mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
Dynamic (#77)
This commit is contained in:
parent
55247b7fcd
commit
991c84a1eb
1464 changed files with 225448 additions and 1985 deletions
437
src/surface/build.rs
Normal file
437
src/surface/build.rs
Normal file
|
|
@ -0,0 +1,437 @@
|
|||
//! Top-level [`SurfaceMap`] builder.
|
||||
//!
|
||||
//! Phase 22 dispatch:
|
||||
//!
|
||||
//! 1. Per-file framework probes (one parser per language) emit
|
||||
//! [`SurfaceNode::EntryPoint`](crate::surface::SurfaceNode::EntryPoint) nodes for every recognised route /
|
||||
//! handler.
|
||||
//! 2. [`super::datastore::detect_data_stores`] walks
|
||||
//! [`GlobalSummaries`] and emits [`SurfaceNode::DataStore`](crate::surface::SurfaceNode::DataStore) nodes
|
||||
//! for every recognised driver call.
|
||||
//! 3. [`super::external::detect_external_services`] walks summaries +
|
||||
//! SSRF caps and emits [`SurfaceNode::ExternalService`](crate::surface::SurfaceNode::ExternalService) nodes.
|
||||
//! 4. [`super::dangerous::detect_dangerous_locals`] walks summaries
|
||||
//! and emits [`SurfaceNode::DangerousLocal`](crate::surface::SurfaceNode::DangerousLocal) nodes for every
|
||||
//! function whose `sink_caps` include CODE_EXEC / DESERIALIZE /
|
||||
//! SSTI / FMT_STRING.
|
||||
//! 5. [`super::reachability::populate_reaches_edges`] runs a BFS over
|
||||
//! the [`CallGraph`] from each entry-point handler, emitting
|
||||
//! [`super::EdgeKind::Reaches`] edges to every reachable
|
||||
//! DataStore / ExternalService / DangerousLocal.
|
||||
//! 6. [`SurfaceMap::canonicalize`] sorts nodes + edges so the
|
||||
//! serialised JSON is byte-deterministic across rescans.
|
||||
//!
|
||||
//! Per-file errors (parse failure, unsupported language) are
|
||||
//! swallowed so a single bad file does not kill the whole map.
|
||||
|
||||
use crate::callgraph::CallGraph;
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::surface::{
|
||||
SurfaceMap, dangerous, datastore, external,
|
||||
lang::{
|
||||
go_gin, go_http, java_quarkus, java_servlet, java_spring, js_express, js_koa, php_laravel,
|
||||
php_slim, python_django, python_fastapi, python_flask, ruby_rails, ruby_sinatra,
|
||||
rust_actix, rust_axum, ts_next,
|
||||
},
|
||||
reachability,
|
||||
};
|
||||
use crate::utils::config::Config;
|
||||
use std::path::{Path, PathBuf};
|
||||
use tree_sitter::Parser;
|
||||
|
||||
pub struct SurfaceBuildInputs<'a> {
|
||||
pub files: &'a [PathBuf],
|
||||
pub scan_root: Option<&'a Path>,
|
||||
pub global_summaries: &'a GlobalSummaries,
|
||||
pub call_graph: &'a CallGraph,
|
||||
pub config: &'a Config,
|
||||
}
|
||||
|
||||
pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
||||
let mut map = SurfaceMap::new();
|
||||
let _ = inputs.config;
|
||||
|
||||
let mut parsers = Parsers::new();
|
||||
for path in inputs.files {
|
||||
let Ok(bytes) = std::fs::read(path) else {
|
||||
continue;
|
||||
};
|
||||
let kind = classify_file(path);
|
||||
let nodes = match kind {
|
||||
FileKind::Python => parsers
|
||||
.python
|
||||
.as_mut()
|
||||
.and_then(|p| p.parse(&bytes, None))
|
||||
.map(|tree| {
|
||||
let mut all =
|
||||
python_flask::detect_flask_routes(&tree, &bytes, path, inputs.scan_root);
|
||||
all.extend(python_fastapi::detect_fastapi_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all.extend(python_django::detect_django_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
FileKind::JavaScript => parsers
|
||||
.javascript
|
||||
.as_mut()
|
||||
.and_then(|p| p.parse(&bytes, None))
|
||||
.map(|tree| {
|
||||
let mut all =
|
||||
js_express::detect_express_routes(&tree, &bytes, path, inputs.scan_root);
|
||||
all.extend(js_koa::detect_koa_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
FileKind::TypeScript => parsers
|
||||
.typescript
|
||||
.as_mut()
|
||||
.and_then(|p| p.parse(&bytes, None))
|
||||
.map(|tree| {
|
||||
let mut all =
|
||||
js_express::detect_express_routes(&tree, &bytes, path, inputs.scan_root);
|
||||
all.extend(js_koa::detect_koa_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all.extend(ts_next::detect_next_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
FileKind::Java => parsers
|
||||
.java
|
||||
.as_mut()
|
||||
.and_then(|p| p.parse(&bytes, None))
|
||||
.map(|tree| {
|
||||
let mut all =
|
||||
java_spring::detect_spring_routes(&tree, &bytes, path, inputs.scan_root);
|
||||
all.extend(java_servlet::detect_servlet_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all.extend(java_quarkus::detect_quarkus_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
FileKind::Go => parsers
|
||||
.go
|
||||
.as_mut()
|
||||
.and_then(|p| p.parse(&bytes, None))
|
||||
.map(|tree| {
|
||||
let mut all =
|
||||
go_http::detect_go_http_routes(&tree, &bytes, path, inputs.scan_root);
|
||||
all.extend(go_gin::detect_gin_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
FileKind::Php => parsers
|
||||
.php
|
||||
.as_mut()
|
||||
.and_then(|p| p.parse(&bytes, None))
|
||||
.map(|tree| {
|
||||
let mut all =
|
||||
php_laravel::detect_laravel_routes(&tree, &bytes, path, inputs.scan_root);
|
||||
all.extend(php_slim::detect_slim_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
FileKind::Ruby => parsers
|
||||
.ruby
|
||||
.as_mut()
|
||||
.and_then(|p| p.parse(&bytes, None))
|
||||
.map(|tree| {
|
||||
let mut all =
|
||||
ruby_sinatra::detect_sinatra_routes(&tree, &bytes, path, inputs.scan_root);
|
||||
all.extend(ruby_rails::detect_rails_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
FileKind::Rust => parsers
|
||||
.rust
|
||||
.as_mut()
|
||||
.and_then(|p| p.parse(&bytes, None))
|
||||
.map(|tree| {
|
||||
let mut all =
|
||||
rust_actix::detect_actix_routes(&tree, &bytes, path, inputs.scan_root);
|
||||
all.extend(rust_axum::detect_axum_routes(
|
||||
&tree,
|
||||
&bytes,
|
||||
path,
|
||||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
FileKind::Other => Vec::new(),
|
||||
};
|
||||
for n in nodes {
|
||||
map.nodes.push(n);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 22 — Track F.3: data-store / external-service /
|
||||
// dangerous-local detection from summaries.
|
||||
map.nodes
|
||||
.extend(datastore::detect_data_stores(inputs.global_summaries));
|
||||
map.nodes
|
||||
.extend(external::detect_external_services(inputs.global_summaries));
|
||||
map.nodes
|
||||
.extend(dangerous::detect_dangerous_locals(inputs.global_summaries));
|
||||
|
||||
// Canonicalise so node indices are stable before reachability
|
||||
// builds edges referring to those indices.
|
||||
map.canonicalize();
|
||||
|
||||
// Phase 22 — Track F.3: transitive closure over the call graph.
|
||||
reachability::populate_reaches_edges(&mut map, inputs.global_summaries, inputs.call_graph);
|
||||
|
||||
// Re-canonicalise: edges added by reachability need to be sorted
|
||||
// so the serialised JSON stays byte-deterministic.
|
||||
map.canonicalize();
|
||||
map
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
||||
enum FileKind {
|
||||
Python,
|
||||
JavaScript,
|
||||
TypeScript,
|
||||
Java,
|
||||
Go,
|
||||
Php,
|
||||
Ruby,
|
||||
Rust,
|
||||
Other,
|
||||
}
|
||||
|
||||
fn classify_file(path: &Path) -> FileKind {
|
||||
match path.extension().and_then(|s| s.to_str()) {
|
||||
Some("py") | Some("pyi") => FileKind::Python,
|
||||
Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => FileKind::JavaScript,
|
||||
Some("ts") | Some("tsx") | Some("mts") | Some("cts") => FileKind::TypeScript,
|
||||
Some("java") => FileKind::Java,
|
||||
Some("go") => FileKind::Go,
|
||||
Some("php") => FileKind::Php,
|
||||
Some("rb") => FileKind::Ruby,
|
||||
Some("rs") => FileKind::Rust,
|
||||
_ => FileKind::Other,
|
||||
}
|
||||
}
|
||||
|
||||
struct Parsers {
|
||||
python: Option<Parser>,
|
||||
javascript: Option<Parser>,
|
||||
typescript: Option<Parser>,
|
||||
java: Option<Parser>,
|
||||
go: Option<Parser>,
|
||||
php: Option<Parser>,
|
||||
ruby: Option<Parser>,
|
||||
rust: Option<Parser>,
|
||||
}
|
||||
|
||||
impl Parsers {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
python: parser_for(tree_sitter_python::LANGUAGE.into()),
|
||||
javascript: parser_for(tree_sitter_javascript::LANGUAGE.into()),
|
||||
typescript: parser_for(tree_sitter_typescript::LANGUAGE_TSX.into()),
|
||||
java: parser_for(tree_sitter_java::LANGUAGE.into()),
|
||||
go: parser_for(tree_sitter_go::LANGUAGE.into()),
|
||||
php: parser_for(tree_sitter_php::LANGUAGE_PHP.into()),
|
||||
ruby: parser_for(tree_sitter_ruby::LANGUAGE.into()),
|
||||
rust: parser_for(tree_sitter_rust::LANGUAGE.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parser_for(language: tree_sitter::Language) -> Option<Parser> {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(&language).ok()?;
|
||||
Some(parser)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::SurfaceNode;
|
||||
use std::fs;
|
||||
use tempfile::tempdir;
|
||||
|
||||
fn empty_inputs<'a>(
|
||||
files: &'a [PathBuf],
|
||||
scan_root: Option<&'a Path>,
|
||||
gs: &'a GlobalSummaries,
|
||||
cg: &'a CallGraph,
|
||||
cfg: &'a Config,
|
||||
) -> SurfaceBuildInputs<'a> {
|
||||
SurfaceBuildInputs {
|
||||
files,
|
||||
scan_root,
|
||||
global_summaries: gs,
|
||||
call_graph: cg,
|
||||
config: cfg,
|
||||
}
|
||||
}
|
||||
|
||||
fn empty_call_graph() -> CallGraph {
|
||||
CallGraph {
|
||||
graph: petgraph::graph::DiGraph::new(),
|
||||
index: Default::default(),
|
||||
unresolved_not_found: vec![],
|
||||
unresolved_ambiguous: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_inputs_produce_empty_map() {
|
||||
let dir = tempdir().unwrap();
|
||||
let cfg = Config::default();
|
||||
let gs = GlobalSummaries::new();
|
||||
let cg = empty_call_graph();
|
||||
let files: Vec<PathBuf> = vec![];
|
||||
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
|
||||
let map = build_surface_map(&inputs);
|
||||
assert_eq!(map.node_count(), 0);
|
||||
assert_eq!(map.edge_count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flask_file_produces_entry_points() {
|
||||
let dir = tempdir().unwrap();
|
||||
let py = dir.path().join("app.py");
|
||||
fs::write(
|
||||
&py,
|
||||
r#"
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return "hi"
|
||||
|
||||
@app.post("/submit")
|
||||
def submit():
|
||||
return "ok"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = Config::default();
|
||||
let gs = GlobalSummaries::new();
|
||||
let cg = empty_call_graph();
|
||||
let files = vec![py];
|
||||
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
|
||||
let map = build_surface_map(&inputs);
|
||||
assert_eq!(map.node_count(), 2);
|
||||
let methods: Vec<HttpMethod> = map.entry_points().map(|ep| ep.method).collect();
|
||||
assert!(methods.contains(&HttpMethod::GET));
|
||||
assert!(methods.contains(&HttpMethod::POST));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_file_produces_entry_points() {
|
||||
let dir = tempdir().unwrap();
|
||||
let py = dir.path().join("api.py");
|
||||
fs::write(
|
||||
&py,
|
||||
"from fastapi import FastAPI\napp = FastAPI()\n@app.get('/users')\ndef list_users(): pass\n@app.post('/items')\ndef create(): pass\n",
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = Config::default();
|
||||
let gs = GlobalSummaries::new();
|
||||
let cg = empty_call_graph();
|
||||
let files = vec![py];
|
||||
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
|
||||
let map = build_surface_map(&inputs);
|
||||
assert_eq!(map.node_count(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dangerous_local_emits_node_and_reaches_edge_to_same_file_entry() {
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
let dir = tempdir().unwrap();
|
||||
let py = dir.path().join("app.py");
|
||||
fs::write(
|
||||
&py,
|
||||
r#"
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/eval")
|
||||
def evaluator():
|
||||
return ""
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = Config::default();
|
||||
let mut gs = GlobalSummaries::new();
|
||||
gs.insert(
|
||||
FuncKey::new_function(Lang::Python, "app.py", "evaluator", None),
|
||||
FuncSummary {
|
||||
name: "evaluator".to_string(),
|
||||
file_path: "app.py".to_string(),
|
||||
lang: "python".to_string(),
|
||||
sink_caps: Cap::CODE_EXEC.bits(),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
let cg = empty_call_graph();
|
||||
let files = vec![py];
|
||||
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
|
||||
let map = build_surface_map(&inputs);
|
||||
assert!(
|
||||
map.nodes
|
||||
.iter()
|
||||
.any(|n| matches!(n, SurfaceNode::DangerousLocal(_)))
|
||||
);
|
||||
assert!(
|
||||
map.edges
|
||||
.iter()
|
||||
.any(|e| matches!(e.kind, crate::surface::EdgeKind::Reaches))
|
||||
);
|
||||
}
|
||||
}
|
||||
88
src/surface/dangerous.rs
Normal file
88
src/surface/dangerous.rs
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
//! Dangerous-local sink detection.
|
||||
//!
|
||||
//! Walks the post-pass-2 [`GlobalSummaries`] looking for functions
|
||||
//! that themselves consume `Cap::CODE_EXEC`, `Cap::DESERIALIZE`,
|
||||
//! `Cap::SSTI`, or `Cap::FMT_STRING` (the canonical "no externally
|
||||
//! observable side effect" sinks) and emits one
|
||||
//! [`SurfaceNode::DangerousLocal`] per such function.
|
||||
//!
|
||||
//! The cap bits are taken straight from the existing label-rule
|
||||
//! registry — every Phase 22 sink class continues to land on the same
|
||||
//! `sink_caps` field downstream rules already populate. No new
|
||||
//! detection pass is added here; the surface layer just lifts the
|
||||
//! cap-bit information out of the summary.
|
||||
|
||||
use super::{DangerousLocal, SourceLocation, SurfaceNode};
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::GlobalSummaries;
|
||||
|
||||
/// Cap bits that indicate the function is a *local* sink — code exec,
|
||||
/// unsafe deserialisation, server-side template injection, format
|
||||
/// string injection. Other sink caps (SQL_QUERY → DataStore;
|
||||
/// SSRF → ExternalService) live elsewhere in the surface layer so the
|
||||
/// node taxonomy matches the chain composer's expectations.
|
||||
fn dangerous_caps() -> Cap {
|
||||
Cap::CODE_EXEC | Cap::DESERIALIZE | Cap::SSTI | Cap::FMT_STRING
|
||||
}
|
||||
|
||||
pub fn detect_dangerous_locals(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
|
||||
let mask = dangerous_caps();
|
||||
let mut out: Vec<SurfaceNode> = Vec::new();
|
||||
for (key, summary) in summaries.iter() {
|
||||
let caps = summary.sink_caps() & mask;
|
||||
if caps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
out.push(SurfaceNode::DangerousLocal(DangerousLocal {
|
||||
location: SourceLocation {
|
||||
file: summary.file_path.clone(),
|
||||
line: 0,
|
||||
col: 0,
|
||||
},
|
||||
function_name: key.qualified_name(),
|
||||
cap_bits: caps.bits(),
|
||||
}));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
|
||||
fn summary_with_caps(name: &str, file: &str, caps: Cap) -> (FuncKey, FuncSummary) {
|
||||
let key = FuncKey::new_function(Lang::Python, file, name, None);
|
||||
let summary = FuncSummary {
|
||||
name: name.to_string(),
|
||||
file_path: file.to_string(),
|
||||
lang: "python".to_string(),
|
||||
sink_caps: caps.bits(),
|
||||
..Default::default()
|
||||
};
|
||||
(key, summary)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_eval_sink() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k, s) = summary_with_caps("run", "danger.py", Cap::CODE_EXEC);
|
||||
gs.insert(k, s);
|
||||
let nodes = detect_dangerous_locals(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::DangerousLocal(d) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(d.cap_bits & Cap::CODE_EXEC.bits(), Cap::CODE_EXEC.bits());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_sql_only() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k, s) = summary_with_caps("query", "data.py", Cap::SQL_QUERY);
|
||||
gs.insert(k, s);
|
||||
let nodes = detect_dangerous_locals(&gs);
|
||||
assert!(nodes.is_empty());
|
||||
}
|
||||
}
|
||||
614
src/surface/datastore.rs
Normal file
614
src/surface/datastore.rs
Normal file
|
|
@ -0,0 +1,614 @@
|
|||
//! Data-store detection.
|
||||
//!
|
||||
//! Walks the post-pass-2 [`GlobalSummaries`] looking for callees whose
|
||||
//! name is a known database / cache / blob-store driver entry point,
|
||||
//! and emits one [`SurfaceNode::DataStore`] per resolved store.
|
||||
//!
|
||||
//! The detector is name-based on purpose: the receiver's full type is
|
||||
//! often unknown after pass 2, but the leaf name of a driver call
|
||||
//! (`psycopg2.connect`, `mysql.createConnection`, `gorm.Open`,
|
||||
//! `Eloquent::find`, `ActiveRecord::Base.connection`) carries enough
|
||||
//! signal for surface-level chain composition. False positives here
|
||||
//! are forgiving — the surface map is informational, not a finding
|
||||
//! that fires on its own.
|
||||
|
||||
use super::{DataStore, DataStoreKind, SourceLocation, SurfaceNode};
|
||||
use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries};
|
||||
|
||||
/// One detection rule: leaf-name pattern → store kind + label. Stored
|
||||
/// as a flat list so adding a new ORM / driver is a one-line edit.
|
||||
struct DriverRule {
|
||||
/// Substring to match against the callee's leaf name (case-insensitive).
|
||||
leaf: &'static str,
|
||||
kind: DataStoreKind,
|
||||
/// Human-readable label attached to the emitted node. Used by the
|
||||
/// chain composer and the `nyx surface` CLI tree.
|
||||
label: &'static str,
|
||||
}
|
||||
|
||||
const DRIVER_RULES: &[DriverRule] = &[
|
||||
// Python — relational
|
||||
DriverRule {
|
||||
leaf: "psycopg2.connect",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "PostgreSQL (psycopg2)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "psycopg.connect",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "PostgreSQL (psycopg3)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "mysql.connector.connect",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "MySQL (mysql.connector)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "MySQLdb.connect",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "MySQL (MySQLdb)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "pymysql.connect",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "MySQL (PyMySQL)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "sqlite3.connect",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "SQLite (sqlite3)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "sqlalchemy.create_engine",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "SQLAlchemy",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "django.db.connection",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Django ORM",
|
||||
},
|
||||
// Python — kv / doc
|
||||
DriverRule {
|
||||
leaf: "redis.Redis",
|
||||
kind: DataStoreKind::KeyValue,
|
||||
label: "Redis",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "redis.from_url",
|
||||
kind: DataStoreKind::KeyValue,
|
||||
label: "Redis",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "pymongo.MongoClient",
|
||||
kind: DataStoreKind::Document,
|
||||
label: "MongoDB",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "boto3.client",
|
||||
kind: DataStoreKind::BlobStore,
|
||||
label: "AWS (boto3)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "boto3.resource",
|
||||
kind: DataStoreKind::BlobStore,
|
||||
label: "AWS (boto3)",
|
||||
},
|
||||
// JavaScript / TypeScript — relational
|
||||
DriverRule {
|
||||
leaf: "knex",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Knex.js",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "createConnection",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "MySQL/Postgres (mysql/pg)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "Sequelize",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Sequelize",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "TypeORM.createConnection",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "TypeORM",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "PrismaClient",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Prisma",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "pool.query",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "pg/mysql pool",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "client.query",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "pg client",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "db.query",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Generic SQL driver",
|
||||
},
|
||||
// JS — kv / doc
|
||||
DriverRule {
|
||||
leaf: "redis.createClient",
|
||||
kind: DataStoreKind::KeyValue,
|
||||
label: "Redis (node-redis)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "ioredis",
|
||||
kind: DataStoreKind::KeyValue,
|
||||
label: "ioredis",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "MongoClient.connect",
|
||||
kind: DataStoreKind::Document,
|
||||
label: "MongoDB (node)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "AWS.S3",
|
||||
kind: DataStoreKind::BlobStore,
|
||||
label: "AWS S3",
|
||||
},
|
||||
// Java — JDBC / Hibernate
|
||||
DriverRule {
|
||||
leaf: "DriverManager.getConnection",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "JDBC",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "JdbcTemplate",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Spring JdbcTemplate",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "EntityManager",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "JPA EntityManager",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "SessionFactory.openSession",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Hibernate",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "Jedis",
|
||||
kind: DataStoreKind::KeyValue,
|
||||
label: "Jedis (Redis)",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "MongoClients.create",
|
||||
kind: DataStoreKind::Document,
|
||||
label: "MongoDB (java-driver)",
|
||||
},
|
||||
// Go — sql + ORM
|
||||
DriverRule {
|
||||
leaf: "sql.Open",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "database/sql",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "gorm.Open",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "GORM",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "sqlx.Connect",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "sqlx",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "sqlx.Open",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "sqlx",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "redis.NewClient",
|
||||
kind: DataStoreKind::KeyValue,
|
||||
label: "go-redis",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "mongo.Connect",
|
||||
kind: DataStoreKind::Document,
|
||||
label: "MongoDB (go-driver)",
|
||||
},
|
||||
// PHP — Eloquent / PDO
|
||||
DriverRule {
|
||||
leaf: "PDO",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "PDO",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "Eloquent::find",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Laravel Eloquent",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "Eloquent::where",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Laravel Eloquent",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "DB::connection",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Laravel DB",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "Doctrine",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Doctrine ORM",
|
||||
},
|
||||
// Ruby — ActiveRecord
|
||||
DriverRule {
|
||||
leaf: "ActiveRecord::Base.connection",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "ActiveRecord",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "ActiveRecord::Base.find",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "ActiveRecord",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: ".find_by_sql",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "ActiveRecord raw SQL",
|
||||
},
|
||||
// Rust — sqlx / diesel
|
||||
DriverRule {
|
||||
leaf: "sqlx::query",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "sqlx",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "sqlx::query_as",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "sqlx",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "diesel::sql_query",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Diesel",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "PgConnection::establish",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Diesel",
|
||||
},
|
||||
// Type-qualified — fires when the SSA type-fact engine resolves a
|
||||
// receiver to `TypeKind::DatabaseConnection` regardless of the bare
|
||||
// callee name (e.g. `conn = psycopg2.connect(); conn.cursor()` →
|
||||
// typed_call_receivers maps the `.cursor` ordinal to "DatabaseConnection").
|
||||
DriverRule {
|
||||
leaf: "DatabaseConnection.cursor",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Database connection",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "DatabaseConnection.execute",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Database connection",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "DatabaseConnection.query",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Database connection",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "DatabaseConnection.exec",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Database connection",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "DatabaseConnection.prepare",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Database connection",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "DatabaseConnection.commit",
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "Database connection",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "FileHandle.read",
|
||||
kind: DataStoreKind::Filesystem,
|
||||
label: "Filesystem",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "FileHandle.write",
|
||||
kind: DataStoreKind::Filesystem,
|
||||
label: "Filesystem",
|
||||
},
|
||||
DriverRule {
|
||||
leaf: "FileHandle.close",
|
||||
kind: DataStoreKind::Filesystem,
|
||||
label: "Filesystem",
|
||||
},
|
||||
// Filesystem (best-effort: language-agnostic open()-family)
|
||||
DriverRule {
|
||||
leaf: "open",
|
||||
kind: DataStoreKind::Filesystem,
|
||||
label: "Filesystem",
|
||||
},
|
||||
];
|
||||
|
||||
/// Walk every function summary's callee list and emit one
|
||||
/// [`SurfaceNode::DataStore`] per matched driver call. De-duped on
|
||||
/// `(file, line, label)`.
|
||||
///
|
||||
/// When the bare callee name does not hit a rule, the type-fact engine's
|
||||
/// per-call `typed_call_receivers` map (read off the matching
|
||||
/// [`crate::summary::ssa_summary::SsaFuncSummary`]) is consulted: a callee whose
|
||||
/// receiver was resolved to `TypeKind::DatabaseConnection` or
|
||||
/// `TypeKind::FileHandle` is retried under the type-qualified name
|
||||
/// `"DatabaseConnection.<method>"` / `"FileHandle.<method>"`, picking up
|
||||
/// the bound-receiver call shapes (`conn.cursor()` after
|
||||
/// `conn = psycopg2.connect()`) that the name-only matcher misses.
|
||||
pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
|
||||
let mut out: Vec<SurfaceNode> = Vec::new();
|
||||
let mut seen: std::collections::HashSet<(String, u32, String)> =
|
||||
std::collections::HashSet::new();
|
||||
for (key, summary) in summaries.iter() {
|
||||
let typed = summaries
|
||||
.get_ssa(key)
|
||||
.map(|s| s.typed_call_receivers.as_slice());
|
||||
for callee in &summary.callees {
|
||||
let rule = match_rule(&callee.name).or_else(|| {
|
||||
typed
|
||||
.and_then(|t| container_for_ordinal(t, callee.ordinal))
|
||||
.and_then(|c| match_rule(&qualify(c, &callee.name)))
|
||||
});
|
||||
let Some(rule) = rule else { continue };
|
||||
let location = call_site_location(summary, callee);
|
||||
let dedup = (location.file.clone(), location.line, rule.label.to_string());
|
||||
if !seen.insert(dedup) {
|
||||
continue;
|
||||
}
|
||||
out.push(SurfaceNode::DataStore(DataStore {
|
||||
location,
|
||||
kind: rule.kind,
|
||||
label: rule.label.to_string(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Last segment of a callee text after the final `.` or `::`.
|
||||
fn leaf_segment(name: &str) -> &str {
|
||||
let after_colon = name.rsplit("::").next().unwrap_or(name);
|
||||
after_colon.rsplit('.').next().unwrap_or(after_colon)
|
||||
}
|
||||
|
||||
/// Build a type-qualified callee name (`"{container}.{method}"`) for
|
||||
/// retry-matching when the bare callee text did not hit any rule.
|
||||
fn qualify(container: &str, callee_name: &str) -> String {
|
||||
format!("{}.{}", container, leaf_segment(callee_name))
|
||||
}
|
||||
|
||||
/// Linear-scan helper since `typed_call_receivers` is a small
|
||||
/// `Vec<(ordinal, container)>` per function. Typical lengths are 0 to a
|
||||
/// few dozen; a HashMap-per-summary would be wasteful.
|
||||
fn container_for_ordinal(typed: &[(u32, String)], ordinal: u32) -> Option<&str> {
|
||||
typed
|
||||
.iter()
|
||||
.find(|(o, _)| *o == ordinal)
|
||||
.map(|(_, c)| c.as_str())
|
||||
}
|
||||
|
||||
fn match_rule(callee: &str) -> Option<&'static DriverRule> {
|
||||
let cl = callee.trim().to_ascii_lowercase();
|
||||
// Normalize `::` → `.` so segment-split treats both as separators.
|
||||
let cl_segments = cl.replace("::", ".");
|
||||
DRIVER_RULES.iter().find(|r| {
|
||||
let rl = r.leaf.to_ascii_lowercase();
|
||||
if r.leaf.contains('.') || r.leaf.contains("::") {
|
||||
// Qualified pattern (e.g. `psycopg2.connect`, `Eloquent::find`):
|
||||
// substring on the full callee text. Qualified shapes are
|
||||
// unambiguous so substring is precise enough.
|
||||
cl.contains(&rl)
|
||||
} else {
|
||||
// Bare leaf (e.g. `open`, `fetch`, `PrismaClient`): require a
|
||||
// whole-segment match. Prevents `fopen` / `OpenSearch` /
|
||||
// `getPrismaClient` from FP-matching short bare leaves.
|
||||
cl_segments.split('.').any(|seg| seg == rl)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Source location of a call site. Reads the 1-based `(line, col)`
|
||||
/// recorded on the [`CalleeSite`] at CFG-build time (populated for every
|
||||
/// summary produced after the span field landed); for legacy summaries
|
||||
/// loaded from SQLite with no span, falls back to the function's host
|
||||
/// file with line 0.
|
||||
fn call_site_location(summary: &FuncSummary, callee: &CalleeSite) -> SourceLocation {
|
||||
let (line, col) = callee.span.unwrap_or((0, 0));
|
||||
SourceLocation {
|
||||
file: summary.file_path.clone(),
|
||||
line,
|
||||
col,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
|
||||
fn summary_with_callees(name: &str, file: &str, callees: &[&str]) -> (FuncKey, FuncSummary) {
|
||||
let key = FuncKey::new_function(Lang::Python, file, name, None);
|
||||
let summary = FuncSummary {
|
||||
name: name.to_string(),
|
||||
file_path: file.to_string(),
|
||||
lang: "python".to_string(),
|
||||
param_count: 0,
|
||||
callees: callees
|
||||
.iter()
|
||||
.map(|c| CalleeSite::bare(c.to_string()))
|
||||
.collect(),
|
||||
..Default::default()
|
||||
};
|
||||
(key, summary)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn datastore_carries_callee_span_when_present() {
|
||||
// When the CFG populates `CalleeSite.span`, the detected datastore
|
||||
// node's `SourceLocation` must reflect that 1-based `(line, col)`
|
||||
// — not the legacy `(0, 0)` fallback.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Python, "app.py", "init", None);
|
||||
let mut callee = CalleeSite::bare("psycopg2.connect");
|
||||
callee.span = Some((42, 13));
|
||||
let summary = FuncSummary {
|
||||
name: "init".into(),
|
||||
file_path: "app.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
callees: vec![callee],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ds.location.line, 42);
|
||||
assert_eq!(ds.location.col, 13);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_psycopg2_connect() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k, s) = summary_with_callees("init", "app.py", &["psycopg2.connect"]);
|
||||
gs.insert(k, s);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ds.kind, DataStoreKind::Sql);
|
||||
assert_eq!(ds.label, "PostgreSQL (psycopg2)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_gorm_open() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k, s) = summary_with_callees("init", "main.go", &["gorm.Open"]);
|
||||
gs.insert(k, s);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ds.label, "GORM");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dedup_collapses_repeats_in_same_file() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k, s) =
|
||||
summary_with_callees("init", "app.py", &["psycopg2.connect", "psycopg2.connect"]);
|
||||
gs.insert(k, s);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_open_rule_does_not_match_fopen_or_opensearch() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k, s) = summary_with_callees(
|
||||
"init",
|
||||
"app.py",
|
||||
&[
|
||||
"fopen",
|
||||
"popen",
|
||||
"OpenSearch",
|
||||
"openssl_encrypt",
|
||||
"MongoClient.openSession",
|
||||
],
|
||||
);
|
||||
gs.insert(k, s);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert!(
|
||||
nodes.is_empty(),
|
||||
"bare `open` rule should not FP on {nodes:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_open_rule_still_matches_real_open() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k, s) = summary_with_callees("loader", "app.py", &["open"]);
|
||||
gs.insert(k, s);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ds.kind, DataStoreKind::Filesystem);
|
||||
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k, s) = summary_with_callees("loader", "app.py", &["builtins.open"]);
|
||||
gs.insert(k, s);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typed_receiver_database_connection_resolves_bound_cursor() {
|
||||
// `conn = psycopg2.connect(); conn.cursor()` — the bare callee
|
||||
// `conn.cursor` is not in DRIVER_RULES, but the SSA type-fact
|
||||
// engine populates `typed_call_receivers` with
|
||||
// `(ordinal, "DatabaseConnection")` for the `.cursor` ordinal.
|
||||
// The detector retries under `DatabaseConnection.cursor` and
|
||||
// emits a Sql datastore node.
|
||||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Python, "app.py", "load", None);
|
||||
let summary = FuncSummary {
|
||||
name: "load".into(),
|
||||
file_path: "app.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
callees: vec![{
|
||||
let mut c = CalleeSite::bare("conn.cursor");
|
||||
c.ordinal = 7;
|
||||
c.span = Some((4, 8));
|
||||
c
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key.clone(), summary);
|
||||
let mut ssa = SsaFuncSummary::default();
|
||||
ssa.typed_call_receivers
|
||||
.push((7, "DatabaseConnection".into()));
|
||||
gs.insert_ssa(key, ssa);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1, "expected typed retry to hit; got {nodes:?}");
|
||||
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ds.kind, DataStoreKind::Sql);
|
||||
assert_eq!(ds.label, "Database connection");
|
||||
assert_eq!(ds.location.line, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typed_receiver_without_ssa_summary_falls_through() {
|
||||
// No SsaFuncSummary inserted → bare `client.cursor` does not match
|
||||
// any rule and `typed_call_receivers` is unreachable. Detector
|
||||
// emits zero nodes (no panic on missing SSA side).
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k, s) = summary_with_callees("load", "app.py", &["client.cursor"]);
|
||||
gs.insert(k, s);
|
||||
assert!(detect_data_stores(&gs).is_empty());
|
||||
}
|
||||
}
|
||||
529
src/surface/external.rs
Normal file
529
src/surface/external.rs
Normal file
|
|
@ -0,0 +1,529 @@
|
|||
//! External-service detection.
|
||||
//!
|
||||
//! Walks the post-pass-2 [`GlobalSummaries`] looking for callees that
|
||||
//! launch outbound network requests (HTTP, gRPC, SMTP, DNS) and emits
|
||||
//! one [`SurfaceNode::ExternalService`] per call. Detection is by
|
||||
//! callee leaf name + `sink_caps & SSRF` heuristic — both signals are
|
||||
//! consulted so a probe with no SSRF cap (DNS resolver, SMTP sender)
|
||||
//! still surfaces as an external service.
|
||||
|
||||
use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode};
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries};
|
||||
|
||||
struct ClientRule {
|
||||
leaf: &'static str,
|
||||
kind: ExternalServiceKind,
|
||||
label: &'static str,
|
||||
}
|
||||
|
||||
const CLIENT_RULES: &[ClientRule] = &[
|
||||
// HTTP
|
||||
ClientRule {
|
||||
leaf: "requests.get",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "requests (Python)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "requests.post",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "requests (Python)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "httpx.get",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "httpx (Python)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "httpx.post",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "httpx (Python)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "urllib.request.urlopen",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "urllib",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "fetch",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "fetch (JS)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "axios.get",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "axios",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "axios.post",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "axios",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "http.request",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "node http",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "got",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "got (JS)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "HttpClient.send",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Java HttpClient",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "HttpClient.execute",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Java HttpClient",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "RestTemplate.exchange",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Spring RestTemplate",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "RestTemplate.getForObject",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Spring RestTemplate",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "OkHttpClient.newCall",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "OkHttp",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "http.Get",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "net/http (Go)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "http.Post",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "net/http (Go)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "http.NewRequest",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "net/http (Go)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "client.Do",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "go http client",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "reqwest::get",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "reqwest (Rust)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "reqwest::Client",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "reqwest (Rust)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "Net::HTTP",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Net::HTTP (Ruby)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "HTTParty.get",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "HTTParty",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "Faraday",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Faraday (Ruby)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "curl_exec",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "PHP curl",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "file_get_contents",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "PHP file_get_contents",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "Guzzle",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Guzzle (PHP)",
|
||||
},
|
||||
// Message brokers
|
||||
ClientRule {
|
||||
leaf: "kafka.send",
|
||||
kind: ExternalServiceKind::MessageBroker,
|
||||
label: "Kafka",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "KafkaProducer.send",
|
||||
kind: ExternalServiceKind::MessageBroker,
|
||||
label: "Kafka",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "rabbitmq.publish",
|
||||
kind: ExternalServiceKind::MessageBroker,
|
||||
label: "RabbitMQ",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "amqp.publish",
|
||||
kind: ExternalServiceKind::MessageBroker,
|
||||
label: "AMQP",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "sqs.send_message",
|
||||
kind: ExternalServiceKind::MessageBroker,
|
||||
label: "AWS SQS",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "sns.publish",
|
||||
kind: ExternalServiceKind::MessageBroker,
|
||||
label: "AWS SNS",
|
||||
},
|
||||
// Search indices
|
||||
ClientRule {
|
||||
leaf: "Elasticsearch",
|
||||
kind: ExternalServiceKind::SearchIndex,
|
||||
label: "Elasticsearch",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "elasticsearch.search",
|
||||
kind: ExternalServiceKind::SearchIndex,
|
||||
label: "Elasticsearch",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "OpenSearch",
|
||||
kind: ExternalServiceKind::SearchIndex,
|
||||
label: "OpenSearch",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "Algolia",
|
||||
kind: ExternalServiceKind::SearchIndex,
|
||||
label: "Algolia",
|
||||
},
|
||||
// Auth providers
|
||||
ClientRule {
|
||||
leaf: "auth0",
|
||||
kind: ExternalServiceKind::AuthProvider,
|
||||
label: "Auth0",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "passport.authenticate",
|
||||
kind: ExternalServiceKind::AuthProvider,
|
||||
label: "Passport.js",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "OAuth2Client",
|
||||
kind: ExternalServiceKind::AuthProvider,
|
||||
label: "OAuth2 client",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "google.oauth2",
|
||||
kind: ExternalServiceKind::AuthProvider,
|
||||
label: "Google OAuth2",
|
||||
},
|
||||
// SMTP
|
||||
ClientRule {
|
||||
leaf: "smtplib.SMTP",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "SMTP (Python)",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "Mail::send",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Laravel Mail",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "ActionMailer",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Rails ActionMailer",
|
||||
},
|
||||
// DNS
|
||||
ClientRule {
|
||||
leaf: "socket.gethostbyname",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "DNS resolver",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "dns.lookup",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "DNS resolver",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "net.LookupIP",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "DNS resolver",
|
||||
},
|
||||
// Type-qualified — fires when the SSA type-fact engine resolves a
|
||||
// receiver to `TypeKind::HttpClient` regardless of the bare callee
|
||||
// name (`session = requests.Session(); session.get(url)` →
|
||||
// typed_call_receivers maps the `.get` ordinal to "HttpClient", so
|
||||
// the bound-receiver call surfaces as an outbound HTTP node even
|
||||
// though `requests.get` is the only direct-import rule above).
|
||||
ClientRule {
|
||||
leaf: "HttpClient.get",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "HTTP client",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "HttpClient.post",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "HTTP client",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "HttpClient.put",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "HTTP client",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "HttpClient.delete",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "HTTP client",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "HttpClient.patch",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "HTTP client",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "HttpClient.request",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "HTTP client",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "HttpClient.head",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "HTTP client",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "HttpClient.options",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "HTTP client",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "RequestBuilder.send",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "HTTP request builder",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "URL.openConnection",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "URL connection",
|
||||
},
|
||||
ClientRule {
|
||||
leaf: "URL.openStream",
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "URL connection",
|
||||
},
|
||||
];
|
||||
|
||||
/// Walk every function summary's callee list and emit one
|
||||
/// [`SurfaceNode::ExternalService`] per matched outbound-client call.
|
||||
///
|
||||
/// When the bare callee name does not hit a rule, the type-fact engine's
|
||||
/// per-call `typed_call_receivers` map (read off the matching
|
||||
/// [`crate::summary::ssa_summary::SsaFuncSummary`]) is consulted: a callee whose
|
||||
/// receiver was resolved to `TypeKind::HttpClient` /
|
||||
/// `TypeKind::RequestBuilder` / `TypeKind::Url` is retried under the
|
||||
/// type-qualified name `"{container}.<method>"`, picking up the
|
||||
/// bound-receiver call shapes (`client = requests.Session();
|
||||
/// client.get(url)`) that the name-only matcher misses.
|
||||
pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
|
||||
let mut out: Vec<SurfaceNode> = Vec::new();
|
||||
let mut seen: std::collections::HashSet<(String, String)> = std::collections::HashSet::new();
|
||||
for (key, summary) in summaries.iter() {
|
||||
let typed = summaries
|
||||
.get_ssa(key)
|
||||
.map(|s| s.typed_call_receivers.as_slice());
|
||||
for callee in &summary.callees {
|
||||
let rule = match_rule(&callee.name).or_else(|| {
|
||||
typed
|
||||
.and_then(|t| container_for_ordinal(t, callee.ordinal))
|
||||
.and_then(|c| match_rule(&qualify(c, &callee.name)))
|
||||
});
|
||||
let Some(rule) = rule else { continue };
|
||||
let location = call_site_location(summary, Some(callee));
|
||||
if !seen.insert((location.file.clone(), rule.label.to_string())) {
|
||||
continue;
|
||||
}
|
||||
out.push(SurfaceNode::ExternalService(ExternalService {
|
||||
location,
|
||||
kind: rule.kind,
|
||||
label: rule.label.to_string(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
// Also surface any function whose own sink_caps include SSRF — the
|
||||
// function itself is an outbound network call site even if the
|
||||
// direct callee did not match the rule list. Use the function's
|
||||
// file as the location and synthesise a generic label.
|
||||
for (_key, summary) in summaries.iter() {
|
||||
if summary.sink_caps().contains(Cap::SSRF) {
|
||||
let loc = call_site_location(summary, None);
|
||||
let dedup = (loc.file.clone(), "Outbound HTTP".to_string());
|
||||
if seen.insert(dedup) {
|
||||
out.push(SurfaceNode::ExternalService(ExternalService {
|
||||
location: loc,
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Outbound HTTP".to_string(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn leaf_segment(name: &str) -> &str {
|
||||
let after_colon = name.rsplit("::").next().unwrap_or(name);
|
||||
after_colon.rsplit('.').next().unwrap_or(after_colon)
|
||||
}
|
||||
|
||||
fn qualify(container: &str, callee_name: &str) -> String {
|
||||
format!("{}.{}", container, leaf_segment(callee_name))
|
||||
}
|
||||
|
||||
fn container_for_ordinal(typed: &[(u32, String)], ordinal: u32) -> Option<&str> {
|
||||
typed
|
||||
.iter()
|
||||
.find(|(o, _)| *o == ordinal)
|
||||
.map(|(_, c)| c.as_str())
|
||||
}
|
||||
|
||||
fn match_rule(callee: &str) -> Option<&'static ClientRule> {
|
||||
let cl = callee.trim().to_ascii_lowercase();
|
||||
let cl_segments = cl.replace("::", ".");
|
||||
CLIENT_RULES.iter().find(|r| {
|
||||
let rl = r.leaf.to_ascii_lowercase();
|
||||
if r.leaf.contains('.') || r.leaf.contains("::") {
|
||||
// Qualified pattern: substring on full callee text.
|
||||
cl.contains(&rl)
|
||||
} else {
|
||||
// Bare leaf: whole-segment match only. Stops `prefetch` from
|
||||
// matching `fetch`, `Faraday` substrings, etc.
|
||||
cl_segments.split('.').any(|seg| seg == rl)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Source location of an external-service call site. Reads the 1-based
|
||||
/// `(line, col)` recorded on the [`CalleeSite`] at CFG-build time when
|
||||
/// available; otherwise (sink-cap–only fallback path, or legacy summaries
|
||||
/// loaded from SQLite) returns the function's host file with line 0.
|
||||
fn call_site_location(summary: &FuncSummary, callee: Option<&CalleeSite>) -> SourceLocation {
|
||||
let (line, col) = callee.and_then(|c| c.span).unwrap_or((0, 0));
|
||||
SourceLocation {
|
||||
file: summary.file_path.clone(),
|
||||
line,
|
||||
col,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::summary::CalleeSite;
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
|
||||
#[test]
|
||||
fn detects_requests_get() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Python, "client.py", "fetch_user", None);
|
||||
let summary = FuncSummary {
|
||||
name: "fetch_user".to_string(),
|
||||
file_path: "client.py".to_string(),
|
||||
lang: "python".to_string(),
|
||||
param_count: 0,
|
||||
callees: vec![CalleeSite::bare("requests.get".to_string())],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_external_services(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::ExternalService(es) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(es.label, "requests (Python)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_fetch_rule_does_not_match_prefetch_or_cachekey() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::JavaScript, "client.js", "load", None);
|
||||
let summary = FuncSummary {
|
||||
name: "load".to_string(),
|
||||
file_path: "client.js".to_string(),
|
||||
lang: "javascript".to_string(),
|
||||
param_count: 0,
|
||||
callees: vec![
|
||||
CalleeSite::bare("prefetch".to_string()),
|
||||
CalleeSite::bare("cacheKeyFetch".to_string()),
|
||||
CalleeSite::bare("Faraday_token".to_string()),
|
||||
],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_external_services(&gs);
|
||||
assert!(nodes.is_empty(), "bare rules FP-matched on {nodes:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typed_receiver_http_client_resolves_bound_session_get() {
|
||||
// `client = requests.Session(); client.get(url)` — the bare
|
||||
// callee `client.get` is not in CLIENT_RULES, but the SSA type
|
||||
// engine resolves the receiver to `TypeKind::HttpClient`. The
|
||||
// detector retries under `HttpClient.get` and emits an HTTP
|
||||
// external-service node.
|
||||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Python, "client.py", "fetch", None);
|
||||
let summary = FuncSummary {
|
||||
name: "fetch".into(),
|
||||
file_path: "client.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
callees: vec![{
|
||||
let mut c = CalleeSite::bare("client.get");
|
||||
c.ordinal = 3;
|
||||
c.span = Some((9, 5));
|
||||
c
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key.clone(), summary);
|
||||
let mut ssa = SsaFuncSummary::default();
|
||||
ssa.typed_call_receivers.push((3, "HttpClient".into()));
|
||||
gs.insert_ssa(key, ssa);
|
||||
let nodes = detect_external_services(&gs);
|
||||
assert_eq!(nodes.len(), 1, "expected typed retry to hit; got {nodes:?}");
|
||||
let SurfaceNode::ExternalService(es) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(es.label, "HTTP client");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_got_rule_matches_segmented_callee() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::JavaScript, "client.js", "load", None);
|
||||
let summary = FuncSummary {
|
||||
name: "load".to_string(),
|
||||
file_path: "client.js".to_string(),
|
||||
lang: "javascript".to_string(),
|
||||
param_count: 0,
|
||||
callees: vec![CalleeSite::bare("got.post".to_string())],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_external_services(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::ExternalService(es) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(es.label, "got (JS)");
|
||||
}
|
||||
}
|
||||
107
src/surface/graph.rs
Normal file
107
src/surface/graph.rs
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
//! petgraph-backed read-only view over a [`SurfaceMap`].
|
||||
//!
|
||||
//! The on-disk shape is two parallel `Vec`s (deterministic ordering,
|
||||
//! byte-identical JSON), but downstream consumers — the Track G chain
|
||||
//! composer, the `nyx surface` CLI walker — want graph queries:
|
||||
//! neighbours, reachability, topological order. [`petgraph_view`]
|
||||
//! constructs a `DiGraph<NodeRef<'_>, EdgeRef<'_>>` on demand without
|
||||
//! cloning the underlying nodes or edges.
|
||||
|
||||
use super::{EdgeKind, SurfaceEdge, SurfaceMap, SurfaceNode};
|
||||
use petgraph::graph::{DiGraph, NodeIndex};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Borrowed handle to one [`SurfaceNode`] inside the petgraph view.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct NodeRef<'a> {
|
||||
pub idx: u32,
|
||||
pub node: &'a SurfaceNode,
|
||||
}
|
||||
|
||||
/// Borrowed handle to one [`SurfaceEdge`] inside the petgraph view.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct EdgeRef<'a> {
|
||||
pub edge: &'a SurfaceEdge,
|
||||
}
|
||||
|
||||
impl<'a> EdgeRef<'a> {
|
||||
pub fn kind(&self) -> EdgeKind {
|
||||
self.edge.kind
|
||||
}
|
||||
}
|
||||
|
||||
/// Materialise a petgraph view of `map`. Node indices in the returned
|
||||
/// graph match `map.nodes` ordering 1:1, and the `lookup` map lets
|
||||
/// callers translate from the surface index (`u32`) to the petgraph
|
||||
/// [`NodeIndex`]. Walking edges respects `map.edges` order.
|
||||
pub fn petgraph_view(map: &SurfaceMap) -> SurfaceGraphView<'_> {
|
||||
let mut graph: DiGraph<NodeRef<'_>, EdgeRef<'_>> = DiGraph::new();
|
||||
let mut lookup: HashMap<u32, NodeIndex> = HashMap::with_capacity(map.nodes.len());
|
||||
for (i, node) in map.nodes.iter().enumerate() {
|
||||
let nx = graph.add_node(NodeRef {
|
||||
idx: i as u32,
|
||||
node,
|
||||
});
|
||||
lookup.insert(i as u32, nx);
|
||||
}
|
||||
for edge in &map.edges {
|
||||
if let (Some(&from), Some(&to)) = (lookup.get(&edge.from), lookup.get(&edge.to)) {
|
||||
graph.add_edge(from, to, EdgeRef { edge });
|
||||
}
|
||||
}
|
||||
SurfaceGraphView { graph, lookup }
|
||||
}
|
||||
|
||||
/// petgraph view returned by [`petgraph_view`].
|
||||
pub struct SurfaceGraphView<'a> {
|
||||
pub graph: DiGraph<NodeRef<'a>, EdgeRef<'a>>,
|
||||
pub lookup: HashMap<u32, NodeIndex>,
|
||||
}
|
||||
|
||||
impl<'a> SurfaceGraphView<'a> {
|
||||
/// Resolve a surface index back to its petgraph [`NodeIndex`].
|
||||
pub fn node_index(&self, surface_idx: u32) -> Option<NodeIndex> {
|
||||
self.lookup.get(&surface_idx).copied()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation};
|
||||
|
||||
#[test]
|
||||
fn petgraph_view_preserves_indices() {
|
||||
let mut m = SurfaceMap::new();
|
||||
m.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::GET,
|
||||
route: "/a".into(),
|
||||
handler_name: "h".into(),
|
||||
handler_location: SourceLocation::new("a.py", 2, 1),
|
||||
auth_required: false,
|
||||
}));
|
||||
m.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new("b.py", 1, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::POST,
|
||||
route: "/b".into(),
|
||||
handler_name: "h".into(),
|
||||
handler_location: SourceLocation::new("b.py", 2, 1),
|
||||
auth_required: false,
|
||||
}));
|
||||
m.edges.push(SurfaceEdge {
|
||||
from: 0,
|
||||
to: 1,
|
||||
kind: EdgeKind::Calls,
|
||||
});
|
||||
let view = petgraph_view(&m);
|
||||
assert_eq!(view.graph.node_count(), 2);
|
||||
assert_eq!(view.graph.edge_count(), 1);
|
||||
let n0 = view.node_index(0).unwrap();
|
||||
let n1 = view.node_index(1).unwrap();
|
||||
assert!(view.graph.find_edge(n0, n1).is_some());
|
||||
}
|
||||
}
|
||||
303
src/surface/lang/common.rs
Normal file
303
src/surface/lang/common.rs
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
//! Shared helpers used by the per-(language, framework) probes.
|
||||
//!
|
||||
//! Each probe extracts an [`EntryPoint`](crate::surface::EntryPoint) node from a parsed source file
|
||||
//! by walking the framework's route declaration shape. These helpers
|
||||
//! cover the bookkeeping common to every probe: building a stable
|
||||
//! [`SourceLocation`] from a tree-sitter node, decoding common string
|
||||
//! literal shapes, and identifier-based auth marker lookups.
|
||||
|
||||
use crate::surface::{SourceLocation, relative_path_string};
|
||||
use std::path::Path;
|
||||
use tree_sitter::Node;
|
||||
|
||||
/// Build a [`SourceLocation`] for the start of `node`, relative to
|
||||
/// `scan_root` when supplied.
|
||||
pub fn loc_for(node: Node<'_>, file_rel: &str) -> SourceLocation {
|
||||
let pos = node.start_position();
|
||||
SourceLocation::new(file_rel, (pos.row + 1) as u32, (pos.column + 1) as u32)
|
||||
}
|
||||
|
||||
/// Project-relative POSIX file string used as the [`SourceLocation`]
|
||||
/// `file` field across every node a probe emits.
|
||||
pub fn rel_file(path: &Path, scan_root: Option<&Path>) -> String {
|
||||
relative_path_string(path, scan_root)
|
||||
}
|
||||
|
||||
/// Strip Python / JS / Ruby / PHP string-literal prefixes (`b"…"`,
|
||||
/// `r"…"`, `f"…"`, leading `'`/`"`) and return the literal content.
|
||||
/// Used by every probe that lifts a route path out of a string node.
|
||||
pub fn unquote(raw: &str) -> String {
|
||||
let trimmed = raw.trim();
|
||||
let mut s = trimmed;
|
||||
// Python prefixes
|
||||
while let Some(rest) = s.strip_prefix(['b', 'r', 'B', 'R', 'f', 'F']) {
|
||||
if rest.starts_with('\'') || rest.starts_with('"') {
|
||||
s = rest;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
s.trim_start_matches(['\'', '"', '`'])
|
||||
.trim_end_matches(['\'', '"', '`'])
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// Read the literal text of a tree-sitter `string` node and return its
|
||||
/// unquoted content; `None` when the slice is not valid UTF-8.
|
||||
pub fn string_node_value(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||||
Some(unquote(node.utf8_text(bytes).ok()?))
|
||||
}
|
||||
|
||||
/// Return `true` when the leaf segment of `text` (split on `.` or `::`)
|
||||
/// matches one of the entries in `markers`, case-insensitive on the
|
||||
/// underscored form. Used by every probe's auth-decorator allowlist.
|
||||
pub fn leaf_matches(text: &str, markers: &[&str]) -> bool {
|
||||
let leaf = text.rsplit(['.', ':']).next().unwrap_or(text).trim();
|
||||
markers.iter().any(|m| leaf.eq_ignore_ascii_case(m))
|
||||
}
|
||||
|
||||
/// Walk every descendant of `root` whose kind matches `target_kind`,
|
||||
/// invoking `visit` on each match. Bounded by recursion on tree-sitter
|
||||
/// node count.
|
||||
pub fn for_each_node<'tree, F>(root: Node<'tree>, target_kind: &str, mut visit: F)
|
||||
where
|
||||
F: FnMut(Node<'tree>),
|
||||
{
|
||||
fn recurse<'tree, F>(node: Node<'tree>, kind: &str, visit: &mut F)
|
||||
where
|
||||
F: FnMut(Node<'tree>),
|
||||
{
|
||||
if node.kind() == kind {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
recurse(child, kind, visit);
|
||||
}
|
||||
}
|
||||
recurse(root, target_kind, &mut visit);
|
||||
}
|
||||
|
||||
/// Find the first child of `parent` whose kind matches `kind`, with a
|
||||
/// `child_by_field_name(kind)` fast path. Used by Java probes where
|
||||
/// `class_declaration` / `method_declaration` modifiers / body live as
|
||||
/// unnamed children rather than fielded children in tree-sitter-java.
|
||||
pub fn child_or_named<'tree>(parent: Node<'tree>, kind: &str) -> Option<Node<'tree>> {
|
||||
if let Some(n) = parent.child_by_field_name(kind) {
|
||||
return Some(n);
|
||||
}
|
||||
let mut cursor = parent.walk();
|
||||
parent.children(&mut cursor).find(|c| c.kind() == kind)
|
||||
}
|
||||
|
||||
/// Return `true` when `bytes` contains a top-level Python `import` /
|
||||
/// `from … import …` statement whose leading package segment starts
|
||||
/// with one of `modules` (case-insensitive prefix match). This means
|
||||
/// `["flask"]` matches `flask`, `flask_login`, and `flask_jwt_extended`
|
||||
/// — the canonical Flask framework family — but does not match
|
||||
/// `os.flask_helper` or a comment that mentions flask.
|
||||
pub fn python_imports_any(bytes: &[u8], modules: &[&str]) -> bool {
|
||||
let text = match std::str::from_utf8(bytes) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return false,
|
||||
};
|
||||
for line in text.lines() {
|
||||
let line = line.trim_start();
|
||||
let pkg = if let Some(rest) = line.strip_prefix("from ") {
|
||||
rest.split_whitespace().next().unwrap_or("")
|
||||
} else if let Some(rest) = line.strip_prefix("import ") {
|
||||
rest.split([',', ' ', ';']).next().unwrap_or("").trim()
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
if pkg.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let head = pkg.split('.').next().unwrap_or(pkg);
|
||||
if matches_prefix_ci(head, modules) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn matches_prefix_ci(head: &str, prefixes: &[&str]) -> bool {
|
||||
let head_lc = head.to_ascii_lowercase();
|
||||
prefixes
|
||||
.iter()
|
||||
.any(|p| head_lc.starts_with(&p.to_ascii_lowercase()))
|
||||
}
|
||||
|
||||
/// Return `true` when `bytes` contains a top-level Rust `use` (or
|
||||
/// `extern crate`) statement whose leading path segment matches one of
|
||||
/// `crates` (case-insensitive). Optional `pub` / `pub(crate)` /
|
||||
/// `pub(super)` visibility prefixes are stripped before the `use`
|
||||
/// keyword check.
|
||||
pub fn rust_uses_any(bytes: &[u8], crates: &[&str]) -> bool {
|
||||
let text = match std::str::from_utf8(bytes) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return false,
|
||||
};
|
||||
for line in text.lines() {
|
||||
let mut line = line.trim_start();
|
||||
if let Some(rest) = line.strip_prefix("pub") {
|
||||
let rest = rest.trim_start();
|
||||
line = if let Some(r) = rest.strip_prefix("(crate)") {
|
||||
r.trim_start()
|
||||
} else if let Some(r) = rest.strip_prefix("(super)") {
|
||||
r.trim_start()
|
||||
} else if let Some(r) = rest.strip_prefix("(self)") {
|
||||
r.trim_start()
|
||||
} else {
|
||||
rest
|
||||
};
|
||||
}
|
||||
let rest = if let Some(r) = line.strip_prefix("use ") {
|
||||
r
|
||||
} else if let Some(r) = line.strip_prefix("extern crate ") {
|
||||
r
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
let head = rest
|
||||
.split(['{', ';', ' ', ':', '/'])
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
.trim();
|
||||
if head.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if matches_prefix_ci(head, crates) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Return `true` when `bytes` contains a top-level Java `import`
|
||||
/// statement (including `import static`) whose package path begins
|
||||
/// with one of `prefixes`. Comment-only mentions do *not* match.
|
||||
pub fn java_imports_any(bytes: &[u8], prefixes: &[&str]) -> bool {
|
||||
let text = match std::str::from_utf8(bytes) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return false,
|
||||
};
|
||||
for line in text.lines() {
|
||||
let line = line.trim_start();
|
||||
let Some(rest) = line.strip_prefix("import ") else {
|
||||
continue;
|
||||
};
|
||||
let path = rest
|
||||
.strip_prefix("static ")
|
||||
.unwrap_or(rest)
|
||||
.trim()
|
||||
.trim_end_matches(';')
|
||||
.trim();
|
||||
if prefixes.iter().any(|p| path.starts_with(p)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Walk every descendant of `root`, invoking `visit` once per node.
|
||||
/// Useful when a probe needs to look at multiple node kinds in a single
|
||||
/// pass (e.g. annotations + method declarations on the same walk).
|
||||
pub fn for_each_node_any<'tree, F>(root: Node<'tree>, mut visit: F)
|
||||
where
|
||||
F: FnMut(Node<'tree>),
|
||||
{
|
||||
fn recurse<'tree, F>(node: Node<'tree>, visit: &mut F)
|
||||
where
|
||||
F: FnMut(Node<'tree>),
|
||||
{
|
||||
visit(node);
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
recurse(child, visit);
|
||||
}
|
||||
}
|
||||
recurse(root, &mut visit);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn unquote_strips_python_prefixes() {
|
||||
assert_eq!(unquote("b\"path\""), "path");
|
||||
assert_eq!(unquote("r'/api'"), "/api");
|
||||
assert_eq!(unquote("f\"/users/{id}\""), "/users/{id}");
|
||||
assert_eq!(unquote("\"plain\""), "plain");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leaf_matches_handles_dot_and_colon_paths() {
|
||||
assert!(leaf_matches(
|
||||
"flask_login.login_required",
|
||||
&["login_required"]
|
||||
));
|
||||
assert!(leaf_matches("Auth::JwtRequired", &["JwtRequired"]));
|
||||
assert!(!leaf_matches("OtherDecorator", &["login_required"]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_imports_any_matches_actual_imports() {
|
||||
assert!(python_imports_any(b"from flask import Flask\n", &["flask"]));
|
||||
assert!(python_imports_any(b"import flask\n", &["flask"]));
|
||||
assert!(python_imports_any(
|
||||
b"from flask.app import Flask\n",
|
||||
&["flask"]
|
||||
));
|
||||
assert!(python_imports_any(b"import django.urls\n", &["django"]));
|
||||
// Comment-only mention must not match.
|
||||
assert!(!python_imports_any(b"# flask is great\n", &["flask"]));
|
||||
// String-only mention must not match.
|
||||
assert!(!python_imports_any(b"x = 'flask'\n", &["flask"]));
|
||||
// Wrong module.
|
||||
assert!(!python_imports_any(b"import os\n", &["flask"]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_uses_any_matches_use_statements() {
|
||||
assert!(rust_uses_any(b"use actix_web::web;\n", &["actix_web"]));
|
||||
assert!(rust_uses_any(b"use actix_web;\n", &["actix_web"]));
|
||||
assert!(rust_uses_any(b"pub use axum::Router;\n", &["axum"]));
|
||||
assert!(rust_uses_any(
|
||||
b"pub(crate) use axum::extract::Path;\n",
|
||||
&["axum"]
|
||||
));
|
||||
assert!(rust_uses_any(b"extern crate axum;\n", &["axum"]));
|
||||
// Comment-only mention must not match.
|
||||
assert!(!rust_uses_any(b"// use actix_web::web;\n", &["actix_web"]));
|
||||
// Wrong crate.
|
||||
assert!(!rust_uses_any(b"use serde::Deserialize;\n", &["actix_web"]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn java_imports_any_matches_package_prefix() {
|
||||
assert!(java_imports_any(
|
||||
b"import io.quarkus.runtime.Quarkus;\n",
|
||||
&["io.quarkus"]
|
||||
));
|
||||
assert!(java_imports_any(
|
||||
b"import jakarta.ws.rs.GET;\n",
|
||||
&["jakarta.ws.rs"]
|
||||
));
|
||||
assert!(java_imports_any(
|
||||
b"import static io.quarkus.runtime.Quarkus.run;\n",
|
||||
&["io.quarkus"]
|
||||
));
|
||||
// Comment-only mention must not match.
|
||||
assert!(!java_imports_any(
|
||||
b"// import io.quarkus.runtime.Quarkus;\n",
|
||||
&["io.quarkus"]
|
||||
));
|
||||
// Wrong prefix.
|
||||
assert!(!java_imports_any(
|
||||
b"import org.springframework.web.bind.annotation.GetMapping;\n",
|
||||
&["io.quarkus"]
|
||||
));
|
||||
}
|
||||
}
|
||||
167
src/surface/lang/go_gin.rs
Normal file
167
src/surface/lang/go_gin.rs
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
//! Go + gin framework probe.
|
||||
//!
|
||||
//! Detects gin route registration:
|
||||
//!
|
||||
//! * `r.GET("/path", handler)` / `.POST(...)` / `.PUT` / `.DELETE`
|
||||
//! on a `*gin.Engine` or `*gin.RouterGroup`.
|
||||
//! * `r.Group("/prefix").GET("/sub", ...)` chained shapes.
|
||||
//! * `r.Use(middleware...)` followed by route registrations — the
|
||||
//! middleware list is consulted for auth markers
|
||||
//! ([`AUTH_MIDDLEWARES`]).
|
||||
//!
|
||||
//! Also recognises echo (`e.GET(...)`) and chi (`r.Get(...)`) by the
|
||||
//! same shape — receiver name `e` / `r` / `router` / `engine`.
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub use crate::auth_analysis::auth_markers::GIN_MIDDLEWARES as AUTH_MIDDLEWARES;
|
||||
|
||||
const VERBS: &[&str] = &[
|
||||
"GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD", "Any", "Get", "Post", "Put",
|
||||
"Delete", "Patch", "Options", "Head",
|
||||
];
|
||||
|
||||
pub fn detect_gin_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_calls(tree.root_node(), &mut |call| {
|
||||
if let Some(node) = match_gin_call(call, bytes, &file_rel) {
|
||||
out.push(node);
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
|
||||
if node.kind() == "call_expression" {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_calls(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn match_gin_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
|
||||
let func = call.child_by_field_name("function")?;
|
||||
if func.kind() != "selector_expression" {
|
||||
return None;
|
||||
}
|
||||
let operand = func.child_by_field_name("operand")?;
|
||||
let field = func.child_by_field_name("field")?;
|
||||
let field_text = field.utf8_text(bytes).ok()?;
|
||||
if !VERBS.contains(&field_text) {
|
||||
return None;
|
||||
}
|
||||
let operand_text = operand.utf8_text(bytes).ok()?;
|
||||
if !receiver_is_gin(operand_text) {
|
||||
return None;
|
||||
}
|
||||
let method = HttpMethod::from_ident(&field_text.to_ascii_uppercase())?;
|
||||
let args = call.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let positional: Vec<Node> = args
|
||||
.children(&mut cursor)
|
||||
.filter(|n| !matches!(n.kind(), "(" | ")" | ","))
|
||||
.collect();
|
||||
let route = positional
|
||||
.first()
|
||||
.and_then(|n| string_node_value(*n, bytes))?;
|
||||
let handler_node = positional.iter().rev().find(|n| {
|
||||
matches!(
|
||||
n.kind(),
|
||||
"identifier" | "selector_expression" | "func_literal"
|
||||
)
|
||||
})?;
|
||||
let handler_name = handler_node
|
||||
.utf8_text(bytes)
|
||||
.ok()
|
||||
.map(str::to_string)
|
||||
.unwrap_or_default();
|
||||
let auth_required = positional[1..]
|
||||
.iter()
|
||||
.filter(|n| !std::ptr::eq(*n, handler_node))
|
||||
.any(|n| arg_is_auth_marker(*n, bytes));
|
||||
Some(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(call, file_rel),
|
||||
framework: Framework::Gin,
|
||||
method,
|
||||
route,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(handler_node.start_position().row + 1) as u32,
|
||||
(handler_node.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required,
|
||||
}))
|
||||
}
|
||||
|
||||
fn receiver_is_gin(text: &str) -> bool {
|
||||
let leaf = text.rsplit('.').next().unwrap_or(text).trim();
|
||||
let lower = leaf.to_ascii_lowercase();
|
||||
lower == "r"
|
||||
|| lower == "g"
|
||||
|| lower == "e"
|
||||
|| lower == "router"
|
||||
|| lower == "engine"
|
||||
|| lower == "group"
|
||||
|| lower.ends_with("router")
|
||||
|| lower.ends_with("group")
|
||||
|| lower.ends_with("engine")
|
||||
}
|
||||
|
||||
fn arg_is_auth_marker(node: Node, bytes: &[u8]) -> bool {
|
||||
match node.kind() {
|
||||
"identifier" | "selector_expression" => node
|
||||
.utf8_text(bytes)
|
||||
.map(|t| leaf_matches(t, AUTH_MIDDLEWARES))
|
||||
.unwrap_or(false),
|
||||
"call_expression" => {
|
||||
let Some(callee) = node.child_by_field_name("function") else {
|
||||
return false;
|
||||
};
|
||||
let Ok(text) = callee.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
leaf_matches(text, AUTH_MIDDLEWARES)
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_go::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_get() {
|
||||
let src = "package main\nimport \"github.com/gin-gonic/gin\"\nfunc main() {\n r := gin.Default()\n r.GET(\"/users\", listUsers)\n}\nfunc listUsers(c *gin.Context) {}\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_gin_routes(&tree, &bytes, &PathBuf::from("main.go"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users");
|
||||
}
|
||||
}
|
||||
129
src/surface/lang/go_http.rs
Normal file
129
src/surface/lang/go_http.rs
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
//! Go + `net/http` framework probe.
|
||||
//!
|
||||
//! Recognises the canonical route registration shapes:
|
||||
//!
|
||||
//! * `http.HandleFunc("/path", handler)`
|
||||
//! * `http.Handle("/path", handler)`
|
||||
//! * `mux.HandleFunc("/path", handler)` (any `*http.ServeMux` receiver)
|
||||
//! * `http.NewServeMux()` derived receivers
|
||||
//!
|
||||
//! Method is `GET` by default — `net/http` registrations are
|
||||
//! method-agnostic at the routing layer; the handler dispatches on
|
||||
//! `r.Method` internally.
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub fn detect_go_http_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_calls(tree.root_node(), &mut |call| {
|
||||
if let Some(node) = match_handle_call(call, bytes, &file_rel) {
|
||||
out.push(node);
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
|
||||
if node.kind() == "call_expression" {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_calls(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn match_handle_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
|
||||
let func = call.child_by_field_name("function")?;
|
||||
if func.kind() != "selector_expression" {
|
||||
return None;
|
||||
}
|
||||
let operand = func.child_by_field_name("operand")?;
|
||||
let field = func.child_by_field_name("field")?;
|
||||
let field_text = field.utf8_text(bytes).ok()?;
|
||||
if field_text != "HandleFunc" && field_text != "Handle" {
|
||||
return None;
|
||||
}
|
||||
let operand_text = operand.utf8_text(bytes).ok()?;
|
||||
let leaf = operand_text.rsplit('.').next().unwrap_or(operand_text);
|
||||
if leaf != "http"
|
||||
&& !operand_text.contains("Mux")
|
||||
&& !operand_text.contains("mux")
|
||||
&& !operand_text.contains("Server")
|
||||
&& !operand_text.contains("Router")
|
||||
&& !operand_text.contains("router")
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let args = call.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let positional: Vec<Node> = args
|
||||
.children(&mut cursor)
|
||||
.filter(|n| !matches!(n.kind(), "(" | ")" | ","))
|
||||
.collect();
|
||||
if positional.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
let route = string_node_value(positional[0], bytes)?;
|
||||
let handler_node = positional[1];
|
||||
let handler_name = handler_function_name(handler_node, bytes).unwrap_or_default();
|
||||
Some(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(call, file_rel),
|
||||
framework: Framework::NetHttp,
|
||||
method: HttpMethod::GET,
|
||||
route,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(handler_node.start_position().row + 1) as u32,
|
||||
(handler_node.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required: false,
|
||||
}))
|
||||
}
|
||||
|
||||
fn handler_function_name(node: Node, bytes: &[u8]) -> Option<String> {
|
||||
match node.kind() {
|
||||
"identifier" | "selector_expression" => node.utf8_text(bytes).ok().map(str::to_string),
|
||||
"func_literal" => Some("anonymous".to_string()),
|
||||
_ => node.utf8_text(bytes).ok().map(str::to_string),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_go::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_handle_func() {
|
||||
let src = "package main\nimport \"net/http\"\nfunc main() {\n http.HandleFunc(\"/users\", listUsers)\n}\nfunc listUsers(w http.ResponseWriter, r *http.Request) {}\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_go_http_routes(&tree, &bytes, &PathBuf::from("main.go"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.framework, Framework::NetHttp);
|
||||
assert_eq!(ep.route, "/users");
|
||||
assert_eq!(ep.handler_name, "listUsers");
|
||||
}
|
||||
}
|
||||
300
src/surface/lang/java_quarkus.rs
Normal file
300
src/surface/lang/java_quarkus.rs
Normal file
|
|
@ -0,0 +1,300 @@
|
|||
//! Java + Quarkus framework probe.
|
||||
//!
|
||||
//! Quarkus uses JAX-RS (`jakarta.ws.rs`) for HTTP routing on top of
|
||||
//! `RESTEasy Reactive` / `Quarkus REST`. The annotations are
|
||||
//! identical to plain JAX-RS, so this probe overlaps with
|
||||
//! [`super::java_servlet`] but emits the [`Framework::Quarkus`] tag
|
||||
//! via a Quarkus-specific recogniser:
|
||||
//!
|
||||
//! * The class is annotated with `@ApplicationScoped`,
|
||||
//! `@RequestScoped`, or `@Singleton` (Quarkus DI markers); OR
|
||||
//! * The file imports a `quarkus`-prefixed package; OR
|
||||
//! * The class extends a Quarkus-known reactive base type
|
||||
//! (`PanacheRepository`, `Multi`, `Uni`).
|
||||
//!
|
||||
//! Auth markers: `@Authenticated`, `@RolesAllowed`, `@PermitAll`,
|
||||
//! `@DenyAll` (Quarkus Security).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{java_imports_any, loc_for, rel_file};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub use crate::auth_analysis::auth_markers::QUARKUS_ANNOTATIONS as AUTH_ANNOTATIONS;
|
||||
|
||||
const QUARKUS_DI: &[&str] = &[
|
||||
"ApplicationScoped",
|
||||
"RequestScoped",
|
||||
"Singleton",
|
||||
"Dependent",
|
||||
"Path",
|
||||
];
|
||||
|
||||
const JAXRS_VERBS: &[(&str, HttpMethod)] = &[
|
||||
("GET", HttpMethod::GET),
|
||||
("POST", HttpMethod::POST),
|
||||
("PUT", HttpMethod::PUT),
|
||||
("DELETE", HttpMethod::DELETE),
|
||||
("PATCH", HttpMethod::PATCH),
|
||||
("HEAD", HttpMethod::HEAD),
|
||||
("OPTIONS", HttpMethod::OPTIONS),
|
||||
];
|
||||
|
||||
pub fn detect_quarkus_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
// Phase 23 follow-up: tighten witness to top-level `import`
|
||||
// statements with the strict package prefix, replacing the
|
||||
// previous AST `import_declaration.contains(...)` substring scan.
|
||||
if !java_imports_any(bytes, &["io.quarkus", "jakarta.ws.rs"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let mut out = Vec::new();
|
||||
walk_classes(tree.root_node(), &mut |class| {
|
||||
if !class_is_quarkus_resource(class, bytes) {
|
||||
return;
|
||||
}
|
||||
let class_path = class_path_annotation(class, bytes).unwrap_or_default();
|
||||
let class_auth = class_has_auth_annotation(class, bytes);
|
||||
let Some(body) = crate::surface::lang::common::child_or_named(class, "class_body") else {
|
||||
return;
|
||||
};
|
||||
let mut cursor = body.walk();
|
||||
for member in body.children(&mut cursor) {
|
||||
if member.kind() != "method_declaration" {
|
||||
continue;
|
||||
}
|
||||
if let Some((method, method_path, method_auth)) =
|
||||
method_mapping(member, bytes, &class_path)
|
||||
{
|
||||
let name = method_name(member, bytes).unwrap_or_default();
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(member, &file_rel),
|
||||
framework: Framework::Quarkus,
|
||||
method,
|
||||
route: method_path,
|
||||
handler_name: name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel.clone(),
|
||||
(member.start_position().row + 1) as u32,
|
||||
(member.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required: class_auth || method_auth,
|
||||
}));
|
||||
}
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn class_is_quarkus_resource(class: Node, bytes: &[u8]) -> bool {
|
||||
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
|
||||
Some(m) => m,
|
||||
None => return false,
|
||||
};
|
||||
let mut cursor = modifiers.walk();
|
||||
for ann in modifiers.children(&mut cursor) {
|
||||
if !is_annotation(ann) {
|
||||
continue;
|
||||
}
|
||||
if let Some(name) = annotation_name(ann, bytes) {
|
||||
let leaf = name.rsplit('.').next().unwrap_or(&name);
|
||||
if QUARKUS_DI.iter().any(|d| leaf.eq_ignore_ascii_case(d)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn walk_classes<'tree, F>(node: Node<'tree>, visit: &mut F)
|
||||
where
|
||||
F: FnMut(Node<'tree>),
|
||||
{
|
||||
if node.kind() == "class_declaration" {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_classes(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn class_path_annotation(class: Node, bytes: &[u8]) -> Option<String> {
|
||||
annotation_string_arg(class, bytes, "Path")
|
||||
}
|
||||
|
||||
fn class_has_auth_annotation(class: Node, bytes: &[u8]) -> bool {
|
||||
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
|
||||
Some(m) => m,
|
||||
None => return false,
|
||||
};
|
||||
let mut cursor = modifiers.walk();
|
||||
for ann in modifiers.children(&mut cursor) {
|
||||
if !is_annotation(ann) {
|
||||
continue;
|
||||
}
|
||||
if let Some(name) = annotation_name(ann, bytes) {
|
||||
let leaf = name.rsplit('.').next().unwrap_or(&name);
|
||||
if AUTH_ANNOTATIONS
|
||||
.iter()
|
||||
.any(|a| leaf.eq_ignore_ascii_case(a))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn method_mapping(
|
||||
method: Node,
|
||||
bytes: &[u8],
|
||||
class_path: &str,
|
||||
) -> Option<(HttpMethod, String, bool)> {
|
||||
let modifiers = crate::surface::lang::common::child_or_named(method, "modifiers")?;
|
||||
let mut cursor = modifiers.walk();
|
||||
let mut verb: Option<HttpMethod> = None;
|
||||
let mut method_path = String::new();
|
||||
let mut auth = false;
|
||||
for ann in modifiers.children(&mut cursor) {
|
||||
if !is_annotation(ann) {
|
||||
continue;
|
||||
}
|
||||
let Some(name) = annotation_name(ann, bytes) else {
|
||||
continue;
|
||||
};
|
||||
let leaf = name.rsplit('.').next().unwrap_or(&name);
|
||||
if let Some((_, m)) = JAXRS_VERBS
|
||||
.iter()
|
||||
.find(|(n, _)| n.eq_ignore_ascii_case(leaf))
|
||||
{
|
||||
verb = Some(*m);
|
||||
}
|
||||
if leaf == "Path"
|
||||
&& let Some(p) = annotation_string_arg_from_node(ann, bytes)
|
||||
{
|
||||
method_path = p;
|
||||
}
|
||||
if AUTH_ANNOTATIONS
|
||||
.iter()
|
||||
.any(|a| leaf.eq_ignore_ascii_case(a))
|
||||
{
|
||||
auth = true;
|
||||
}
|
||||
}
|
||||
let v = verb?;
|
||||
let combined = if class_path.is_empty() {
|
||||
method_path
|
||||
} else if method_path.is_empty() {
|
||||
class_path.to_string()
|
||||
} else {
|
||||
format!(
|
||||
"{}/{}",
|
||||
class_path.trim_end_matches('/'),
|
||||
method_path.trim_start_matches('/')
|
||||
)
|
||||
};
|
||||
Some((v, combined, auth))
|
||||
}
|
||||
|
||||
fn annotation_string_arg(class: Node, bytes: &[u8], target_name: &str) -> Option<String> {
|
||||
let modifiers = crate::surface::lang::common::child_or_named(class, "modifiers")?;
|
||||
let mut cursor = modifiers.walk();
|
||||
for ann in modifiers.children(&mut cursor) {
|
||||
if !is_annotation(ann) {
|
||||
continue;
|
||||
}
|
||||
let Some(name) = annotation_name(ann, bytes) else {
|
||||
continue;
|
||||
};
|
||||
let leaf = name.rsplit('.').next().unwrap_or(&name);
|
||||
if leaf == target_name {
|
||||
return annotation_string_arg_from_node(ann, bytes);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn annotation_string_arg_from_node(ann: Node, bytes: &[u8]) -> Option<String> {
|
||||
let args = ann.child_by_field_name("arguments")?;
|
||||
let raw = args.utf8_text(bytes).ok()?;
|
||||
let start = raw.find('"')? + 1;
|
||||
let end = raw[start..].find('"')? + start;
|
||||
Some(raw[start..end].to_string())
|
||||
}
|
||||
|
||||
fn annotation_name(ann: Node, bytes: &[u8]) -> Option<String> {
|
||||
ann.child_by_field_name("name")
|
||||
.and_then(|n| n.utf8_text(bytes).ok())
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
fn method_name(method: Node, bytes: &[u8]) -> Option<String> {
|
||||
method
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| n.utf8_text(bytes).ok())
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
fn is_annotation(node: Node) -> bool {
|
||||
matches!(node.kind(), "annotation" | "marker_annotation")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_java::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_quarkus_resource() {
|
||||
let src = r#"
|
||||
import io.quarkus.runtime.Quarkus;
|
||||
import jakarta.ws.rs.GET;
|
||||
import jakarta.ws.rs.Path;
|
||||
|
||||
@ApplicationScoped
|
||||
@Path("/api")
|
||||
public class GreetResource {
|
||||
@GET
|
||||
@Path("/hello")
|
||||
public String hello() { return "hi"; }
|
||||
}
|
||||
"#;
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes =
|
||||
detect_quarkus_routes(&tree, &bytes, &PathBuf::from("GreetResource.java"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/api/hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_non_quarkus_class() {
|
||||
let src = r#"
|
||||
public class C {
|
||||
@GetMapping("/x")
|
||||
public void x() {}
|
||||
}
|
||||
"#;
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_quarkus_routes(&tree, &bytes, &PathBuf::from("C.java"), None);
|
||||
assert!(nodes.is_empty());
|
||||
}
|
||||
}
|
||||
295
src/surface/lang/java_servlet.rs
Normal file
295
src/surface/lang/java_servlet.rs
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
//! Java + Servlet (JAX-RS / Jakarta REST) framework probe.
|
||||
//!
|
||||
//! Recognises:
|
||||
//!
|
||||
//! * `@WebServlet("/path")` annotated `HttpServlet` subclasses — every
|
||||
//! `doGet` / `doPost` / `doPut` / `doDelete` method is one entry-point.
|
||||
//! * `@Path("/path")` annotated JAX-RS resource methods with verb
|
||||
//! annotation `@GET` / `@POST` / `@PUT` / `@DELETE` / `@PATCH`.
|
||||
//!
|
||||
//! Auth markers: `@DenyAll`, `@RolesAllowed`, `@PermitAll` — the
|
||||
//! presence of any of these implies a security configuration is
|
||||
//! actively gating the resource (we report `auth_required = true`
|
||||
//! conservatively for `@RolesAllowed` and `@DenyAll`).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub use crate::auth_analysis::auth_markers::SERVLET_ANNOTATIONS as AUTH_ANNOTATIONS;
|
||||
|
||||
const SERVLET_VERBS: &[(&str, HttpMethod)] = &[
|
||||
("doGet", HttpMethod::GET),
|
||||
("doPost", HttpMethod::POST),
|
||||
("doPut", HttpMethod::PUT),
|
||||
("doDelete", HttpMethod::DELETE),
|
||||
("doHead", HttpMethod::HEAD),
|
||||
("doOptions", HttpMethod::OPTIONS),
|
||||
];
|
||||
|
||||
const JAXRS_VERBS: &[(&str, HttpMethod)] = &[
|
||||
("GET", HttpMethod::GET),
|
||||
("POST", HttpMethod::POST),
|
||||
("PUT", HttpMethod::PUT),
|
||||
("DELETE", HttpMethod::DELETE),
|
||||
("PATCH", HttpMethod::PATCH),
|
||||
("HEAD", HttpMethod::HEAD),
|
||||
("OPTIONS", HttpMethod::OPTIONS),
|
||||
];
|
||||
|
||||
pub fn detect_servlet_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_classes(tree.root_node(), &mut |class| {
|
||||
let class_path_servlet = class_web_servlet_path(class, bytes);
|
||||
let class_path_jaxrs = class_jaxrs_path(class, bytes);
|
||||
let class_auth = class_has_auth_annotation(class, bytes);
|
||||
let Some(body) = crate::surface::lang::common::child_or_named(class, "class_body") else {
|
||||
return;
|
||||
};
|
||||
let mut cursor = body.walk();
|
||||
for member in body.children(&mut cursor) {
|
||||
if member.kind() != "method_declaration" {
|
||||
continue;
|
||||
}
|
||||
let name = method_name(member, bytes).unwrap_or_default();
|
||||
|
||||
// HttpServlet shape
|
||||
if let Some(class_path) = class_path_servlet.as_deref()
|
||||
&& let Some((_, method)) = SERVLET_VERBS
|
||||
.iter()
|
||||
.find(|(verb, _)| *verb == name.as_str())
|
||||
{
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(member, &file_rel),
|
||||
framework: Framework::JaxRs,
|
||||
method: *method,
|
||||
route: class_path.to_string(),
|
||||
handler_name: name.clone(),
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel.clone(),
|
||||
(member.start_position().row + 1) as u32,
|
||||
(member.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required: class_auth,
|
||||
}));
|
||||
continue;
|
||||
}
|
||||
|
||||
// JAX-RS shape
|
||||
if let Some((method, method_path, method_auth)) =
|
||||
jaxrs_method_mapping(member, bytes, class_path_jaxrs.as_deref().unwrap_or(""))
|
||||
{
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(member, &file_rel),
|
||||
framework: Framework::JaxRs,
|
||||
method,
|
||||
route: method_path,
|
||||
handler_name: name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel.clone(),
|
||||
(member.start_position().row + 1) as u32,
|
||||
(member.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required: class_auth || method_auth,
|
||||
}));
|
||||
}
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_classes<'tree, F>(node: Node<'tree>, visit: &mut F)
|
||||
where
|
||||
F: FnMut(Node<'tree>),
|
||||
{
|
||||
if node.kind() == "class_declaration" {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_classes(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn class_web_servlet_path(class: Node, bytes: &[u8]) -> Option<String> {
|
||||
annotation_string_arg(class, bytes, "WebServlet")
|
||||
}
|
||||
|
||||
fn class_jaxrs_path(class: Node, bytes: &[u8]) -> Option<String> {
|
||||
annotation_string_arg(class, bytes, "Path")
|
||||
}
|
||||
|
||||
fn class_has_auth_annotation(class: Node, bytes: &[u8]) -> bool {
|
||||
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
|
||||
Some(m) => m,
|
||||
None => return false,
|
||||
};
|
||||
let mut cursor = modifiers.walk();
|
||||
for ann in modifiers.children(&mut cursor) {
|
||||
if !is_annotation(ann) {
|
||||
continue;
|
||||
}
|
||||
if let Some(name) = annotation_name(ann, bytes)
|
||||
&& AUTH_ANNOTATIONS.iter().any(|a| {
|
||||
name.rsplit('.')
|
||||
.next()
|
||||
.unwrap_or(&name)
|
||||
.eq_ignore_ascii_case(a)
|
||||
})
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn jaxrs_method_mapping(
|
||||
method: Node,
|
||||
bytes: &[u8],
|
||||
class_path: &str,
|
||||
) -> Option<(HttpMethod, String, bool)> {
|
||||
let modifiers = crate::surface::lang::common::child_or_named(method, "modifiers")?;
|
||||
let mut cursor = modifiers.walk();
|
||||
let mut verb: Option<HttpMethod> = None;
|
||||
let mut method_path = String::new();
|
||||
let mut auth = false;
|
||||
for ann in modifiers.children(&mut cursor) {
|
||||
if !is_annotation(ann) {
|
||||
continue;
|
||||
}
|
||||
let Some(name) = annotation_name(ann, bytes) else {
|
||||
continue;
|
||||
};
|
||||
let leaf = name.rsplit('.').next().unwrap_or(&name);
|
||||
if let Some((_, m)) = JAXRS_VERBS
|
||||
.iter()
|
||||
.find(|(n, _)| n.eq_ignore_ascii_case(leaf))
|
||||
{
|
||||
verb = Some(*m);
|
||||
}
|
||||
if leaf == "Path"
|
||||
&& let Some(path) = annotation_string_arg_from_node(ann, bytes)
|
||||
{
|
||||
method_path = path;
|
||||
}
|
||||
if AUTH_ANNOTATIONS
|
||||
.iter()
|
||||
.any(|a| leaf.eq_ignore_ascii_case(a))
|
||||
{
|
||||
auth = true;
|
||||
}
|
||||
}
|
||||
let v = verb?;
|
||||
let combined = if class_path.is_empty() {
|
||||
method_path
|
||||
} else if method_path.is_empty() {
|
||||
class_path.to_string()
|
||||
} else {
|
||||
format!(
|
||||
"{}/{}",
|
||||
class_path.trim_end_matches('/'),
|
||||
method_path.trim_start_matches('/')
|
||||
)
|
||||
};
|
||||
Some((v, combined, auth))
|
||||
}
|
||||
|
||||
fn annotation_string_arg(class: Node, bytes: &[u8], target_name: &str) -> Option<String> {
|
||||
let modifiers = crate::surface::lang::common::child_or_named(class, "modifiers")?;
|
||||
let mut cursor = modifiers.walk();
|
||||
for ann in modifiers.children(&mut cursor) {
|
||||
if !is_annotation(ann) {
|
||||
continue;
|
||||
}
|
||||
let Some(name) = annotation_name(ann, bytes) else {
|
||||
continue;
|
||||
};
|
||||
let leaf = name.rsplit('.').next().unwrap_or(&name);
|
||||
if leaf == target_name {
|
||||
return annotation_string_arg_from_node(ann, bytes);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn annotation_string_arg_from_node(ann: Node, bytes: &[u8]) -> Option<String> {
|
||||
let args = ann.child_by_field_name("arguments")?;
|
||||
let raw = args.utf8_text(bytes).ok()?;
|
||||
let start = raw.find('"')? + 1;
|
||||
let end = raw[start..].find('"')? + start;
|
||||
Some(raw[start..end].to_string())
|
||||
}
|
||||
|
||||
fn annotation_name(ann: Node, bytes: &[u8]) -> Option<String> {
|
||||
ann.child_by_field_name("name")
|
||||
.and_then(|n| n.utf8_text(bytes).ok())
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
fn method_name(method: Node, bytes: &[u8]) -> Option<String> {
|
||||
method
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| n.utf8_text(bytes).ok())
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
fn is_annotation(node: Node) -> bool {
|
||||
matches!(node.kind(), "annotation" | "marker_annotation")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_java::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_jaxrs_get() {
|
||||
let src = r#"
|
||||
@Path("/users")
|
||||
public class UsersResource {
|
||||
@GET
|
||||
@Path("/{id}")
|
||||
public User get() { return null; }
|
||||
}
|
||||
"#;
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes =
|
||||
detect_servlet_routes(&tree, &bytes, &PathBuf::from("UsersResource.java"), None);
|
||||
assert!(!nodes.is_empty());
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users/{id}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_servlet_doget() {
|
||||
let src = r#"
|
||||
@WebServlet("/admin")
|
||||
public class Admin extends HttpServlet {
|
||||
public void doGet(HttpServletRequest req, HttpServletResponse resp) {}
|
||||
public void doPost(HttpServletRequest req, HttpServletResponse resp) {}
|
||||
}
|
||||
"#;
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_servlet_routes(&tree, &bytes, &PathBuf::from("Admin.java"), None);
|
||||
assert_eq!(nodes.len(), 2);
|
||||
}
|
||||
}
|
||||
288
src/surface/lang/java_spring.rs
Normal file
288
src/surface/lang/java_spring.rs
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
//! Java + Spring framework probe.
|
||||
//!
|
||||
//! Recognises Spring controller methods annotated with
|
||||
//! `@RequestMapping` / `@GetMapping` / `@PostMapping` / `@PutMapping`
|
||||
//! / `@PatchMapping` / `@DeleteMapping`. The route path is the
|
||||
//! concatenation of class-level `@RequestMapping(value=...)` /
|
||||
//! `@RestController` and method-level `value=...` arguments.
|
||||
//!
|
||||
//! `auth_required` fires when the method, the enclosing class, or the
|
||||
//! `value=` argument lists a Spring-Security annotation
|
||||
//! ([`AUTH_ANNOTATIONS`]).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub use crate::auth_analysis::auth_markers::SPRING_ANNOTATIONS as AUTH_ANNOTATIONS;
|
||||
|
||||
const MAPPING_ANNOTATIONS: &[(&str, Option<HttpMethod>)] = &[
|
||||
("RequestMapping", None),
|
||||
("GetMapping", Some(HttpMethod::GET)),
|
||||
("PostMapping", Some(HttpMethod::POST)),
|
||||
("PutMapping", Some(HttpMethod::PUT)),
|
||||
("PatchMapping", Some(HttpMethod::PATCH)),
|
||||
("DeleteMapping", Some(HttpMethod::DELETE)),
|
||||
];
|
||||
|
||||
pub fn detect_spring_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_classes(tree.root_node(), &mut |class| {
|
||||
let class_path = class_request_mapping_path(class, bytes);
|
||||
let class_auth = class_has_auth_annotation(class, bytes);
|
||||
let Some(body) = crate::surface::lang::common::child_or_named(class, "class_body") else {
|
||||
return;
|
||||
};
|
||||
let mut cursor = body.walk();
|
||||
for member in body.children(&mut cursor) {
|
||||
if member.kind() != "method_declaration" {
|
||||
continue;
|
||||
}
|
||||
if let Some((method, route_path, auth)) = method_mapping(member, bytes, &class_path) {
|
||||
let auth_required = class_auth || auth;
|
||||
let handler_name = method_name(member, bytes).unwrap_or_default();
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(member, &file_rel),
|
||||
framework: Framework::Spring,
|
||||
method,
|
||||
route: route_path,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel.clone(),
|
||||
(member.start_position().row + 1) as u32,
|
||||
(member.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required,
|
||||
}));
|
||||
}
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_classes<'tree, F>(node: Node<'tree>, visit: &mut F)
|
||||
where
|
||||
F: FnMut(Node<'tree>),
|
||||
{
|
||||
if node.kind() == "class_declaration" {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_classes(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn class_request_mapping_path(class: Node, bytes: &[u8]) -> String {
|
||||
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
|
||||
Some(m) => m,
|
||||
None => return String::new(),
|
||||
};
|
||||
let mut cursor = modifiers.walk();
|
||||
for ann in modifiers.children(&mut cursor) {
|
||||
if !is_annotation(ann) {
|
||||
continue;
|
||||
}
|
||||
let Some((name, args_text)) = annotation_name_and_args(ann, bytes) else {
|
||||
continue;
|
||||
};
|
||||
if name == "RequestMapping" {
|
||||
return extract_first_path(&args_text);
|
||||
}
|
||||
}
|
||||
String::new()
|
||||
}
|
||||
|
||||
fn class_has_auth_annotation(class: Node, bytes: &[u8]) -> bool {
|
||||
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
|
||||
Some(m) => m,
|
||||
None => return false,
|
||||
};
|
||||
let mut cursor = modifiers.walk();
|
||||
for ann in modifiers.children(&mut cursor) {
|
||||
if !is_annotation(ann) {
|
||||
continue;
|
||||
}
|
||||
if let Some((name, _)) = annotation_name_and_args(ann, bytes)
|
||||
&& AUTH_ANNOTATIONS.iter().any(|a| leaf_matches(&name, &[a]))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn method_mapping(
|
||||
method: Node,
|
||||
bytes: &[u8],
|
||||
class_path: &str,
|
||||
) -> Option<(HttpMethod, String, bool)> {
|
||||
let modifiers = crate::surface::lang::common::child_or_named(method, "modifiers")?;
|
||||
let mut cursor = modifiers.walk();
|
||||
let mut auth = false;
|
||||
let mut found: Option<(HttpMethod, String)> = None;
|
||||
for ann in modifiers.children(&mut cursor) {
|
||||
if !is_annotation(ann) {
|
||||
continue;
|
||||
}
|
||||
let Some((name, args_text)) = annotation_name_and_args(ann, bytes) else {
|
||||
continue;
|
||||
};
|
||||
if AUTH_ANNOTATIONS.iter().any(|a| leaf_matches(&name, &[a])) {
|
||||
auth = true;
|
||||
}
|
||||
if found.is_some() {
|
||||
continue;
|
||||
}
|
||||
for (ann_name, default_method) in MAPPING_ANNOTATIONS {
|
||||
if name == *ann_name {
|
||||
let mut method_route = extract_first_path(&args_text);
|
||||
if method_route.is_empty() && !class_path.is_empty() {
|
||||
// Class-only mapping; method has no path.
|
||||
method_route = class_path.to_string();
|
||||
} else if !class_path.is_empty() {
|
||||
method_route = format!(
|
||||
"{}/{}",
|
||||
class_path.trim_end_matches('/'),
|
||||
method_route.trim_start_matches('/')
|
||||
);
|
||||
}
|
||||
let method = default_method
|
||||
.or_else(|| extract_request_method_from_args(&args_text))
|
||||
.unwrap_or(HttpMethod::GET);
|
||||
found = Some((method, method_route));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let (m, p) = found?;
|
||||
Some((m, p, auth))
|
||||
}
|
||||
|
||||
fn is_annotation(node: Node) -> bool {
|
||||
matches!(node.kind(), "annotation" | "marker_annotation")
|
||||
}
|
||||
|
||||
/// Returns `(annotation_name, raw_args_text)` for an annotation node.
|
||||
fn annotation_name_and_args(ann: Node, bytes: &[u8]) -> Option<(String, String)> {
|
||||
let name_node = ann.child_by_field_name("name")?;
|
||||
let raw_name = name_node.utf8_text(bytes).ok()?;
|
||||
let leaf = raw_name.rsplit('.').next().unwrap_or(raw_name).to_string();
|
||||
let args_text = ann
|
||||
.child_by_field_name("arguments")
|
||||
.and_then(|a| a.utf8_text(bytes).ok())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
Some((leaf, args_text))
|
||||
}
|
||||
|
||||
fn extract_first_path(args_text: &str) -> String {
|
||||
// Look for the first `"..."` literal.
|
||||
let mut chars = args_text.chars().peekable();
|
||||
while let Some(c) = chars.next() {
|
||||
if c == '"' {
|
||||
let mut buf = String::new();
|
||||
for c in chars.by_ref() {
|
||||
if c == '"' {
|
||||
return buf;
|
||||
}
|
||||
buf.push(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
String::new()
|
||||
}
|
||||
|
||||
fn extract_request_method_from_args(args_text: &str) -> Option<HttpMethod> {
|
||||
// RequestMapping(method = RequestMethod.POST)
|
||||
for verb in ["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"] {
|
||||
if args_text.contains(&format!("RequestMethod.{}", verb)) {
|
||||
return HttpMethod::from_ident(verb);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn method_name(method: Node, bytes: &[u8]) -> Option<String> {
|
||||
method
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| n.utf8_text(bytes).ok())
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_java::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_get_mapping() {
|
||||
let src = r#"
|
||||
@RestController
|
||||
public class UserController {
|
||||
@GetMapping("/users")
|
||||
public List<User> list() { return null; }
|
||||
}
|
||||
"#;
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes =
|
||||
detect_spring_routes(&tree, &bytes, &PathBuf::from("UserController.java"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users");
|
||||
assert_eq!(ep.handler_name, "list");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_request_mapping_prefix_concatenates() {
|
||||
let src = r#"
|
||||
@RequestMapping("/api")
|
||||
public class C {
|
||||
@PostMapping("/users")
|
||||
public void create() {}
|
||||
}
|
||||
"#;
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_spring_routes(&tree, &bytes, &PathBuf::from("C.java"), None);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.route, "/api/users");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pre_authorize_marks_auth() {
|
||||
let src = r#"
|
||||
public class C {
|
||||
@PreAuthorize("hasRole('ADMIN')")
|
||||
@GetMapping("/admin")
|
||||
public void admin() {}
|
||||
}
|
||||
"#;
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_spring_routes(&tree, &bytes, &PathBuf::from("C.java"), None);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert!(ep.auth_required);
|
||||
}
|
||||
}
|
||||
253
src/surface/lang/js_express.rs
Normal file
253
src/surface/lang/js_express.rs
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
//! JavaScript / TypeScript + Express framework probe.
|
||||
//!
|
||||
//! Detects route registration calls of the form `app.METHOD(path, ...)`
|
||||
//! / `router.METHOD(path, ...)` for the standard set of HTTP verbs plus
|
||||
//! `all` / `use`. The handler is the *last* function-shaped argument
|
||||
//! (Express convention: `(path, ...middleware, handler)`).
|
||||
//!
|
||||
//! `auth_required` fires when any positional argument before the
|
||||
//! handler is an identifier matching one of the auth-middleware names
|
||||
//! in [`AUTH_MIDDLEWARES`] (passport's `requireAuth`, custom guards),
|
||||
//! or when an inline `passport.authenticate(...)` call appears in the
|
||||
//! middleware list.
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub use crate::auth_analysis::auth_markers::EXPRESS_MIDDLEWARES as AUTH_MIDDLEWARES;
|
||||
|
||||
const VERBS: &[&str] = &[
|
||||
"get", "post", "put", "delete", "patch", "options", "head", "all",
|
||||
];
|
||||
|
||||
pub fn detect_express_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_calls(tree.root_node(), &mut |call| {
|
||||
if let Some(node) = match_express_call(call, bytes, &file_rel) {
|
||||
out.push(node);
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
|
||||
if matches!(node.kind(), "call_expression") {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_calls(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn match_express_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
|
||||
let func = call.child_by_field_name("function")?;
|
||||
if func.kind() != "member_expression" {
|
||||
return None;
|
||||
}
|
||||
let object = func.child_by_field_name("object")?;
|
||||
let file_text = std::str::from_utf8(bytes).unwrap_or("");
|
||||
let has_express_witness = file_text.contains("express");
|
||||
if !receiver_is_express(object, bytes, has_express_witness) {
|
||||
return None;
|
||||
}
|
||||
let prop = func.child_by_field_name("property")?;
|
||||
let prop_text = prop.utf8_text(bytes).ok()?;
|
||||
if !VERBS.contains(&prop_text) {
|
||||
return None;
|
||||
}
|
||||
let method = HttpMethod::from_ident(prop_text).unwrap_or(HttpMethod::GET);
|
||||
let args = call.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let mut positional: Vec<Node> = args.children(&mut cursor).collect();
|
||||
positional.retain(|n| n.kind() != "(" && n.kind() != ")" && n.kind() != ",");
|
||||
let route = positional
|
||||
.first()
|
||||
.filter(|n| n.kind() == "string" || n.kind() == "template_string")
|
||||
.and_then(|n| string_node_value(*n, bytes))
|
||||
.unwrap_or_default();
|
||||
if route.is_empty() && prop_text != "use" {
|
||||
// bare `app.use(handler)` is middleware, not an entry point
|
||||
return None;
|
||||
}
|
||||
let handler_node = find_handler(&positional)?;
|
||||
let handler_id = handler_node.id();
|
||||
let auth_required = positional[1..]
|
||||
.iter()
|
||||
.filter(|n| n.id() != handler_id)
|
||||
.any(|n| arg_is_auth_marker(*n, bytes));
|
||||
let handler_name = handler_function_name(handler_node, bytes).unwrap_or_default();
|
||||
Some(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(call, file_rel),
|
||||
framework: Framework::Express,
|
||||
method,
|
||||
route,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(handler_node.start_position().row + 1) as u32,
|
||||
(handler_node.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required,
|
||||
}))
|
||||
}
|
||||
|
||||
fn find_handler<'a>(positional: &[Node<'a>]) -> Option<Node<'a>> {
|
||||
positional
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|n| {
|
||||
matches!(
|
||||
n.kind(),
|
||||
"arrow_function"
|
||||
| "function"
|
||||
| "function_expression"
|
||||
| "function_declaration"
|
||||
| "identifier"
|
||||
| "member_expression"
|
||||
)
|
||||
})
|
||||
.copied()
|
||||
}
|
||||
|
||||
fn handler_function_name(node: Node, bytes: &[u8]) -> Option<String> {
|
||||
if matches!(node.kind(), "identifier" | "member_expression") {
|
||||
return node.utf8_text(bytes).ok().map(str::to_string);
|
||||
}
|
||||
if let Some(name_node) = node.child_by_field_name("name")
|
||||
&& let Ok(name) = name_node.utf8_text(bytes)
|
||||
{
|
||||
return Some(name.to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn arg_is_auth_marker(node: Node, bytes: &[u8]) -> bool {
|
||||
match node.kind() {
|
||||
"identifier" | "member_expression" => node
|
||||
.utf8_text(bytes)
|
||||
.map(|t| leaf_matches(t, AUTH_MIDDLEWARES))
|
||||
.unwrap_or(false),
|
||||
"call_expression" => {
|
||||
let Some(callee) = node.child_by_field_name("function") else {
|
||||
return false;
|
||||
};
|
||||
let Ok(text) = callee.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
leaf_matches(text, AUTH_MIDDLEWARES) || text.contains("passport.authenticate")
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn receiver_is_express(object: Node, bytes: &[u8], has_express_witness: bool) -> bool {
|
||||
fn name_matches_strong(text: &str) -> bool {
|
||||
let lower = text.to_ascii_lowercase();
|
||||
lower == "app" || lower == "server" || lower.ends_with("_app") || lower.ends_with("api")
|
||||
}
|
||||
fn name_matches_router(text: &str) -> bool {
|
||||
let lower = text.to_ascii_lowercase();
|
||||
lower == "router" || lower.ends_with("router")
|
||||
}
|
||||
let check_name = |text: &str| -> bool {
|
||||
// `router` / `*router` is ambiguous with koa-router; require a
|
||||
// file-level `express` witness before claiming it. Strong
|
||||
// shapes (`app`, `server`, `*_app`, `*api`) are Express-only
|
||||
// conventions and don't need a witness.
|
||||
if name_matches_strong(text) {
|
||||
return true;
|
||||
}
|
||||
if name_matches_router(text) {
|
||||
return has_express_witness;
|
||||
}
|
||||
false
|
||||
};
|
||||
match object.kind() {
|
||||
"identifier" => object.utf8_text(bytes).ok().is_some_and(check_name),
|
||||
"member_expression" => object
|
||||
.child_by_field_name("property")
|
||||
.and_then(|p| p.utf8_text(bytes).ok())
|
||||
.is_some_and(check_name),
|
||||
"call_expression" => {
|
||||
let Some(callee) = object.child_by_field_name("function") else {
|
||||
return false;
|
||||
};
|
||||
let Ok(text) = callee.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
let leaf = text.rsplit('.').next().unwrap_or(text);
|
||||
leaf == "express" || leaf == "Router" || leaf == "createApp"
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_javascript::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_get_route() {
|
||||
let src = "const app = express();\napp.get('/users', (req, res) => res.send('ok'));\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.framework, Framework::Express);
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_auth_middleware() {
|
||||
let src = "app.post('/secret', requireAuth, (req, res) => {});\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert!(ep.auth_required);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn router_receiver_without_express_witness_does_not_match() {
|
||||
// Pure koa-router file — express probe must not claim it.
|
||||
let src = "const Router = require('@koa/router');\nconst router = new Router();\nrouter.get('/users', async ctx => {});\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None);
|
||||
assert!(
|
||||
nodes.is_empty(),
|
||||
"express probe FP'd on koa-only file: {nodes:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn router_receiver_with_express_witness_still_matches() {
|
||||
// express + Router.get is a real Express idiom — must still detect.
|
||||
let src = "const express = require('express');\nconst router = express.Router();\nrouter.get('/users', (req, res) => {});\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
}
|
||||
}
|
||||
180
src/surface/lang/js_koa.rs
Normal file
180
src/surface/lang/js_koa.rs
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
//! JavaScript / TypeScript + Koa framework probe.
|
||||
//!
|
||||
//! Koa apps register routes through `koa-router` (or `@koa/router`):
|
||||
//! `router.get(path, handler)`, `router.post(path, ...middleware,
|
||||
//! handler)`, etc. The receiver is named `router`, `r`, or has a
|
||||
//! `_router`/`Router` suffix. Additional Koa-specific recognition:
|
||||
//!
|
||||
//! * `router.use('/path', subrouter.routes())` is *not* an
|
||||
//! entry-point — the inner middleware chain is. Filtered by
|
||||
//! ignoring `use` for path-less middleware mounting.
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub use crate::auth_analysis::auth_markers::KOA_MIDDLEWARES as AUTH_MIDDLEWARES;
|
||||
|
||||
const VERBS: &[&str] = &[
|
||||
"get", "post", "put", "delete", "patch", "options", "head", "all",
|
||||
];
|
||||
|
||||
pub fn detect_koa_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_calls(tree.root_node(), &mut |call| {
|
||||
if let Some(node) = match_koa_call(call, bytes, &file_rel) {
|
||||
out.push(node);
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
|
||||
if matches!(node.kind(), "call_expression") {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_calls(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn match_koa_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
|
||||
let func = call.child_by_field_name("function")?;
|
||||
if func.kind() != "member_expression" {
|
||||
return None;
|
||||
}
|
||||
let object = func.child_by_field_name("object")?;
|
||||
if !receiver_is_koa_router(object, bytes) {
|
||||
return None;
|
||||
}
|
||||
let prop = func.child_by_field_name("property")?;
|
||||
let prop_text = prop.utf8_text(bytes).ok()?;
|
||||
if !VERBS.contains(&prop_text) {
|
||||
return None;
|
||||
}
|
||||
let method = HttpMethod::from_ident(prop_text).unwrap_or(HttpMethod::GET);
|
||||
let args = call.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let mut positional: Vec<Node> = args.children(&mut cursor).collect();
|
||||
positional.retain(|n| n.kind() != "(" && n.kind() != ")" && n.kind() != ",");
|
||||
let route_idx = positional
|
||||
.iter()
|
||||
.position(|n| matches!(n.kind(), "string" | "template_string"))?;
|
||||
let route = string_node_value(positional[route_idx], bytes).unwrap_or_default();
|
||||
let handler_node = positional.iter().rev().find(|n| {
|
||||
matches!(
|
||||
n.kind(),
|
||||
"arrow_function"
|
||||
| "function"
|
||||
| "function_expression"
|
||||
| "function_declaration"
|
||||
| "identifier"
|
||||
| "member_expression"
|
||||
)
|
||||
})?;
|
||||
let auth_required = positional
|
||||
.iter()
|
||||
.filter(|n| !std::ptr::eq(*n, handler_node))
|
||||
.any(|n| arg_is_auth_marker(*n, bytes));
|
||||
let handler_name = handler_function_name(*handler_node, bytes).unwrap_or_default();
|
||||
Some(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(call, file_rel),
|
||||
framework: Framework::Koa,
|
||||
method,
|
||||
route,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(handler_node.start_position().row + 1) as u32,
|
||||
(handler_node.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required,
|
||||
}))
|
||||
}
|
||||
|
||||
fn handler_function_name(node: Node, bytes: &[u8]) -> Option<String> {
|
||||
if matches!(node.kind(), "identifier" | "member_expression") {
|
||||
return node.utf8_text(bytes).ok().map(str::to_string);
|
||||
}
|
||||
if let Some(name_node) = node.child_by_field_name("name")
|
||||
&& let Ok(name) = name_node.utf8_text(bytes)
|
||||
{
|
||||
return Some(name.to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn arg_is_auth_marker(node: Node, bytes: &[u8]) -> bool {
|
||||
match node.kind() {
|
||||
"identifier" | "member_expression" => node
|
||||
.utf8_text(bytes)
|
||||
.map(|t| leaf_matches(t, AUTH_MIDDLEWARES))
|
||||
.unwrap_or(false),
|
||||
"call_expression" => {
|
||||
let Some(callee) = node.child_by_field_name("function") else {
|
||||
return false;
|
||||
};
|
||||
let Ok(text) = callee.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
leaf_matches(text, AUTH_MIDDLEWARES)
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn receiver_is_koa_router(object: Node, bytes: &[u8]) -> bool {
|
||||
fn name_matches(text: &str) -> bool {
|
||||
let lower = text.to_ascii_lowercase();
|
||||
lower == "router" || lower == "r" || lower.ends_with("_router") || lower.ends_with("router")
|
||||
}
|
||||
match object.kind() {
|
||||
"identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches),
|
||||
"member_expression" => object
|
||||
.child_by_field_name("property")
|
||||
.and_then(|p| p.utf8_text(bytes).ok())
|
||||
.is_some_and(name_matches),
|
||||
"call_expression" => {
|
||||
let Some(callee) = object.child_by_field_name("function") else {
|
||||
return false;
|
||||
};
|
||||
let Ok(text) = callee.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
let leaf = text.rsplit('.').next().unwrap_or(text);
|
||||
leaf == "Router" || leaf == "KoaRouter"
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_javascript::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_router_get() {
|
||||
let src = "const router = new Router();\nrouter.get('/users', async ctx => { ctx.body = []; });\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_koa_routes(&tree, &bytes, &PathBuf::from("server.js"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
}
|
||||
}
|
||||
37
src/surface/lang/mod.rs
Normal file
37
src/surface/lang/mod.rs
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
//! Per-language framework probes.
|
||||
//!
|
||||
//! Phase 21 shipped Python + Flask. Phase 22 generalises detection to:
|
||||
//! Python (FastAPI, Django), JS/TS (Express, Koa, Next.js), Java
|
||||
//! (Spring, Servlet/JAX-RS, Quarkus), Go (`net/http`, gin), PHP
|
||||
//! (Laravel, Slim), Ruby (Sinatra, Rails), Rust (axum, actix-web).
|
||||
//!
|
||||
//! Every probe exposes one public `detect_<framework>_routes` function
|
||||
//! returning `Vec<SurfaceNode>` (one [`super::SurfaceNode::EntryPoint`]
|
||||
//! per recognised route). Probes are pure functions — no I/O, no
|
||||
//! state.
|
||||
|
||||
pub mod common;
|
||||
|
||||
pub mod python_django;
|
||||
pub mod python_fastapi;
|
||||
pub mod python_flask;
|
||||
|
||||
pub mod js_express;
|
||||
pub mod js_koa;
|
||||
pub mod ts_next;
|
||||
|
||||
pub mod java_quarkus;
|
||||
pub mod java_servlet;
|
||||
pub mod java_spring;
|
||||
|
||||
pub mod go_gin;
|
||||
pub mod go_http;
|
||||
|
||||
pub mod php_laravel;
|
||||
pub mod php_slim;
|
||||
|
||||
pub mod ruby_rails;
|
||||
pub mod ruby_sinatra;
|
||||
|
||||
pub mod rust_actix;
|
||||
pub mod rust_axum;
|
||||
169
src/surface/lang/php_laravel.rs
Normal file
169
src/surface/lang/php_laravel.rs
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
//! PHP + Laravel framework probe.
|
||||
//!
|
||||
//! Recognises Laravel route declarations:
|
||||
//!
|
||||
//! * `Route::get('/path', $handler)` / `::post(...)` / `::put` /
|
||||
//! `::patch` / `::delete` / `::any` / `::match`
|
||||
//! * `Route::resource('users', UserController::class)` (omitted —
|
||||
//! resource controller dispatch is path-derived; Phase 22 ships the
|
||||
//! primary verb shape only)
|
||||
//!
|
||||
//! `auth_required` fires when the route call is followed by a
|
||||
//! `->middleware('auth')` chain or the closure is wrapped in
|
||||
//! `Route::middleware(['auth'])->group(...)`.
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
const VERBS: &[(&str, HttpMethod)] = &[
|
||||
("get", HttpMethod::GET),
|
||||
("post", HttpMethod::POST),
|
||||
("put", HttpMethod::PUT),
|
||||
("patch", HttpMethod::PATCH),
|
||||
("delete", HttpMethod::DELETE),
|
||||
("options", HttpMethod::OPTIONS),
|
||||
("head", HttpMethod::HEAD),
|
||||
];
|
||||
|
||||
pub fn detect_laravel_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_calls(tree.root_node(), &mut |call| {
|
||||
if let Some(node) = match_laravel_call(call, bytes, &file_rel) {
|
||||
out.push(node);
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
|
||||
if matches!(
|
||||
node.kind(),
|
||||
"function_call_expression" | "scoped_call_expression" | "member_call_expression"
|
||||
) {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_calls(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn match_laravel_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
|
||||
if call.kind() != "scoped_call_expression" {
|
||||
return None;
|
||||
}
|
||||
let scope = call.child_by_field_name("scope")?;
|
||||
let scope_text = scope.utf8_text(bytes).ok()?;
|
||||
if scope_text != "Route" && !scope_text.contains("Route") {
|
||||
return None;
|
||||
}
|
||||
let name = call.child_by_field_name("name")?;
|
||||
let name_text = name.utf8_text(bytes).ok()?;
|
||||
let (_, method) = VERBS
|
||||
.iter()
|
||||
.find(|(v, _)| v.eq_ignore_ascii_case(name_text))?;
|
||||
let args = call.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let positional: Vec<Node> = args
|
||||
.children(&mut cursor)
|
||||
.filter(|n| n.kind() == "argument")
|
||||
.collect();
|
||||
if positional.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
let route_node = first_inner(positional[0]);
|
||||
let route = string_node_value(route_node, bytes).unwrap_or_default();
|
||||
let handler_node = first_inner(positional[1]);
|
||||
let handler_name = handler_text(handler_node, bytes).unwrap_or_default();
|
||||
let auth_required = check_chained_middleware(call, bytes);
|
||||
Some(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(call, file_rel),
|
||||
framework: Framework::Laravel,
|
||||
method: *method,
|
||||
route,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(handler_node.start_position().row + 1) as u32,
|
||||
(handler_node.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required,
|
||||
}))
|
||||
}
|
||||
|
||||
fn first_inner(arg: Node) -> Node {
|
||||
let mut cursor = arg.walk();
|
||||
arg.named_children(&mut cursor).next().unwrap_or(arg)
|
||||
}
|
||||
|
||||
fn handler_text(node: Node, bytes: &[u8]) -> Option<String> {
|
||||
Some(node.utf8_text(bytes).ok()?.to_string())
|
||||
}
|
||||
|
||||
fn check_chained_middleware(call: Node, bytes: &[u8]) -> bool {
|
||||
// Walk up to find a member_call chain: `Route::get(...)->middleware('auth')`
|
||||
let mut cur = call.parent();
|
||||
while let Some(p) = cur {
|
||||
if p.kind() == "member_call_expression"
|
||||
&& let Some(name) = p.child_by_field_name("name")
|
||||
&& let Ok(name_text) = name.utf8_text(bytes)
|
||||
&& name_text == "middleware"
|
||||
&& let Some(args) = p.child_by_field_name("arguments")
|
||||
&& let Ok(args_text) = args.utf8_text(bytes)
|
||||
&& (args_text.contains("auth")
|
||||
|| args_text.contains("jwt")
|
||||
|| args_text.contains("authenticated"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
cur = p.parent();
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_php::LANGUAGE_PHP.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_laravel_get() {
|
||||
let src = "<?php\nRoute::get('/users', 'UserController@index');\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_laravel_routes(&tree, &bytes, &PathBuf::from("routes.php"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_middleware_chain() {
|
||||
let src = "<?php\nRoute::post('/admin', 'AdminController@create')->middleware('auth');\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_laravel_routes(&tree, &bytes, &PathBuf::from("routes.php"), None);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert!(ep.auth_required);
|
||||
}
|
||||
}
|
||||
139
src/surface/lang/php_slim.rs
Normal file
139
src/surface/lang/php_slim.rs
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
//! PHP + Slim framework probe.
|
||||
//!
|
||||
//! Recognises Slim route registrations:
|
||||
//!
|
||||
//! * `$app->get('/path', $handler)` / `->post(...)` / `->put` /
|
||||
//! `->delete` / `->patch` / `->options` / `->any`
|
||||
//! * `$app->group('/api', function ($g) { $g->get(...); })` (the
|
||||
//! group prefix is captured when the call site is lexically inside
|
||||
//! a `group(...)` closure body — best-effort textual match).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
const VERBS: &[(&str, HttpMethod)] = &[
|
||||
("get", HttpMethod::GET),
|
||||
("post", HttpMethod::POST),
|
||||
("put", HttpMethod::PUT),
|
||||
("patch", HttpMethod::PATCH),
|
||||
("delete", HttpMethod::DELETE),
|
||||
("options", HttpMethod::OPTIONS),
|
||||
("head", HttpMethod::HEAD),
|
||||
("any", HttpMethod::GET),
|
||||
];
|
||||
|
||||
pub fn detect_slim_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_calls(tree.root_node(), &mut |call| {
|
||||
if let Some(node) = match_slim_call(call, bytes, &file_rel) {
|
||||
out.push(node);
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
|
||||
if node.kind() == "member_call_expression" {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_calls(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn match_slim_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
|
||||
let object = call.child_by_field_name("object")?;
|
||||
let object_text = object.utf8_text(bytes).ok()?;
|
||||
if !receiver_is_slim_app(object_text) {
|
||||
return None;
|
||||
}
|
||||
let name = call.child_by_field_name("name")?;
|
||||
let name_text = name.utf8_text(bytes).ok()?;
|
||||
let (_, method) = VERBS
|
||||
.iter()
|
||||
.find(|(v, _)| v.eq_ignore_ascii_case(name_text))?;
|
||||
let args = call.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let positional: Vec<Node> = args
|
||||
.children(&mut cursor)
|
||||
.filter(|n| n.kind() == "argument")
|
||||
.collect();
|
||||
if positional.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
let route_node = first_inner(positional[0]);
|
||||
let route = string_node_value(route_node, bytes).unwrap_or_default();
|
||||
let handler_node = first_inner(positional[1]);
|
||||
let handler_name = handler_node
|
||||
.utf8_text(bytes)
|
||||
.ok()
|
||||
.map(str::to_string)
|
||||
.unwrap_or_default();
|
||||
Some(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(call, file_rel),
|
||||
framework: Framework::Slim,
|
||||
method: *method,
|
||||
route,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(handler_node.start_position().row + 1) as u32,
|
||||
(handler_node.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required: false,
|
||||
}))
|
||||
}
|
||||
|
||||
fn first_inner(arg: Node) -> Node {
|
||||
let mut cursor = arg.walk();
|
||||
arg.named_children(&mut cursor).next().unwrap_or(arg)
|
||||
}
|
||||
|
||||
fn receiver_is_slim_app(text: &str) -> bool {
|
||||
let trimmed = text.trim();
|
||||
let lower = trimmed.to_ascii_lowercase();
|
||||
lower == "$app"
|
||||
|| lower == "$g"
|
||||
|| lower == "$group"
|
||||
|| lower == "$router"
|
||||
|| lower.ends_with("app")
|
||||
|| lower.ends_with("group")
|
||||
|| lower.ends_with("router")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_php::LANGUAGE_PHP.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_slim_get() {
|
||||
let src = "<?php\n$app->get('/users', 'UsersController:list');\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_slim_routes(&tree, &bytes, &PathBuf::from("routes.php"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users");
|
||||
}
|
||||
}
|
||||
353
src/surface/lang/python_django.rs
Normal file
353
src/surface/lang/python_django.rs
Normal file
|
|
@ -0,0 +1,353 @@
|
|||
//! Python + Django framework probe.
|
||||
//!
|
||||
//! Recognises two route shapes:
|
||||
//!
|
||||
//! 1. `urls.py`-style routing: `path("/admin", admin_view)`,
|
||||
//! `re_path(r"^api/", api_view)`, `url(r"^foo$", foo_view)`.
|
||||
//! The probe walks the URL configuration list and emits one
|
||||
//! EntryPoint per `path` / `re_path` / `url` call, resolving the
|
||||
//! handler to the function with the same name in the file when
|
||||
//! possible.
|
||||
//! 2. Class-based view methods: a `get` / `post` / `put` / `delete`
|
||||
//! method on a class derived from `View`, `APIView`, `ViewSet`,
|
||||
//! `TemplateView`. The route path is `""` because URL config lives
|
||||
//! in a separate `urls.py`.
|
||||
//!
|
||||
//! `auth_required` follows the standard Django decorators
|
||||
//! ([`AUTH_DECORATORS`]) plus the DRF permission classes pattern
|
||||
//! (`permission_classes = [IsAuthenticated]`).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{
|
||||
leaf_matches, loc_for, python_imports_any, rel_file, string_node_value,
|
||||
};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub use crate::auth_analysis::auth_markers::DJANGO_DECORATORS as AUTH_DECORATORS;
|
||||
|
||||
const CBV_BASES: &[&str] = &[
|
||||
"View",
|
||||
"APIView",
|
||||
"ViewSet",
|
||||
"ModelViewSet",
|
||||
"ReadOnlyModelViewSet",
|
||||
"TemplateView",
|
||||
"ListView",
|
||||
"DetailView",
|
||||
"CreateView",
|
||||
"UpdateView",
|
||||
"DeleteView",
|
||||
"RedirectView",
|
||||
"FormView",
|
||||
];
|
||||
|
||||
pub fn detect_django_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
// File-level gate: only fire when the file actually imports
|
||||
// django or DRF. Phase 23 follow-up tightens the witness to
|
||||
// top-level `import` / `from` statements so a comment or string
|
||||
// mention of "django" / "rest_framework" cannot trigger detection.
|
||||
if !python_imports_any(bytes, &["django", "rest_framework"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
let function_index = collect_function_definitions(tree.root_node(), bytes);
|
||||
detect_url_dispatch(
|
||||
tree.root_node(),
|
||||
bytes,
|
||||
&file_rel,
|
||||
&function_index,
|
||||
&mut out,
|
||||
);
|
||||
detect_class_based_views(tree.root_node(), bytes, &file_rel, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
fn collect_function_definitions<'tree>(
|
||||
root: Node<'tree>,
|
||||
bytes: &'tree [u8],
|
||||
) -> HashMap<String, (Node<'tree>, bool)> {
|
||||
let mut index: HashMap<String, (Node<'tree>, bool)> = HashMap::new();
|
||||
fn walk<'tree>(
|
||||
node: Node<'tree>,
|
||||
bytes: &'tree [u8],
|
||||
index: &mut HashMap<String, (Node<'tree>, bool)>,
|
||||
) {
|
||||
if node.kind() == "function_definition"
|
||||
&& let Some(name_node) = node.child_by_field_name("name")
|
||||
&& let Ok(name) = name_node.utf8_text(bytes)
|
||||
{
|
||||
// Detect if any decorator is an auth marker.
|
||||
let mut auth = false;
|
||||
if let Some(parent) = node.parent()
|
||||
&& parent.kind() == "decorated_definition"
|
||||
{
|
||||
let mut cursor = parent.walk();
|
||||
for child in parent.children(&mut cursor) {
|
||||
if child.kind() == "decorator" && decorator_is_auth_marker(child, bytes) {
|
||||
auth = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
index.insert(name.to_string(), (node, auth));
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk(child, bytes, index);
|
||||
}
|
||||
}
|
||||
walk(root, bytes, &mut index);
|
||||
index
|
||||
}
|
||||
|
||||
fn detect_url_dispatch<'tree>(
|
||||
root: Node<'tree>,
|
||||
bytes: &[u8],
|
||||
file_rel: &str,
|
||||
function_index: &HashMap<String, (Node<'tree>, bool)>,
|
||||
out: &mut Vec<SurfaceNode>,
|
||||
) {
|
||||
fn recurse<'tree>(
|
||||
node: Node<'tree>,
|
||||
bytes: &[u8],
|
||||
file_rel: &str,
|
||||
function_index: &HashMap<String, (Node<'tree>, bool)>,
|
||||
out: &mut Vec<SurfaceNode>,
|
||||
) {
|
||||
if node.kind() == "call"
|
||||
&& let Some((route, handler_name)) = parse_url_call(node, bytes)
|
||||
{
|
||||
let (handler_loc, auth_required) = function_index
|
||||
.get(&handler_name)
|
||||
.map(|(h, a)| (loc_for(*h, file_rel), *a))
|
||||
.unwrap_or_else(|| (loc_for(node, file_rel), false));
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(node, file_rel),
|
||||
framework: Framework::Django,
|
||||
method: HttpMethod::GET,
|
||||
route,
|
||||
handler_name,
|
||||
handler_location: handler_loc,
|
||||
auth_required,
|
||||
}));
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
recurse(child, bytes, file_rel, function_index, out);
|
||||
}
|
||||
}
|
||||
recurse(root, bytes, file_rel, function_index, out);
|
||||
}
|
||||
|
||||
fn parse_url_call(call: Node, bytes: &[u8]) -> Option<(String, String)> {
|
||||
let target = call.child_by_field_name("function")?;
|
||||
let target_text = target.utf8_text(bytes).ok()?;
|
||||
let leaf = target_text.rsplit('.').next().unwrap_or(target_text);
|
||||
if !matches!(leaf, "path" | "re_path" | "url") {
|
||||
return None;
|
||||
}
|
||||
let args = call.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let mut route: Option<String> = None;
|
||||
let mut handler: Option<String> = None;
|
||||
for arg in args.children(&mut cursor) {
|
||||
match arg.kind() {
|
||||
"string" if route.is_none() => {
|
||||
route = string_node_value(arg, bytes);
|
||||
}
|
||||
"identifier" if handler.is_none() => {
|
||||
handler = arg.utf8_text(bytes).ok().map(str::to_string);
|
||||
}
|
||||
"attribute" if handler.is_none() => {
|
||||
handler = arg.utf8_text(bytes).ok().map(str::to_string);
|
||||
}
|
||||
"call" if handler.is_none() => {
|
||||
// `MyView.as_view()` shape — extract `MyView`.
|
||||
if let Some(callee) = arg.child_by_field_name("function")
|
||||
&& let Ok(text) = callee.utf8_text(bytes)
|
||||
{
|
||||
handler = Some(text.split('.').next().unwrap_or(text).to_string());
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Some((route?, handler?))
|
||||
}
|
||||
|
||||
fn detect_class_based_views(root: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
|
||||
fn recurse(node: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
|
||||
if node.kind() == "class_definition" && class_is_django_view(node, bytes) {
|
||||
let class_auth = class_has_auth_permission(node, bytes);
|
||||
// Walk the body for HTTP-named methods.
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
let mut bcur = body.walk();
|
||||
for stmt in body.children(&mut bcur) {
|
||||
let func = match stmt.kind() {
|
||||
"function_definition" => stmt,
|
||||
"decorated_definition" => stmt
|
||||
.child_by_field_name("definition")
|
||||
.or_else(|| {
|
||||
let mut c = stmt.walk();
|
||||
stmt.children(&mut c)
|
||||
.find(|n| n.kind() == "function_definition")
|
||||
})
|
||||
.unwrap_or(stmt),
|
||||
_ => continue,
|
||||
};
|
||||
if func.kind() != "function_definition" {
|
||||
continue;
|
||||
}
|
||||
let Some(name_node) = func.child_by_field_name("name") else {
|
||||
continue;
|
||||
};
|
||||
let Ok(name) = name_node.utf8_text(bytes) else {
|
||||
continue;
|
||||
};
|
||||
let Some(method) = HttpMethod::from_ident(name) else {
|
||||
continue;
|
||||
};
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(func, file_rel),
|
||||
framework: Framework::Django,
|
||||
method,
|
||||
route: String::new(),
|
||||
handler_name: name.to_string(),
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(func.start_position().row + 1) as u32,
|
||||
(func.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required: class_auth,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
recurse(child, bytes, file_rel, out);
|
||||
}
|
||||
}
|
||||
recurse(root, bytes, file_rel, out);
|
||||
}
|
||||
|
||||
fn class_is_django_view(class: Node, bytes: &[u8]) -> bool {
|
||||
let Some(supers) = class.child_by_field_name("superclasses") else {
|
||||
return false;
|
||||
};
|
||||
let mut cursor = supers.walk();
|
||||
for sup in supers.named_children(&mut cursor) {
|
||||
let Ok(text) = sup.utf8_text(bytes) else {
|
||||
continue;
|
||||
};
|
||||
let leaf = text.rsplit('.').next().unwrap_or(text);
|
||||
if CBV_BASES.iter().any(|b| leaf.contains(b)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn class_has_auth_permission(class: Node, bytes: &[u8]) -> bool {
|
||||
let Some(body) = class.child_by_field_name("body") else {
|
||||
return false;
|
||||
};
|
||||
let mut cursor = body.walk();
|
||||
for stmt in body.children(&mut cursor) {
|
||||
if stmt.kind() != "expression_statement" {
|
||||
continue;
|
||||
}
|
||||
let mut sc = stmt.walk();
|
||||
for child in stmt.children(&mut sc) {
|
||||
if child.kind() != "assignment" {
|
||||
continue;
|
||||
}
|
||||
let Some(left) = child.child_by_field_name("left") else {
|
||||
continue;
|
||||
};
|
||||
let Ok(left_text) = left.utf8_text(bytes) else {
|
||||
continue;
|
||||
};
|
||||
if left_text != "permission_classes" {
|
||||
continue;
|
||||
}
|
||||
let Some(right) = child.child_by_field_name("right") else {
|
||||
continue;
|
||||
};
|
||||
let Ok(right_text) = right.utf8_text(bytes) else {
|
||||
continue;
|
||||
};
|
||||
if right_text.contains("IsAuthenticated")
|
||||
|| right_text.contains("IsAdminUser")
|
||||
|| right_text.contains("DjangoModelPermissions")
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn decorator_is_auth_marker(decorator: Node, bytes: &[u8]) -> bool {
|
||||
let mut cursor = decorator.walk();
|
||||
let Some(expr) = decorator
|
||||
.children(&mut cursor)
|
||||
.find(|c| c.kind() != "@" && c.kind() != "comment")
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
let target = match expr.kind() {
|
||||
"call" => expr.child_by_field_name("function"),
|
||||
_ => Some(expr),
|
||||
};
|
||||
let Some(target) = target else { return false };
|
||||
let Ok(text) = target.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
leaf_matches(text, AUTH_DECORATORS)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_python::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_path_call() {
|
||||
let src = "from django.urls import path\n\ndef admin_view(request): pass\n\nurlpatterns = [\n path('admin/', admin_view),\n]\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_django_routes(&tree, &bytes, &PathBuf::from("urls.py"), None);
|
||||
assert!(!nodes.is_empty());
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.framework, Framework::Django);
|
||||
assert_eq!(ep.handler_name, "admin_view");
|
||||
assert_eq!(ep.route, "admin/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_class_based_view() {
|
||||
let src = "from rest_framework.views import APIView\n\nclass UserList(APIView):\n def get(self, request): pass\n def post(self, request): pass\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_django_routes(&tree, &bytes, &PathBuf::from("views.py"), None);
|
||||
assert_eq!(nodes.len(), 2);
|
||||
}
|
||||
}
|
||||
325
src/surface/lang/python_fastapi.rs
Normal file
325
src/surface/lang/python_fastapi.rs
Normal file
|
|
@ -0,0 +1,325 @@
|
|||
//! Python + FastAPI framework probe.
|
||||
//!
|
||||
//! Recognises FastAPI / Starlette route declarations:
|
||||
//!
|
||||
//! * `@app.get("/path")` / `.post("/path")` / `.put` / `.patch` / `.delete`
|
||||
//! * `@router.get("/path")` / `.post(...)` / etc. on an `APIRouter`
|
||||
//! * `@app.api_route("/path", methods=["GET","POST"])`
|
||||
//! * `@app.websocket("/ws")` (treated as GET)
|
||||
//!
|
||||
//! `auth_required` is inferred from `Depends(<auth>)` parameters in the
|
||||
//! handler signature (FastAPI's idiomatic auth pattern) and from
|
||||
//! decorator-stack guards drawn from [`AUTH_DECORATORS`].
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{
|
||||
leaf_matches, loc_for, python_imports_any, rel_file, string_node_value,
|
||||
};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
/// Auth markers recognised in the decorator stack. FastAPI's primary
|
||||
/// auth idiom is `Depends(...)` parameter injection, handled separately.
|
||||
pub use crate::auth_analysis::auth_markers::FASTAPI_DECORATORS as AUTH_DECORATORS;
|
||||
|
||||
/// Auth-callee names recognised inside a `Depends(...)` parameter.
|
||||
const AUTH_DEPENDS_CALLEES: &[&str] = &[
|
||||
"get_current_user",
|
||||
"get_current_active_user",
|
||||
"current_user",
|
||||
"require_user",
|
||||
"require_auth",
|
||||
"auth",
|
||||
"verify_token",
|
||||
"verify_jwt",
|
||||
"validate_token",
|
||||
];
|
||||
|
||||
pub fn detect_fastapi_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
// File-level gate: avoid double-detection on Flask files that
|
||||
// also use `app.get(...)` shape. Phase 23 follow-up tightens the
|
||||
// witness to actual top-level `import` / `from` statements so a
|
||||
// comment or string mention of "fastapi" cannot trigger detection.
|
||||
if !python_imports_any(bytes, &["fastapi", "starlette"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_decorated(tree.root_node(), &mut |func, decorators| {
|
||||
let auth_via_decorator = decorators
|
||||
.iter()
|
||||
.any(|d| decorator_is_auth_marker(*d, bytes));
|
||||
let auth_via_depends = function_signature_uses_auth_depends(*func, bytes);
|
||||
let auth_required = auth_via_decorator || auth_via_depends;
|
||||
for dec in decorators {
|
||||
if let Some((method, route_path)) = fastapi_route_decorator(*dec, bytes) {
|
||||
let handler_name = function_name(*func, bytes).unwrap_or_default();
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(*dec, &file_rel),
|
||||
framework: Framework::FastApi,
|
||||
method,
|
||||
route: route_path,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel.clone(),
|
||||
(func.start_position().row + 1) as u32,
|
||||
(func.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required,
|
||||
}));
|
||||
}
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_decorated<'tree, F>(root: Node<'tree>, visit: &mut F)
|
||||
where
|
||||
F: FnMut(&Node<'tree>, &[Node<'tree>]),
|
||||
{
|
||||
if root.kind() == "decorated_definition" {
|
||||
let mut cursor = root.walk();
|
||||
let mut decorators: Vec<Node<'tree>> = Vec::new();
|
||||
let mut func: Option<Node<'tree>> = None;
|
||||
for child in root.children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"decorator" => decorators.push(child),
|
||||
"function_definition" => func = Some(child),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if let Some(f) = func {
|
||||
visit(&f, &decorators);
|
||||
}
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
for child in root.children(&mut cursor) {
|
||||
walk_decorated(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn fastapi_route_decorator(decorator: Node, bytes: &[u8]) -> Option<(HttpMethod, String)> {
|
||||
let mut cursor = decorator.walk();
|
||||
let expr = decorator
|
||||
.children(&mut cursor)
|
||||
.find(|c| c.kind() != "@" && c.kind() != "comment")?;
|
||||
if expr.kind() != "call" {
|
||||
return None;
|
||||
}
|
||||
let target = expr.child_by_field_name("function")?;
|
||||
let args = expr.child_by_field_name("arguments");
|
||||
if target.kind() != "attribute" {
|
||||
return None;
|
||||
}
|
||||
let object = target.child_by_field_name("object")?;
|
||||
if !receiver_is_fastapi(object, bytes) {
|
||||
return None;
|
||||
}
|
||||
let attr = target.child_by_field_name("attribute")?;
|
||||
let attr_text = attr.utf8_text(bytes).ok()?;
|
||||
let route_path = args
|
||||
.and_then(|a| first_string_arg(a, bytes))
|
||||
.unwrap_or_default();
|
||||
if let Some(m) = HttpMethod::from_ident(attr_text) {
|
||||
return Some((m, route_path));
|
||||
}
|
||||
let lower = attr_text.to_ascii_lowercase();
|
||||
if lower == "websocket" || lower == "websocket_route" {
|
||||
return Some((HttpMethod::GET, route_path));
|
||||
}
|
||||
if lower == "api_route" {
|
||||
let method = args
|
||||
.and_then(|a| first_methods_kwarg(a, bytes))
|
||||
.unwrap_or(HttpMethod::GET);
|
||||
return Some((method, route_path));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn receiver_is_fastapi(object: Node, bytes: &[u8]) -> bool {
|
||||
fn name_matches(text: &str) -> bool {
|
||||
let lower = text.to_ascii_lowercase();
|
||||
lower == "app"
|
||||
|| lower == "router"
|
||||
|| lower == "api"
|
||||
|| lower.ends_with("_app")
|
||||
|| lower.ends_with("_router")
|
||||
|| lower.ends_with("_api")
|
||||
}
|
||||
match object.kind() {
|
||||
"identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches),
|
||||
"attribute" => object
|
||||
.child_by_field_name("attribute")
|
||||
.and_then(|a| a.utf8_text(bytes).ok())
|
||||
.is_some_and(name_matches),
|
||||
"call" => {
|
||||
let Some(callee) = object.child_by_field_name("function") else {
|
||||
return false;
|
||||
};
|
||||
let Ok(text) = callee.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
let leaf = text.rsplit('.').next().unwrap_or(text).trim();
|
||||
leaf == "FastAPI" || leaf == "APIRouter" || leaf == "Starlette"
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn first_string_arg(args: Node, bytes: &[u8]) -> Option<String> {
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
if arg.kind() == "string" {
|
||||
return string_node_value(arg, bytes);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn first_methods_kwarg(args: Node, bytes: &[u8]) -> Option<HttpMethod> {
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
if arg.kind() != "keyword_argument" {
|
||||
continue;
|
||||
}
|
||||
let name = arg.child_by_field_name("name")?;
|
||||
if name.utf8_text(bytes).ok()? != "methods" {
|
||||
continue;
|
||||
}
|
||||
let value = arg.child_by_field_name("value")?;
|
||||
let mut vw = value.walk();
|
||||
for child in value.children(&mut vw) {
|
||||
if child.kind() == "string"
|
||||
&& let Some(v) = string_node_value(child, bytes)
|
||||
&& let Some(m) = HttpMethod::from_ident(&v)
|
||||
{
|
||||
return Some(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn decorator_is_auth_marker(decorator: Node, bytes: &[u8]) -> bool {
|
||||
let mut cursor = decorator.walk();
|
||||
let Some(expr) = decorator
|
||||
.children(&mut cursor)
|
||||
.find(|c| c.kind() != "@" && c.kind() != "comment")
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
let target = match expr.kind() {
|
||||
"call" => expr.child_by_field_name("function"),
|
||||
_ => Some(expr),
|
||||
};
|
||||
let Some(target) = target else { return false };
|
||||
let Ok(text) = target.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
leaf_matches(text, AUTH_DECORATORS)
|
||||
}
|
||||
|
||||
/// Look for a parameter with default `Depends(<auth_callee>)`.
|
||||
fn function_signature_uses_auth_depends(func: Node, bytes: &[u8]) -> bool {
|
||||
let Some(params) = func.child_by_field_name("parameters") else {
|
||||
return false;
|
||||
};
|
||||
let mut cursor = params.walk();
|
||||
for param in params.children(&mut cursor) {
|
||||
if !matches!(
|
||||
param.kind(),
|
||||
"default_parameter" | "typed_default_parameter"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
let Some(value) = param.child_by_field_name("value") else {
|
||||
continue;
|
||||
};
|
||||
if value.kind() != "call" {
|
||||
continue;
|
||||
}
|
||||
let Some(call_target) = value.child_by_field_name("function") else {
|
||||
continue;
|
||||
};
|
||||
let Ok(text) = call_target.utf8_text(bytes) else {
|
||||
continue;
|
||||
};
|
||||
let leaf = text.rsplit('.').next().unwrap_or(text).trim();
|
||||
if leaf != "Depends" && leaf != "Security" {
|
||||
continue;
|
||||
}
|
||||
let Some(args) = value.child_by_field_name("arguments") else {
|
||||
continue;
|
||||
};
|
||||
let mut aw = args.walk();
|
||||
for arg in args.children(&mut aw) {
|
||||
if let Ok(arg_text) = arg.utf8_text(bytes)
|
||||
&& leaf_matches(arg_text, AUTH_DEPENDS_CALLEES)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn function_name(func: Node, bytes: &[u8]) -> Option<String> {
|
||||
let name_node = func.child_by_field_name("name")?;
|
||||
name_node.utf8_text(bytes).ok().map(str::to_string)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_python::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_get_route() {
|
||||
let src = "from fastapi import FastAPI\napp = FastAPI()\n@app.get('/users')\ndef list_users(): pass\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_fastapi_routes(&tree, &bytes, &PathBuf::from("api.py"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users");
|
||||
assert_eq!(ep.framework, Framework::FastApi);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_router_post() {
|
||||
let src = "from fastapi import APIRouter\nrouter = APIRouter()\n@router.post('/items')\ndef create(): pass\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_fastapi_routes(&tree, &bytes, &PathBuf::from("api.py"), None);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::POST);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_depends_auth() {
|
||||
let src = "from fastapi import Depends\n@app.get('/me')\ndef me(user = Depends(get_current_user)): pass\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_fastapi_routes(&tree, &bytes, &PathBuf::from("api.py"), None);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert!(ep.auth_required);
|
||||
}
|
||||
}
|
||||
411
src/surface/lang/python_flask.rs
Normal file
411
src/surface/lang/python_flask.rs
Normal file
|
|
@ -0,0 +1,411 @@
|
|||
//! Python + Flask framework probe.
|
||||
//!
|
||||
//! Walks a parsed Python file looking for the four canonical Flask
|
||||
//! route shapes:
|
||||
//!
|
||||
//! * `@app.route("/path", methods=[...])`
|
||||
//! * `@app.get("/path")` / `.post(...)` / etc. (Flask ≥ 2.0)
|
||||
//! * `@bp.route("/path", methods=[...])` on a `Blueprint`
|
||||
//! * `@bp.get("/path")` / `.post(...)` / etc.
|
||||
//!
|
||||
//! `auth_required` is inferred from the decorator stack: any decorator
|
||||
//! whose textual representation matches one of [`AUTH_DECORATORS`] is
|
||||
//! treated as an auth boundary on the following route. This catches
|
||||
//! the canonical `@login_required` (Flask-Login), `@auth_required`
|
||||
//! (custom guards), and `@jwt_required` / `@jwt_required()` (Flask-JWT
|
||||
//! and -JWT-Extended).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::python_imports_any;
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode, relative_path_string};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
/// Decorator names that mark a route as requiring authentication.
|
||||
/// Matched against the *leaf* of the decorator expression — i.e. the
|
||||
/// last `attribute` / `identifier` segment — so `@login_required`,
|
||||
/// `@auth.login_required`, and `@flask_login.login_required` all
|
||||
/// match. Match is case-insensitive on the underscored form.
|
||||
pub use crate::auth_analysis::auth_markers::FLASK_DECORATORS as AUTH_DECORATORS;
|
||||
|
||||
/// Detect every Flask route in a parsed Python file.
|
||||
///
|
||||
/// `scan_root` is used to convert the file path to a project-relative
|
||||
/// POSIX path; pass `None` to record absolute paths. Returns one
|
||||
/// [`SurfaceNode::EntryPoint`] per `@route` / `@get` / `@post` / …
|
||||
/// decorator that targets a Flask-shaped receiver (`app`, `bp`,
|
||||
/// `blueprint`, or anything ending in `_bp` / `Blueprint`).
|
||||
pub fn detect_flask_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
// File-level gate: avoid double-detection on FastAPI files where
|
||||
// `app.get(...)` shape overlaps. Phase 21 was lenient because no
|
||||
// sibling probe existed; Phase 22 split per-framework via free
|
||||
// text witness; Phase 23 follow-up tightens the witness to actual
|
||||
// top-level `import` / `from` statements so a comment or vendored
|
||||
// license header that names "flask" cannot trigger detection.
|
||||
if !python_imports_any(bytes, &["flask"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let file_rel = relative_path_string(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_decorated(tree.root_node(), bytes, &mut |func_node, decorators| {
|
||||
// Reverse pass: find Flask-route decorators and collect auth
|
||||
// markers seen at *any* position in the decorator stack —
|
||||
// Flask honours decorators in stacked order regardless of
|
||||
// sequence relative to the route.
|
||||
let auth_required = decorators
|
||||
.iter()
|
||||
.any(|d| decorator_is_auth_marker(*d, bytes));
|
||||
for dec in decorators {
|
||||
if let Some((method, route_path)) = flask_route_decorator(*dec, bytes) {
|
||||
let dec_pos = dec.start_position();
|
||||
let handler_pos = func_node.start_position();
|
||||
let handler_name = function_name(*func_node, bytes).unwrap_or_default();
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new(
|
||||
file_rel.clone(),
|
||||
(dec_pos.row + 1) as u32,
|
||||
(dec_pos.column + 1) as u32,
|
||||
),
|
||||
framework: Framework::Flask,
|
||||
method,
|
||||
route: route_path,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel.clone(),
|
||||
(handler_pos.row + 1) as u32,
|
||||
(handler_pos.column + 1) as u32,
|
||||
),
|
||||
auth_required,
|
||||
}));
|
||||
}
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
/// Walk every `function_definition` in `root` and invoke `visit` with
|
||||
/// the function node plus the list of decorator nodes wrapping it.
|
||||
/// Handles both `decorated_definition` (one or more decorators) and
|
||||
/// bare `function_definition` (zero decorators, visit skipped).
|
||||
fn walk_decorated<'tree, F>(root: Node<'tree>, bytes: &[u8], visit: &mut F)
|
||||
where
|
||||
F: FnMut(&Node<'tree>, &[Node<'tree>]),
|
||||
{
|
||||
if root.kind() == "decorated_definition" {
|
||||
let mut cursor = root.walk();
|
||||
let mut decorators: Vec<Node<'tree>> = Vec::new();
|
||||
let mut func: Option<Node<'tree>> = None;
|
||||
for child in root.children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"decorator" => decorators.push(child),
|
||||
"function_definition" => func = Some(child),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if let Some(func_node) = func {
|
||||
visit(&func_node, &decorators);
|
||||
}
|
||||
let _ = bytes;
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
for child in root.children(&mut cursor) {
|
||||
walk_decorated(child, bytes, visit);
|
||||
}
|
||||
}
|
||||
|
||||
/// Classify a `decorator` node as a Flask route, returning the
|
||||
/// `(method, path)` pair. Recognises both the `@app.route(...)` and
|
||||
/// `@app.<verb>(...)` shapes and the Blueprint equivalents.
|
||||
fn flask_route_decorator(decorator: Node, bytes: &[u8]) -> Option<(HttpMethod, String)> {
|
||||
let mut walker = decorator.walk();
|
||||
let expr = decorator
|
||||
.children(&mut walker)
|
||||
.find(|c| c.kind() != "@" && c.kind() != "comment")?;
|
||||
let (call_target, args) = match expr.kind() {
|
||||
"call" => (
|
||||
expr.child_by_field_name("function")?,
|
||||
expr.child_by_field_name("arguments"),
|
||||
),
|
||||
_ => return None,
|
||||
};
|
||||
if call_target.kind() != "attribute" {
|
||||
return None;
|
||||
}
|
||||
let object = call_target.child_by_field_name("object")?;
|
||||
if !receiver_is_flask(object, bytes) {
|
||||
return None;
|
||||
}
|
||||
let attr = call_target.child_by_field_name("attribute")?;
|
||||
let attr_text = attr.utf8_text(bytes).ok()?;
|
||||
let route_path = args
|
||||
.and_then(|a| first_string_arg(a, bytes))
|
||||
.unwrap_or_default();
|
||||
if attr_text == "route" {
|
||||
let method = args
|
||||
.and_then(|a| extract_first_method(a, bytes))
|
||||
.unwrap_or(HttpMethod::GET);
|
||||
return Some((method, route_path));
|
||||
}
|
||||
if let Some(method) = HttpMethod::from_ident(attr_text) {
|
||||
return Some((method, route_path));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// `true` when the decorator receiver looks like a Flask app or
|
||||
/// Blueprint binding. Allowlist over identifier names + a structural
|
||||
/// match on call expressions like `Blueprint("name", __name__)`.
|
||||
fn receiver_is_flask(object: Node, bytes: &[u8]) -> bool {
|
||||
fn name_matches(text: &str) -> bool {
|
||||
let lower = text.to_ascii_lowercase();
|
||||
lower == "app"
|
||||
|| lower == "bp"
|
||||
|| lower == "blueprint"
|
||||
|| lower.ends_with("_app")
|
||||
|| lower.ends_with("_bp")
|
||||
|| lower.ends_with("blueprint")
|
||||
|| lower.ends_with("api")
|
||||
}
|
||||
match object.kind() {
|
||||
"identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches),
|
||||
"attribute" => object
|
||||
.child_by_field_name("attribute")
|
||||
.and_then(|a| a.utf8_text(bytes).ok())
|
||||
.is_some_and(name_matches),
|
||||
"call" => {
|
||||
let Some(callee) = object.child_by_field_name("function") else {
|
||||
return false;
|
||||
};
|
||||
let Ok(text) = callee.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
let leaf = text.rsplit('.').next().unwrap_or(text).trim();
|
||||
leaf == "Flask" || leaf == "Blueprint"
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Pull the first string literal positional argument out of a
|
||||
/// `argument_list` node. Used to extract the route path from
|
||||
/// `@app.route("/path", ...)`.
|
||||
fn first_string_arg(args: Node, bytes: &[u8]) -> Option<String> {
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
if arg.kind() == "string" {
|
||||
return Some(string_literal_text(arg, bytes));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Strip Python quotes / prefix bytes (`b"..."`, `r"..."`) and return
|
||||
/// the literal content. Falls back to the raw slice when the literal
|
||||
/// has an unfamiliar shape.
|
||||
fn string_literal_text(node: Node, bytes: &[u8]) -> String {
|
||||
let raw = node.utf8_text(bytes).unwrap_or("");
|
||||
let trimmed = raw.trim();
|
||||
let mut s = trimmed;
|
||||
while let Some(rest) = s.strip_prefix(['b', 'r', 'B', 'R', 'f', 'F']) {
|
||||
s = rest;
|
||||
}
|
||||
let stripped = s
|
||||
.trim_start_matches(['\'', '"'])
|
||||
.trim_end_matches(['\'', '"']);
|
||||
stripped.to_string()
|
||||
}
|
||||
|
||||
/// Extract the first HTTP method named in a `methods=[...]` kwarg, or
|
||||
/// `None` when the decorator omits the kwarg. The first method in
|
||||
/// the list wins; multi-method routes are recorded as the first
|
||||
/// (Flask itself runs the same handler for every listed method).
|
||||
fn extract_first_method(args: Node, bytes: &[u8]) -> Option<HttpMethod> {
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
if arg.kind() != "keyword_argument" {
|
||||
continue;
|
||||
}
|
||||
let name_node = arg.child_by_field_name("name")?;
|
||||
let Ok(name) = name_node.utf8_text(bytes) else {
|
||||
continue;
|
||||
};
|
||||
if name != "methods" {
|
||||
continue;
|
||||
}
|
||||
let value = arg.child_by_field_name("value")?;
|
||||
let mut cur = value.walk();
|
||||
for child in value.children(&mut cur) {
|
||||
if child.kind() == "string" {
|
||||
let text = string_literal_text(child, bytes);
|
||||
if let Some(m) = HttpMethod::from_ident(&text) {
|
||||
return Some(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// `true` when the decorator is an auth-guard marker. Matches the
|
||||
/// last segment of the decorator expression against
|
||||
/// [`AUTH_DECORATORS`].
|
||||
fn decorator_is_auth_marker(decorator: Node, bytes: &[u8]) -> bool {
|
||||
let mut walker = decorator.walk();
|
||||
let Some(expr) = decorator
|
||||
.children(&mut walker)
|
||||
.find(|c| c.kind() != "@" && c.kind() != "comment")
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
let target = match expr.kind() {
|
||||
"call" => expr.child_by_field_name("function"),
|
||||
_ => Some(expr),
|
||||
};
|
||||
let Some(target) = target else { return false };
|
||||
let Ok(text) = target.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
let leaf = text.rsplit('.').next().unwrap_or(text).trim();
|
||||
AUTH_DECORATORS.iter().any(|d| leaf.eq_ignore_ascii_case(d))
|
||||
}
|
||||
|
||||
/// Read the function name from a `function_definition` node.
|
||||
fn function_name(func: Node, bytes: &[u8]) -> Option<String> {
|
||||
let name_node = func.child_by_field_name("name")?;
|
||||
name_node.utf8_text(bytes).ok().map(str::to_string)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_python::LANGUAGE.into())
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
fn detect(src: &str) -> Vec<SurfaceNode> {
|
||||
let (tree, bytes) = parse(src);
|
||||
detect_flask_routes(&tree, &bytes, &PathBuf::from("app.py"), None)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_basic_route() {
|
||||
let src = r#"
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/hello")
|
||||
def hello():
|
||||
return "hi"
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
if let SurfaceNode::EntryPoint(ep) = &nodes[0] {
|
||||
assert_eq!(ep.route, "/hello");
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.handler_name, "hello");
|
||||
assert!(!ep.auth_required);
|
||||
} else {
|
||||
panic!("not an EntryPoint");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_methods_kwarg() {
|
||||
let src = r#"
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/submit", methods=["POST"])
|
||||
def submit():
|
||||
return "ok"
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
let ep = match &nodes[0] {
|
||||
SurfaceNode::EntryPoint(ep) => ep,
|
||||
_ => panic!("not an EntryPoint"),
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::POST);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_verb_decorator() {
|
||||
let src = r#"
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.post("/users")
|
||||
def create():
|
||||
return "ok"
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
let ep = match &nodes[0] {
|
||||
SurfaceNode::EntryPoint(ep) => ep,
|
||||
_ => panic!("not an EntryPoint"),
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::POST);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_blueprint() {
|
||||
let src = r#"
|
||||
from flask import Blueprint
|
||||
bp = Blueprint("admin", __name__)
|
||||
|
||||
@bp.get("/admin")
|
||||
def admin():
|
||||
return "secret"
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
let ep = match &nodes[0] {
|
||||
SurfaceNode::EntryPoint(ep) => ep,
|
||||
_ => panic!("not an EntryPoint"),
|
||||
};
|
||||
assert_eq!(ep.route, "/admin");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_auth_decorator() {
|
||||
let src = r#"
|
||||
from flask import Flask
|
||||
from flask_login import login_required
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/secret")
|
||||
@login_required
|
||||
def secret():
|
||||
return "shh"
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
let ep = match &nodes[0] {
|
||||
SurfaceNode::EntryPoint(ep) => ep,
|
||||
_ => panic!("not an EntryPoint"),
|
||||
};
|
||||
assert!(ep.auth_required);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_flask_receiver() {
|
||||
let src = r#"
|
||||
client = requests.Session()
|
||||
|
||||
@client.get("/whatever")
|
||||
def x():
|
||||
pass
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
// `client` does not match the Flask receiver allowlist.
|
||||
assert!(nodes.is_empty());
|
||||
}
|
||||
}
|
||||
214
src/surface/lang/ruby_rails.rs
Normal file
214
src/surface/lang/ruby_rails.rs
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
//! Ruby + Rails framework probe.
|
||||
//!
|
||||
//! Recognises two Rails route shapes:
|
||||
//!
|
||||
//! 1. `config/routes.rb` declarations — `get '/path', to: 'controller#action'`,
|
||||
//! `post '/path' => 'controller#action'`, `resources :users`.
|
||||
//! 2. Controller actions — public instance methods on a class
|
||||
//! inheriting from `ApplicationController` / `ActionController::Base`.
|
||||
//!
|
||||
//! `auth_required` for routes follows `before_action :authenticate!`
|
||||
//! at the controller level.
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
const VERBS: &[(&str, HttpMethod)] = &[
|
||||
("get", HttpMethod::GET),
|
||||
("post", HttpMethod::POST),
|
||||
("put", HttpMethod::PUT),
|
||||
("patch", HttpMethod::PATCH),
|
||||
("delete", HttpMethod::DELETE),
|
||||
("match", HttpMethod::GET),
|
||||
];
|
||||
|
||||
pub fn detect_rails_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
detect_routes_dsl(tree.root_node(), bytes, &file_rel, &mut out);
|
||||
detect_controllers(tree.root_node(), bytes, &file_rel, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
fn detect_routes_dsl(root: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
|
||||
fn recurse(node: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
|
||||
if matches!(node.kind(), "call" | "method_call")
|
||||
&& let Some(method_node) = node.child_by_field_name("method")
|
||||
&& let Ok(method_text) = method_node.utf8_text(bytes)
|
||||
&& let Some((_, method)) = VERBS.iter().find(|(v, _)| *v == method_text)
|
||||
{
|
||||
let args_opt = node.child_by_field_name("arguments").or_else(|| {
|
||||
let mut c = node.walk();
|
||||
node.children(&mut c).find(|n| n.kind() == "argument_list")
|
||||
});
|
||||
if let Some(args) = args_opt {
|
||||
let mut cursor = args.walk();
|
||||
let positional: Vec<Node> = args.named_children(&mut cursor).collect();
|
||||
if let Some(route_node) = positional.first()
|
||||
&& let Some(route) = string_node_value(*route_node, bytes)
|
||||
{
|
||||
let handler_name = positional
|
||||
.iter()
|
||||
.find_map(|n| extract_to_handler(*n, bytes))
|
||||
.unwrap_or_default();
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(node, file_rel),
|
||||
framework: Framework::Rails,
|
||||
method: *method,
|
||||
route,
|
||||
handler_name,
|
||||
handler_location: loc_for(node, file_rel),
|
||||
auth_required: false,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
recurse(child, bytes, file_rel, out);
|
||||
}
|
||||
}
|
||||
recurse(root, bytes, file_rel, out);
|
||||
}
|
||||
|
||||
fn extract_to_handler(node: Node, bytes: &[u8]) -> Option<String> {
|
||||
// Shapes:
|
||||
// `to: 'controller#action'` — pair with hash key `to`
|
||||
// `'controller#action'` — second positional string
|
||||
// `=> 'controller#action'` — assoc with hashrocket
|
||||
if node.kind() == "string"
|
||||
&& let Some(s) = string_node_value(node, bytes)
|
||||
&& s.contains('#')
|
||||
{
|
||||
return Some(s);
|
||||
}
|
||||
if node.kind() == "pair" {
|
||||
let mut cursor = node.walk();
|
||||
let children: Vec<Node> = node.named_children(&mut cursor).collect();
|
||||
for child in &children {
|
||||
if child.kind() == "string"
|
||||
&& let Some(s) = string_node_value(*child, bytes)
|
||||
&& s.contains('#')
|
||||
{
|
||||
return Some(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn detect_controllers(root: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
|
||||
fn recurse(node: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
|
||||
if node.kind() == "class" && class_is_controller(node, bytes) {
|
||||
let class_auth = class_has_before_authenticate(node, bytes);
|
||||
walk_methods(node, bytes, &mut |method_node, name| {
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(method_node, file_rel),
|
||||
framework: Framework::Rails,
|
||||
method: HttpMethod::GET,
|
||||
route: String::new(),
|
||||
handler_name: name.to_string(),
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(method_node.start_position().row + 1) as u32,
|
||||
(method_node.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required: class_auth,
|
||||
}));
|
||||
});
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
recurse(child, bytes, file_rel, out);
|
||||
}
|
||||
}
|
||||
recurse(root, bytes, file_rel, out);
|
||||
}
|
||||
|
||||
fn class_is_controller(class: Node, bytes: &[u8]) -> bool {
|
||||
let Some(super_node) = class.child_by_field_name("superclass") else {
|
||||
return false;
|
||||
};
|
||||
let Ok(text) = super_node.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
text.contains("ApplicationController") || text.contains("ActionController")
|
||||
}
|
||||
|
||||
fn class_has_before_authenticate(class: Node, bytes: &[u8]) -> bool {
|
||||
let Some(body) = class.child_by_field_name("body") else {
|
||||
return false;
|
||||
};
|
||||
let mut cursor = body.walk();
|
||||
for child in body.children(&mut cursor) {
|
||||
if let Ok(text) = child.utf8_text(bytes)
|
||||
&& text.contains("before_action")
|
||||
&& (text.contains("authenticate") || text.contains("login_required"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn walk_methods<'tree, F>(class: Node<'tree>, bytes: &[u8], visit: &mut F)
|
||||
where
|
||||
F: FnMut(Node<'tree>, &str),
|
||||
{
|
||||
let Some(body) = class.child_by_field_name("body") else {
|
||||
return;
|
||||
};
|
||||
let mut cursor = body.walk();
|
||||
for child in body.children(&mut cursor) {
|
||||
if child.kind() == "method"
|
||||
&& let Some(name_node) = child.child_by_field_name("name")
|
||||
&& let Ok(name) = name_node.utf8_text(bytes)
|
||||
&& !name.starts_with('_')
|
||||
{
|
||||
visit(child, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_ruby::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_routes_dsl() {
|
||||
let src = "Rails.application.routes.draw do\n get '/users', to: 'users#index'\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_rails_routes(&tree, &bytes, &PathBuf::from("config/routes.rb"), None);
|
||||
assert!(!nodes.is_empty());
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_controller_actions() {
|
||||
let src = "class UsersController < ApplicationController\n def index\n end\n def show\n end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_rails_routes(&tree, &bytes, &PathBuf::from("users_controller.rb"), None);
|
||||
assert_eq!(nodes.len(), 2);
|
||||
}
|
||||
}
|
||||
105
src/surface/lang/ruby_sinatra.rs
Normal file
105
src/surface/lang/ruby_sinatra.rs
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
//! Ruby + Sinatra framework probe.
|
||||
//!
|
||||
//! Sinatra routes are top-level method calls of the form
|
||||
//! `get '/path' do ... end`, `post '/path' do ... end`, etc. The
|
||||
//! handler is the block; we synthesise the handler name from the
|
||||
//! route string (Sinatra blocks are anonymous).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
|
||||
use crate::surface::{EntryPoint, Framework, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
const VERBS: &[(&str, HttpMethod)] = &[
|
||||
("get", HttpMethod::GET),
|
||||
("post", HttpMethod::POST),
|
||||
("put", HttpMethod::PUT),
|
||||
("patch", HttpMethod::PATCH),
|
||||
("delete", HttpMethod::DELETE),
|
||||
("head", HttpMethod::HEAD),
|
||||
("options", HttpMethod::OPTIONS),
|
||||
];
|
||||
|
||||
pub fn detect_sinatra_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_calls(tree.root_node(), &mut |call| {
|
||||
if let Some(node) = match_sinatra_call(call, bytes, &file_rel) {
|
||||
out.push(node);
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
|
||||
if matches!(node.kind(), "call" | "method_call") {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_calls(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn match_sinatra_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
|
||||
let method_name_node = call.child_by_field_name("method")?;
|
||||
let method_text = method_name_node.utf8_text(bytes).ok()?;
|
||||
let (_, method) = VERBS.iter().find(|(v, _)| *v == method_text)?;
|
||||
// Must have a block to be a Sinatra route.
|
||||
let block = call.child_by_field_name("block").or_else(|| {
|
||||
let mut c = call.walk();
|
||||
call.children(&mut c)
|
||||
.find(|n| matches!(n.kind(), "do_block" | "block"))
|
||||
})?;
|
||||
// Args: Sinatra accepts a string literal as the first positional arg.
|
||||
let args = call.child_by_field_name("arguments").or_else(|| {
|
||||
let mut c = call.walk();
|
||||
call.children(&mut c).find(|n| n.kind() == "argument_list")
|
||||
})?;
|
||||
let mut cursor = args.walk();
|
||||
let route_node = args.named_children(&mut cursor).next()?;
|
||||
let route = string_node_value(route_node, bytes)?;
|
||||
let handler_name = format!("{}_{}", method_text, route.replace(['/', '-'], "_"));
|
||||
Some(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(call, file_rel),
|
||||
framework: Framework::Sinatra,
|
||||
method: *method,
|
||||
route,
|
||||
handler_name,
|
||||
handler_location: loc_for(block, file_rel),
|
||||
auth_required: false,
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_ruby::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_sinatra_get() {
|
||||
let src = "get '/users' do\n 'hi'\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_sinatra_routes(&tree, &bytes, &PathBuf::from("app.rb"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users");
|
||||
}
|
||||
}
|
||||
187
src/surface/lang/rust_actix.rs
Normal file
187
src/surface/lang/rust_actix.rs
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
//! Rust + actix-web framework probe.
|
||||
//!
|
||||
//! Recognises actix-web routing macros (`#[get("/path")]`,
|
||||
//! `#[post("/path")]`, `#[put]`, `#[delete]`, `#[patch]`, `#[head]`,
|
||||
//! `#[options]`, `#[route("/path", method = ...)]`) attached to a
|
||||
//! `function_item`. The route path is extracted from the macro
|
||||
//! argument string literal.
|
||||
//!
|
||||
//! `auth_required` fires when the function signature has a parameter
|
||||
//! whose type matches one of [`AUTH_EXTRACTORS`] (`Identity`,
|
||||
//! `BearerAuth`, `JwtClaims`, etc.).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file, rust_uses_any};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub use crate::auth_analysis::auth_markers::ACTIX_EXTRACTORS as AUTH_EXTRACTORS;
|
||||
|
||||
const ROUTE_MACROS: &[(&str, Option<HttpMethod>)] = &[
|
||||
("get", Some(HttpMethod::GET)),
|
||||
("post", Some(HttpMethod::POST)),
|
||||
("put", Some(HttpMethod::PUT)),
|
||||
("delete", Some(HttpMethod::DELETE)),
|
||||
("patch", Some(HttpMethod::PATCH)),
|
||||
("head", Some(HttpMethod::HEAD)),
|
||||
("options", Some(HttpMethod::OPTIONS)),
|
||||
("route", None),
|
||||
];
|
||||
|
||||
pub fn detect_actix_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
// Phase 23 follow-up: gate on a real top-level `use actix_web…` /
|
||||
// `extern crate actix_web` so a comment or string literal
|
||||
// mentioning actix_web cannot trigger detection on a Rocket /
|
||||
// generic Rust file that also defines a `#[get]` user macro.
|
||||
if !rust_uses_any(bytes, &["actix_web"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_functions(tree.root_node(), &mut |func| {
|
||||
if let Some(node) = match_actix_function(func, bytes, &file_rel) {
|
||||
out.push(node);
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_functions<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
|
||||
if node.kind() == "function_item" {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_functions(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn match_actix_function(func: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
|
||||
let attrs = collect_preceding_attributes(func);
|
||||
let mut method: Option<HttpMethod> = None;
|
||||
let mut route_path = String::new();
|
||||
for attr in attrs {
|
||||
let raw = attr.utf8_text(bytes).ok()?;
|
||||
let inner = raw.trim_start_matches(['#', '!']).trim_matches(['[', ']']);
|
||||
for (name, default_method) in ROUTE_MACROS {
|
||||
let prefix = format!("{}(", name);
|
||||
if inner.starts_with(&prefix) {
|
||||
method = default_method.or_else(|| extract_route_method(inner));
|
||||
if route_path.is_empty()
|
||||
&& let Some(start) = inner.find('"')
|
||||
{
|
||||
let rest = &inner[start + 1..];
|
||||
if let Some(end) = rest.find('"') {
|
||||
route_path = rest[..end].to_string();
|
||||
}
|
||||
}
|
||||
} else if inner == *name && method.is_none() {
|
||||
method = *default_method;
|
||||
}
|
||||
}
|
||||
}
|
||||
let m = method?;
|
||||
let handler_name = function_name(func, bytes).unwrap_or_default();
|
||||
let auth_required = signature_uses_auth_extractor(func, bytes);
|
||||
Some(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(func, file_rel),
|
||||
framework: Framework::Actix,
|
||||
method: m,
|
||||
route: route_path,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(func.start_position().row + 1) as u32,
|
||||
(func.start_position().column + 1) as u32,
|
||||
),
|
||||
auth_required,
|
||||
}))
|
||||
}
|
||||
|
||||
fn collect_preceding_attributes(func: Node) -> Vec<Node> {
|
||||
let mut out: Vec<Node> = Vec::new();
|
||||
let Some(parent) = func.parent() else {
|
||||
return out;
|
||||
};
|
||||
let mut cursor = parent.walk();
|
||||
let mut pending: Vec<Node> = Vec::new();
|
||||
for sib in parent.children(&mut cursor) {
|
||||
if sib.id() == func.id() {
|
||||
out.append(&mut pending);
|
||||
return out;
|
||||
}
|
||||
if sib.kind() == "attribute_item" || sib.kind() == "inner_attribute_item" {
|
||||
let mut aw = sib.walk();
|
||||
for inner in sib.children(&mut aw) {
|
||||
if inner.kind() == "attribute" {
|
||||
pending.push(inner);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pending.clear();
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn extract_route_method(inner: &str) -> Option<HttpMethod> {
|
||||
for verb in ["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"] {
|
||||
if inner.contains(verb) {
|
||||
return HttpMethod::from_ident(verb);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn signature_uses_auth_extractor(func: Node, bytes: &[u8]) -> bool {
|
||||
let Some(params) = func.child_by_field_name("parameters") else {
|
||||
return false;
|
||||
};
|
||||
let Ok(text) = params.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
AUTH_EXTRACTORS.iter().any(|n| text.contains(n))
|
||||
}
|
||||
|
||||
fn function_name(func: Node, bytes: &[u8]) -> Option<String> {
|
||||
func.child_by_field_name("name")
|
||||
.and_then(|n| n.utf8_text(bytes).ok())
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_rust::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_actix_get() {
|
||||
let src = r#"
|
||||
use actix_web::{get, HttpResponse};
|
||||
#[get("/users")]
|
||||
async fn list_users() -> HttpResponse { HttpResponse::Ok().finish() }
|
||||
"#;
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_actix_routes(&tree, &bytes, &PathBuf::from("main.rs"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users");
|
||||
}
|
||||
}
|
||||
187
src/surface/lang/rust_axum.rs
Normal file
187
src/surface/lang/rust_axum.rs
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
//! Rust + axum framework probe.
|
||||
//!
|
||||
//! Detects axum route registration:
|
||||
//!
|
||||
//! * `Router::new().route("/path", get(handler))` /
|
||||
//! `.route("/path", post(handler))` / etc.
|
||||
//! * Bare extractor-shaped function items in files that import axum
|
||||
//! (handler typing alone is treated as a candidate, but only when a
|
||||
//! `Router::route(...)` registration in the same file references it).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file, rust_uses_any, string_node_value};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
const VERBS: &[(&str, HttpMethod)] = &[
|
||||
("get", HttpMethod::GET),
|
||||
("post", HttpMethod::POST),
|
||||
("put", HttpMethod::PUT),
|
||||
("delete", HttpMethod::DELETE),
|
||||
("patch", HttpMethod::PATCH),
|
||||
("head", HttpMethod::HEAD),
|
||||
("options", HttpMethod::OPTIONS),
|
||||
];
|
||||
|
||||
pub use crate::auth_analysis::auth_markers::AXUM_EXTRACTORS as AUTH_EXTRACTORS;
|
||||
|
||||
pub fn detect_axum_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
// Phase 23 follow-up: gate on a real top-level `use axum…` /
|
||||
// `extern crate axum` so a comment / string literal mentioning
|
||||
// axum cannot trigger detection.
|
||||
if !rust_uses_any(bytes, &["axum"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let function_index = collect_functions(tree.root_node(), bytes);
|
||||
let mut out = Vec::new();
|
||||
walk_calls(tree.root_node(), &mut |call| {
|
||||
if let Some(node) = match_router_route(call, bytes, &file_rel, &function_index) {
|
||||
out.push(node);
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
|
||||
if node.kind() == "call_expression" {
|
||||
visit(node);
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk_calls(child, visit);
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_functions<'tree>(
|
||||
root: Node<'tree>,
|
||||
bytes: &'tree [u8],
|
||||
) -> HashMap<String, (Node<'tree>, bool)> {
|
||||
let mut out: HashMap<String, (Node<'tree>, bool)> = HashMap::new();
|
||||
fn walk<'tree>(
|
||||
node: Node<'tree>,
|
||||
bytes: &'tree [u8],
|
||||
out: &mut HashMap<String, (Node<'tree>, bool)>,
|
||||
) {
|
||||
if node.kind() == "function_item"
|
||||
&& let Some(name_node) = node.child_by_field_name("name")
|
||||
&& let Ok(name) = name_node.utf8_text(bytes)
|
||||
{
|
||||
let auth = node
|
||||
.child_by_field_name("parameters")
|
||||
.and_then(|p| p.utf8_text(bytes).ok())
|
||||
.map(|t| AUTH_EXTRACTORS.iter().any(|x| t.contains(x)))
|
||||
.unwrap_or(false);
|
||||
out.insert(name.to_string(), (node, auth));
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
walk(child, bytes, out);
|
||||
}
|
||||
}
|
||||
walk(root, bytes, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
fn match_router_route<'tree>(
|
||||
call: Node<'tree>,
|
||||
bytes: &[u8],
|
||||
file_rel: &str,
|
||||
function_index: &HashMap<String, (Node<'tree>, bool)>,
|
||||
) -> Option<SurfaceNode> {
|
||||
let func = call.child_by_field_name("function")?;
|
||||
if func.kind() != "field_expression" {
|
||||
return None;
|
||||
}
|
||||
let field = func.child_by_field_name("field")?;
|
||||
if field.utf8_text(bytes).ok()? != "route" {
|
||||
return None;
|
||||
}
|
||||
let args = call.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let positional: Vec<Node> = args
|
||||
.children(&mut cursor)
|
||||
.filter(|n| !matches!(n.kind(), "(" | ")" | ","))
|
||||
.collect();
|
||||
if positional.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
let route = string_node_value(positional[0], bytes)?;
|
||||
let method_args = positional[1];
|
||||
if method_args.kind() != "call_expression" {
|
||||
return None;
|
||||
}
|
||||
let method_callee = method_args.child_by_field_name("function")?;
|
||||
let method_text = method_callee.utf8_text(bytes).ok()?;
|
||||
let leaf = method_text.rsplit("::").next().unwrap_or(method_text);
|
||||
let (_, method) = VERBS.iter().find(|(v, _)| *v == leaf)?;
|
||||
let method_args_node = method_args.child_by_field_name("arguments")?;
|
||||
let mut hcur = method_args_node.walk();
|
||||
let handler_node = method_args_node
|
||||
.children(&mut hcur)
|
||||
.find(|n| n.kind() == "identifier" || n.kind() == "scoped_identifier")?;
|
||||
let handler_name = handler_node.utf8_text(bytes).ok()?.to_string();
|
||||
let auth_required = function_index
|
||||
.get(&handler_name)
|
||||
.map(|(_, a)| *a)
|
||||
.unwrap_or(false);
|
||||
let handler_loc = function_index
|
||||
.get(&handler_name)
|
||||
.map(|(node, _)| {
|
||||
SourceLocation::new(
|
||||
file_rel,
|
||||
(node.start_position().row + 1) as u32,
|
||||
(node.start_position().column + 1) as u32,
|
||||
)
|
||||
})
|
||||
.unwrap_or_else(|| loc_for(handler_node, file_rel));
|
||||
Some(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(call, file_rel),
|
||||
framework: Framework::Axum,
|
||||
method: *method,
|
||||
route,
|
||||
handler_name,
|
||||
handler_location: handler_loc,
|
||||
auth_required,
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_rust::LANGUAGE.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_router_get() {
|
||||
let src = r#"
|
||||
use axum::{Router, routing::get};
|
||||
async fn list_users() -> &'static str { "ok" }
|
||||
fn app() -> Router {
|
||||
Router::new().route("/users", get(list_users))
|
||||
}
|
||||
"#;
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_axum_routes(&tree, &bytes, &PathBuf::from("main.rs"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.route, "/users");
|
||||
}
|
||||
}
|
||||
310
src/surface/lang/ts_next.rs
Normal file
310
src/surface/lang/ts_next.rs
Normal file
|
|
@ -0,0 +1,310 @@
|
|||
//! TypeScript + Next.js framework probe.
|
||||
//!
|
||||
//! Recognises Next.js App Router route handlers (`app/**/route.{ts,tsx,js,jsx}`)
|
||||
//! by walking exported function declarations whose name is one of the
|
||||
//! HTTP method idents (`GET` / `POST` / …). Also recognises Pages
|
||||
//! Router API routes (`pages/api/**/*.{ts,tsx,js,jsx}`) via the
|
||||
//! `export default handler` pattern.
|
||||
//!
|
||||
//! Server actions (`'use server'` directive at file or function scope)
|
||||
//! are also reported as entry points because they expose a function
|
||||
//! callable from a React client over the wire.
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub fn detect_next_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
let app_router = is_app_router_route(path);
|
||||
let pages_api = is_pages_api_route(path);
|
||||
let route_path = derive_route_path(path);
|
||||
let file_use_server = file_level_use_server(tree.root_node(), bytes);
|
||||
|
||||
if app_router {
|
||||
collect_named_exports(tree.root_node(), bytes, &file_rel, &route_path, &mut out);
|
||||
}
|
||||
if pages_api {
|
||||
collect_default_export(tree.root_node(), bytes, &file_rel, &route_path, &mut out);
|
||||
}
|
||||
if file_use_server {
|
||||
collect_use_server_exports(tree.root_node(), bytes, &file_rel, &route_path, &mut out);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn is_app_router_route(path: &Path) -> bool {
|
||||
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
|
||||
return false;
|
||||
};
|
||||
if !matches!(name, "route.ts" | "route.tsx" | "route.js" | "route.jsx") {
|
||||
return false;
|
||||
}
|
||||
path.components()
|
||||
.any(|c| c.as_os_str().to_string_lossy() == "app")
|
||||
}
|
||||
|
||||
fn is_pages_api_route(path: &Path) -> bool {
|
||||
let comps = path.components().peekable();
|
||||
let mut saw_pages = false;
|
||||
for c in comps {
|
||||
if c.as_os_str().to_string_lossy() == "pages" {
|
||||
saw_pages = true;
|
||||
} else if saw_pages && c.as_os_str().to_string_lossy() == "api" {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Convert `app/users/[id]/route.ts` → `/users/[id]`.
|
||||
/// Convert `pages/api/users/index.ts` → `/users`.
|
||||
fn derive_route_path(path: &Path) -> String {
|
||||
let mut comps: Vec<String> = Vec::new();
|
||||
let mut started = false;
|
||||
for comp in path.components() {
|
||||
let text = comp.as_os_str().to_string_lossy().into_owned();
|
||||
if !started {
|
||||
if text == "app" || text == "api" || text == "pages" {
|
||||
started = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
comps.push(text);
|
||||
}
|
||||
if let Some(last) = comps.last_mut() {
|
||||
// Drop the basename; route file becomes the trailing segment.
|
||||
if last.starts_with("route.") || last.starts_with("index.") {
|
||||
comps.pop();
|
||||
} else if let Some(idx) = last.rfind('.') {
|
||||
last.truncate(idx);
|
||||
}
|
||||
}
|
||||
let joined = comps.join("/");
|
||||
if joined.is_empty() {
|
||||
"/".to_string()
|
||||
} else {
|
||||
format!("/{}", joined)
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_named_exports(
|
||||
root: Node,
|
||||
bytes: &[u8],
|
||||
file_rel: &str,
|
||||
route_path: &str,
|
||||
out: &mut Vec<SurfaceNode>,
|
||||
) {
|
||||
fn recurse(
|
||||
node: Node,
|
||||
bytes: &[u8],
|
||||
file_rel: &str,
|
||||
route_path: &str,
|
||||
out: &mut Vec<SurfaceNode>,
|
||||
) {
|
||||
if node.kind() == "export_statement" {
|
||||
// Look for `export async function NAME(...)` or `export const NAME = ...`
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if let Some((name, span)) = extract_named_function(child, bytes)
|
||||
&& let Some(method) = HttpMethod::from_ident(&name)
|
||||
{
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(node, file_rel),
|
||||
framework: Framework::NextAppRouter,
|
||||
method,
|
||||
route: route_path.to_string(),
|
||||
handler_name: name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(span.0 + 1) as u32,
|
||||
(span.1 + 1) as u32,
|
||||
),
|
||||
auth_required: false,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
recurse(child, bytes, file_rel, route_path, out);
|
||||
}
|
||||
}
|
||||
recurse(root, bytes, file_rel, route_path, out);
|
||||
}
|
||||
|
||||
fn extract_named_function(node: Node, bytes: &[u8]) -> Option<(String, (usize, usize))> {
|
||||
match node.kind() {
|
||||
"function_declaration" => {
|
||||
let name_node = node.child_by_field_name("name")?;
|
||||
let name = name_node.utf8_text(bytes).ok()?.to_string();
|
||||
let pos = node.start_position();
|
||||
Some((name, (pos.row, pos.column)))
|
||||
}
|
||||
"lexical_declaration" | "variable_declaration" => {
|
||||
let mut cursor = node.walk();
|
||||
for decl in node.children(&mut cursor) {
|
||||
if decl.kind() == "variable_declarator"
|
||||
&& let Some(name_node) = decl.child_by_field_name("name")
|
||||
&& let Ok(name) = name_node.utf8_text(bytes)
|
||||
{
|
||||
let pos = decl.start_position();
|
||||
return Some((name.to_string(), (pos.row, pos.column)));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_default_export(
|
||||
root: Node,
|
||||
bytes: &[u8],
|
||||
file_rel: &str,
|
||||
route_path: &str,
|
||||
out: &mut Vec<SurfaceNode>,
|
||||
) {
|
||||
fn recurse(
|
||||
node: Node,
|
||||
bytes: &[u8],
|
||||
file_rel: &str,
|
||||
route_path: &str,
|
||||
out: &mut Vec<SurfaceNode>,
|
||||
) {
|
||||
if node.kind() == "export_statement" {
|
||||
let raw = node.utf8_text(bytes).unwrap_or("");
|
||||
if raw.contains("default") {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
let name = match child.kind() {
|
||||
"function_declaration" => child
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| n.utf8_text(bytes).ok())
|
||||
.map(str::to_string),
|
||||
"identifier" => child.utf8_text(bytes).ok().map(str::to_string),
|
||||
"arrow_function" | "function" | "function_expression" => {
|
||||
Some("default".to_string())
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
if let Some(name) = name {
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(node, file_rel),
|
||||
framework: Framework::NextAppRouter,
|
||||
method: HttpMethod::GET,
|
||||
route: route_path.to_string(),
|
||||
handler_name: name,
|
||||
handler_location: loc_for(child, file_rel),
|
||||
auth_required: false,
|
||||
}));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
recurse(child, bytes, file_rel, route_path, out);
|
||||
}
|
||||
}
|
||||
recurse(root, bytes, file_rel, route_path, out);
|
||||
}
|
||||
|
||||
fn collect_use_server_exports(
|
||||
root: Node,
|
||||
bytes: &[u8],
|
||||
file_rel: &str,
|
||||
route_path: &str,
|
||||
out: &mut Vec<SurfaceNode>,
|
||||
) {
|
||||
let mut cursor = root.walk();
|
||||
for child in root.children(&mut cursor) {
|
||||
if child.kind() == "export_statement"
|
||||
&& let Some((name, span)) = export_function_name(child, bytes)
|
||||
{
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc_for(child, file_rel),
|
||||
framework: Framework::NextServerAction,
|
||||
method: HttpMethod::POST,
|
||||
route: route_path.to_string(),
|
||||
handler_name: name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel,
|
||||
(span.0 + 1) as u32,
|
||||
(span.1 + 1) as u32,
|
||||
),
|
||||
auth_required: false,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn export_function_name(node: Node, bytes: &[u8]) -> Option<(String, (usize, usize))> {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if let Some(extracted) = extract_named_function(child, bytes) {
|
||||
return Some(extracted);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn file_level_use_server(root: Node, bytes: &[u8]) -> bool {
|
||||
let mut cursor = root.walk();
|
||||
for child in root.children(&mut cursor) {
|
||||
if child.kind() == "expression_statement" {
|
||||
let mut cs = child.walk();
|
||||
for c in child.children(&mut cs) {
|
||||
if c.kind() == "string"
|
||||
&& let Ok(text) = c.utf8_text(bytes)
|
||||
{
|
||||
let trimmed = text.trim().trim_matches(['\'', '"']);
|
||||
if trimmed == "use server" {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if !matches!(child.kind(), "comment" | "import_statement") {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_typescript::LANGUAGE_TSX.into())
|
||||
.unwrap();
|
||||
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_app_router_get() {
|
||||
let src = "export async function GET(req: Request) { return new Response('ok'); }\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_next_routes(&tree, &bytes, &PathBuf::from("app/users/route.ts"), None);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert!(ep.route.contains("users"));
|
||||
}
|
||||
}
|
||||
406
src/surface/mod.rs
Normal file
406
src/surface/mod.rs
Normal file
|
|
@ -0,0 +1,406 @@
|
|||
//! Phase 21 — attack-surface map.
|
||||
//!
|
||||
//! The `SurfaceMap` graph names the externally-reachable shape of the
|
||||
//! project under analysis: HTTP route entry-points (Flask, FastAPI,
|
||||
//! Spring, Express, …), the data stores they read/write, the external
|
||||
//! services they talk to, and the local sinks they ultimately reach.
|
||||
//!
|
||||
//! Track G's chain composer walks this graph to translate findings into
|
||||
//! cross-feature attack chains, and the `nyx surface` CLI prints a
|
||||
//! human-readable tree from it. Phase 21 ships the graph types plus
|
||||
//! the first framework probe (Python + Flask); Phase 22 generalises the
|
||||
//! probe to the remaining languages and Phase 23 wires the CLI.
|
||||
//!
|
||||
//! Storage shape: a flat `Vec<SurfaceNode>` sorted by [`SourceLocation`]
|
||||
//! and a flat `Vec<SurfaceEdge>` sorted by `(from_idx, to_idx, kind)`.
|
||||
//! Both vectors are byte-deterministic, so two scans of the same source
|
||||
//! produce byte-identical JSON when round-tripped through SQLite. See
|
||||
//! [`graph::petgraph_view`] for a petgraph-backed view used by the
|
||||
//! chain composer.
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub mod build;
|
||||
pub mod dangerous;
|
||||
pub mod datastore;
|
||||
pub mod external;
|
||||
pub mod graph;
|
||||
pub mod lang;
|
||||
pub mod reachability;
|
||||
|
||||
/// Stable source location used as the primary key for every
|
||||
/// [`SurfaceNode`]. `file` is a project-relative POSIX path so the
|
||||
/// SurfaceMap is portable across machines; `line` and `col` are
|
||||
/// 1-indexed. Ordering is `(file, line, col)` lexicographic, matching
|
||||
/// the determinism the rest of the analyser uses for spans.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct SourceLocation {
|
||||
pub file: String,
|
||||
pub line: u32,
|
||||
pub col: u32,
|
||||
}
|
||||
|
||||
impl SourceLocation {
|
||||
pub fn new(file: impl Into<String>, line: u32, col: u32) -> Self {
|
||||
Self {
|
||||
file: file.into(),
|
||||
line,
|
||||
col,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Web-framework tag attached to every [`EntryPoint`]. The set is
|
||||
/// fixed in Phase 21 + 22 and matches the set of framework probes
|
||||
/// behind [`lang`]. New frameworks land as new variants.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum Framework {
|
||||
Flask,
|
||||
FastApi,
|
||||
Django,
|
||||
Express,
|
||||
Koa,
|
||||
Spring,
|
||||
JaxRs,
|
||||
Quarkus,
|
||||
Rails,
|
||||
Sinatra,
|
||||
Laravel,
|
||||
Slim,
|
||||
Axum,
|
||||
Actix,
|
||||
Rocket,
|
||||
NetHttp,
|
||||
Gin,
|
||||
NextAppRouter,
|
||||
NextServerAction,
|
||||
}
|
||||
|
||||
/// HTTP-handler entry-point recognised by a framework probe.
|
||||
///
|
||||
/// Every node carries the route's declared path string, HTTP method,
|
||||
/// and a resolved handler [`SourceLocation`] pointing at the function
|
||||
/// definition. `auth_required` is `true` when the decorator stack
|
||||
/// (or framework equivalent) contains an auth guard the probe was
|
||||
/// able to identify; Phase 21 recognises Flask's `@login_required`,
|
||||
/// `@auth_required`, and `@jwt_required` decorators.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct EntryPoint {
|
||||
pub location: SourceLocation,
|
||||
pub framework: Framework,
|
||||
pub method: HttpMethod,
|
||||
pub route: String,
|
||||
pub handler_name: String,
|
||||
pub handler_location: SourceLocation,
|
||||
pub auth_required: bool,
|
||||
}
|
||||
|
||||
/// Persistent data store reachable from the surface — SQL database,
|
||||
/// key-value store, document DB, blob store. Phase 22 populates this
|
||||
/// from label-rule data-source matches and ORM-receiver type facts;
|
||||
/// Phase 21 ships the type for forward-compat only and emits no
|
||||
/// `DataStore` nodes.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DataStore {
|
||||
pub location: SourceLocation,
|
||||
pub kind: DataStoreKind,
|
||||
pub label: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum DataStoreKind {
|
||||
Sql,
|
||||
KeyValue,
|
||||
Document,
|
||||
BlobStore,
|
||||
Filesystem,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// External service the surface talks to over a network — third-party
|
||||
/// HTTP API, message broker, search index. Phase 22 fills this in;
|
||||
/// Phase 21 ships the type.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ExternalService {
|
||||
pub location: SourceLocation,
|
||||
pub kind: ExternalServiceKind,
|
||||
pub label: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ExternalServiceKind {
|
||||
HttpApi,
|
||||
MessageBroker,
|
||||
SearchIndex,
|
||||
AuthProvider,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// Local sink with no externally observable side-effect — `eval`,
|
||||
/// `pickle.loads`, `subprocess.Popen`, raw SQL execute, etc. Phase 22
|
||||
/// fills this in from the existing label-rule registry; Phase 21
|
||||
/// ships the type.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DangerousLocal {
|
||||
pub location: SourceLocation,
|
||||
pub function_name: String,
|
||||
pub cap_bits: u32,
|
||||
}
|
||||
|
||||
/// A node in the [`SurfaceMap`]. Every variant carries a
|
||||
/// [`SourceLocation`] so the surface ordering is total and stable.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(tag = "node", rename_all = "snake_case")]
|
||||
pub enum SurfaceNode {
|
||||
EntryPoint(EntryPoint),
|
||||
DataStore(DataStore),
|
||||
ExternalService(ExternalService),
|
||||
DangerousLocal(DangerousLocal),
|
||||
}
|
||||
|
||||
impl SurfaceNode {
|
||||
pub fn location(&self) -> &SourceLocation {
|
||||
match self {
|
||||
SurfaceNode::EntryPoint(n) => &n.location,
|
||||
SurfaceNode::DataStore(n) => &n.location,
|
||||
SurfaceNode::ExternalService(n) => &n.location,
|
||||
SurfaceNode::DangerousLocal(n) => &n.location,
|
||||
}
|
||||
}
|
||||
|
||||
/// Discriminator used as a secondary sort key so two nodes that
|
||||
/// happen to share a [`SourceLocation`] (e.g. multiple route
|
||||
/// decorators on one function) keep a deterministic relative
|
||||
/// order. Returns the variant index in the enum declaration.
|
||||
fn kind_ordinal(&self) -> u8 {
|
||||
match self {
|
||||
SurfaceNode::EntryPoint(_) => 0,
|
||||
SurfaceNode::DataStore(_) => 1,
|
||||
SurfaceNode::ExternalService(_) => 2,
|
||||
SurfaceNode::DangerousLocal(_) => 3,
|
||||
}
|
||||
}
|
||||
|
||||
/// Tertiary sort key used to disambiguate nodes that share both
|
||||
/// [`SourceLocation`] and kind — e.g. a single Flask function with
|
||||
/// two `@app.route(...)` decorators ending up at the same handler
|
||||
/// location.
|
||||
fn dedup_tag(&self) -> String {
|
||||
match self {
|
||||
SurfaceNode::EntryPoint(n) => format!("{:?}:{:?}:{}", n.framework, n.method, n.route),
|
||||
SurfaceNode::DataStore(n) => format!("{:?}:{}", n.kind, n.label),
|
||||
SurfaceNode::ExternalService(n) => format!("{:?}:{}", n.kind, n.label),
|
||||
SurfaceNode::DangerousLocal(n) => format!("{}:{:#x}", n.function_name, n.cap_bits),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Semantic kind of an edge in the [`SurfaceMap`]. Encodes the
|
||||
/// seven edge classes the chain composer walks; persistence is via
|
||||
/// JSON so adding a variant is a non-breaking schema change as long
|
||||
/// as the SQLite-level migration drops the old surface_map rows.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum EdgeKind {
|
||||
/// Caller → callee. Wraps the call-graph edge so consumers do
|
||||
/// not have to consult [`crate::callgraph::CallGraph`] directly.
|
||||
Calls,
|
||||
/// Function or entry-point reads from a data store / external
|
||||
/// service.
|
||||
ReadsFrom,
|
||||
/// Function or entry-point writes to a data store.
|
||||
WritesTo,
|
||||
/// Function or entry-point sends a request to an external
|
||||
/// service.
|
||||
TalksTo,
|
||||
/// Entry-point reaches a dangerous-local sink through some
|
||||
/// transitive call chain.
|
||||
Reaches,
|
||||
/// Entry-point triggers a side-effecting action (job, email,
|
||||
/// webhook) other than a direct call.
|
||||
Triggers,
|
||||
/// Entry-point gates downstream access on a successful auth
|
||||
/// check. The `from` is the auth-check node, the `to` is the
|
||||
/// entry-point.
|
||||
AuthRequiredOn,
|
||||
}
|
||||
|
||||
/// A single edge in the [`SurfaceMap`]. `from` and `to` are indices
|
||||
/// into [`SurfaceMap::nodes`]; the surface ordering keeps these
|
||||
/// stable across rescans.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
pub struct SurfaceEdge {
|
||||
pub from: u32,
|
||||
pub to: u32,
|
||||
pub kind: EdgeKind,
|
||||
}
|
||||
|
||||
/// The attack-surface graph for a project. Stored as parallel
|
||||
/// `Vec`s keyed on [`SourceLocation`] so JSON serialisation is
|
||||
/// byte-deterministic and SQLite round-trips are stable.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SurfaceMap {
|
||||
pub nodes: Vec<SurfaceNode>,
|
||||
pub edges: Vec<SurfaceEdge>,
|
||||
}
|
||||
|
||||
impl SurfaceMap {
|
||||
/// Construct an empty map.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Total node count. Cheap.
|
||||
pub fn node_count(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
|
||||
/// Total edge count. Cheap.
|
||||
pub fn edge_count(&self) -> usize {
|
||||
self.edges.len()
|
||||
}
|
||||
|
||||
/// Return the first entry-point node matching `(method, route)`.
|
||||
/// Linear scan; the SurfaceMap is small (one node per route +
|
||||
/// store + service + sink) so this is fine in practice.
|
||||
pub fn entry_for_route(&self, method: HttpMethod, route: &str) -> Option<&EntryPoint> {
|
||||
self.nodes.iter().find_map(|n| match n {
|
||||
SurfaceNode::EntryPoint(ep) if ep.method == method && ep.route == route => Some(ep),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Iterate over every entry-point node in surface order.
|
||||
pub fn entry_points(&self) -> impl Iterator<Item = &EntryPoint> {
|
||||
self.nodes.iter().filter_map(|n| match n {
|
||||
SurfaceNode::EntryPoint(ep) => Some(ep),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Sort nodes by `(SourceLocation, kind_ordinal, dedup_tag)` and
|
||||
/// rewrite every edge's `from`/`to` accordingly. Two structurally
|
||||
/// identical maps are byte-identical after [`canonicalize`] +
|
||||
/// `serde_json::to_vec` regardless of insertion order.
|
||||
///
|
||||
/// [`canonicalize`]: SurfaceMap::canonicalize
|
||||
pub fn canonicalize(&mut self) {
|
||||
if self.nodes.is_empty() {
|
||||
self.edges.sort();
|
||||
self.edges.dedup();
|
||||
return;
|
||||
}
|
||||
let mut indexed: Vec<(usize, &SurfaceNode)> = self.nodes.iter().enumerate().collect();
|
||||
indexed.sort_by(|(_, a), (_, b)| {
|
||||
let key_a = (a.location(), a.kind_ordinal(), a.dedup_tag());
|
||||
let key_b = (b.location(), b.kind_ordinal(), b.dedup_tag());
|
||||
key_a.cmp(&key_b)
|
||||
});
|
||||
let mut remap: BTreeMap<u32, u32> = BTreeMap::new();
|
||||
let mut new_nodes: Vec<SurfaceNode> = Vec::with_capacity(self.nodes.len());
|
||||
for (new_idx, (old_idx, _)) in indexed.iter().enumerate() {
|
||||
remap.insert(*old_idx as u32, new_idx as u32);
|
||||
}
|
||||
for (_, node) in indexed {
|
||||
new_nodes.push(node.clone());
|
||||
}
|
||||
for edge in &mut self.edges {
|
||||
if let Some(&new_from) = remap.get(&edge.from) {
|
||||
edge.from = new_from;
|
||||
}
|
||||
if let Some(&new_to) = remap.get(&edge.to) {
|
||||
edge.to = new_to;
|
||||
}
|
||||
}
|
||||
self.nodes = new_nodes;
|
||||
self.edges.sort();
|
||||
self.edges.dedup();
|
||||
}
|
||||
|
||||
/// Serialize to deterministic JSON. The map is canonicalised
|
||||
/// first; structurally identical maps emit byte-identical JSON.
|
||||
pub fn to_json(&mut self) -> serde_json::Result<Vec<u8>> {
|
||||
self.canonicalize();
|
||||
serde_json::to_vec(self)
|
||||
}
|
||||
|
||||
/// Deserialize from JSON. Does not canonicalise; the producer is
|
||||
/// responsible for emitting a canonicalised payload.
|
||||
pub fn from_json(bytes: &[u8]) -> serde_json::Result<Self> {
|
||||
serde_json::from_slice(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an absolute path to a project-relative POSIX path string.
|
||||
/// Returns the absolute path verbatim when the file is outside the
|
||||
/// scan root or when path stripping fails.
|
||||
pub fn relative_path_string(path: &Path, scan_root: Option<&Path>) -> String {
|
||||
if let Some(root) = scan_root
|
||||
&& let Ok(rel) = path.strip_prefix(root)
|
||||
{
|
||||
return rel.to_string_lossy().replace('\\', "/");
|
||||
}
|
||||
path.to_string_lossy().replace('\\', "/")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn loc(file: &str, line: u32, col: u32) -> SourceLocation {
|
||||
SourceLocation::new(file, line, col)
|
||||
}
|
||||
|
||||
fn ep(file: &str, line: u32, route: &str, method: HttpMethod) -> SurfaceNode {
|
||||
SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc(file, line, 1),
|
||||
framework: Framework::Flask,
|
||||
method,
|
||||
route: route.into(),
|
||||
handler_name: "h".into(),
|
||||
handler_location: loc(file, line + 1, 1),
|
||||
auth_required: false,
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canonicalize_sorts_nodes_and_remaps_edges() {
|
||||
let mut m = SurfaceMap::new();
|
||||
m.nodes.push(ep("b.py", 10, "/b", HttpMethod::GET));
|
||||
m.nodes.push(ep("a.py", 5, "/a", HttpMethod::GET));
|
||||
m.edges.push(SurfaceEdge {
|
||||
from: 0,
|
||||
to: 1,
|
||||
kind: EdgeKind::Calls,
|
||||
});
|
||||
m.canonicalize();
|
||||
assert_eq!(m.nodes[0].location().file, "a.py");
|
||||
assert_eq!(m.nodes[1].location().file, "b.py");
|
||||
// edge `from=0` was b.py (now index 1), `to=1` was a.py (now index 0)
|
||||
assert_eq!(m.edges[0].from, 1);
|
||||
assert_eq!(m.edges[0].to, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_round_trip_byte_identical() {
|
||||
let mut a = SurfaceMap::new();
|
||||
a.nodes.push(ep("a.py", 1, "/a", HttpMethod::GET));
|
||||
a.nodes.push(ep("b.py", 2, "/b", HttpMethod::POST));
|
||||
a.edges.push(SurfaceEdge {
|
||||
from: 0,
|
||||
to: 1,
|
||||
kind: EdgeKind::Calls,
|
||||
});
|
||||
let bytes_a = a.to_json().unwrap();
|
||||
let b = SurfaceMap::from_json(&bytes_a).unwrap();
|
||||
let mut b = b;
|
||||
let bytes_b = b.to_json().unwrap();
|
||||
assert_eq!(bytes_a, bytes_b);
|
||||
}
|
||||
}
|
||||
220
src/surface/reachability.rs
Normal file
220
src/surface/reachability.rs
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
//! Transitive-closure pass: connect [`SurfaceNode::EntryPoint`] nodes
|
||||
//! to the [`SurfaceNode::DataStore`] / [`SurfaceNode::ExternalService`]
|
||||
//! / [`SurfaceNode::DangerousLocal`] nodes they can reach via the
|
||||
//! whole-program [`CallGraph`].
|
||||
//!
|
||||
//! For each entry-point we first locate the matching call-graph
|
||||
//! [`FuncKey`](crate::symbol::FuncKey) by `(namespace, function_name)` (the entry-point's
|
||||
//! `handler_location.file` is the project-relative POSIX path used as
|
||||
//! `FuncKey::namespace`, and `handler_name` is the leaf function
|
||||
//! name). From that node we run a BFS over forward call-graph edges
|
||||
//! up to a small depth bound, and for every visited
|
||||
//! `(file, function_name)` we look for a matching DataStore /
|
||||
//! ExternalService / DangerousLocal node in the SurfaceMap, emitting
|
||||
//! one [`EdgeKind::Reaches`] edge per match.
|
||||
//!
|
||||
//! Node match policy: the destination's `location.file` must equal
|
||||
//! the visited call-graph node's namespace. This is best-effort but
|
||||
//! deterministic — an entry-point that calls into a helper which then
|
||||
//! calls `eval()` will surface the eval as a `Reaches` of the entry
|
||||
//! point as long as the eval's host file is on the BFS frontier.
|
||||
|
||||
use super::{EdgeKind, SurfaceEdge, SurfaceMap, SurfaceNode};
|
||||
use crate::callgraph::CallGraph;
|
||||
use crate::summary::GlobalSummaries;
|
||||
use petgraph::Direction;
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
/// Maximum BFS depth from an entry-point node. Surface chains beyond
|
||||
/// six call-graph hops are rare in practice and the cost of a deeper
|
||||
/// walk is paid per entry-point per scan. A depth-bounded traversal
|
||||
/// also prevents recursive cycles from blowing up.
|
||||
const MAX_BFS_DEPTH: usize = 8;
|
||||
|
||||
/// Populate [`EdgeKind::Reaches`] edges on `map`. Mutates the edge
|
||||
/// list in place; the caller is expected to follow up with
|
||||
/// [`SurfaceMap::canonicalize`] before serialisation.
|
||||
pub fn populate_reaches_edges(
|
||||
map: &mut SurfaceMap,
|
||||
summaries: &GlobalSummaries,
|
||||
call_graph: &CallGraph,
|
||||
) {
|
||||
if map.nodes.is_empty() {
|
||||
return;
|
||||
}
|
||||
let dst_index = build_destination_index(map);
|
||||
if dst_index.is_empty() {
|
||||
return;
|
||||
}
|
||||
let _ = summaries;
|
||||
|
||||
let mut new_edges: HashSet<SurfaceEdge> = HashSet::new();
|
||||
for (entry_idx, node) in map.nodes.iter().enumerate() {
|
||||
let SurfaceNode::EntryPoint(ep) = node else {
|
||||
continue;
|
||||
};
|
||||
let mut reachable_files: HashSet<String> = HashSet::new();
|
||||
// Seed with the handler's host file — the entry-point itself
|
||||
// counts as reachable, so any DataStore / ExternalService /
|
||||
// DangerousLocal in the same file is connected even when the
|
||||
// call graph cannot resolve the seed FuncKey.
|
||||
reachable_files.insert(ep.handler_location.file.clone());
|
||||
|
||||
// Locate seed FuncKeys whose `namespace` (project-relative
|
||||
// POSIX path, optionally prefixed with `@pkg/name::`) matches
|
||||
// the entry's file and whose `name` matches the handler. More
|
||||
// than one seed is possible (overloaded methods, duplicate
|
||||
// definitions).
|
||||
//
|
||||
// Phase 23 follow-up: this used to be an `ends_with` substring
|
||||
// check on both sides, which silently aliased same-basename
|
||||
// files in sibling directories — `subdir/app.py` and
|
||||
// `other/app.py` would both seed when the entry-point pointed
|
||||
// at `app.py`. We now compare the file part exactly so a
|
||||
// handler in `subdir/app.py` only seeds the FuncKey whose
|
||||
// namespace strips to `subdir/app.py`.
|
||||
let seeds = call_graph
|
||||
.index
|
||||
.iter()
|
||||
.filter(|(k, _)| k.name == ep.handler_name)
|
||||
.filter(|(k, _)| file_part_of_namespace(&k.namespace) == ep.handler_location.file)
|
||||
.map(|(_, idx)| *idx)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut visited: HashSet<_> = seeds.iter().copied().collect();
|
||||
let mut queue: VecDeque<(petgraph::graph::NodeIndex, usize)> =
|
||||
seeds.iter().map(|n| (*n, 0)).collect();
|
||||
while let Some((node_idx, depth)) = queue.pop_front() {
|
||||
if let Some(key) = call_graph.graph.node_weight(node_idx) {
|
||||
reachable_files.insert(key.namespace.clone());
|
||||
}
|
||||
if depth >= MAX_BFS_DEPTH {
|
||||
continue;
|
||||
}
|
||||
for neighbour in call_graph
|
||||
.graph
|
||||
.neighbors_directed(node_idx, Direction::Outgoing)
|
||||
{
|
||||
if visited.insert(neighbour) {
|
||||
queue.push_back((neighbour, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (dst_idx, dst_file) in &dst_index {
|
||||
if reachable_files.contains(dst_file) {
|
||||
new_edges.insert(SurfaceEdge {
|
||||
from: entry_idx as u32,
|
||||
to: *dst_idx as u32,
|
||||
kind: EdgeKind::Reaches,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
map.edges.extend(new_edges);
|
||||
}
|
||||
|
||||
/// Strip the optional `@pkg/name::` package prefix from a `FuncKey`
|
||||
/// namespace, returning the project-relative POSIX file path part.
|
||||
/// `namespace_with_package` produces `"@scope/name::src/file.ts"` for
|
||||
/// JS/TS files inside resolved packages; the file part is what
|
||||
/// matches an entry-point's `handler_location.file`.
|
||||
fn file_part_of_namespace(ns: &str) -> &str {
|
||||
ns.rsplit_once("::").map(|(_, rest)| rest).unwrap_or(ns)
|
||||
}
|
||||
|
||||
/// Build a lookup from destination node index → destination file.
|
||||
/// Restricted to the three reachable-from-entry-point variants.
|
||||
fn build_destination_index(map: &SurfaceMap) -> Vec<(usize, String)> {
|
||||
let mut out: Vec<(usize, String)> = Vec::new();
|
||||
for (idx, node) in map.nodes.iter().enumerate() {
|
||||
let file = match node {
|
||||
SurfaceNode::DataStore(n) => n.location.file.clone(),
|
||||
SurfaceNode::ExternalService(n) => n.location.file.clone(),
|
||||
SurfaceNode::DangerousLocal(n) => n.location.file.clone(),
|
||||
SurfaceNode::EntryPoint(_) => continue,
|
||||
};
|
||||
out.push((idx, file));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Cheap by-file inverted index of the destination nodes — exposed for
|
||||
/// future callers (chain composer, CLI tree printer) that want a
|
||||
/// constant-time "what does this file expose" lookup without rerunning
|
||||
/// reachability.
|
||||
#[allow(dead_code)]
|
||||
pub fn destinations_by_file(map: &SurfaceMap) -> HashMap<String, Vec<usize>> {
|
||||
let mut out: HashMap<String, Vec<usize>> = HashMap::new();
|
||||
for (idx, node) in map.nodes.iter().enumerate() {
|
||||
let file = match node {
|
||||
SurfaceNode::DataStore(n) => &n.location.file,
|
||||
SurfaceNode::ExternalService(n) => &n.location.file,
|
||||
SurfaceNode::DangerousLocal(n) => &n.location.file,
|
||||
SurfaceNode::EntryPoint(_) => continue,
|
||||
};
|
||||
out.entry(file.clone()).or_default().push(idx);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::{
|
||||
DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode,
|
||||
};
|
||||
|
||||
fn ep(file: &str, handler: &str) -> SurfaceNode {
|
||||
SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new(file, 1, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::GET,
|
||||
route: "/".into(),
|
||||
handler_name: handler.into(),
|
||||
handler_location: SourceLocation::new(file, 2, 1),
|
||||
auth_required: false,
|
||||
})
|
||||
}
|
||||
|
||||
fn dl(file: &str, name: &str) -> SurfaceNode {
|
||||
SurfaceNode::DangerousLocal(DangerousLocal {
|
||||
location: SourceLocation::new(file, 0, 0),
|
||||
function_name: name.into(),
|
||||
cap_bits: 0x1,
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn entry_in_same_file_as_dangerous_emits_reaches() {
|
||||
let mut map = SurfaceMap::new();
|
||||
map.nodes.push(ep("app.py", "index"));
|
||||
map.nodes.push(dl("app.py", "do_eval"));
|
||||
let gs = GlobalSummaries::new();
|
||||
let cg = CallGraph {
|
||||
graph: petgraph::graph::DiGraph::new(),
|
||||
index: Default::default(),
|
||||
unresolved_not_found: vec![],
|
||||
unresolved_ambiguous: vec![],
|
||||
};
|
||||
populate_reaches_edges(&mut map, &gs, &cg);
|
||||
assert_eq!(map.edges.len(), 1);
|
||||
assert_eq!(map.edges[0].kind, EdgeKind::Reaches);
|
||||
assert_eq!(map.edges[0].from, 0);
|
||||
assert_eq!(map.edges[0].to, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn file_part_of_namespace_strips_package_prefix() {
|
||||
assert_eq!(file_part_of_namespace("app.py"), "app.py");
|
||||
assert_eq!(file_part_of_namespace("src/main.rs"), "src/main.rs");
|
||||
assert_eq!(
|
||||
file_part_of_namespace("@scope/name::src/file.ts"),
|
||||
"src/file.ts"
|
||||
);
|
||||
// Last `::` wins, matching `namespace_with_package`'s shape.
|
||||
assert_eq!(file_part_of_namespace("@a/b::@c/d::lib/x.ts"), "lib/x.ts");
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue