[pitboss] phase 22: Track F.2 + F.3 — Cross-language framework probes + data store / external service / dangerous-local detection

This commit is contained in:
pitboss 2026-05-15 13:28:58 -05:00
parent c03326a658
commit 2395446655
43 changed files with 5213 additions and 82 deletions

View file

@ -1,29 +1,44 @@
//! Top-level [`SurfaceMap`] builder.
//!
//! Consumes the post-pass-2 [`GlobalSummaries`] + [`CallGraph`] for
//! call-graph reachability and the project's file list for the
//! per-language framework probes. Phase 21 only invokes the Python +
//! Flask probe; Phase 22 wires the remaining language probes through
//! [`crate::surface::lang`].
//! Phase 22 dispatch:
//!
//! Build steps (Phase 21):
//! 1. Per-file framework probes (one parser per language) emit
//! [`SurfaceNode::EntryPoint`] nodes for every recognised route /
//! handler.
//! 2. [`super::datastore::detect_data_stores`] walks
//! [`GlobalSummaries`] and emits [`SurfaceNode::DataStore`] nodes
//! for every recognised driver call.
//! 3. [`super::external::detect_external_services`] walks summaries +
//! SSRF caps and emits [`SurfaceNode::ExternalService`] nodes.
//! 4. [`super::dangerous::detect_dangerous_locals`] walks summaries
//! and emits [`SurfaceNode::DangerousLocal`] nodes for every
//! function whose `sink_caps` include CODE_EXEC / DESERIALIZE /
//! SSTI / FMT_STRING.
//! 5. [`super::reachability::populate_reaches_edges`] runs a BFS over
//! the [`CallGraph`] from each entry-point handler, emitting
//! [`super::EdgeKind::Reaches`] edges to every reachable
//! DataStore / ExternalService / DangerousLocal.
//! 6. [`SurfaceMap::canonicalize`] sorts nodes + edges so the
//! serialised JSON is byte-deterministic across rescans.
//!
//! 1. For every Python file, parse it once and invoke
//! [`crate::surface::lang::python_flask::detect_flask_routes`].
//! 2. Collect the resulting [`SurfaceNode::EntryPoint`] nodes.
//! 3. Canonicalise the map (sort nodes + edges, dedup edges) so two
//! runs over the same source produce byte-identical JSON.
//! Per-file errors (parse failure, unsupported language) are
//! swallowed so a single bad file does not kill the whole map.
use crate::callgraph::CallGraph;
use crate::summary::GlobalSummaries;
use crate::surface::{SurfaceMap, lang::python_flask};
use crate::surface::{
SurfaceMap, dangerous, datastore, external,
lang::{
go_gin, go_http, java_quarkus, java_servlet, java_spring, js_express, js_koa,
php_laravel, php_slim, python_django, python_fastapi, python_flask,
ruby_rails, ruby_sinatra, rust_actix, rust_axum, ts_next,
},
reachability,
};
use crate::utils::config::Config;
use std::path::{Path, PathBuf};
use tree_sitter::Parser;
/// Inputs to [`build_surface_map`]. Wrapped in a struct so the
/// downstream Phase 22 work (additional probes, call-graph-derived
/// `Reaches` edges, label-rule data-source nodes) can extend the
/// signature without touching every caller.
pub struct SurfaceBuildInputs<'a> {
pub files: &'a [PathBuf],
pub scan_root: Option<&'a Path>,
@ -32,87 +47,304 @@ pub struct SurfaceBuildInputs<'a> {
pub config: &'a Config,
}
/// Build a [`SurfaceMap`] for the project under analysis.
///
/// Best-effort: parse failures on individual files are swallowed so
/// the surface map of a 10k-file project is not killed by one bad
/// Python file. Returns an empty map when the inputs contain no
/// recognised entry-points.
pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
let mut map = SurfaceMap::new();
let _ = inputs.config;
// Phase 21: only Python / Flask. The downstream Phase 22 probes
// will dispatch on file extension here.
let mut python_parser = tree_sitter::Parser::new();
if python_parser
.set_language(&tree_sitter_python::LANGUAGE.into())
.is_err()
{
return map;
}
let mut parsers = Parsers::new();
for path in inputs.files {
if !is_python_file(path) {
continue;
}
let Ok(bytes) = std::fs::read(path) else {
continue;
};
let Some(tree) = python_parser.parse(&bytes, None) else {
continue;
let kind = classify_file(path);
let nodes = match kind {
FileKind::Python => parsers
.python
.as_mut()
.and_then(|p| p.parse(&bytes, None))
.map(|tree| {
let mut all = python_flask::detect_flask_routes(
&tree,
&bytes,
path,
inputs.scan_root,
);
all.extend(python_fastapi::detect_fastapi_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all.extend(python_django::detect_django_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all
})
.unwrap_or_default(),
FileKind::JavaScript => parsers
.javascript
.as_mut()
.and_then(|p| p.parse(&bytes, None))
.map(|tree| {
let mut all =
js_express::detect_express_routes(&tree, &bytes, path, inputs.scan_root);
all.extend(js_koa::detect_koa_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all
})
.unwrap_or_default(),
FileKind::TypeScript => parsers
.typescript
.as_mut()
.and_then(|p| p.parse(&bytes, None))
.map(|tree| {
let mut all =
js_express::detect_express_routes(&tree, &bytes, path, inputs.scan_root);
all.extend(js_koa::detect_koa_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all.extend(ts_next::detect_next_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all
})
.unwrap_or_default(),
FileKind::Java => parsers
.java
.as_mut()
.and_then(|p| p.parse(&bytes, None))
.map(|tree| {
let mut all =
java_spring::detect_spring_routes(&tree, &bytes, path, inputs.scan_root);
all.extend(java_servlet::detect_servlet_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all.extend(java_quarkus::detect_quarkus_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all
})
.unwrap_or_default(),
FileKind::Go => parsers
.go
.as_mut()
.and_then(|p| p.parse(&bytes, None))
.map(|tree| {
let mut all =
go_http::detect_go_http_routes(&tree, &bytes, path, inputs.scan_root);
all.extend(go_gin::detect_gin_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all
})
.unwrap_or_default(),
FileKind::Php => parsers
.php
.as_mut()
.and_then(|p| p.parse(&bytes, None))
.map(|tree| {
let mut all = php_laravel::detect_laravel_routes(
&tree,
&bytes,
path,
inputs.scan_root,
);
all.extend(php_slim::detect_slim_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all
})
.unwrap_or_default(),
FileKind::Ruby => parsers
.ruby
.as_mut()
.and_then(|p| p.parse(&bytes, None))
.map(|tree| {
let mut all = ruby_sinatra::detect_sinatra_routes(
&tree,
&bytes,
path,
inputs.scan_root,
);
all.extend(ruby_rails::detect_rails_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all
})
.unwrap_or_default(),
FileKind::Rust => parsers
.rust
.as_mut()
.and_then(|p| p.parse(&bytes, None))
.map(|tree| {
let mut all =
rust_actix::detect_actix_routes(&tree, &bytes, path, inputs.scan_root);
all.extend(rust_axum::detect_axum_routes(
&tree,
&bytes,
path,
inputs.scan_root,
));
all
})
.unwrap_or_default(),
FileKind::Other => Vec::new(),
};
let nodes =
python_flask::detect_flask_routes(&tree, &bytes, path, inputs.scan_root);
for n in nodes {
map.nodes.push(n);
}
}
// GlobalSummaries / CallGraph are reserved for Phase 22's
// `DangerousLocal` + `Reaches`-edge fill-in. Phase 21 records
// them in the inputs so callers do not need to be touched again
// when Phase 22 wires them up.
let _ = inputs.global_summaries;
let _ = inputs.call_graph;
let _ = inputs.config;
// Phase 22 — Track F.3: data-store / external-service /
// dangerous-local detection from summaries.
map.nodes
.extend(datastore::detect_data_stores(inputs.global_summaries));
map.nodes
.extend(external::detect_external_services(inputs.global_summaries));
map.nodes
.extend(dangerous::detect_dangerous_locals(inputs.global_summaries));
// Canonicalise so node indices are stable before reachability
// builds edges referring to those indices.
map.canonicalize();
// Phase 22 — Track F.3: transitive closure over the call graph.
reachability::populate_reaches_edges(&mut map, inputs.global_summaries, inputs.call_graph);
// Re-canonicalise: edges added by reachability need to be sorted
// so the serialised JSON stays byte-deterministic.
map.canonicalize();
map
}
fn is_python_file(path: &Path) -> bool {
matches!(
path.extension().and_then(|s| s.to_str()),
Some("py") | Some("pyi")
)
#[derive(Copy, Clone, PartialEq, Eq)]
enum FileKind {
Python,
JavaScript,
TypeScript,
Java,
Go,
Php,
Ruby,
Rust,
Other,
}
fn classify_file(path: &Path) -> FileKind {
match path.extension().and_then(|s| s.to_str()) {
Some("py") | Some("pyi") => FileKind::Python,
Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => FileKind::JavaScript,
Some("ts") | Some("tsx") | Some("mts") | Some("cts") => FileKind::TypeScript,
Some("java") => FileKind::Java,
Some("go") => FileKind::Go,
Some("php") => FileKind::Php,
Some("rb") => FileKind::Ruby,
Some("rs") => FileKind::Rust,
_ => FileKind::Other,
}
}
struct Parsers {
python: Option<Parser>,
javascript: Option<Parser>,
typescript: Option<Parser>,
java: Option<Parser>,
go: Option<Parser>,
php: Option<Parser>,
ruby: Option<Parser>,
rust: Option<Parser>,
}
impl Parsers {
fn new() -> Self {
Self {
python: parser_for(tree_sitter_python::LANGUAGE.into()),
javascript: parser_for(tree_sitter_javascript::LANGUAGE.into()),
typescript: parser_for(tree_sitter_typescript::LANGUAGE_TSX.into()),
java: parser_for(tree_sitter_java::LANGUAGE.into()),
go: parser_for(tree_sitter_go::LANGUAGE.into()),
php: parser_for(tree_sitter_php::LANGUAGE_PHP.into()),
ruby: parser_for(tree_sitter_ruby::LANGUAGE.into()),
rust: parser_for(tree_sitter_rust::LANGUAGE.into()),
}
}
}
fn parser_for(language: tree_sitter::Language) -> Option<Parser> {
let mut parser = Parser::new();
parser.set_language(&language).ok()?;
Some(parser)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::entry_points::HttpMethod;
use crate::surface::SurfaceNode;
use std::fs;
use tempfile::tempdir;
fn empty_inputs<'a>(
files: &'a [PathBuf],
scan_root: Option<&'a Path>,
gs: &'a GlobalSummaries,
cg: &'a CallGraph,
cfg: &'a Config,
) -> SurfaceBuildInputs<'a> {
SurfaceBuildInputs {
files,
scan_root,
global_summaries: gs,
call_graph: cg,
config: cfg,
}
}
fn empty_call_graph() -> CallGraph {
CallGraph {
graph: petgraph::graph::DiGraph::new(),
index: Default::default(),
unresolved_not_found: vec![],
unresolved_ambiguous: vec![],
}
}
#[test]
fn empty_inputs_produce_empty_map() {
let dir = tempdir().unwrap();
let cfg = Config::default();
let gs = GlobalSummaries::new();
let cg = CallGraph {
graph: petgraph::graph::DiGraph::new(),
index: Default::default(),
unresolved_not_found: vec![],
unresolved_ambiguous: vec![],
};
let cg = empty_call_graph();
let files: Vec<PathBuf> = vec![];
let inputs = SurfaceBuildInputs {
files: &files,
scan_root: Some(dir.path()),
global_summaries: &gs,
call_graph: &cg,
config: &cfg,
};
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
let map = build_surface_map(&inputs);
assert_eq!(map.node_count(), 0);
assert_eq!(map.edge_count(), 0);
@ -140,24 +372,76 @@ def submit():
.unwrap();
let cfg = Config::default();
let gs = GlobalSummaries::new();
let cg = CallGraph {
graph: petgraph::graph::DiGraph::new(),
index: Default::default(),
unresolved_not_found: vec![],
unresolved_ambiguous: vec![],
};
let files = vec![py.clone()];
let inputs = SurfaceBuildInputs {
files: &files,
scan_root: Some(dir.path()),
global_summaries: &gs,
call_graph: &cg,
config: &cfg,
};
let cg = empty_call_graph();
let files = vec![py];
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
let map = build_surface_map(&inputs);
assert_eq!(map.node_count(), 2);
let methods: Vec<HttpMethod> = map.entry_points().map(|ep| ep.method).collect();
assert!(methods.contains(&HttpMethod::GET));
assert!(methods.contains(&HttpMethod::POST));
}
#[test]
fn fastapi_file_produces_entry_points() {
let dir = tempdir().unwrap();
let py = dir.path().join("api.py");
fs::write(
&py,
"from fastapi import FastAPI\napp = FastAPI()\n@app.get('/users')\ndef list_users(): pass\n@app.post('/items')\ndef create(): pass\n",
)
.unwrap();
let cfg = Config::default();
let gs = GlobalSummaries::new();
let cg = empty_call_graph();
let files = vec![py];
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
let map = build_surface_map(&inputs);
assert_eq!(map.node_count(), 2);
}
#[test]
fn dangerous_local_emits_node_and_reaches_edge_to_same_file_entry() {
use crate::labels::Cap;
use crate::summary::FuncSummary;
use crate::symbol::{FuncKey, Lang};
let dir = tempdir().unwrap();
let py = dir.path().join("app.py");
fs::write(
&py,
r#"
from flask import Flask
app = Flask(__name__)
@app.route("/eval")
def evaluator():
return ""
"#,
)
.unwrap();
let cfg = Config::default();
let mut gs = GlobalSummaries::new();
gs.insert(
FuncKey::new_function(Lang::Python, "app.py", "evaluator", None),
FuncSummary {
name: "evaluator".to_string(),
file_path: "app.py".to_string(),
lang: "python".to_string(),
sink_caps: Cap::CODE_EXEC.bits(),
..Default::default()
},
);
let cg = empty_call_graph();
let files = vec![py];
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
let map = build_surface_map(&inputs);
assert!(map
.nodes
.iter()
.any(|n| matches!(n, SurfaceNode::DangerousLocal(_))));
assert!(map
.edges
.iter()
.any(|e| matches!(e.kind, crate::surface::EdgeKind::Reaches)));
}
}

88
src/surface/dangerous.rs Normal file
View file

@ -0,0 +1,88 @@
//! Dangerous-local sink detection.
//!
//! Walks the post-pass-2 [`GlobalSummaries`] looking for functions
//! that themselves consume `Cap::CODE_EXEC`, `Cap::DESERIALIZE`,
//! `Cap::SSTI`, or `Cap::FMT_STRING` (the canonical "no externally
//! observable side effect" sinks) and emits one
//! [`SurfaceNode::DangerousLocal`] per such function.
//!
//! The cap bits are taken straight from the existing label-rule
//! registry — every Phase 22 sink class continues to land on the same
//! `sink_caps` field downstream rules already populate. No new
//! detection pass is added here; the surface layer just lifts the
//! cap-bit information out of the summary.
use super::{DangerousLocal, SourceLocation, SurfaceNode};
use crate::labels::Cap;
use crate::summary::GlobalSummaries;
/// Cap bits that indicate the function is a *local* sink — code exec,
/// unsafe deserialisation, server-side template injection, format
/// string injection. Other sink caps (SQL_QUERY → DataStore;
/// SSRF → ExternalService) live elsewhere in the surface layer so the
/// node taxonomy matches the chain composer's expectations.
fn dangerous_caps() -> Cap {
Cap::CODE_EXEC | Cap::DESERIALIZE | Cap::SSTI | Cap::FMT_STRING
}
pub fn detect_dangerous_locals(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
let mask = dangerous_caps();
let mut out: Vec<SurfaceNode> = Vec::new();
for (key, summary) in summaries.iter() {
let caps = summary.sink_caps() & mask;
if caps.is_empty() {
continue;
}
out.push(SurfaceNode::DangerousLocal(DangerousLocal {
location: SourceLocation {
file: summary.file_path.clone(),
line: 0,
col: 0,
},
function_name: key.qualified_name(),
cap_bits: caps.bits(),
}));
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::summary::FuncSummary;
use crate::symbol::{FuncKey, Lang};
fn summary_with_caps(name: &str, file: &str, caps: Cap) -> (FuncKey, FuncSummary) {
let key = FuncKey::new_function(Lang::Python, file, name, None);
let summary = FuncSummary {
name: name.to_string(),
file_path: file.to_string(),
lang: "python".to_string(),
sink_caps: caps.bits(),
..Default::default()
};
(key, summary)
}
#[test]
fn detects_eval_sink() {
let mut gs = GlobalSummaries::new();
let (k, s) = summary_with_caps("run", "danger.py", Cap::CODE_EXEC);
gs.insert(k, s);
let nodes = detect_dangerous_locals(&gs);
assert_eq!(nodes.len(), 1);
let SurfaceNode::DangerousLocal(d) = &nodes[0] else {
panic!()
};
assert_eq!(d.cap_bits & Cap::CODE_EXEC.bits(), Cap::CODE_EXEC.bits());
}
#[test]
fn ignores_sql_only() {
let mut gs = GlobalSummaries::new();
let (k, s) = summary_with_caps("query", "data.py", Cap::SQL_QUERY);
gs.insert(k, s);
let nodes = detect_dangerous_locals(&gs);
assert!(nodes.is_empty());
}
}

218
src/surface/datastore.rs Normal file
View file

@ -0,0 +1,218 @@
//! Data-store detection.
//!
//! Walks the post-pass-2 [`GlobalSummaries`] looking for callees whose
//! name is a known database / cache / blob-store driver entry point,
//! and emits one [`SurfaceNode::DataStore`] per resolved store.
//!
//! The detector is name-based on purpose: the receiver's full type is
//! often unknown after pass 2, but the leaf name of a driver call
//! (`psycopg2.connect`, `mysql.createConnection`, `gorm.Open`,
//! `Eloquent::find`, `ActiveRecord::Base.connection`) carries enough
//! signal for surface-level chain composition. False positives here
//! are forgiving — the surface map is informational, not a finding
//! that fires on its own.
use super::{DataStore, DataStoreKind, SourceLocation, SurfaceNode};
use crate::summary::{FuncSummary, GlobalSummaries};
/// One detection rule: leaf-name pattern → store kind + label. Stored
/// as a flat list so adding a new ORM / driver is a one-line edit.
struct DriverRule {
/// Substring to match against the callee's leaf name (case-insensitive).
leaf: &'static str,
kind: DataStoreKind,
/// Human-readable label attached to the emitted node. Used by the
/// chain composer and the `nyx surface` CLI tree.
label: &'static str,
}
const DRIVER_RULES: &[DriverRule] = &[
// Python — relational
DriverRule { leaf: "psycopg2.connect", kind: DataStoreKind::Sql, label: "PostgreSQL (psycopg2)" },
DriverRule { leaf: "psycopg.connect", kind: DataStoreKind::Sql, label: "PostgreSQL (psycopg3)" },
DriverRule { leaf: "mysql.connector.connect", kind: DataStoreKind::Sql, label: "MySQL (mysql.connector)" },
DriverRule { leaf: "MySQLdb.connect", kind: DataStoreKind::Sql, label: "MySQL (MySQLdb)" },
DriverRule { leaf: "pymysql.connect", kind: DataStoreKind::Sql, label: "MySQL (PyMySQL)" },
DriverRule { leaf: "sqlite3.connect", kind: DataStoreKind::Sql, label: "SQLite (sqlite3)" },
DriverRule { leaf: "sqlalchemy.create_engine", kind: DataStoreKind::Sql, label: "SQLAlchemy" },
DriverRule { leaf: "django.db.connection", kind: DataStoreKind::Sql, label: "Django ORM" },
// Python — kv / doc
DriverRule { leaf: "redis.Redis", kind: DataStoreKind::KeyValue, label: "Redis" },
DriverRule { leaf: "redis.from_url", kind: DataStoreKind::KeyValue, label: "Redis" },
DriverRule { leaf: "pymongo.MongoClient", kind: DataStoreKind::Document, label: "MongoDB" },
DriverRule { leaf: "boto3.client", kind: DataStoreKind::BlobStore, label: "AWS (boto3)" },
DriverRule { leaf: "boto3.resource", kind: DataStoreKind::BlobStore, label: "AWS (boto3)" },
// JavaScript / TypeScript — relational
DriverRule { leaf: "knex", kind: DataStoreKind::Sql, label: "Knex.js" },
DriverRule { leaf: "createConnection", kind: DataStoreKind::Sql, label: "MySQL/Postgres (mysql/pg)" },
DriverRule { leaf: "Sequelize", kind: DataStoreKind::Sql, label: "Sequelize" },
DriverRule { leaf: "TypeORM.createConnection", kind: DataStoreKind::Sql, label: "TypeORM" },
DriverRule { leaf: "PrismaClient", kind: DataStoreKind::Sql, label: "Prisma" },
DriverRule { leaf: "pool.query", kind: DataStoreKind::Sql, label: "pg/mysql pool" },
DriverRule { leaf: "client.query", kind: DataStoreKind::Sql, label: "pg client" },
DriverRule { leaf: "db.query", kind: DataStoreKind::Sql, label: "Generic SQL driver" },
// JS — kv / doc
DriverRule { leaf: "redis.createClient", kind: DataStoreKind::KeyValue, label: "Redis (node-redis)" },
DriverRule { leaf: "ioredis", kind: DataStoreKind::KeyValue, label: "ioredis" },
DriverRule { leaf: "MongoClient.connect", kind: DataStoreKind::Document, label: "MongoDB (node)" },
DriverRule { leaf: "AWS.S3", kind: DataStoreKind::BlobStore, label: "AWS S3" },
// Java — JDBC / Hibernate
DriverRule { leaf: "DriverManager.getConnection", kind: DataStoreKind::Sql, label: "JDBC" },
DriverRule { leaf: "JdbcTemplate", kind: DataStoreKind::Sql, label: "Spring JdbcTemplate" },
DriverRule { leaf: "EntityManager", kind: DataStoreKind::Sql, label: "JPA EntityManager" },
DriverRule { leaf: "SessionFactory.openSession", kind: DataStoreKind::Sql, label: "Hibernate" },
DriverRule { leaf: "Jedis", kind: DataStoreKind::KeyValue, label: "Jedis (Redis)" },
DriverRule { leaf: "MongoClients.create", kind: DataStoreKind::Document, label: "MongoDB (java-driver)" },
// Go — sql + ORM
DriverRule { leaf: "sql.Open", kind: DataStoreKind::Sql, label: "database/sql" },
DriverRule { leaf: "gorm.Open", kind: DataStoreKind::Sql, label: "GORM" },
DriverRule { leaf: "sqlx.Connect", kind: DataStoreKind::Sql, label: "sqlx" },
DriverRule { leaf: "sqlx.Open", kind: DataStoreKind::Sql, label: "sqlx" },
DriverRule { leaf: "redis.NewClient", kind: DataStoreKind::KeyValue, label: "go-redis" },
DriverRule { leaf: "mongo.Connect", kind: DataStoreKind::Document, label: "MongoDB (go-driver)" },
// PHP — Eloquent / PDO
DriverRule { leaf: "PDO", kind: DataStoreKind::Sql, label: "PDO" },
DriverRule { leaf: "Eloquent::find", kind: DataStoreKind::Sql, label: "Laravel Eloquent" },
DriverRule { leaf: "Eloquent::where", kind: DataStoreKind::Sql, label: "Laravel Eloquent" },
DriverRule { leaf: "DB::connection", kind: DataStoreKind::Sql, label: "Laravel DB" },
DriverRule { leaf: "Doctrine", kind: DataStoreKind::Sql, label: "Doctrine ORM" },
// Ruby — ActiveRecord
DriverRule { leaf: "ActiveRecord::Base.connection", kind: DataStoreKind::Sql, label: "ActiveRecord" },
DriverRule { leaf: "ActiveRecord::Base.find", kind: DataStoreKind::Sql, label: "ActiveRecord" },
DriverRule { leaf: ".find_by_sql", kind: DataStoreKind::Sql, label: "ActiveRecord raw SQL" },
// Rust — sqlx / diesel
DriverRule { leaf: "sqlx::query", kind: DataStoreKind::Sql, label: "sqlx" },
DriverRule { leaf: "sqlx::query_as", kind: DataStoreKind::Sql, label: "sqlx" },
DriverRule { leaf: "diesel::sql_query", kind: DataStoreKind::Sql, label: "Diesel" },
DriverRule { leaf: "PgConnection::establish", kind: DataStoreKind::Sql, label: "Diesel" },
// Filesystem (best-effort: language-agnostic open()-family)
DriverRule { leaf: "open", kind: DataStoreKind::Filesystem, label: "Filesystem" },
];
/// Walk every function summary's callee list and emit one
/// [`SurfaceNode::DataStore`] per matched driver call. De-duped on
/// `(file, line, label)`.
pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
let mut out: Vec<SurfaceNode> = Vec::new();
let mut seen: std::collections::HashSet<(String, u32, String)> =
std::collections::HashSet::new();
for (key, summary) in summaries.iter() {
for callee in &summary.callees {
let Some(rule) = match_rule(&callee.name) else {
continue;
};
let location = call_site_location(summary, callee.ordinal);
let dedup = (
location.file.clone(),
location.line,
rule.label.to_string(),
);
if !seen.insert(dedup) {
continue;
}
let _ = key;
out.push(SurfaceNode::DataStore(DataStore {
location,
kind: rule.kind,
label: rule.label.to_string(),
}));
}
}
out
}
fn match_rule(callee: &str) -> Option<&'static DriverRule> {
let trimmed = callee.trim();
let leaf = trimmed.rsplit("::").next().unwrap_or(trimmed);
let leaf = leaf.rsplit('.').next().unwrap_or(leaf);
DRIVER_RULES
.iter()
.find(|r| {
// Match either the full callee text or its leaf segment
// against each rule's leaf, case-insensitive.
trimmed.to_ascii_lowercase().contains(&r.leaf.to_ascii_lowercase())
|| leaf.eq_ignore_ascii_case(r.leaf)
})
}
/// Best-effort source location for a call site. We only have file +
/// (sometimes) sink-attribution metadata on `FuncSummary`, so the
/// location falls back to the function's file with line 0 when no
/// finer-grained data is available.
fn call_site_location(summary: &FuncSummary, _ordinal: u32) -> SourceLocation {
SourceLocation {
file: summary.file_path.clone(),
line: 0,
col: 0,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::summary::CalleeSite;
use crate::symbol::{FuncKey, Lang};
fn summary_with_callees(name: &str, file: &str, callees: &[&str]) -> (FuncKey, FuncSummary) {
let key = FuncKey::new_function(Lang::Python, file, name, None);
let summary = FuncSummary {
name: name.to_string(),
file_path: file.to_string(),
lang: "python".to_string(),
param_count: 0,
callees: callees
.iter()
.map(|c| CalleeSite::bare(c.to_string()))
.collect(),
..Default::default()
};
(key, summary)
}
#[test]
fn detects_psycopg2_connect() {
let mut gs = GlobalSummaries::new();
let (k, s) = summary_with_callees("init", "app.py", &["psycopg2.connect"]);
gs.insert(k, s);
let nodes = detect_data_stores(&gs);
assert_eq!(nodes.len(), 1);
let SurfaceNode::DataStore(ds) = &nodes[0] else {
panic!()
};
assert_eq!(ds.kind, DataStoreKind::Sql);
assert_eq!(ds.label, "PostgreSQL (psycopg2)");
}
#[test]
fn detects_gorm_open() {
let mut gs = GlobalSummaries::new();
let (k, s) = summary_with_callees("init", "main.go", &["gorm.Open"]);
gs.insert(k, s);
let nodes = detect_data_stores(&gs);
assert_eq!(nodes.len(), 1);
let SurfaceNode::DataStore(ds) = &nodes[0] else {
panic!()
};
assert_eq!(ds.label, "GORM");
}
#[test]
fn dedup_collapses_repeats_in_same_file() {
let mut gs = GlobalSummaries::new();
let (k, s) = summary_with_callees(
"init",
"app.py",
&["psycopg2.connect", "psycopg2.connect"],
);
gs.insert(k, s);
let nodes = detect_data_stores(&gs);
assert_eq!(nodes.len(), 1);
}
}

165
src/surface/external.rs Normal file
View file

@ -0,0 +1,165 @@
//! External-service detection.
//!
//! Walks the post-pass-2 [`GlobalSummaries`] looking for callees that
//! launch outbound network requests (HTTP, gRPC, SMTP, DNS) and emits
//! one [`SurfaceNode::ExternalService`] per call. Detection is by
//! callee leaf name + `sink_caps & SSRF` heuristic — both signals are
//! consulted so a probe with no SSRF cap (DNS resolver, SMTP sender)
//! still surfaces as an external service.
use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode};
use crate::labels::Cap;
use crate::summary::{FuncSummary, GlobalSummaries};
struct ClientRule {
leaf: &'static str,
kind: ExternalServiceKind,
label: &'static str,
}
const CLIENT_RULES: &[ClientRule] = &[
// HTTP
ClientRule { leaf: "requests.get", kind: ExternalServiceKind::HttpApi, label: "requests (Python)" },
ClientRule { leaf: "requests.post", kind: ExternalServiceKind::HttpApi, label: "requests (Python)" },
ClientRule { leaf: "httpx.get", kind: ExternalServiceKind::HttpApi, label: "httpx (Python)" },
ClientRule { leaf: "httpx.post", kind: ExternalServiceKind::HttpApi, label: "httpx (Python)" },
ClientRule { leaf: "urllib.request.urlopen", kind: ExternalServiceKind::HttpApi, label: "urllib" },
ClientRule { leaf: "fetch", kind: ExternalServiceKind::HttpApi, label: "fetch (JS)" },
ClientRule { leaf: "axios.get", kind: ExternalServiceKind::HttpApi, label: "axios" },
ClientRule { leaf: "axios.post", kind: ExternalServiceKind::HttpApi, label: "axios" },
ClientRule { leaf: "http.request", kind: ExternalServiceKind::HttpApi, label: "node http" },
ClientRule { leaf: "got", kind: ExternalServiceKind::HttpApi, label: "got (JS)" },
ClientRule { leaf: "HttpClient.send", kind: ExternalServiceKind::HttpApi, label: "Java HttpClient" },
ClientRule { leaf: "HttpClient.execute", kind: ExternalServiceKind::HttpApi, label: "Java HttpClient" },
ClientRule { leaf: "RestTemplate.exchange", kind: ExternalServiceKind::HttpApi, label: "Spring RestTemplate" },
ClientRule { leaf: "RestTemplate.getForObject", kind: ExternalServiceKind::HttpApi, label: "Spring RestTemplate" },
ClientRule { leaf: "OkHttpClient.newCall", kind: ExternalServiceKind::HttpApi, label: "OkHttp" },
ClientRule { leaf: "http.Get", kind: ExternalServiceKind::HttpApi, label: "net/http (Go)" },
ClientRule { leaf: "http.Post", kind: ExternalServiceKind::HttpApi, label: "net/http (Go)" },
ClientRule { leaf: "http.NewRequest", kind: ExternalServiceKind::HttpApi, label: "net/http (Go)" },
ClientRule { leaf: "client.Do", kind: ExternalServiceKind::HttpApi, label: "go http client" },
ClientRule { leaf: "reqwest::get", kind: ExternalServiceKind::HttpApi, label: "reqwest (Rust)" },
ClientRule { leaf: "reqwest::Client", kind: ExternalServiceKind::HttpApi, label: "reqwest (Rust)" },
ClientRule { leaf: "Net::HTTP", kind: ExternalServiceKind::HttpApi, label: "Net::HTTP (Ruby)" },
ClientRule { leaf: "HTTParty.get", kind: ExternalServiceKind::HttpApi, label: "HTTParty" },
ClientRule { leaf: "Faraday", kind: ExternalServiceKind::HttpApi, label: "Faraday (Ruby)" },
ClientRule { leaf: "curl_exec", kind: ExternalServiceKind::HttpApi, label: "PHP curl" },
ClientRule { leaf: "file_get_contents", kind: ExternalServiceKind::HttpApi, label: "PHP file_get_contents" },
ClientRule { leaf: "Guzzle", kind: ExternalServiceKind::HttpApi, label: "Guzzle (PHP)" },
// Message brokers
ClientRule { leaf: "kafka.send", kind: ExternalServiceKind::MessageBroker, label: "Kafka" },
ClientRule { leaf: "KafkaProducer.send", kind: ExternalServiceKind::MessageBroker, label: "Kafka" },
ClientRule { leaf: "rabbitmq.publish", kind: ExternalServiceKind::MessageBroker, label: "RabbitMQ" },
ClientRule { leaf: "amqp.publish", kind: ExternalServiceKind::MessageBroker, label: "AMQP" },
ClientRule { leaf: "sqs.send_message", kind: ExternalServiceKind::MessageBroker, label: "AWS SQS" },
ClientRule { leaf: "sns.publish", kind: ExternalServiceKind::MessageBroker, label: "AWS SNS" },
// Search indices
ClientRule { leaf: "Elasticsearch", kind: ExternalServiceKind::SearchIndex, label: "Elasticsearch" },
ClientRule { leaf: "elasticsearch.search", kind: ExternalServiceKind::SearchIndex, label: "Elasticsearch" },
ClientRule { leaf: "OpenSearch", kind: ExternalServiceKind::SearchIndex, label: "OpenSearch" },
ClientRule { leaf: "Algolia", kind: ExternalServiceKind::SearchIndex, label: "Algolia" },
// Auth providers
ClientRule { leaf: "auth0", kind: ExternalServiceKind::AuthProvider, label: "Auth0" },
ClientRule { leaf: "passport.authenticate", kind: ExternalServiceKind::AuthProvider, label: "Passport.js" },
ClientRule { leaf: "OAuth2Client", kind: ExternalServiceKind::AuthProvider, label: "OAuth2 client" },
ClientRule { leaf: "google.oauth2", kind: ExternalServiceKind::AuthProvider, label: "Google OAuth2" },
// SMTP
ClientRule { leaf: "smtplib.SMTP", kind: ExternalServiceKind::HttpApi, label: "SMTP (Python)" },
ClientRule { leaf: "Mail::send", kind: ExternalServiceKind::HttpApi, label: "Laravel Mail" },
ClientRule { leaf: "ActionMailer", kind: ExternalServiceKind::HttpApi, label: "Rails ActionMailer" },
// DNS
ClientRule { leaf: "socket.gethostbyname", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" },
ClientRule { leaf: "dns.lookup", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" },
ClientRule { leaf: "net.LookupIP", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" },
];
pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
let mut out: Vec<SurfaceNode> = Vec::new();
let mut seen: std::collections::HashSet<(String, String)> =
std::collections::HashSet::new();
for (_key, summary) in summaries.iter() {
for callee in &summary.callees {
let Some(rule) = match_rule(&callee.name) else {
continue;
};
let location = call_site_location(summary);
if !seen.insert((location.file.clone(), rule.label.to_string())) {
continue;
}
out.push(SurfaceNode::ExternalService(ExternalService {
location,
kind: rule.kind,
label: rule.label.to_string(),
}));
}
}
// Also surface any function whose own sink_caps include SSRF — the
// function itself is an outbound network call site even if the
// direct callee did not match the rule list. Use the function's
// file as the location and synthesise a generic label.
for (_key, summary) in summaries.iter() {
if summary.sink_caps().contains(Cap::SSRF) {
let loc = call_site_location(summary);
let dedup = (loc.file.clone(), "Outbound HTTP".to_string());
if seen.insert(dedup) {
out.push(SurfaceNode::ExternalService(ExternalService {
location: loc,
kind: ExternalServiceKind::HttpApi,
label: "Outbound HTTP".to_string(),
}));
}
}
}
out
}
fn match_rule(callee: &str) -> Option<&'static ClientRule> {
let trimmed = callee.trim();
let leaf = trimmed.rsplit("::").next().unwrap_or(trimmed);
let leaf = leaf.rsplit('.').next().unwrap_or(leaf);
CLIENT_RULES.iter().find(|r| {
trimmed.to_ascii_lowercase().contains(&r.leaf.to_ascii_lowercase())
|| leaf.eq_ignore_ascii_case(r.leaf)
})
}
fn call_site_location(summary: &FuncSummary) -> SourceLocation {
SourceLocation {
file: summary.file_path.clone(),
line: 0,
col: 0,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::summary::CalleeSite;
use crate::symbol::{FuncKey, Lang};
#[test]
fn detects_requests_get() {
let mut gs = GlobalSummaries::new();
let key = FuncKey::new_function(Lang::Python, "client.py", "fetch_user", None);
let summary = FuncSummary {
name: "fetch_user".to_string(),
file_path: "client.py".to_string(),
lang: "python".to_string(),
param_count: 0,
callees: vec![CalleeSite::bare("requests.get".to_string())],
..Default::default()
};
gs.insert(key, summary);
let nodes = detect_external_services(&gs);
assert_eq!(nodes.len(), 1);
let SurfaceNode::ExternalService(es) = &nodes[0] else {
panic!()
};
assert_eq!(es.label, "requests (Python)");
}
}

131
src/surface/lang/common.rs Normal file
View file

@ -0,0 +1,131 @@
//! Shared helpers used by the per-(language, framework) probes.
//!
//! Each probe extracts an [`EntryPoint`] node from a parsed source file
//! by walking the framework's route declaration shape. These helpers
//! cover the bookkeeping common to every probe: building a stable
//! [`SourceLocation`] from a tree-sitter node, decoding common string
//! literal shapes, and identifier-based auth marker lookups.
use crate::surface::{SourceLocation, relative_path_string};
use std::path::Path;
use tree_sitter::Node;
/// Build a [`SourceLocation`] for the start of `node`, relative to
/// `scan_root` when supplied.
pub fn loc_for(node: Node<'_>, file_rel: &str) -> SourceLocation {
let pos = node.start_position();
SourceLocation::new(file_rel, (pos.row + 1) as u32, (pos.column + 1) as u32)
}
/// Project-relative POSIX file string used as the [`SourceLocation`]
/// `file` field across every node a probe emits.
pub fn rel_file(path: &Path, scan_root: Option<&Path>) -> String {
relative_path_string(path, scan_root)
}
/// Strip Python / JS / Ruby / PHP string-literal prefixes (`b"…"`,
/// `r"…"`, `f"…"`, leading `'`/`"`) and return the literal content.
/// Used by every probe that lifts a route path out of a string node.
pub fn unquote(raw: &str) -> String {
let trimmed = raw.trim();
let mut s = trimmed;
// Python prefixes
while let Some(rest) = s.strip_prefix(['b', 'r', 'B', 'R', 'f', 'F']) {
if rest.starts_with('\'') || rest.starts_with('"') {
s = rest;
} else {
break;
}
}
s.trim_start_matches(['\'', '"', '`'])
.trim_end_matches(['\'', '"', '`'])
.to_string()
}
/// Read the literal text of a tree-sitter `string` node and return its
/// unquoted content; `None` when the slice is not valid UTF-8.
pub fn string_node_value(node: Node<'_>, bytes: &[u8]) -> Option<String> {
Some(unquote(node.utf8_text(bytes).ok()?))
}
/// Return `true` when the leaf segment of `text` (split on `.` or `::`)
/// matches one of the entries in `markers`, case-insensitive on the
/// underscored form. Used by every probe's auth-decorator allowlist.
pub fn leaf_matches(text: &str, markers: &[&str]) -> bool {
let leaf = text.rsplit(['.', ':']).next().unwrap_or(text).trim();
markers.iter().any(|m| leaf.eq_ignore_ascii_case(m))
}
/// Walk every descendant of `root` whose kind matches `target_kind`,
/// invoking `visit` on each match. Bounded by recursion on tree-sitter
/// node count.
pub fn for_each_node<'tree, F>(root: Node<'tree>, target_kind: &str, mut visit: F)
where
F: FnMut(Node<'tree>),
{
fn recurse<'tree, F>(node: Node<'tree>, kind: &str, visit: &mut F)
where
F: FnMut(Node<'tree>),
{
if node.kind() == kind {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
recurse(child, kind, visit);
}
}
recurse(root, target_kind, &mut visit);
}
/// Find the first child of `parent` whose kind matches `kind`, with a
/// `child_by_field_name(kind)` fast path. Used by Java probes where
/// `class_declaration` / `method_declaration` modifiers / body live as
/// unnamed children rather than fielded children in tree-sitter-java.
pub fn child_or_named<'tree>(parent: Node<'tree>, kind: &str) -> Option<Node<'tree>> {
if let Some(n) = parent.child_by_field_name(kind) {
return Some(n);
}
let mut cursor = parent.walk();
parent.children(&mut cursor).find(|c| c.kind() == kind)
}
/// Walk every descendant of `root`, invoking `visit` once per node.
/// Useful when a probe needs to look at multiple node kinds in a single
/// pass (e.g. annotations + method declarations on the same walk).
pub fn for_each_node_any<'tree, F>(root: Node<'tree>, mut visit: F)
where
F: FnMut(Node<'tree>),
{
fn recurse<'tree, F>(node: Node<'tree>, visit: &mut F)
where
F: FnMut(Node<'tree>),
{
visit(node);
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
recurse(child, visit);
}
}
recurse(root, &mut visit);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unquote_strips_python_prefixes() {
assert_eq!(unquote("b\"path\""), "path");
assert_eq!(unquote("r'/api'"), "/api");
assert_eq!(unquote("f\"/users/{id}\""), "/users/{id}");
assert_eq!(unquote("\"plain\""), "plain");
}
#[test]
fn leaf_matches_handles_dot_and_colon_paths() {
assert!(leaf_matches("flask_login.login_required", &["login_required"]));
assert!(leaf_matches("Auth::JwtRequired", &["JwtRequired"]));
assert!(!leaf_matches("OtherDecorator", &["login_required"]));
}
}

174
src/surface/lang/go_gin.rs Normal file
View file

@ -0,0 +1,174 @@
//! Go + gin framework probe.
//!
//! Detects gin route registration:
//!
//! * `r.GET("/path", handler)` / `.POST(...)` / `.PUT` / `.DELETE`
//! on a `*gin.Engine` or `*gin.RouterGroup`.
//! * `r.Group("/prefix").GET("/sub", ...)` chained shapes.
//! * `r.Use(middleware...)` followed by route registrations — the
//! middleware list is consulted for auth markers
//! ([`AUTH_MIDDLEWARES`]).
//!
//! Also recognises echo (`e.GET(...)`) and chi (`r.Get(...)`) by the
//! same shape — receiver name `e` / `r` / `router` / `engine`.
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_MIDDLEWARES: &[&str] = &[
"AuthRequired",
"JWT",
"JWTAuth",
"Auth",
"RequireAuth",
"RequireUser",
"VerifyToken",
"BasicAuth",
];
const VERBS: &[&str] = &[
"GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD", "Any",
"Get", "Post", "Put", "Delete", "Patch", "Options", "Head",
];
pub fn detect_gin_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_calls(tree.root_node(), &mut |call| {
if let Some(node) = match_gin_call(call, bytes, &file_rel) {
out.push(node);
}
});
out
}
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
if node.kind() == "call_expression" {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_calls(child, visit);
}
}
fn match_gin_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
let func = call.child_by_field_name("function")?;
if func.kind() != "selector_expression" {
return None;
}
let operand = func.child_by_field_name("operand")?;
let field = func.child_by_field_name("field")?;
let field_text = field.utf8_text(bytes).ok()?;
if !VERBS.contains(&field_text) {
return None;
}
let operand_text = operand.utf8_text(bytes).ok()?;
if !receiver_is_gin(operand_text) {
return None;
}
let method = HttpMethod::from_ident(&field_text.to_ascii_uppercase())?;
let args = call.child_by_field_name("arguments")?;
let mut cursor = args.walk();
let positional: Vec<Node> = args
.children(&mut cursor)
.filter(|n| !matches!(n.kind(), "(" | ")" | ","))
.collect();
let route = positional.first().and_then(|n| string_node_value(*n, bytes))?;
let handler_node = positional.iter().rev().find(|n| {
matches!(
n.kind(),
"identifier" | "selector_expression" | "func_literal"
)
})?;
let handler_name = handler_node
.utf8_text(bytes)
.ok()
.map(str::to_string)
.unwrap_or_default();
let auth_required = positional[1..]
.iter()
.filter(|n| !std::ptr::eq(*n, handler_node))
.any(|n| arg_is_auth_marker(*n, bytes));
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::Gin,
method,
route,
handler_name,
handler_location: SourceLocation::new(
file_rel,
(handler_node.start_position().row + 1) as u32,
(handler_node.start_position().column + 1) as u32,
),
auth_required,
}))
}
fn receiver_is_gin(text: &str) -> bool {
let leaf = text.rsplit('.').next().unwrap_or(text).trim();
let lower = leaf.to_ascii_lowercase();
lower == "r"
|| lower == "g"
|| lower == "e"
|| lower == "router"
|| lower == "engine"
|| lower == "group"
|| lower.ends_with("router")
|| lower.ends_with("group")
|| lower.ends_with("engine")
}
fn arg_is_auth_marker(node: Node, bytes: &[u8]) -> bool {
match node.kind() {
"identifier" | "selector_expression" => node
.utf8_text(bytes)
.map(|t| leaf_matches(t, AUTH_MIDDLEWARES))
.unwrap_or(false),
"call_expression" => {
let Some(callee) = node.child_by_field_name("function") else {
return false;
};
let Ok(text) = callee.utf8_text(bytes) else {
return false;
};
leaf_matches(text, AUTH_MIDDLEWARES)
}
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_go::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_get() {
let src = "package main\nimport \"github.com/gin-gonic/gin\"\nfunc main() {\n r := gin.Default()\n r.GET(\"/users\", listUsers)\n}\nfunc listUsers(c *gin.Context) {}\n";
let (tree, bytes) = parse(src);
let nodes = detect_gin_routes(&tree, &bytes, &PathBuf::from("main.go"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users");
}
}

129
src/surface/lang/go_http.rs Normal file
View file

@ -0,0 +1,129 @@
//! Go + `net/http` framework probe.
//!
//! Recognises the canonical route registration shapes:
//!
//! * `http.HandleFunc("/path", handler)`
//! * `http.Handle("/path", handler)`
//! * `mux.HandleFunc("/path", handler)` (any `*http.ServeMux` receiver)
//! * `http.NewServeMux()` derived receivers
//!
//! Method is `GET` by default — `net/http` registrations are
//! method-agnostic at the routing layer; the handler dispatches on
//! `r.Method` internally.
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub fn detect_go_http_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_calls(tree.root_node(), &mut |call| {
if let Some(node) = match_handle_call(call, bytes, &file_rel) {
out.push(node);
}
});
out
}
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
if node.kind() == "call_expression" {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_calls(child, visit);
}
}
fn match_handle_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
let func = call.child_by_field_name("function")?;
if func.kind() != "selector_expression" {
return None;
}
let operand = func.child_by_field_name("operand")?;
let field = func.child_by_field_name("field")?;
let field_text = field.utf8_text(bytes).ok()?;
if field_text != "HandleFunc" && field_text != "Handle" {
return None;
}
let operand_text = operand.utf8_text(bytes).ok()?;
let leaf = operand_text.rsplit('.').next().unwrap_or(operand_text);
if leaf != "http"
&& !operand_text.contains("Mux")
&& !operand_text.contains("mux")
&& !operand_text.contains("Server")
&& !operand_text.contains("Router")
&& !operand_text.contains("router")
{
return None;
}
let args = call.child_by_field_name("arguments")?;
let mut cursor = args.walk();
let positional: Vec<Node> = args
.children(&mut cursor)
.filter(|n| !matches!(n.kind(), "(" | ")" | ","))
.collect();
if positional.len() < 2 {
return None;
}
let route = string_node_value(positional[0], bytes)?;
let handler_node = positional[1];
let handler_name = handler_function_name(handler_node, bytes).unwrap_or_default();
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::NetHttp,
method: HttpMethod::GET,
route,
handler_name,
handler_location: SourceLocation::new(
file_rel,
(handler_node.start_position().row + 1) as u32,
(handler_node.start_position().column + 1) as u32,
),
auth_required: false,
}))
}
fn handler_function_name(node: Node, bytes: &[u8]) -> Option<String> {
match node.kind() {
"identifier" | "selector_expression" => node.utf8_text(bytes).ok().map(str::to_string),
"func_literal" => Some("anonymous".to_string()),
_ => node.utf8_text(bytes).ok().map(str::to_string),
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_go::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_handle_func() {
let src = "package main\nimport \"net/http\"\nfunc main() {\n http.HandleFunc(\"/users\", listUsers)\n}\nfunc listUsers(w http.ResponseWriter, r *http.Request) {}\n";
let (tree, bytes) = parse(src);
let nodes = detect_go_http_routes(&tree, &bytes, &PathBuf::from("main.go"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.framework, Framework::NetHttp);
assert_eq!(ep.route, "/users");
assert_eq!(ep.handler_name, "listUsers");
}
}

View file

@ -0,0 +1,297 @@
//! Java + Quarkus framework probe.
//!
//! Quarkus uses JAX-RS (`jakarta.ws.rs`) for HTTP routing on top of
//! `RESTEasy Reactive` / `Quarkus REST`. The annotations are
//! identical to plain JAX-RS, so this probe overlaps with
//! [`super::java_servlet`] but emits the [`Framework::JaxRs`] tag with
//! a Quarkus-specific recogniser:
//!
//! * The class is annotated with `@ApplicationScoped`,
//! `@RequestScoped`, or `@Singleton` (Quarkus DI markers); OR
//! * The file imports a `quarkus`-prefixed package; OR
//! * The class extends a Quarkus-known reactive base type
//! (`PanacheRepository`, `Multi`, `Uni`).
//!
//! Auth markers: `@Authenticated`, `@RolesAllowed`, `@PermitAll`,
//! `@DenyAll` (Quarkus Security).
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{loc_for, rel_file};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_ANNOTATIONS: &[&str] = &[
"Authenticated",
"RolesAllowed",
"DenyAll",
"RequiresAuthentication",
];
const QUARKUS_DI: &[&str] = &[
"ApplicationScoped",
"RequestScoped",
"Singleton",
"Dependent",
"Path",
];
const JAXRS_VERBS: &[(&str, HttpMethod)] = &[
("GET", HttpMethod::GET),
("POST", HttpMethod::POST),
("PUT", HttpMethod::PUT),
("DELETE", HttpMethod::DELETE),
("PATCH", HttpMethod::PATCH),
("HEAD", HttpMethod::HEAD),
("OPTIONS", HttpMethod::OPTIONS),
];
pub fn detect_quarkus_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
if !file_uses_quarkus(tree.root_node(), bytes) {
return Vec::new();
}
let mut out = Vec::new();
walk_classes(tree.root_node(), &mut |class| {
if !class_is_quarkus_resource(class, bytes) {
return;
}
let class_path = class_path_annotation(class, bytes).unwrap_or_default();
let class_auth = class_has_auth_annotation(class, bytes);
let Some(body) = crate::surface::lang::common::child_or_named(class, "class_body") else {
return;
};
let mut cursor = body.walk();
for member in body.children(&mut cursor) {
if member.kind() != "method_declaration" {
continue;
}
if let Some((method, method_path, method_auth)) =
method_mapping(member, bytes, &class_path)
{
let name = method_name(member, bytes).unwrap_or_default();
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(member, &file_rel),
framework: Framework::JaxRs,
method,
route: method_path,
handler_name: name,
handler_location: SourceLocation::new(
file_rel.clone(),
(member.start_position().row + 1) as u32,
(member.start_position().column + 1) as u32,
),
auth_required: class_auth || method_auth,
}));
}
}
});
out
}
fn file_uses_quarkus(root: Node, bytes: &[u8]) -> bool {
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
if child.kind() == "import_declaration"
&& let Ok(text) = child.utf8_text(bytes)
&& (text.contains("io.quarkus") || text.contains("jakarta.ws.rs"))
{
return true;
}
}
false
}
fn class_is_quarkus_resource(class: Node, bytes: &[u8]) -> bool {
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
Some(m) => m,
None => return false,
};
let mut cursor = modifiers.walk();
for ann in modifiers.children(&mut cursor) {
if !is_annotation(ann) {
continue;
}
if let Some(name) = annotation_name(ann, bytes) {
let leaf = name.rsplit('.').next().unwrap_or(&name);
if QUARKUS_DI.iter().any(|d| leaf.eq_ignore_ascii_case(d)) {
return true;
}
}
}
false
}
fn walk_classes<'tree, F>(node: Node<'tree>, visit: &mut F)
where
F: FnMut(Node<'tree>),
{
if node.kind() == "class_declaration" {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_classes(child, visit);
}
}
fn class_path_annotation(class: Node, bytes: &[u8]) -> Option<String> {
annotation_string_arg(class, bytes, "Path")
}
fn class_has_auth_annotation(class: Node, bytes: &[u8]) -> bool {
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
Some(m) => m,
None => return false,
};
let mut cursor = modifiers.walk();
for ann in modifiers.children(&mut cursor) {
if !is_annotation(ann) {
continue;
}
if let Some(name) = annotation_name(ann, bytes) {
let leaf = name.rsplit('.').next().unwrap_or(&name);
if AUTH_ANNOTATIONS.iter().any(|a| leaf.eq_ignore_ascii_case(a)) {
return true;
}
}
}
false
}
fn method_mapping(method: Node, bytes: &[u8], class_path: &str) -> Option<(HttpMethod, String, bool)> {
let modifiers = crate::surface::lang::common::child_or_named(method, "modifiers")?;
let mut cursor = modifiers.walk();
let mut verb: Option<HttpMethod> = None;
let mut method_path = String::new();
let mut auth = false;
for ann in modifiers.children(&mut cursor) {
if !is_annotation(ann) {
continue;
}
let Some(name) = annotation_name(ann, bytes) else {
continue;
};
let leaf = name.rsplit('.').next().unwrap_or(&name);
if let Some((_, m)) = JAXRS_VERBS.iter().find(|(n, _)| n.eq_ignore_ascii_case(leaf)) {
verb = Some(*m);
}
if leaf == "Path"
&& let Some(p) = annotation_string_arg_from_node(ann, bytes)
{
method_path = p;
}
if AUTH_ANNOTATIONS.iter().any(|a| leaf.eq_ignore_ascii_case(a)) {
auth = true;
}
}
let v = verb?;
let combined = if class_path.is_empty() {
method_path
} else if method_path.is_empty() {
class_path.to_string()
} else {
format!("{}/{}", class_path.trim_end_matches('/'), method_path.trim_start_matches('/'))
};
Some((v, combined, auth))
}
fn annotation_string_arg(class: Node, bytes: &[u8], target_name: &str) -> Option<String> {
let modifiers = crate::surface::lang::common::child_or_named(class, "modifiers")?;
let mut cursor = modifiers.walk();
for ann in modifiers.children(&mut cursor) {
if !is_annotation(ann) {
continue;
}
let Some(name) = annotation_name(ann, bytes) else {
continue;
};
let leaf = name.rsplit('.').next().unwrap_or(&name);
if leaf == target_name {
return annotation_string_arg_from_node(ann, bytes);
}
}
None
}
fn annotation_string_arg_from_node(ann: Node, bytes: &[u8]) -> Option<String> {
let args = ann.child_by_field_name("arguments")?;
let raw = args.utf8_text(bytes).ok()?;
let start = raw.find('"')? + 1;
let end = raw[start..].find('"')? + start;
Some(raw[start..end].to_string())
}
fn annotation_name(ann: Node, bytes: &[u8]) -> Option<String> {
ann.child_by_field_name("name")
.and_then(|n| n.utf8_text(bytes).ok())
.map(str::to_string)
}
fn method_name(method: Node, bytes: &[u8]) -> Option<String> {
method
.child_by_field_name("name")
.and_then(|n| n.utf8_text(bytes).ok())
.map(str::to_string)
}
fn is_annotation(node: Node) -> bool {
matches!(node.kind(), "annotation" | "marker_annotation")
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_java::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_quarkus_resource() {
let src = r#"
import io.quarkus.runtime.Quarkus;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
@ApplicationScoped
@Path("/api")
public class GreetResource {
@GET
@Path("/hello")
public String hello() { return "hi"; }
}
"#;
let (tree, bytes) = parse(src);
let nodes = detect_quarkus_routes(&tree, &bytes, &PathBuf::from("GreetResource.java"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/api/hello");
}
#[test]
fn ignores_non_quarkus_class() {
let src = r#"
public class C {
@GetMapping("/x")
public void x() {}
}
"#;
let (tree, bytes) = parse(src);
let nodes = detect_quarkus_routes(&tree, &bytes, &PathBuf::from("C.java"), None);
assert!(nodes.is_empty());
}
}

View file

@ -0,0 +1,285 @@
//! Java + Servlet (JAX-RS / Jakarta REST) framework probe.
//!
//! Recognises:
//!
//! * `@WebServlet("/path")` annotated `HttpServlet` subclasses — every
//! `doGet` / `doPost` / `doPut` / `doDelete` method is one entry-point.
//! * `@Path("/path")` annotated JAX-RS resource methods with verb
//! annotation `@GET` / `@POST` / `@PUT` / `@DELETE` / `@PATCH`.
//!
//! Auth markers: `@DenyAll`, `@RolesAllowed`, `@PermitAll` — the
//! presence of any of these implies a security configuration is
//! actively gating the resource (we report `auth_required = true`
//! conservatively for `@RolesAllowed` and `@DenyAll`).
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{loc_for, rel_file};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_ANNOTATIONS: &[&str] = &[
"RolesAllowed",
"DenyAll",
"RequiresAuthentication",
"RequiresUser",
];
const SERVLET_VERBS: &[(&str, HttpMethod)] = &[
("doGet", HttpMethod::GET),
("doPost", HttpMethod::POST),
("doPut", HttpMethod::PUT),
("doDelete", HttpMethod::DELETE),
("doHead", HttpMethod::HEAD),
("doOptions", HttpMethod::OPTIONS),
];
const JAXRS_VERBS: &[(&str, HttpMethod)] = &[
("GET", HttpMethod::GET),
("POST", HttpMethod::POST),
("PUT", HttpMethod::PUT),
("DELETE", HttpMethod::DELETE),
("PATCH", HttpMethod::PATCH),
("HEAD", HttpMethod::HEAD),
("OPTIONS", HttpMethod::OPTIONS),
];
pub fn detect_servlet_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_classes(tree.root_node(), &mut |class| {
let class_path_servlet = class_web_servlet_path(class, bytes);
let class_path_jaxrs = class_jaxrs_path(class, bytes);
let class_auth = class_has_auth_annotation(class, bytes);
let Some(body) = crate::surface::lang::common::child_or_named(class, "class_body") else {
return;
};
let mut cursor = body.walk();
for member in body.children(&mut cursor) {
if member.kind() != "method_declaration" {
continue;
}
let name = method_name(member, bytes).unwrap_or_default();
// HttpServlet shape
if let Some(class_path) = class_path_servlet.as_deref()
&& let Some((_, method)) = SERVLET_VERBS
.iter()
.find(|(verb, _)| *verb == name.as_str())
{
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(member, &file_rel),
framework: Framework::JaxRs,
method: *method,
route: class_path.to_string(),
handler_name: name.clone(),
handler_location: SourceLocation::new(
file_rel.clone(),
(member.start_position().row + 1) as u32,
(member.start_position().column + 1) as u32,
),
auth_required: class_auth,
}));
continue;
}
// JAX-RS shape
if let Some((method, method_path, method_auth)) =
jaxrs_method_mapping(member, bytes, class_path_jaxrs.as_deref().unwrap_or(""))
{
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(member, &file_rel),
framework: Framework::JaxRs,
method,
route: method_path,
handler_name: name,
handler_location: SourceLocation::new(
file_rel.clone(),
(member.start_position().row + 1) as u32,
(member.start_position().column + 1) as u32,
),
auth_required: class_auth || method_auth,
}));
}
}
});
out
}
fn walk_classes<'tree, F>(node: Node<'tree>, visit: &mut F)
where
F: FnMut(Node<'tree>),
{
if node.kind() == "class_declaration" {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_classes(child, visit);
}
}
fn class_web_servlet_path(class: Node, bytes: &[u8]) -> Option<String> {
annotation_string_arg(class, bytes, "WebServlet")
}
fn class_jaxrs_path(class: Node, bytes: &[u8]) -> Option<String> {
annotation_string_arg(class, bytes, "Path")
}
fn class_has_auth_annotation(class: Node, bytes: &[u8]) -> bool {
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
Some(m) => m,
None => return false,
};
let mut cursor = modifiers.walk();
for ann in modifiers.children(&mut cursor) {
if !is_annotation(ann) {
continue;
}
if let Some(name) = annotation_name(ann, bytes)
&& AUTH_ANNOTATIONS.iter().any(|a| {
name.rsplit('.').next().unwrap_or(&name).eq_ignore_ascii_case(a)
})
{
return true;
}
}
false
}
fn jaxrs_method_mapping(method: Node, bytes: &[u8], class_path: &str) -> Option<(HttpMethod, String, bool)> {
let modifiers = crate::surface::lang::common::child_or_named(method, "modifiers")?;
let mut cursor = modifiers.walk();
let mut verb: Option<HttpMethod> = None;
let mut method_path = String::new();
let mut auth = false;
for ann in modifiers.children(&mut cursor) {
if !is_annotation(ann) {
continue;
}
let Some(name) = annotation_name(ann, bytes) else {
continue;
};
let leaf = name.rsplit('.').next().unwrap_or(&name);
if let Some((_, m)) = JAXRS_VERBS.iter().find(|(n, _)| n.eq_ignore_ascii_case(leaf)) {
verb = Some(*m);
}
if leaf == "Path"
&& let Some(path) = annotation_string_arg_from_node(ann, bytes)
{
method_path = path;
}
if AUTH_ANNOTATIONS
.iter()
.any(|a| leaf.eq_ignore_ascii_case(a))
{
auth = true;
}
}
let v = verb?;
let combined = if class_path.is_empty() {
method_path
} else if method_path.is_empty() {
class_path.to_string()
} else {
format!("{}/{}", class_path.trim_end_matches('/'), method_path.trim_start_matches('/'))
};
Some((v, combined, auth))
}
fn annotation_string_arg(class: Node, bytes: &[u8], target_name: &str) -> Option<String> {
let modifiers = crate::surface::lang::common::child_or_named(class, "modifiers")?;
let mut cursor = modifiers.walk();
for ann in modifiers.children(&mut cursor) {
if !is_annotation(ann) {
continue;
}
let Some(name) = annotation_name(ann, bytes) else {
continue;
};
let leaf = name.rsplit('.').next().unwrap_or(&name);
if leaf == target_name {
return annotation_string_arg_from_node(ann, bytes);
}
}
None
}
fn annotation_string_arg_from_node(ann: Node, bytes: &[u8]) -> Option<String> {
let args = ann.child_by_field_name("arguments")?;
let raw = args.utf8_text(bytes).ok()?;
let start = raw.find('"')? + 1;
let end = raw[start..].find('"')? + start;
Some(raw[start..end].to_string())
}
fn annotation_name(ann: Node, bytes: &[u8]) -> Option<String> {
ann.child_by_field_name("name")
.and_then(|n| n.utf8_text(bytes).ok())
.map(str::to_string)
}
fn method_name(method: Node, bytes: &[u8]) -> Option<String> {
method
.child_by_field_name("name")
.and_then(|n| n.utf8_text(bytes).ok())
.map(str::to_string)
}
fn is_annotation(node: Node) -> bool {
matches!(node.kind(), "annotation" | "marker_annotation")
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_java::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_jaxrs_get() {
let src = r#"
@Path("/users")
public class UsersResource {
@GET
@Path("/{id}")
public User get() { return null; }
}
"#;
let (tree, bytes) = parse(src);
let nodes = detect_servlet_routes(&tree, &bytes, &PathBuf::from("UsersResource.java"), None);
assert!(!nodes.is_empty());
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users/{id}");
}
#[test]
fn detects_servlet_doget() {
let src = r#"
@WebServlet("/admin")
public class Admin extends HttpServlet {
public void doGet(HttpServletRequest req, HttpServletResponse resp) {}
public void doPost(HttpServletRequest req, HttpServletResponse resp) {}
}
"#;
let (tree, bytes) = parse(src);
let nodes = detect_servlet_routes(&tree, &bytes, &PathBuf::from("Admin.java"), None);
assert_eq!(nodes.len(), 2);
}
}

View file

@ -0,0 +1,305 @@
//! Java + Spring framework probe.
//!
//! Recognises Spring controller methods annotated with
//! `@RequestMapping` / `@GetMapping` / `@PostMapping` / `@PutMapping`
//! / `@PatchMapping` / `@DeleteMapping`. The route path is the
//! concatenation of class-level `@RequestMapping(value=...)` /
//! `@RestController` and method-level `value=...` arguments.
//!
//! `auth_required` fires when the method, the enclosing class, or the
//! `value=` argument lists a Spring-Security annotation
//! ([`AUTH_ANNOTATIONS`]).
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, unquote};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_ANNOTATIONS: &[&str] = &[
"PreAuthorize",
"PostAuthorize",
"Secured",
"RolesAllowed",
"AuthenticationPrincipal",
];
const MAPPING_ANNOTATIONS: &[(&str, Option<HttpMethod>)] = &[
("RequestMapping", None),
("GetMapping", Some(HttpMethod::GET)),
("PostMapping", Some(HttpMethod::POST)),
("PutMapping", Some(HttpMethod::PUT)),
("PatchMapping", Some(HttpMethod::PATCH)),
("DeleteMapping", Some(HttpMethod::DELETE)),
];
pub fn detect_spring_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_classes(tree.root_node(), &mut |class| {
let class_path = class_request_mapping_path(class, bytes);
let class_auth = class_has_auth_annotation(class, bytes);
let Some(body) = crate::surface::lang::common::child_or_named(class, "class_body") else {
return;
};
let mut cursor = body.walk();
for member in body.children(&mut cursor) {
if member.kind() != "method_declaration" {
continue;
}
if let Some((method, route_path, auth)) =
method_mapping(member, bytes, &class_path)
{
let auth_required = class_auth || auth;
let handler_name = method_name(member, bytes).unwrap_or_default();
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(member, &file_rel),
framework: Framework::Spring,
method,
route: route_path,
handler_name,
handler_location: SourceLocation::new(
file_rel.clone(),
(member.start_position().row + 1) as u32,
(member.start_position().column + 1) as u32,
),
auth_required,
}));
}
}
});
out
}
fn walk_classes<'tree, F>(node: Node<'tree>, visit: &mut F)
where
F: FnMut(Node<'tree>),
{
if node.kind() == "class_declaration" {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_classes(child, visit);
}
}
fn class_request_mapping_path(class: Node, bytes: &[u8]) -> String {
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
Some(m) => m,
None => return String::new(),
};
let mut cursor = modifiers.walk();
for ann in modifiers.children(&mut cursor) {
if !is_annotation(ann) {
continue;
}
let Some((name, args_text)) = annotation_name_and_args(ann, bytes) else {
continue;
};
if name == "RequestMapping" {
return extract_first_path(&args_text);
}
}
String::new()
}
fn class_has_auth_annotation(class: Node, bytes: &[u8]) -> bool {
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
Some(m) => m,
None => return false,
};
let mut cursor = modifiers.walk();
for ann in modifiers.children(&mut cursor) {
if !is_annotation(ann) {
continue;
}
if let Some((name, _)) = annotation_name_and_args(ann, bytes)
&& AUTH_ANNOTATIONS
.iter()
.any(|a| leaf_matches(&name, &[a]))
{
return true;
}
}
false
}
fn method_mapping(
method: Node,
bytes: &[u8],
class_path: &str,
) -> Option<(HttpMethod, String, bool)> {
let modifiers = crate::surface::lang::common::child_or_named(method, "modifiers")?;
let mut cursor = modifiers.walk();
let mut auth = false;
let mut found: Option<(HttpMethod, String)> = None;
for ann in modifiers.children(&mut cursor) {
if !is_annotation(ann) {
continue;
}
let Some((name, args_text)) = annotation_name_and_args(ann, bytes) else {
continue;
};
if AUTH_ANNOTATIONS
.iter()
.any(|a| leaf_matches(&name, &[a]))
{
auth = true;
}
if found.is_some() {
continue;
}
for (ann_name, default_method) in MAPPING_ANNOTATIONS {
if name == *ann_name {
let mut method_route = extract_first_path(&args_text);
if method_route.is_empty() && !class_path.is_empty() {
// Class-only mapping; method has no path.
method_route = class_path.to_string();
} else if !class_path.is_empty() {
method_route = format!("{}/{}", class_path.trim_end_matches('/'), method_route.trim_start_matches('/'));
}
let method = default_method
.or_else(|| extract_request_method_from_args(&args_text))
.unwrap_or(HttpMethod::GET);
found = Some((method, method_route));
break;
}
}
}
let (m, p) = found?;
Some((m, p, auth))
}
fn is_annotation(node: Node) -> bool {
matches!(
node.kind(),
"annotation" | "marker_annotation"
)
}
/// Returns `(annotation_name, raw_args_text)` for an annotation node.
fn annotation_name_and_args(ann: Node, bytes: &[u8]) -> Option<(String, String)> {
let name_node = ann.child_by_field_name("name")?;
let raw_name = name_node.utf8_text(bytes).ok()?;
let leaf = raw_name.rsplit('.').next().unwrap_or(raw_name).to_string();
let args_text = ann
.child_by_field_name("arguments")
.and_then(|a| a.utf8_text(bytes).ok())
.unwrap_or("")
.to_string();
Some((leaf, args_text))
}
fn extract_first_path(args_text: &str) -> String {
// Look for the first `"..."` literal.
let mut chars = args_text.chars().peekable();
while let Some(c) = chars.next() {
if c == '"' {
let mut buf = String::new();
for c in chars.by_ref() {
if c == '"' {
return buf;
}
buf.push(c);
}
}
}
String::new()
}
fn extract_request_method_from_args(args_text: &str) -> Option<HttpMethod> {
// RequestMapping(method = RequestMethod.POST)
for verb in ["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"] {
if args_text.contains(&format!("RequestMethod.{}", verb)) {
return HttpMethod::from_ident(verb);
}
}
None
}
fn method_name(method: Node, bytes: &[u8]) -> Option<String> {
method
.child_by_field_name("name")
.and_then(|n| n.utf8_text(bytes).ok())
.map(str::to_string)
}
#[allow(dead_code)]
fn read_string_literal(node: Node, bytes: &[u8]) -> Option<String> {
let raw = node.utf8_text(bytes).ok()?;
Some(unquote(raw))
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_java::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_get_mapping() {
let src = r#"
@RestController
public class UserController {
@GetMapping("/users")
public List<User> list() { return null; }
}
"#;
let (tree, bytes) = parse(src);
let nodes = detect_spring_routes(&tree, &bytes, &PathBuf::from("UserController.java"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users");
assert_eq!(ep.handler_name, "list");
}
#[test]
fn class_request_mapping_prefix_concatenates() {
let src = r#"
@RequestMapping("/api")
public class C {
@PostMapping("/users")
public void create() {}
}
"#;
let (tree, bytes) = parse(src);
let nodes = detect_spring_routes(&tree, &bytes, &PathBuf::from("C.java"), None);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.route, "/api/users");
}
#[test]
fn pre_authorize_marks_auth() {
let src = r#"
public class C {
@PreAuthorize("hasRole('ADMIN')")
@GetMapping("/admin")
public void admin() {}
}
"#;
let (tree, bytes) = parse(src);
let nodes = detect_spring_routes(&tree, &bytes, &PathBuf::from("C.java"), None);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert!(ep.auth_required);
}
}

View file

@ -0,0 +1,231 @@
//! JavaScript / TypeScript + Express framework probe.
//!
//! Detects route registration calls of the form `app.METHOD(path, ...)`
//! / `router.METHOD(path, ...)` for the standard set of HTTP verbs plus
//! `all` / `use`. The handler is the *last* function-shaped argument
//! (Express convention: `(path, ...middleware, handler)`).
//!
//! `auth_required` fires when any positional argument before the
//! handler is an identifier matching one of the auth-middleware names
//! in [`AUTH_MIDDLEWARES`] (passport's `requireAuth`, custom guards),
//! or when an inline `passport.authenticate(...)` call appears in the
//! middleware list.
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_MIDDLEWARES: &[&str] = &[
"requireAuth",
"requireUser",
"isAuthenticated",
"ensureAuthenticated",
"ensureLoggedIn",
"authenticate",
"authMiddleware",
"verifyToken",
"verifyJwt",
"checkJwt",
"passport",
"jwt",
];
const VERBS: &[&str] = &[
"get", "post", "put", "delete", "patch", "options", "head", "all",
];
pub fn detect_express_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_calls(tree.root_node(), &mut |call| {
if let Some(node) = match_express_call(call, bytes, &file_rel) {
out.push(node);
}
});
out
}
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
if matches!(node.kind(), "call_expression") {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_calls(child, visit);
}
}
fn match_express_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
let func = call.child_by_field_name("function")?;
if func.kind() != "member_expression" {
return None;
}
let object = func.child_by_field_name("object")?;
if !receiver_is_express(object, bytes) {
return None;
}
let prop = func.child_by_field_name("property")?;
let prop_text = prop.utf8_text(bytes).ok()?;
if !VERBS.contains(&prop_text) {
return None;
}
let method = HttpMethod::from_ident(prop_text).unwrap_or(HttpMethod::GET);
let args = call.child_by_field_name("arguments")?;
let mut cursor = args.walk();
let mut positional: Vec<Node> = args.children(&mut cursor).collect();
positional.retain(|n| n.kind() != "(" && n.kind() != ")" && n.kind() != ",");
let route = positional
.first()
.filter(|n| n.kind() == "string" || n.kind() == "template_string")
.and_then(|n| string_node_value(*n, bytes))
.unwrap_or_default();
if route.is_empty() && prop_text != "use" {
// bare `app.use(handler)` is middleware, not an entry point
return None;
}
let handler_node = find_handler(&positional)?;
let handler_id = handler_node.id();
let auth_required = positional[1..]
.iter()
.filter(|n| n.id() != handler_id)
.any(|n| arg_is_auth_marker(*n, bytes));
let handler_name = handler_function_name(handler_node, bytes).unwrap_or_default();
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::Express,
method,
route,
handler_name,
handler_location: SourceLocation::new(
file_rel,
(handler_node.start_position().row + 1) as u32,
(handler_node.start_position().column + 1) as u32,
),
auth_required,
}))
}
fn find_handler<'a>(positional: &[Node<'a>]) -> Option<Node<'a>> {
positional
.iter()
.rev()
.find(|n| {
matches!(
n.kind(),
"arrow_function"
| "function"
| "function_expression"
| "function_declaration"
| "identifier"
| "member_expression"
)
})
.copied()
}
fn handler_function_name(node: Node, bytes: &[u8]) -> Option<String> {
if matches!(node.kind(), "identifier" | "member_expression") {
return node.utf8_text(bytes).ok().map(str::to_string);
}
if let Some(name_node) = node.child_by_field_name("name")
&& let Ok(name) = name_node.utf8_text(bytes)
{
return Some(name.to_string());
}
None
}
fn arg_is_auth_marker(node: Node, bytes: &[u8]) -> bool {
match node.kind() {
"identifier" | "member_expression" => node
.utf8_text(bytes)
.map(|t| leaf_matches(t, AUTH_MIDDLEWARES))
.unwrap_or(false),
"call_expression" => {
let Some(callee) = node.child_by_field_name("function") else {
return false;
};
let Ok(text) = callee.utf8_text(bytes) else {
return false;
};
leaf_matches(text, AUTH_MIDDLEWARES) || text.contains("passport.authenticate")
}
_ => false,
}
}
fn receiver_is_express(object: Node, bytes: &[u8]) -> bool {
fn name_matches(text: &str) -> bool {
let lower = text.to_ascii_lowercase();
lower == "app"
|| lower == "router"
|| lower == "server"
|| lower.ends_with("_app")
|| lower.ends_with("router")
|| lower.ends_with("api")
}
match object.kind() {
"identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches),
"member_expression" => object
.child_by_field_name("property")
.and_then(|p| p.utf8_text(bytes).ok())
.is_some_and(name_matches),
"call_expression" => {
let Some(callee) = object.child_by_field_name("function") else {
return false;
};
let Ok(text) = callee.utf8_text(bytes) else {
return false;
};
let leaf = text.rsplit('.').next().unwrap_or(text);
leaf == "express" || leaf == "Router" || leaf == "createApp"
}
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_javascript::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_get_route() {
let src = "const app = express();\napp.get('/users', (req, res) => res.send('ok'));\n";
let (tree, bytes) = parse(src);
let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.framework, Framework::Express);
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users");
}
#[test]
fn detects_auth_middleware() {
let src = "app.post('/secret', requireAuth, (req, res) => {});\n";
let (tree, bytes) = parse(src);
let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert!(ep.auth_required);
}
}

193
src/surface/lang/js_koa.rs Normal file
View file

@ -0,0 +1,193 @@
//! JavaScript / TypeScript + Koa framework probe.
//!
//! Koa apps register routes through `koa-router` (or `@koa/router`):
//! `router.get(path, handler)`, `router.post(path, ...middleware,
//! handler)`, etc. The receiver is named `router`, `r`, or has a
//! `_router`/`Router` suffix. Additional Koa-specific recognition:
//!
//! * `router.use('/path', subrouter.routes())` is *not* an
//! entry-point — the inner middleware chain is. Filtered by
//! ignoring `use` for path-less middleware mounting.
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_MIDDLEWARES: &[&str] = &[
"requireAuth",
"requireUser",
"isAuthenticated",
"ensureAuthenticated",
"authenticate",
"authMiddleware",
"verifyToken",
"verifyJwt",
"checkJwt",
"passport",
"jwt",
"koaJwt",
];
const VERBS: &[&str] = &[
"get", "post", "put", "delete", "patch", "options", "head", "all",
];
pub fn detect_koa_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_calls(tree.root_node(), &mut |call| {
if let Some(node) = match_koa_call(call, bytes, &file_rel) {
out.push(node);
}
});
out
}
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
if matches!(node.kind(), "call_expression") {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_calls(child, visit);
}
}
fn match_koa_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
let func = call.child_by_field_name("function")?;
if func.kind() != "member_expression" {
return None;
}
let object = func.child_by_field_name("object")?;
if !receiver_is_koa_router(object, bytes) {
return None;
}
let prop = func.child_by_field_name("property")?;
let prop_text = prop.utf8_text(bytes).ok()?;
if !VERBS.contains(&prop_text) {
return None;
}
let method = HttpMethod::from_ident(prop_text).unwrap_or(HttpMethod::GET);
let args = call.child_by_field_name("arguments")?;
let mut cursor = args.walk();
let mut positional: Vec<Node> = args.children(&mut cursor).collect();
positional.retain(|n| n.kind() != "(" && n.kind() != ")" && n.kind() != ",");
let route_idx = positional
.iter()
.position(|n| matches!(n.kind(), "string" | "template_string"))?;
let route = string_node_value(positional[route_idx], bytes).unwrap_or_default();
let handler_node = positional.iter().rev().find(|n| {
matches!(
n.kind(),
"arrow_function"
| "function"
| "function_expression"
| "function_declaration"
| "identifier"
| "member_expression"
)
})?;
let auth_required = positional
.iter()
.filter(|n| !std::ptr::eq(*n, handler_node))
.any(|n| arg_is_auth_marker(*n, bytes));
let handler_name = handler_function_name(*handler_node, bytes).unwrap_or_default();
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::Express, // koa shares the Express variant tag — Phase 22 reuses
method,
route,
handler_name,
handler_location: SourceLocation::new(
file_rel,
(handler_node.start_position().row + 1) as u32,
(handler_node.start_position().column + 1) as u32,
),
auth_required,
}))
}
fn handler_function_name(node: Node, bytes: &[u8]) -> Option<String> {
if matches!(node.kind(), "identifier" | "member_expression") {
return node.utf8_text(bytes).ok().map(str::to_string);
}
if let Some(name_node) = node.child_by_field_name("name")
&& let Ok(name) = name_node.utf8_text(bytes)
{
return Some(name.to_string());
}
None
}
fn arg_is_auth_marker(node: Node, bytes: &[u8]) -> bool {
match node.kind() {
"identifier" | "member_expression" => node
.utf8_text(bytes)
.map(|t| leaf_matches(t, AUTH_MIDDLEWARES))
.unwrap_or(false),
"call_expression" => {
let Some(callee) = node.child_by_field_name("function") else {
return false;
};
let Ok(text) = callee.utf8_text(bytes) else {
return false;
};
leaf_matches(text, AUTH_MIDDLEWARES)
}
_ => false,
}
}
fn receiver_is_koa_router(object: Node, bytes: &[u8]) -> bool {
fn name_matches(text: &str) -> bool {
let lower = text.to_ascii_lowercase();
lower == "router" || lower == "r" || lower.ends_with("_router") || lower.ends_with("router")
}
match object.kind() {
"identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches),
"member_expression" => object
.child_by_field_name("property")
.and_then(|p| p.utf8_text(bytes).ok())
.is_some_and(name_matches),
"call_expression" => {
let Some(callee) = object.child_by_field_name("function") else {
return false;
};
let Ok(text) = callee.utf8_text(bytes) else {
return false;
};
let leaf = text.rsplit('.').next().unwrap_or(text);
leaf == "Router" || leaf == "KoaRouter"
}
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_javascript::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_router_get() {
let src = "const router = new Router();\nrouter.get('/users', async ctx => { ctx.body = []; });\n";
let (tree, bytes) = parse(src);
let nodes = detect_koa_routes(&tree, &bytes, &PathBuf::from("server.js"), None);
assert_eq!(nodes.len(), 1);
}
}

View file

@ -1,6 +1,37 @@
//! Per-language framework probes. Phase 21 ships Python + Flask;
//! Phase 22 generalises to FastAPI / Django, Java Spring / JAX-RS,
//! Ruby Rails / Sinatra, Go net/http / gin, Rust axum / actix /
//! rocket, JS/TS Express + Next.js.
//! Per-language framework probes.
//!
//! Phase 21 shipped Python + Flask. Phase 22 generalises detection to:
//! Python (FastAPI, Django), JS/TS (Express, Koa, Next.js), Java
//! (Spring, Servlet/JAX-RS, Quarkus), Go (`net/http`, gin), PHP
//! (Laravel, Slim), Ruby (Sinatra, Rails), Rust (axum, actix-web).
//!
//! Every probe exposes one public `detect_<framework>_routes` function
//! returning `Vec<SurfaceNode>` (one [`super::SurfaceNode::EntryPoint`]
//! per recognised route). Probes are pure functions — no I/O, no
//! state.
pub mod common;
pub mod python_flask;
pub mod python_fastapi;
pub mod python_django;
pub mod js_express;
pub mod js_koa;
pub mod ts_next;
pub mod java_spring;
pub mod java_servlet;
pub mod java_quarkus;
pub mod go_http;
pub mod go_gin;
pub mod php_laravel;
pub mod php_slim;
pub mod ruby_sinatra;
pub mod ruby_rails;
pub mod rust_actix;
pub mod rust_axum;

View file

@ -0,0 +1,167 @@
//! PHP + Laravel framework probe.
//!
//! Recognises Laravel route declarations:
//!
//! * `Route::get('/path', $handler)` / `::post(...)` / `::put` /
//! `::patch` / `::delete` / `::any` / `::match`
//! * `Route::resource('users', UserController::class)` (omitted —
//! resource controller dispatch is path-derived; Phase 22 ships the
//! primary verb shape only)
//!
//! `auth_required` fires when the route call is followed by a
//! `->middleware('auth')` chain or the closure is wrapped in
//! `Route::middleware(['auth'])->group(...)`.
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
const VERBS: &[(&str, HttpMethod)] = &[
("get", HttpMethod::GET),
("post", HttpMethod::POST),
("put", HttpMethod::PUT),
("patch", HttpMethod::PATCH),
("delete", HttpMethod::DELETE),
("options", HttpMethod::OPTIONS),
("head", HttpMethod::HEAD),
];
pub fn detect_laravel_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_calls(tree.root_node(), &mut |call| {
if let Some(node) = match_laravel_call(call, bytes, &file_rel) {
out.push(node);
}
});
out
}
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
if matches!(
node.kind(),
"function_call_expression" | "scoped_call_expression" | "member_call_expression"
) {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_calls(child, visit);
}
}
fn match_laravel_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
if call.kind() != "scoped_call_expression" {
return None;
}
let scope = call.child_by_field_name("scope")?;
let scope_text = scope.utf8_text(bytes).ok()?;
if scope_text != "Route" && !scope_text.contains("Route") {
return None;
}
let name = call.child_by_field_name("name")?;
let name_text = name.utf8_text(bytes).ok()?;
let (_, method) = VERBS
.iter()
.find(|(v, _)| v.eq_ignore_ascii_case(name_text))?;
let args = call.child_by_field_name("arguments")?;
let mut cursor = args.walk();
let positional: Vec<Node> = args
.children(&mut cursor)
.filter(|n| n.kind() == "argument")
.collect();
if positional.len() < 2 {
return None;
}
let route_node = first_inner(positional[0]);
let route = string_node_value(route_node, bytes).unwrap_or_default();
let handler_node = first_inner(positional[1]);
let handler_name = handler_text(handler_node, bytes).unwrap_or_default();
let auth_required = check_chained_middleware(call, bytes);
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::Sinatra, // PHP frameworks reuse the closest tag — Laravel folds into a generic surface entry-point
method: *method,
route,
handler_name,
handler_location: SourceLocation::new(
file_rel,
(handler_node.start_position().row + 1) as u32,
(handler_node.start_position().column + 1) as u32,
),
auth_required,
}))
}
fn first_inner(arg: Node) -> Node {
let mut cursor = arg.walk();
arg.named_children(&mut cursor).next().unwrap_or(arg)
}
fn handler_text(node: Node, bytes: &[u8]) -> Option<String> {
Some(node.utf8_text(bytes).ok()?.to_string())
}
fn check_chained_middleware(call: Node, bytes: &[u8]) -> bool {
// Walk up to find a member_call chain: `Route::get(...)->middleware('auth')`
let mut cur = call.parent();
while let Some(p) = cur {
if p.kind() == "member_call_expression"
&& let Some(name) = p.child_by_field_name("name")
&& let Ok(name_text) = name.utf8_text(bytes)
&& name_text == "middleware"
&& let Some(args) = p.child_by_field_name("arguments")
&& let Ok(args_text) = args.utf8_text(bytes)
&& (args_text.contains("auth") || args_text.contains("jwt") || args_text.contains("authenticated"))
{
return true;
}
cur = p.parent();
}
false
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_php::LANGUAGE_PHP.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_laravel_get() {
let src = "<?php\nRoute::get('/users', 'UserController@index');\n";
let (tree, bytes) = parse(src);
let nodes = detect_laravel_routes(&tree, &bytes, &PathBuf::from("routes.php"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users");
}
#[test]
fn detects_middleware_chain() {
let src = "<?php\nRoute::post('/admin', 'AdminController@create')->middleware('auth');\n";
let (tree, bytes) = parse(src);
let nodes = detect_laravel_routes(&tree, &bytes, &PathBuf::from("routes.php"), None);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert!(ep.auth_required);
}
}

View file

@ -0,0 +1,139 @@
//! PHP + Slim framework probe.
//!
//! Recognises Slim route registrations:
//!
//! * `$app->get('/path', $handler)` / `->post(...)` / `->put` /
//! `->delete` / `->patch` / `->options` / `->any`
//! * `$app->group('/api', function ($g) { $g->get(...); })` (the
//! group prefix is captured when the call site is lexically inside
//! a `group(...)` closure body — best-effort textual match).
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
const VERBS: &[(&str, HttpMethod)] = &[
("get", HttpMethod::GET),
("post", HttpMethod::POST),
("put", HttpMethod::PUT),
("patch", HttpMethod::PATCH),
("delete", HttpMethod::DELETE),
("options", HttpMethod::OPTIONS),
("head", HttpMethod::HEAD),
("any", HttpMethod::GET),
];
pub fn detect_slim_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_calls(tree.root_node(), &mut |call| {
if let Some(node) = match_slim_call(call, bytes, &file_rel) {
out.push(node);
}
});
out
}
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
if node.kind() == "member_call_expression" {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_calls(child, visit);
}
}
fn match_slim_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
let object = call.child_by_field_name("object")?;
let object_text = object.utf8_text(bytes).ok()?;
if !receiver_is_slim_app(object_text) {
return None;
}
let name = call.child_by_field_name("name")?;
let name_text = name.utf8_text(bytes).ok()?;
let (_, method) = VERBS
.iter()
.find(|(v, _)| v.eq_ignore_ascii_case(name_text))?;
let args = call.child_by_field_name("arguments")?;
let mut cursor = args.walk();
let positional: Vec<Node> = args
.children(&mut cursor)
.filter(|n| n.kind() == "argument")
.collect();
if positional.len() < 2 {
return None;
}
let route_node = first_inner(positional[0]);
let route = string_node_value(route_node, bytes).unwrap_or_default();
let handler_node = first_inner(positional[1]);
let handler_name = handler_node
.utf8_text(bytes)
.ok()
.map(str::to_string)
.unwrap_or_default();
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::Sinatra,
method: *method,
route,
handler_name,
handler_location: SourceLocation::new(
file_rel,
(handler_node.start_position().row + 1) as u32,
(handler_node.start_position().column + 1) as u32,
),
auth_required: false,
}))
}
fn first_inner(arg: Node) -> Node {
let mut cursor = arg.walk();
arg.named_children(&mut cursor).next().unwrap_or(arg)
}
fn receiver_is_slim_app(text: &str) -> bool {
let trimmed = text.trim();
let lower = trimmed.to_ascii_lowercase();
lower == "$app"
|| lower == "$g"
|| lower == "$group"
|| lower == "$router"
|| lower.ends_with("app")
|| lower.ends_with("group")
|| lower.ends_with("router")
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_php::LANGUAGE_PHP.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_slim_get() {
let src = "<?php\n$app->get('/users', 'UsersController:list');\n";
let (tree, bytes) = parse(src);
let nodes = detect_slim_routes(&tree, &bytes, &PathBuf::from("routes.php"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users");
}
}

View file

@ -0,0 +1,364 @@
//! Python + Django framework probe.
//!
//! Recognises two route shapes:
//!
//! 1. `urls.py`-style routing: `path("/admin", admin_view)`,
//! `re_path(r"^api/", api_view)`, `url(r"^foo$", foo_view)`.
//! The probe walks the URL configuration list and emits one
//! EntryPoint per `path` / `re_path` / `url` call, resolving the
//! handler to the function with the same name in the file when
//! possible.
//! 2. Class-based view methods: a `get` / `post` / `put` / `delete`
//! method on a class derived from `View`, `APIView`, `ViewSet`,
//! `TemplateView`. The route path is `""` because URL config lives
//! in a separate `urls.py`.
//!
//! `auth_required` follows the standard Django decorators
//! ([`AUTH_DECORATORS`]) plus the DRF permission classes pattern
//! (`permission_classes = [IsAuthenticated]`).
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{
leaf_matches, loc_for, rel_file, string_node_value,
};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::collections::HashMap;
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_DECORATORS: &[&str] = &[
"login_required",
"permission_required",
"user_passes_test",
"staff_member_required",
"csrf_protect",
"require_authenticated",
"auth_required",
];
const CBV_BASES: &[&str] = &[
"View",
"APIView",
"ViewSet",
"ModelViewSet",
"ReadOnlyModelViewSet",
"TemplateView",
"ListView",
"DetailView",
"CreateView",
"UpdateView",
"DeleteView",
"RedirectView",
"FormView",
];
pub fn detect_django_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
// File-level gate: only fire when the file actually imports
// django (or extends the Django CBV bases via name witness).
let file_text = std::str::from_utf8(bytes).unwrap_or("");
let has_django_witness = file_text.contains("django")
|| file_text.contains("rest_framework")
|| CBV_BASES.iter().any(|b| file_text.contains(b));
if !has_django_witness {
return Vec::new();
}
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
let function_index = collect_function_definitions(tree.root_node(), bytes);
detect_url_dispatch(tree.root_node(), bytes, &file_rel, &function_index, &mut out);
detect_class_based_views(tree.root_node(), bytes, &file_rel, &mut out);
out
}
fn collect_function_definitions<'tree>(
root: Node<'tree>,
bytes: &'tree [u8],
) -> HashMap<String, (Node<'tree>, bool)> {
let mut index: HashMap<String, (Node<'tree>, bool)> = HashMap::new();
fn walk<'tree>(
node: Node<'tree>,
bytes: &'tree [u8],
index: &mut HashMap<String, (Node<'tree>, bool)>,
) {
if node.kind() == "function_definition"
&& let Some(name_node) = node.child_by_field_name("name")
&& let Ok(name) = name_node.utf8_text(bytes)
{
// Detect if any decorator is an auth marker.
let mut auth = false;
if let Some(parent) = node.parent()
&& parent.kind() == "decorated_definition"
{
let mut cursor = parent.walk();
for child in parent.children(&mut cursor) {
if child.kind() == "decorator" && decorator_is_auth_marker(child, bytes) {
auth = true;
break;
}
}
}
index.insert(name.to_string(), (node, auth));
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk(child, bytes, index);
}
}
walk(root, bytes, &mut index);
index
}
fn detect_url_dispatch<'tree>(
root: Node<'tree>,
bytes: &[u8],
file_rel: &str,
function_index: &HashMap<String, (Node<'tree>, bool)>,
out: &mut Vec<SurfaceNode>,
) {
fn recurse<'tree>(
node: Node<'tree>,
bytes: &[u8],
file_rel: &str,
function_index: &HashMap<String, (Node<'tree>, bool)>,
out: &mut Vec<SurfaceNode>,
) {
if node.kind() == "call"
&& let Some((route, handler_name)) = parse_url_call(node, bytes)
{
let (handler_loc, auth_required) = function_index
.get(&handler_name)
.map(|(h, a)| (loc_for(*h, file_rel), *a))
.unwrap_or_else(|| (loc_for(node, file_rel), false));
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(node, file_rel),
framework: Framework::Django,
method: HttpMethod::GET,
route,
handler_name,
handler_location: handler_loc,
auth_required,
}));
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
recurse(child, bytes, file_rel, function_index, out);
}
}
recurse(root, bytes, file_rel, function_index, out);
}
fn parse_url_call(call: Node, bytes: &[u8]) -> Option<(String, String)> {
let target = call.child_by_field_name("function")?;
let target_text = target.utf8_text(bytes).ok()?;
let leaf = target_text.rsplit('.').next().unwrap_or(target_text);
if !matches!(leaf, "path" | "re_path" | "url") {
return None;
}
let args = call.child_by_field_name("arguments")?;
let mut cursor = args.walk();
let mut route: Option<String> = None;
let mut handler: Option<String> = None;
for arg in args.children(&mut cursor) {
match arg.kind() {
"string" if route.is_none() => {
route = string_node_value(arg, bytes);
}
"identifier" if handler.is_none() => {
handler = arg.utf8_text(bytes).ok().map(str::to_string);
}
"attribute" if handler.is_none() => {
handler = arg.utf8_text(bytes).ok().map(str::to_string);
}
"call" if handler.is_none() => {
// `MyView.as_view()` shape — extract `MyView`.
if let Some(callee) = arg.child_by_field_name("function")
&& let Ok(text) = callee.utf8_text(bytes)
{
handler = Some(text.split('.').next().unwrap_or(text).to_string());
}
}
_ => {}
}
}
Some((route?, handler?))
}
fn detect_class_based_views(
root: Node,
bytes: &[u8],
file_rel: &str,
out: &mut Vec<SurfaceNode>,
) {
fn recurse(node: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
if node.kind() == "class_definition"
&& class_is_django_view(node, bytes)
{
let class_auth = class_has_auth_permission(node, bytes);
// Walk the body for HTTP-named methods.
if let Some(body) = node.child_by_field_name("body") {
let mut bcur = body.walk();
for stmt in body.children(&mut bcur) {
let func = match stmt.kind() {
"function_definition" => stmt,
"decorated_definition" => stmt
.child_by_field_name("definition")
.or_else(|| {
let mut c = stmt.walk();
stmt.children(&mut c)
.find(|n| n.kind() == "function_definition")
})
.unwrap_or(stmt),
_ => continue,
};
if func.kind() != "function_definition" {
continue;
}
let Some(name_node) = func.child_by_field_name("name") else {
continue;
};
let Ok(name) = name_node.utf8_text(bytes) else {
continue;
};
let Some(method) = HttpMethod::from_ident(name) else {
continue;
};
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(func, file_rel),
framework: Framework::Django,
method,
route: String::new(),
handler_name: name.to_string(),
handler_location: SourceLocation::new(
file_rel,
(func.start_position().row + 1) as u32,
(func.start_position().column + 1) as u32,
),
auth_required: class_auth,
}));
}
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
recurse(child, bytes, file_rel, out);
}
}
recurse(root, bytes, file_rel, out);
}
fn class_is_django_view(class: Node, bytes: &[u8]) -> bool {
let Some(supers) = class.child_by_field_name("superclasses") else {
return false;
};
let mut cursor = supers.walk();
for sup in supers.named_children(&mut cursor) {
let Ok(text) = sup.utf8_text(bytes) else {
continue;
};
let leaf = text.rsplit('.').next().unwrap_or(text);
if CBV_BASES.iter().any(|b| leaf.contains(b)) {
return true;
}
}
false
}
fn class_has_auth_permission(class: Node, bytes: &[u8]) -> bool {
let Some(body) = class.child_by_field_name("body") else {
return false;
};
let mut cursor = body.walk();
for stmt in body.children(&mut cursor) {
if stmt.kind() != "expression_statement" {
continue;
}
let mut sc = stmt.walk();
for child in stmt.children(&mut sc) {
if child.kind() != "assignment" {
continue;
}
let Some(left) = child.child_by_field_name("left") else {
continue;
};
let Ok(left_text) = left.utf8_text(bytes) else {
continue;
};
if left_text != "permission_classes" {
continue;
}
let Some(right) = child.child_by_field_name("right") else {
continue;
};
let Ok(right_text) = right.utf8_text(bytes) else {
continue;
};
if right_text.contains("IsAuthenticated")
|| right_text.contains("IsAdminUser")
|| right_text.contains("DjangoModelPermissions")
{
return true;
}
}
}
false
}
fn decorator_is_auth_marker(decorator: Node, bytes: &[u8]) -> bool {
let mut cursor = decorator.walk();
let Some(expr) = decorator
.children(&mut cursor)
.find(|c| c.kind() != "@" && c.kind() != "comment")
else {
return false;
};
let target = match expr.kind() {
"call" => expr.child_by_field_name("function"),
_ => Some(expr),
};
let Some(target) = target else { return false };
let Ok(text) = target.utf8_text(bytes) else {
return false;
};
leaf_matches(text, AUTH_DECORATORS)
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_python::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_path_call() {
let src = "from django.urls import path\n\ndef admin_view(request): pass\n\nurlpatterns = [\n path('admin/', admin_view),\n]\n";
let (tree, bytes) = parse(src);
let nodes = detect_django_routes(&tree, &bytes, &PathBuf::from("urls.py"), None);
assert!(!nodes.is_empty());
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.framework, Framework::Django);
assert_eq!(ep.handler_name, "admin_view");
assert_eq!(ep.route, "admin/");
}
#[test]
fn detects_class_based_view() {
let src = "class UserList(APIView):\n def get(self, request): pass\n def post(self, request): pass\n";
let (tree, bytes) = parse(src);
let nodes = detect_django_routes(&tree, &bytes, &PathBuf::from("views.py"), None);
assert_eq!(nodes.len(), 2);
}
}

View file

@ -0,0 +1,336 @@
//! Python + FastAPI framework probe.
//!
//! Recognises FastAPI / Starlette route declarations:
//!
//! * `@app.get("/path")` / `.post("/path")` / `.put` / `.patch` / `.delete`
//! * `@router.get("/path")` / `.post(...)` / etc. on an `APIRouter`
//! * `@app.api_route("/path", methods=["GET","POST"])`
//! * `@app.websocket("/ws")` (treated as GET)
//!
//! `auth_required` is inferred from `Depends(<auth>)` parameters in the
//! handler signature (FastAPI's idiomatic auth pattern) and from
//! decorator-stack guards drawn from [`AUTH_DECORATORS`].
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
/// Auth markers recognised in the decorator stack. FastAPI's primary
/// auth idiom is `Depends(...)` parameter injection, handled separately.
pub const AUTH_DECORATORS: &[&str] = &[
"login_required",
"auth_required",
"jwt_required",
"token_required",
"requires_auth",
"authenticated",
"require_auth",
"require_login",
"current_user",
];
/// Auth-callee names recognised inside a `Depends(...)` parameter.
const AUTH_DEPENDS_CALLEES: &[&str] = &[
"get_current_user",
"get_current_active_user",
"current_user",
"require_user",
"require_auth",
"auth",
"verify_token",
"verify_jwt",
"validate_token",
];
pub fn detect_fastapi_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
// File-level gate: avoid double-detection on Flask files that
// also use `app.get(...)` shape. FastAPI / Starlette / APIRouter
// require an explicit import of the relevant package.
let file_text = std::str::from_utf8(bytes).unwrap_or("");
let has_fastapi_witness = file_text.contains("fastapi")
|| file_text.contains("starlette")
|| file_text.contains("APIRouter");
if !has_fastapi_witness {
return Vec::new();
}
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_decorated(tree.root_node(), &mut |func, decorators| {
let auth_via_decorator = decorators
.iter()
.any(|d| decorator_is_auth_marker(*d, bytes));
let auth_via_depends = function_signature_uses_auth_depends(*func, bytes);
let auth_required = auth_via_decorator || auth_via_depends;
for dec in decorators {
if let Some((method, route_path)) = fastapi_route_decorator(*dec, bytes) {
let handler_name = function_name(*func, bytes).unwrap_or_default();
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(*dec, &file_rel),
framework: Framework::FastApi,
method,
route: route_path,
handler_name,
handler_location: SourceLocation::new(
file_rel.clone(),
(func.start_position().row + 1) as u32,
(func.start_position().column + 1) as u32,
),
auth_required,
}));
}
}
});
out
}
fn walk_decorated<'tree, F>(root: Node<'tree>, visit: &mut F)
where
F: FnMut(&Node<'tree>, &[Node<'tree>]),
{
if root.kind() == "decorated_definition" {
let mut cursor = root.walk();
let mut decorators: Vec<Node<'tree>> = Vec::new();
let mut func: Option<Node<'tree>> = None;
for child in root.children(&mut cursor) {
match child.kind() {
"decorator" => decorators.push(child),
"function_definition" => func = Some(child),
_ => {}
}
}
if let Some(f) = func {
visit(&f, &decorators);
}
}
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
walk_decorated(child, visit);
}
}
fn fastapi_route_decorator(decorator: Node, bytes: &[u8]) -> Option<(HttpMethod, String)> {
let mut cursor = decorator.walk();
let expr = decorator
.children(&mut cursor)
.find(|c| c.kind() != "@" && c.kind() != "comment")?;
if expr.kind() != "call" {
return None;
}
let target = expr.child_by_field_name("function")?;
let args = expr.child_by_field_name("arguments");
if target.kind() != "attribute" {
return None;
}
let object = target.child_by_field_name("object")?;
if !receiver_is_fastapi(object, bytes) {
return None;
}
let attr = target.child_by_field_name("attribute")?;
let attr_text = attr.utf8_text(bytes).ok()?;
let route_path = args
.and_then(|a| first_string_arg(a, bytes))
.unwrap_or_default();
if let Some(m) = HttpMethod::from_ident(attr_text) {
return Some((m, route_path));
}
let lower = attr_text.to_ascii_lowercase();
if lower == "websocket" || lower == "websocket_route" {
return Some((HttpMethod::GET, route_path));
}
if lower == "api_route" {
let method = args
.and_then(|a| first_methods_kwarg(a, bytes))
.unwrap_or(HttpMethod::GET);
return Some((method, route_path));
}
None
}
fn receiver_is_fastapi(object: Node, bytes: &[u8]) -> bool {
fn name_matches(text: &str) -> bool {
let lower = text.to_ascii_lowercase();
lower == "app"
|| lower == "router"
|| lower == "api"
|| lower.ends_with("_app")
|| lower.ends_with("_router")
|| lower.ends_with("_api")
}
match object.kind() {
"identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches),
"attribute" => object
.child_by_field_name("attribute")
.and_then(|a| a.utf8_text(bytes).ok())
.is_some_and(name_matches),
"call" => {
let Some(callee) = object.child_by_field_name("function") else {
return false;
};
let Ok(text) = callee.utf8_text(bytes) else {
return false;
};
let leaf = text.rsplit('.').next().unwrap_or(text).trim();
leaf == "FastAPI" || leaf == "APIRouter" || leaf == "Starlette"
}
_ => false,
}
}
fn first_string_arg(args: Node, bytes: &[u8]) -> Option<String> {
let mut cursor = args.walk();
for arg in args.children(&mut cursor) {
if arg.kind() == "string" {
return string_node_value(arg, bytes);
}
}
None
}
fn first_methods_kwarg(args: Node, bytes: &[u8]) -> Option<HttpMethod> {
let mut cursor = args.walk();
for arg in args.children(&mut cursor) {
if arg.kind() != "keyword_argument" {
continue;
}
let name = arg.child_by_field_name("name")?;
if name.utf8_text(bytes).ok()? != "methods" {
continue;
}
let value = arg.child_by_field_name("value")?;
let mut vw = value.walk();
for child in value.children(&mut vw) {
if child.kind() == "string"
&& let Some(v) = string_node_value(child, bytes)
&& let Some(m) = HttpMethod::from_ident(&v)
{
return Some(m);
}
}
}
None
}
fn decorator_is_auth_marker(decorator: Node, bytes: &[u8]) -> bool {
let mut cursor = decorator.walk();
let Some(expr) = decorator
.children(&mut cursor)
.find(|c| c.kind() != "@" && c.kind() != "comment")
else {
return false;
};
let target = match expr.kind() {
"call" => expr.child_by_field_name("function"),
_ => Some(expr),
};
let Some(target) = target else { return false };
let Ok(text) = target.utf8_text(bytes) else {
return false;
};
leaf_matches(text, AUTH_DECORATORS)
}
/// Look for a parameter with default `Depends(<auth_callee>)`.
fn function_signature_uses_auth_depends(func: Node, bytes: &[u8]) -> bool {
let Some(params) = func.child_by_field_name("parameters") else {
return false;
};
let mut cursor = params.walk();
for param in params.children(&mut cursor) {
if !matches!(
param.kind(),
"default_parameter" | "typed_default_parameter"
) {
continue;
}
let Some(value) = param.child_by_field_name("value") else {
continue;
};
if value.kind() != "call" {
continue;
}
let Some(call_target) = value.child_by_field_name("function") else {
continue;
};
let Ok(text) = call_target.utf8_text(bytes) else {
continue;
};
let leaf = text.rsplit('.').next().unwrap_or(text).trim();
if leaf != "Depends" && leaf != "Security" {
continue;
}
let Some(args) = value.child_by_field_name("arguments") else {
continue;
};
let mut aw = args.walk();
for arg in args.children(&mut aw) {
if let Ok(arg_text) = arg.utf8_text(bytes)
&& leaf_matches(arg_text, AUTH_DEPENDS_CALLEES)
{
return true;
}
}
}
false
}
fn function_name(func: Node, bytes: &[u8]) -> Option<String> {
let name_node = func.child_by_field_name("name")?;
name_node.utf8_text(bytes).ok().map(str::to_string)
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_python::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_get_route() {
let src = "from fastapi import FastAPI\napp = FastAPI()\n@app.get('/users')\ndef list_users(): pass\n";
let (tree, bytes) = parse(src);
let nodes = detect_fastapi_routes(&tree, &bytes, &PathBuf::from("api.py"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users");
assert_eq!(ep.framework, Framework::FastApi);
}
#[test]
fn detects_router_post() {
let src = "router = APIRouter()\n@router.post('/items')\ndef create(): pass\n";
let (tree, bytes) = parse(src);
let nodes = detect_fastapi_routes(&tree, &bytes, &PathBuf::from("api.py"), None);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::POST);
}
#[test]
fn detects_depends_auth() {
let src = "from fastapi import Depends\n@app.get('/me')\ndef me(user = Depends(get_current_user)): pass\n";
let (tree, bytes) = parse(src);
let nodes = detect_fastapi_routes(&tree, &bytes, &PathBuf::from("api.py"), None);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert!(ep.auth_required);
}
}

View file

@ -50,6 +50,17 @@ pub fn detect_flask_routes(
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
// File-level gate: avoid double-detection on FastAPI files where
// `app.get(...)` shape overlaps. Phase 21 was lenient because no
// sibling probe existed; Phase 22 splits per-framework, so each
// probe only fires when its framework witness is present.
let file_text = std::str::from_utf8(bytes).unwrap_or("");
let has_flask_witness = file_text.contains("flask")
|| file_text.contains("Flask")
|| file_text.contains("Blueprint");
if !has_flask_witness {
return Vec::new();
}
let file_rel = relative_path_string(path, scan_root);
let mut out = Vec::new();
walk_decorated(tree.root_node(), bytes, &mut |func_node, decorators| {

View file

@ -0,0 +1,219 @@
//! Ruby + Rails framework probe.
//!
//! Recognises two Rails route shapes:
//!
//! 1. `config/routes.rb` declarations — `get '/path', to: 'controller#action'`,
//! `post '/path' => 'controller#action'`, `resources :users`.
//! 2. Controller actions — public instance methods on a class
//! inheriting from `ApplicationController` / `ActionController::Base`.
//!
//! `auth_required` for routes follows `before_action :authenticate!`
//! at the controller level.
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
const VERBS: &[(&str, HttpMethod)] = &[
("get", HttpMethod::GET),
("post", HttpMethod::POST),
("put", HttpMethod::PUT),
("patch", HttpMethod::PATCH),
("delete", HttpMethod::DELETE),
("match", HttpMethod::GET),
];
pub fn detect_rails_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
detect_routes_dsl(tree.root_node(), bytes, &file_rel, &mut out);
detect_controllers(tree.root_node(), bytes, &file_rel, &mut out);
out
}
fn detect_routes_dsl(root: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
fn recurse(node: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
if matches!(node.kind(), "call" | "method_call") {
if let Some(method_node) = node.child_by_field_name("method")
&& let Ok(method_text) = method_node.utf8_text(bytes)
&& let Some((_, method)) = VERBS.iter().find(|(v, _)| *v == method_text)
{
let args_opt = node
.child_by_field_name("arguments")
.or_else(|| {
let mut c = node.walk();
node.children(&mut c).find(|n| n.kind() == "argument_list")
});
if let Some(args) = args_opt {
let mut cursor = args.walk();
let positional: Vec<Node> = args.named_children(&mut cursor).collect();
if let Some(route_node) = positional.first()
&& let Some(route) = string_node_value(*route_node, bytes)
{
let handler_name = positional
.iter()
.find_map(|n| extract_to_handler(*n, bytes))
.unwrap_or_default();
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(node, file_rel),
framework: Framework::Rails,
method: *method,
route,
handler_name,
handler_location: loc_for(node, file_rel),
auth_required: false,
}));
}
}
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
recurse(child, bytes, file_rel, out);
}
}
recurse(root, bytes, file_rel, out);
}
fn extract_to_handler(node: Node, bytes: &[u8]) -> Option<String> {
// Shapes:
// `to: 'controller#action'` — pair with hash key `to`
// `'controller#action'` — second positional string
// `=> 'controller#action'` — assoc with hashrocket
if node.kind() == "string"
&& let Some(s) = string_node_value(node, bytes)
&& s.contains('#')
{
return Some(s);
}
if node.kind() == "pair" {
let mut cursor = node.walk();
let children: Vec<Node> = node.named_children(&mut cursor).collect();
for child in &children {
if child.kind() == "string"
&& let Some(s) = string_node_value(*child, bytes)
&& s.contains('#')
{
return Some(s);
}
}
}
None
}
fn detect_controllers(root: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
fn recurse(node: Node, bytes: &[u8], file_rel: &str, out: &mut Vec<SurfaceNode>) {
if node.kind() == "class"
&& class_is_controller(node, bytes)
{
let class_auth = class_has_before_authenticate(node, bytes);
walk_methods(node, bytes, &mut |method_node, name| {
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(method_node, file_rel),
framework: Framework::Rails,
method: HttpMethod::GET,
route: String::new(),
handler_name: name.to_string(),
handler_location: SourceLocation::new(
file_rel,
(method_node.start_position().row + 1) as u32,
(method_node.start_position().column + 1) as u32,
),
auth_required: class_auth,
}));
});
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
recurse(child, bytes, file_rel, out);
}
}
recurse(root, bytes, file_rel, out);
}
fn class_is_controller(class: Node, bytes: &[u8]) -> bool {
let Some(super_node) = class.child_by_field_name("superclass") else {
return false;
};
let Ok(text) = super_node.utf8_text(bytes) else {
return false;
};
text.contains("ApplicationController") || text.contains("ActionController")
}
fn class_has_before_authenticate(class: Node, bytes: &[u8]) -> bool {
let Some(body) = class.child_by_field_name("body") else {
return false;
};
let mut cursor = body.walk();
for child in body.children(&mut cursor) {
if let Ok(text) = child.utf8_text(bytes)
&& text.contains("before_action")
&& (text.contains("authenticate") || text.contains("login_required"))
{
return true;
}
}
false
}
fn walk_methods<'tree, F>(class: Node<'tree>, bytes: &[u8], visit: &mut F)
where
F: FnMut(Node<'tree>, &str),
{
let Some(body) = class.child_by_field_name("body") else {
return;
};
let mut cursor = body.walk();
for child in body.children(&mut cursor) {
if child.kind() == "method"
&& let Some(name_node) = child.child_by_field_name("name")
&& let Ok(name) = name_node.utf8_text(bytes)
&& !name.starts_with('_')
{
visit(child, name);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_ruby::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_routes_dsl() {
let src = "Rails.application.routes.draw do\n get '/users', to: 'users#index'\nend\n";
let (tree, bytes) = parse(src);
let nodes = detect_rails_routes(&tree, &bytes, &PathBuf::from("config/routes.rb"), None);
assert!(!nodes.is_empty());
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users");
}
#[test]
fn detects_controller_actions() {
let src = "class UsersController < ApplicationController\n def index\n end\n def show\n end\nend\n";
let (tree, bytes) = parse(src);
let nodes = detect_rails_routes(&tree, &bytes, &PathBuf::from("users_controller.rb"), None);
assert_eq!(nodes.len(), 2);
}
}

View file

@ -0,0 +1,111 @@
//! Ruby + Sinatra framework probe.
//!
//! Sinatra routes are top-level method calls of the form
//! `get '/path' do ... end`, `post '/path' do ... end`, etc. The
//! handler is the block; we synthesise the handler name from the
//! route string (Sinatra blocks are anonymous).
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
use crate::surface::{EntryPoint, Framework, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
const VERBS: &[(&str, HttpMethod)] = &[
("get", HttpMethod::GET),
("post", HttpMethod::POST),
("put", HttpMethod::PUT),
("patch", HttpMethod::PATCH),
("delete", HttpMethod::DELETE),
("head", HttpMethod::HEAD),
("options", HttpMethod::OPTIONS),
];
pub fn detect_sinatra_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_calls(tree.root_node(), &mut |call| {
if let Some(node) = match_sinatra_call(call, bytes, &file_rel) {
out.push(node);
}
});
out
}
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
if matches!(node.kind(), "call" | "method_call") {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_calls(child, visit);
}
}
fn match_sinatra_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
let method_name_node = call.child_by_field_name("method")?;
let method_text = method_name_node.utf8_text(bytes).ok()?;
let (_, method) = VERBS
.iter()
.find(|(v, _)| *v == method_text)?;
// Must have a block to be a Sinatra route.
let block = call
.child_by_field_name("block")
.or_else(|| {
let mut c = call.walk();
call.children(&mut c)
.find(|n| matches!(n.kind(), "do_block" | "block"))
})?;
// Args: Sinatra accepts a string literal as the first positional arg.
let args = call
.child_by_field_name("arguments")
.or_else(|| {
let mut c = call.walk();
call.children(&mut c).find(|n| n.kind() == "argument_list")
})?;
let mut cursor = args.walk();
let route_node = args.named_children(&mut cursor).next()?;
let route = string_node_value(route_node, bytes)?;
let handler_name = format!("{}_{}", method_text, route.replace(['/', '-'], "_"));
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::Sinatra,
method: *method,
route,
handler_name,
handler_location: loc_for(block, file_rel),
auth_required: false,
}))
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_ruby::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_sinatra_get() {
let src = "get '/users' do\n 'hi'\nend\n";
let (tree, bytes) = parse(src);
let nodes = detect_sinatra_routes(&tree, &bytes, &PathBuf::from("app.rb"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users");
}
}

View file

@ -0,0 +1,196 @@
//! Rust + actix-web framework probe.
//!
//! Recognises actix-web routing macros (`#[get("/path")]`,
//! `#[post("/path")]`, `#[put]`, `#[delete]`, `#[patch]`, `#[head]`,
//! `#[options]`, `#[route("/path", method = ...)]`) attached to a
//! `function_item`. The route path is extracted from the macro
//! argument string literal.
//!
//! `auth_required` fires when the function signature has a parameter
//! whose type matches one of [`AUTH_EXTRACTORS`] (`Identity`,
//! `BearerAuth`, `JwtClaims`, etc.).
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{loc_for, rel_file};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_EXTRACTORS: &[&str] = &[
"Identity",
"BearerAuth",
"BasicAuth",
"JwtClaims",
"Authenticated",
"User",
];
const ROUTE_MACROS: &[(&str, Option<HttpMethod>)] = &[
("get", Some(HttpMethod::GET)),
("post", Some(HttpMethod::POST)),
("put", Some(HttpMethod::PUT)),
("delete", Some(HttpMethod::DELETE)),
("patch", Some(HttpMethod::PATCH)),
("head", Some(HttpMethod::HEAD)),
("options", Some(HttpMethod::OPTIONS)),
("route", None),
];
pub fn detect_actix_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_text = std::str::from_utf8(bytes).unwrap_or("");
if !file_text.contains("actix_web::") && !file_text.contains("use actix_web") {
// Best-effort gate so the actix probe does not over-fire on
// Rocket / generic Rust files that also define a `#[get]`
// macro from a user crate.
return Vec::new();
}
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
walk_functions(tree.root_node(), &mut |func| {
if let Some(node) = match_actix_function(func, bytes, &file_rel) {
out.push(node);
}
});
out
}
fn walk_functions<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
if node.kind() == "function_item" {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_functions(child, visit);
}
}
fn match_actix_function(func: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNode> {
let attrs = collect_preceding_attributes(func);
let mut method: Option<HttpMethod> = None;
let mut route_path = String::new();
for attr in attrs {
let raw = attr.utf8_text(bytes).ok()?;
let inner = raw
.trim_start_matches(['#', '!'])
.trim_matches(['[', ']']);
for (name, default_method) in ROUTE_MACROS {
let prefix = format!("{}(", name);
if inner.starts_with(&prefix) {
method = default_method.or_else(|| extract_route_method(inner));
if route_path.is_empty()
&& let Some(start) = inner.find('"')
{
let rest = &inner[start + 1..];
if let Some(end) = rest.find('"') {
route_path = rest[..end].to_string();
}
}
} else if inner == *name && method.is_none() {
method = *default_method;
}
}
}
let m = method?;
let handler_name = function_name(func, bytes).unwrap_or_default();
let auth_required = signature_uses_auth_extractor(func, bytes);
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(func, file_rel),
framework: Framework::Actix,
method: m,
route: route_path,
handler_name,
handler_location: SourceLocation::new(
file_rel,
(func.start_position().row + 1) as u32,
(func.start_position().column + 1) as u32,
),
auth_required,
}))
}
fn collect_preceding_attributes(func: Node) -> Vec<Node> {
let mut out: Vec<Node> = Vec::new();
let Some(parent) = func.parent() else {
return out;
};
let mut cursor = parent.walk();
let mut pending: Vec<Node> = Vec::new();
for sib in parent.children(&mut cursor) {
if sib.id() == func.id() {
out.append(&mut pending);
return out;
}
if sib.kind() == "attribute_item" || sib.kind() == "inner_attribute_item" {
let mut aw = sib.walk();
for inner in sib.children(&mut aw) {
if inner.kind() == "attribute" {
pending.push(inner);
}
}
} else {
pending.clear();
}
}
out
}
fn extract_route_method(inner: &str) -> Option<HttpMethod> {
for verb in ["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"] {
if inner.contains(verb) {
return HttpMethod::from_ident(verb);
}
}
None
}
fn signature_uses_auth_extractor(func: Node, bytes: &[u8]) -> bool {
let Some(params) = func.child_by_field_name("parameters") else {
return false;
};
let Ok(text) = params.utf8_text(bytes) else {
return false;
};
AUTH_EXTRACTORS.iter().any(|n| text.contains(n))
}
fn function_name(func: Node, bytes: &[u8]) -> Option<String> {
func.child_by_field_name("name")
.and_then(|n| n.utf8_text(bytes).ok())
.map(str::to_string)
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_rust::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_actix_get() {
let src = r#"
use actix_web::{get, HttpResponse};
#[get("/users")]
async fn list_users() -> HttpResponse { HttpResponse::Ok().finish() }
"#;
let (tree, bytes) = parse(src);
let nodes = detect_actix_routes(&tree, &bytes, &PathBuf::from("main.rs"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users");
}
}

View file

@ -0,0 +1,191 @@
//! Rust + axum framework probe.
//!
//! Detects axum route registration:
//!
//! * `Router::new().route("/path", get(handler))` /
//! `.route("/path", post(handler))` / etc.
//! * Bare extractor-shaped function items in files that import axum
//! (handler typing alone is treated as a candidate, but only when a
//! `Router::route(...)` registration in the same file references it).
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::collections::HashMap;
use std::path::Path;
use tree_sitter::{Node, Tree};
const VERBS: &[(&str, HttpMethod)] = &[
("get", HttpMethod::GET),
("post", HttpMethod::POST),
("put", HttpMethod::PUT),
("delete", HttpMethod::DELETE),
("patch", HttpMethod::PATCH),
("head", HttpMethod::HEAD),
("options", HttpMethod::OPTIONS),
];
pub const AUTH_EXTRACTORS: &[&str] = &[
"Extension<User",
"BearerAuth",
"RequireAuth",
"AuthenticatedUser",
"JwtClaims",
];
pub fn detect_axum_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_text = std::str::from_utf8(bytes).unwrap_or("");
if !file_text.contains("axum::") && !file_text.contains("use axum") {
return Vec::new();
}
let file_rel = rel_file(path, scan_root);
let function_index = collect_functions(tree.root_node(), bytes);
let mut out = Vec::new();
walk_calls(tree.root_node(), &mut |call| {
if let Some(node) = match_router_route(call, bytes, &file_rel, &function_index) {
out.push(node);
}
});
out
}
fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) {
if node.kind() == "call_expression" {
visit(node);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_calls(child, visit);
}
}
fn collect_functions<'tree>(
root: Node<'tree>,
bytes: &'tree [u8],
) -> HashMap<String, (Node<'tree>, bool)> {
let mut out: HashMap<String, (Node<'tree>, bool)> = HashMap::new();
fn walk<'tree>(
node: Node<'tree>,
bytes: &'tree [u8],
out: &mut HashMap<String, (Node<'tree>, bool)>,
) {
if node.kind() == "function_item"
&& let Some(name_node) = node.child_by_field_name("name")
&& let Ok(name) = name_node.utf8_text(bytes)
{
let auth = node
.child_by_field_name("parameters")
.and_then(|p| p.utf8_text(bytes).ok())
.map(|t| AUTH_EXTRACTORS.iter().any(|x| t.contains(x)))
.unwrap_or(false);
out.insert(name.to_string(), (node, auth));
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk(child, bytes, out);
}
}
walk(root, bytes, &mut out);
out
}
fn match_router_route<'tree>(
call: Node<'tree>,
bytes: &[u8],
file_rel: &str,
function_index: &HashMap<String, (Node<'tree>, bool)>,
) -> Option<SurfaceNode> {
let func = call.child_by_field_name("function")?;
if func.kind() != "field_expression" {
return None;
}
let field = func.child_by_field_name("field")?;
if field.utf8_text(bytes).ok()? != "route" {
return None;
}
let args = call.child_by_field_name("arguments")?;
let mut cursor = args.walk();
let positional: Vec<Node> = args
.children(&mut cursor)
.filter(|n| !matches!(n.kind(), "(" | ")" | ","))
.collect();
if positional.len() < 2 {
return None;
}
let route = string_node_value(positional[0], bytes)?;
let method_args = positional[1];
if method_args.kind() != "call_expression" {
return None;
}
let method_callee = method_args.child_by_field_name("function")?;
let method_text = method_callee.utf8_text(bytes).ok()?;
let leaf = method_text.rsplit("::").next().unwrap_or(method_text);
let (_, method) = VERBS.iter().find(|(v, _)| *v == leaf)?;
let method_args_node = method_args.child_by_field_name("arguments")?;
let mut hcur = method_args_node.walk();
let handler_node = method_args_node
.children(&mut hcur)
.find(|n| n.kind() == "identifier" || n.kind() == "scoped_identifier")?;
let handler_name = handler_node.utf8_text(bytes).ok()?.to_string();
let auth_required = function_index
.get(&handler_name)
.map(|(_, a)| *a)
.unwrap_or(false);
let handler_loc = function_index
.get(&handler_name)
.map(|(node, _)| {
SourceLocation::new(
file_rel,
(node.start_position().row + 1) as u32,
(node.start_position().column + 1) as u32,
)
})
.unwrap_or_else(|| loc_for(handler_node, file_rel));
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::Axum,
method: *method,
route,
handler_name,
handler_location: handler_loc,
auth_required,
}))
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_rust::LANGUAGE.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_router_get() {
let src = r#"
use axum::{Router, routing::get};
async fn list_users() -> &'static str { "ok" }
fn app() -> Router {
Router::new().route("/users", get(list_users))
}
"#;
let (tree, bytes) = parse(src);
let nodes = detect_axum_routes(&tree, &bytes, &PathBuf::from("main.rs"), None);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert_eq!(ep.route, "/users");
}
}

315
src/surface/lang/ts_next.rs Normal file
View file

@ -0,0 +1,315 @@
//! TypeScript + Next.js framework probe.
//!
//! Recognises Next.js App Router route handlers (`app/**/route.{ts,tsx,js,jsx}`)
//! by walking exported function declarations whose name is one of the
//! HTTP method idents (`GET` / `POST` / …). Also recognises Pages
//! Router API routes (`pages/api/**/*.{ts,tsx,js,jsx}`) via the
//! `export default handler` pattern.
//!
//! Server actions (`'use server'` directive at file or function scope)
//! are also reported as entry points because they expose a function
//! callable from a React client over the wire.
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{loc_for, rel_file};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub fn detect_next_routes(
tree: &Tree,
bytes: &[u8],
path: &Path,
scan_root: Option<&Path>,
) -> Vec<SurfaceNode> {
let file_rel = rel_file(path, scan_root);
let mut out = Vec::new();
let app_router = is_app_router_route(path);
let pages_api = is_pages_api_route(path);
let route_path = derive_route_path(path);
let file_use_server = file_level_use_server(tree.root_node(), bytes);
if app_router {
collect_named_exports(tree.root_node(), bytes, &file_rel, &route_path, &mut out);
}
if pages_api {
collect_default_export(tree.root_node(), bytes, &file_rel, &route_path, &mut out);
}
if file_use_server {
collect_use_server_exports(tree.root_node(), bytes, &file_rel, &route_path, &mut out);
}
out
}
fn is_app_router_route(path: &Path) -> bool {
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
return false;
};
if !matches!(name, "route.ts" | "route.tsx" | "route.js" | "route.jsx") {
return false;
}
path.components()
.any(|c| c.as_os_str().to_string_lossy() == "app")
}
fn is_pages_api_route(path: &Path) -> bool {
let mut comps = path.components().peekable();
let mut saw_pages = false;
while let Some(c) = comps.next() {
if c.as_os_str().to_string_lossy() == "pages" {
saw_pages = true;
} else if saw_pages && c.as_os_str().to_string_lossy() == "api" {
return true;
}
}
false
}
/// Convert `app/users/[id]/route.ts` → `/users/[id]`.
/// Convert `pages/api/users/index.ts` → `/users`.
fn derive_route_path(path: &Path) -> String {
let mut comps: Vec<String> = Vec::new();
let mut started = false;
for comp in path.components() {
let text = comp.as_os_str().to_string_lossy().into_owned();
if !started {
if text == "app" || text == "api" || text == "pages" {
started = true;
}
continue;
}
comps.push(text);
}
if let Some(last) = comps.last_mut() {
// Drop the basename; route file becomes the trailing segment.
if last.starts_with("route.") || last.starts_with("index.") {
comps.pop();
} else if let Some(idx) = last.rfind('.') {
last.truncate(idx);
}
}
let joined = comps.join("/");
if joined.is_empty() {
"/".to_string()
} else {
format!("/{}", joined)
}
}
fn collect_named_exports(
root: Node,
bytes: &[u8],
file_rel: &str,
route_path: &str,
out: &mut Vec<SurfaceNode>,
) {
fn recurse(
node: Node,
bytes: &[u8],
file_rel: &str,
route_path: &str,
out: &mut Vec<SurfaceNode>,
) {
if node.kind() == "export_statement" {
// Look for `export async function NAME(...)` or `export const NAME = ...`
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if let Some((name, span)) = extract_named_function(child, bytes)
&& let Some(method) = HttpMethod::from_ident(&name)
{
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(node, file_rel),
framework: Framework::NextAppRouter,
method,
route: route_path.to_string(),
handler_name: name,
handler_location: SourceLocation::new(
file_rel,
(span.0 + 1) as u32,
(span.1 + 1) as u32,
),
auth_required: false,
}));
}
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
recurse(child, bytes, file_rel, route_path, out);
}
}
recurse(root, bytes, file_rel, route_path, out);
}
fn extract_named_function(node: Node, bytes: &[u8]) -> Option<(String, (usize, usize))> {
match node.kind() {
"function_declaration" => {
let name_node = node.child_by_field_name("name")?;
let name = name_node.utf8_text(bytes).ok()?.to_string();
let pos = node.start_position();
Some((name, (pos.row, pos.column)))
}
"lexical_declaration" | "variable_declaration" => {
let mut cursor = node.walk();
for decl in node.children(&mut cursor) {
if decl.kind() == "variable_declarator"
&& let Some(name_node) = decl.child_by_field_name("name")
&& let Ok(name) = name_node.utf8_text(bytes)
{
let pos = decl.start_position();
return Some((name.to_string(), (pos.row, pos.column)));
}
}
None
}
_ => None,
}
}
fn collect_default_export(
root: Node,
bytes: &[u8],
file_rel: &str,
route_path: &str,
out: &mut Vec<SurfaceNode>,
) {
fn recurse(
node: Node,
bytes: &[u8],
file_rel: &str,
route_path: &str,
out: &mut Vec<SurfaceNode>,
) {
if node.kind() == "export_statement" {
let raw = node.utf8_text(bytes).unwrap_or("");
if raw.contains("default") {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
let name = match child.kind() {
"function_declaration" => child
.child_by_field_name("name")
.and_then(|n| n.utf8_text(bytes).ok())
.map(str::to_string),
"identifier" => child.utf8_text(bytes).ok().map(str::to_string),
"arrow_function" | "function" | "function_expression" => {
Some("default".to_string())
}
_ => None,
};
if let Some(name) = name {
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(node, file_rel),
framework: Framework::NextAppRouter,
method: HttpMethod::GET,
route: route_path.to_string(),
handler_name: name,
handler_location: loc_for(child, file_rel),
auth_required: false,
}));
return;
}
}
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
recurse(child, bytes, file_rel, route_path, out);
}
}
recurse(root, bytes, file_rel, route_path, out);
}
fn collect_use_server_exports(
root: Node,
bytes: &[u8],
file_rel: &str,
route_path: &str,
out: &mut Vec<SurfaceNode>,
) {
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
if child.kind() == "export_statement"
&& let Some((name, span)) = export_function_name(child, bytes)
{
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(child, file_rel),
framework: Framework::NextServerAction,
method: HttpMethod::POST,
route: route_path.to_string(),
handler_name: name,
handler_location: SourceLocation::new(
file_rel,
(span.0 + 1) as u32,
(span.1 + 1) as u32,
),
auth_required: false,
}));
}
}
}
fn export_function_name(node: Node, bytes: &[u8]) -> Option<(String, (usize, usize))> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if let Some(extracted) = extract_named_function(child, bytes) {
return Some(extracted);
}
}
None
}
fn file_level_use_server(root: Node, bytes: &[u8]) -> bool {
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
if child.kind() == "expression_statement" {
let mut cs = child.walk();
for c in child.children(&mut cs) {
if c.kind() == "string"
&& let Ok(text) = c.utf8_text(bytes)
{
let trimmed = text.trim().trim_matches(['\'', '"']);
if trimmed == "use server" {
return true;
}
}
}
return false;
}
if !matches!(child.kind(), "comment" | "import_statement") {
return false;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn parse(src: &str) -> (Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_typescript::LANGUAGE_TSX.into())
.unwrap();
(parser.parse(src, None).unwrap(), src.as_bytes().to_vec())
}
#[test]
fn detects_app_router_get() {
let src = "export async function GET(req: Request) { return new Response('ok'); }\n";
let (tree, bytes) = parse(src);
let nodes = detect_next_routes(
&tree,
&bytes,
&PathBuf::from("app/users/route.ts"),
None,
);
assert_eq!(nodes.len(), 1);
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
panic!()
};
assert_eq!(ep.method, HttpMethod::GET);
assert!(ep.route.contains("users"));
}
}

View file

@ -24,8 +24,12 @@ use std::collections::BTreeMap;
use std::path::Path;
pub mod build;
pub mod dangerous;
pub mod datastore;
pub mod external;
pub mod graph;
pub mod lang;
pub mod reachability;
/// Stable source location used as the primary key for every
/// [`SurfaceNode`]. `file` is a project-relative POSIX path so the

192
src/surface/reachability.rs Normal file
View file

@ -0,0 +1,192 @@
//! Transitive-closure pass: connect [`SurfaceNode::EntryPoint`] nodes
//! to the [`SurfaceNode::DataStore`] / [`SurfaceNode::ExternalService`]
//! / [`SurfaceNode::DangerousLocal`] nodes they can reach via the
//! whole-program [`CallGraph`].
//!
//! For each entry-point we first locate the matching call-graph
//! [`FuncKey`] by `(namespace, function_name)` (the entry-point's
//! `handler_location.file` is the project-relative POSIX path used as
//! `FuncKey::namespace`, and `handler_name` is the leaf function
//! name). From that node we run a BFS over forward call-graph edges
//! up to a small depth bound, and for every visited
//! `(file, function_name)` we look for a matching DataStore /
//! ExternalService / DangerousLocal node in the SurfaceMap, emitting
//! one [`EdgeKind::Reaches`] edge per match.
//!
//! Node match policy: the destination's `location.file` must equal
//! the visited call-graph node's namespace. This is best-effort but
//! deterministic — an entry-point that calls into a helper which then
//! calls `eval()` will surface the eval as a `Reaches` of the entry
//! point as long as the eval's host file is on the BFS frontier.
use super::{EdgeKind, SurfaceEdge, SurfaceMap, SurfaceNode};
use crate::callgraph::CallGraph;
use crate::summary::GlobalSummaries;
use petgraph::Direction;
use std::collections::{HashMap, HashSet, VecDeque};
/// Maximum BFS depth from an entry-point node. Surface chains beyond
/// six call-graph hops are rare in practice and the cost of a deeper
/// walk is paid per entry-point per scan. A depth-bounded traversal
/// also prevents recursive cycles from blowing up.
const MAX_BFS_DEPTH: usize = 8;
/// Populate [`EdgeKind::Reaches`] edges on `map`. Mutates the edge
/// list in place; the caller is expected to follow up with
/// [`SurfaceMap::canonicalize`] before serialisation.
pub fn populate_reaches_edges(
map: &mut SurfaceMap,
summaries: &GlobalSummaries,
call_graph: &CallGraph,
) {
if map.nodes.is_empty() {
return;
}
let dst_index = build_destination_index(map);
if dst_index.is_empty() {
return;
}
let _ = summaries;
let mut new_edges: HashSet<SurfaceEdge> = HashSet::new();
for (entry_idx, node) in map.nodes.iter().enumerate() {
let SurfaceNode::EntryPoint(ep) = node else {
continue;
};
let mut reachable_files: HashSet<String> = HashSet::new();
// Seed with the handler's host file — the entry-point itself
// counts as reachable, so any DataStore / ExternalService /
// DangerousLocal in the same file is connected even when the
// call graph cannot resolve the seed FuncKey.
reachable_files.insert(ep.handler_location.file.clone());
// Locate seed FuncKeys whose `namespace` matches the entry's
// file and whose `name` matches the handler. More than one
// seed is possible (overloaded methods, duplicate definitions).
let seeds = call_graph
.index
.iter()
.filter(|(k, _)| k.name == ep.handler_name)
.filter(|(k, _)| {
k.namespace.ends_with(&ep.handler_location.file)
|| ep.handler_location.file.ends_with(&k.namespace)
})
.map(|(_, idx)| *idx)
.collect::<Vec<_>>();
let mut visited: HashSet<_> = seeds.iter().copied().collect();
let mut queue: VecDeque<(petgraph::graph::NodeIndex, usize)> =
seeds.iter().map(|n| (*n, 0)).collect();
while let Some((node_idx, depth)) = queue.pop_front() {
if let Some(key) = call_graph.graph.node_weight(node_idx) {
reachable_files.insert(key.namespace.clone());
}
if depth >= MAX_BFS_DEPTH {
continue;
}
for neighbour in call_graph
.graph
.neighbors_directed(node_idx, Direction::Outgoing)
{
if visited.insert(neighbour) {
queue.push_back((neighbour, depth + 1));
}
}
}
for (dst_idx, dst_file) in &dst_index {
if reachable_files.contains(dst_file) {
new_edges.insert(SurfaceEdge {
from: entry_idx as u32,
to: *dst_idx as u32,
kind: EdgeKind::Reaches,
});
}
}
}
map.edges.extend(new_edges);
}
/// Build a lookup from destination node index → destination file.
/// Restricted to the three reachable-from-entry-point variants.
fn build_destination_index(map: &SurfaceMap) -> Vec<(usize, String)> {
let mut out: Vec<(usize, String)> = Vec::new();
for (idx, node) in map.nodes.iter().enumerate() {
let file = match node {
SurfaceNode::DataStore(n) => n.location.file.clone(),
SurfaceNode::ExternalService(n) => n.location.file.clone(),
SurfaceNode::DangerousLocal(n) => n.location.file.clone(),
SurfaceNode::EntryPoint(_) => continue,
};
out.push((idx, file));
}
out
}
/// Cheap by-file inverted index of the destination nodes — exposed for
/// future callers (chain composer, CLI tree printer) that want a
/// constant-time "what does this file expose" lookup without rerunning
/// reachability.
#[allow(dead_code)]
pub fn destinations_by_file(map: &SurfaceMap) -> HashMap<String, Vec<usize>> {
let mut out: HashMap<String, Vec<usize>> = HashMap::new();
for (idx, node) in map.nodes.iter().enumerate() {
let file = match node {
SurfaceNode::DataStore(n) => &n.location.file,
SurfaceNode::ExternalService(n) => &n.location.file,
SurfaceNode::DangerousLocal(n) => &n.location.file,
SurfaceNode::EntryPoint(_) => continue,
};
out.entry(file.clone()).or_default().push(idx);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::entry_points::HttpMethod;
use crate::surface::{
DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode,
};
fn ep(file: &str, handler: &str) -> SurfaceNode {
SurfaceNode::EntryPoint(EntryPoint {
location: SourceLocation::new(file, 1, 1),
framework: Framework::Flask,
method: HttpMethod::GET,
route: "/".into(),
handler_name: handler.into(),
handler_location: SourceLocation::new(file, 2, 1),
auth_required: false,
})
}
fn dl(file: &str, name: &str) -> SurfaceNode {
SurfaceNode::DangerousLocal(DangerousLocal {
location: SourceLocation::new(file, 0, 0),
function_name: name.into(),
cap_bits: 0x1,
})
}
#[test]
fn entry_in_same_file_as_dangerous_emits_reaches() {
let mut map = SurfaceMap::new();
map.nodes.push(ep("app.py", "index"));
map.nodes.push(dl("app.py", "do_eval"));
let gs = GlobalSummaries::new();
let cg = CallGraph {
graph: petgraph::graph::DiGraph::new(),
index: Default::default(),
unresolved_not_found: vec![],
unresolved_ambiguous: vec![],
};
populate_reaches_edges(&mut map, &gs, &cg);
assert_eq!(map.edges.len(), 1);
assert_eq!(map.edges[0].kind, EdgeKind::Reaches);
assert_eq!(map.edges[0].from, 0);
assert_eq!(map.edges[0].to, 1);
}
}

View file

@ -0,0 +1,13 @@
package main
import "github.com/gin-gonic/gin"
func main() {
r := gin.Default()
r.GET("/users", listUsers)
r.Run()
}
func listUsers(c *gin.Context) {
c.JSON(200, []string{})
}

View file

@ -0,0 +1,12 @@
package main
import "net/http"
func main() {
http.HandleFunc("/users", listUsers)
http.ListenAndServe(":8080", nil)
}
func listUsers(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("[]"))
}

View file

@ -0,0 +1,17 @@
package com.example;
import io.quarkus.runtime.Quarkus;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
@ApplicationScoped
@Path("/api")
public class GreetResource {
@GET
@Path("/hello")
public String hello() {
return "hi";
}
}

View file

@ -0,0 +1,14 @@
package com.example;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
@Path("/users")
public class UserResource {
@GET
@Path("/{id}")
public String get() {
return "{}";
}
}

View file

@ -0,0 +1,11 @@
package com.example;
@RestController
@RequestMapping("/api")
public class UserController {
@GetMapping("/users")
public String list() {
return "[]";
}
}

View file

@ -0,0 +1,8 @@
const express = require("express");
const app = express();
app.get("/users", (req, res) => {
res.send("ok");
});
app.listen(3000);

View file

@ -0,0 +1,8 @@
const Router = require("@koa/router");
const router = new Router();
router.get("/users", async (ctx) => {
ctx.body = [];
});
module.exports = router;

View file

@ -0,0 +1,3 @@
<?php
Route::get('/users', 'UserController@index');

View file

@ -0,0 +1,3 @@
<?php
$app->get('/users', 'UsersController:list');

View file

@ -0,0 +1,10 @@
from django.urls import path
def admin_view(request):
return None
urlpatterns = [
path("admin/", admin_view),
]

View file

@ -0,0 +1,8 @@
from fastapi import FastAPI
app = FastAPI()
@app.get("/items")
def list_items():
return []

View file

@ -0,0 +1,8 @@
from flask import Flask
app = Flask(__name__)
@app.get("/users")
def list_users():
return "ok"

View file

@ -0,0 +1,9 @@
class UsersController < ApplicationController
def index
render json: []
end
def show
render json: {}
end
end

View file

@ -0,0 +1,5 @@
require 'sinatra'
get '/users' do
'[]'
end

View file

@ -0,0 +1,6 @@
use actix_web::{get, HttpResponse};
#[get("/users")]
async fn list_users() -> HttpResponse {
HttpResponse::Ok().finish()
}

View file

@ -0,0 +1,9 @@
use axum::{routing::get, Router};
async fn list_users() -> &'static str {
"[]"
}
fn app() -> Router {
Router::new().route("/users", get(list_users))
}

View file

@ -0,0 +1,3 @@
export async function GET(req: Request): Promise<Response> {
return new Response("ok");
}

208
tests/surface_cross_lang.rs Normal file
View file

@ -0,0 +1,208 @@
//! Phase 22 — cross-language `SurfaceMap` framework probes.
//!
//! One fixture per (language, framework) pair under
//! `tests/dynamic_fixtures/surface/<probe>/`. Each probe is exercised
//! through the public [`build_surface_map`] entry point and asserted
//! on:
//!
//! 1. At least one [`SurfaceNode::EntryPoint`] is emitted.
//! 2. The recognised entry-point carries the expected [`Framework`]
//! tag.
//! 3. The recognised entry-point's `route` field contains the expected
//! substring (the path declared in the fixture).
use nyx_scanner::callgraph::CallGraph;
use nyx_scanner::summary::GlobalSummaries;
use nyx_scanner::surface::{
Framework, SurfaceMap, SurfaceNode,
build::{build_surface_map, SurfaceBuildInputs},
};
use nyx_scanner::utils::config::Config;
use std::path::{Path, PathBuf};
const FIXTURE_ROOT: &str = "tests/dynamic_fixtures/surface";
fn empty_call_graph() -> CallGraph {
CallGraph {
graph: petgraph::graph::DiGraph::new(),
index: Default::default(),
unresolved_not_found: vec![],
unresolved_ambiguous: vec![],
}
}
fn build(fixture_dir: &str) -> SurfaceMap {
let dir = Path::new(FIXTURE_ROOT).join(fixture_dir);
let mut files: Vec<PathBuf> = Vec::new();
walk(&dir, &mut files);
let cfg = Config::default();
let gs = GlobalSummaries::new();
let cg = empty_call_graph();
let inputs = SurfaceBuildInputs {
files: &files,
scan_root: Some(&dir),
global_summaries: &gs,
call_graph: &cg,
config: &cfg,
};
build_surface_map(&inputs)
}
fn walk(dir: &Path, out: &mut Vec<PathBuf>) {
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
walk(&path, out);
} else {
out.push(path);
}
}
}
fn assert_entry(map: &SurfaceMap, framework: Framework, route_substr: &str) {
let routes: Vec<String> = map
.nodes
.iter()
.filter_map(|n| match n {
SurfaceNode::EntryPoint(ep) if ep.framework == framework => Some(ep.route.clone()),
_ => None,
})
.collect();
assert!(
!routes.is_empty(),
"no entry-point with framework {:?} found in map = {:#?}",
framework,
map.nodes
);
assert!(
routes.iter().any(|r| r.contains(route_substr)),
"expected a route containing {route_substr:?}; got {routes:?}",
);
}
#[test]
fn python_flask_fixture() {
let map = build("python_flask");
assert_entry(&map, Framework::Flask, "/users");
}
#[test]
fn python_fastapi_fixture() {
let map = build("python_fastapi");
assert_entry(&map, Framework::FastApi, "/items");
}
#[test]
fn python_django_fixture() {
let map = build("python_django");
assert_entry(&map, Framework::Django, "admin");
}
#[test]
fn js_express_fixture() {
let map = build("js_express");
assert_entry(&map, Framework::Express, "/users");
}
#[test]
fn js_koa_fixture() {
let map = build("js_koa");
// koa probe currently emits the Express variant tag because the
// SurfaceMap framework taxonomy folds koa-router under the
// generic "node http microframework" bucket. See
// [`nyx_scanner::surface::lang::js_koa`] doc comment.
assert_entry(&map, Framework::Express, "/users");
}
#[test]
fn ts_next_fixture() {
let map = build("ts_next");
assert_entry(&map, Framework::NextAppRouter, "users");
}
#[test]
fn java_spring_fixture() {
let map = build("java_spring");
assert_entry(&map, Framework::Spring, "/api/users");
}
#[test]
fn java_servlet_fixture() {
let map = build("java_servlet");
assert_entry(&map, Framework::JaxRs, "/users");
}
#[test]
fn java_quarkus_fixture() {
let map = build("java_quarkus");
assert_entry(&map, Framework::JaxRs, "/api/hello");
}
#[test]
fn go_http_fixture() {
let map = build("go_http");
assert_entry(&map, Framework::NetHttp, "/users");
}
#[test]
fn go_gin_fixture() {
let map = build("go_gin");
assert_entry(&map, Framework::Gin, "/users");
}
#[test]
fn php_laravel_fixture() {
let map = build("php_laravel");
// Laravel folds into the generic Sinatra-like framework bucket
// because the SurfaceMap framework taxonomy is method-call shaped
// rather than per-stack. See `surface::lang::php_laravel`.
assert_entry(&map, Framework::Sinatra, "/users");
}
#[test]
fn php_slim_fixture() {
let map = build("php_slim");
assert_entry(&map, Framework::Sinatra, "/users");
}
#[test]
fn ruby_sinatra_fixture() {
let map = build("ruby_sinatra");
assert_entry(&map, Framework::Sinatra, "/users");
}
#[test]
fn ruby_rails_fixture() {
let map = build("ruby_rails");
// Controller actions have empty routes because the route table
// lives in `config/routes.rb` (separate file). Assert on the
// handler name surfacing instead.
let handlers: Vec<String> = map
.nodes
.iter()
.filter_map(|n| match n {
SurfaceNode::EntryPoint(ep) if ep.framework == Framework::Rails => {
Some(ep.handler_name.clone())
}
_ => None,
})
.collect();
assert!(handlers.contains(&"index".to_string()));
assert!(handlers.contains(&"show".to_string()));
}
#[test]
fn rust_actix_fixture() {
let map = build("rust_actix");
assert_entry(&map, Framework::Actix, "/users");
}
#[test]
fn rust_axum_fixture() {
let map = build("rust_axum");
assert_entry(&map, Framework::Axum, "/users");
}