From 2395446655e3d2c4ac3ba42ed01a7488173fd4ad Mon Sep 17 00:00:00 2001 From: pitboss Date: Fri, 15 May 2026 13:28:58 -0500 Subject: [PATCH] =?UTF-8?q?[pitboss]=20phase=2022:=20Track=20F.2=20+=20F.3?= =?UTF-8?q?=20=E2=80=94=20Cross-language=20framework=20probes=20+=20data?= =?UTF-8?q?=20store=20/=20external=20service=20/=20dangerous-local=20detec?= =?UTF-8?q?tion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/surface/build.rs | 440 ++++++++++++++---- src/surface/dangerous.rs | 88 ++++ src/surface/datastore.rs | 218 +++++++++ src/surface/external.rs | 165 +++++++ src/surface/lang/common.rs | 131 ++++++ src/surface/lang/go_gin.rs | 174 +++++++ src/surface/lang/go_http.rs | 129 +++++ src/surface/lang/java_quarkus.rs | 297 ++++++++++++ src/surface/lang/java_servlet.rs | 285 ++++++++++++ src/surface/lang/java_spring.rs | 305 ++++++++++++ src/surface/lang/js_express.rs | 231 +++++++++ src/surface/lang/js_koa.rs | 193 ++++++++ src/surface/lang/mod.rs | 39 +- src/surface/lang/php_laravel.rs | 167 +++++++ src/surface/lang/php_slim.rs | 139 ++++++ src/surface/lang/python_django.rs | 364 +++++++++++++++ src/surface/lang/python_fastapi.rs | 336 +++++++++++++ src/surface/lang/python_flask.rs | 11 + src/surface/lang/ruby_rails.rs | 219 +++++++++ src/surface/lang/ruby_sinatra.rs | 111 +++++ src/surface/lang/rust_actix.rs | 196 ++++++++ src/surface/lang/rust_axum.rs | 191 ++++++++ src/surface/lang/ts_next.rs | 315 +++++++++++++ src/surface/mod.rs | 4 + src/surface/reachability.rs | 192 ++++++++ tests/dynamic_fixtures/surface/go_gin/main.go | 13 + .../dynamic_fixtures/surface/go_http/main.go | 12 + .../surface/java_quarkus/GreetResource.java | 17 + .../surface/java_servlet/UserResource.java | 14 + .../surface/java_spring/UserController.java | 11 + .../surface/js_express/server.js | 8 + .../dynamic_fixtures/surface/js_koa/server.js | 8 + .../surface/php_laravel/routes.php | 3 + .../surface/php_slim/routes.php | 3 + .../surface/python_django/urls.py | 10 + .../surface/python_fastapi/api.py | 8 + .../surface/python_flask/app.py | 8 + .../surface/ruby_rails/users_controller.rb | 9 + .../surface/ruby_sinatra/app.rb | 5 + .../surface/rust_actix/main.rs | 6 + .../surface/rust_axum/main.rs | 9 + .../surface/ts_next/app/users/route.ts | 3 + tests/surface_cross_lang.rs | 208 +++++++++ 43 files changed, 5213 insertions(+), 82 deletions(-) create mode 100644 src/surface/dangerous.rs create mode 100644 src/surface/datastore.rs create mode 100644 src/surface/external.rs create mode 100644 src/surface/lang/common.rs create mode 100644 src/surface/lang/go_gin.rs create mode 100644 src/surface/lang/go_http.rs create mode 100644 src/surface/lang/java_quarkus.rs create mode 100644 src/surface/lang/java_servlet.rs create mode 100644 src/surface/lang/java_spring.rs create mode 100644 src/surface/lang/js_express.rs create mode 100644 src/surface/lang/js_koa.rs create mode 100644 src/surface/lang/php_laravel.rs create mode 100644 src/surface/lang/php_slim.rs create mode 100644 src/surface/lang/python_django.rs create mode 100644 src/surface/lang/python_fastapi.rs create mode 100644 src/surface/lang/ruby_rails.rs create mode 100644 src/surface/lang/ruby_sinatra.rs create mode 100644 src/surface/lang/rust_actix.rs create mode 100644 src/surface/lang/rust_axum.rs create mode 100644 src/surface/lang/ts_next.rs create mode 100644 src/surface/reachability.rs create mode 100644 tests/dynamic_fixtures/surface/go_gin/main.go create mode 100644 tests/dynamic_fixtures/surface/go_http/main.go create mode 100644 tests/dynamic_fixtures/surface/java_quarkus/GreetResource.java create mode 100644 tests/dynamic_fixtures/surface/java_servlet/UserResource.java create mode 100644 tests/dynamic_fixtures/surface/java_spring/UserController.java create mode 100644 tests/dynamic_fixtures/surface/js_express/server.js create mode 100644 tests/dynamic_fixtures/surface/js_koa/server.js create mode 100644 tests/dynamic_fixtures/surface/php_laravel/routes.php create mode 100644 tests/dynamic_fixtures/surface/php_slim/routes.php create mode 100644 tests/dynamic_fixtures/surface/python_django/urls.py create mode 100644 tests/dynamic_fixtures/surface/python_fastapi/api.py create mode 100644 tests/dynamic_fixtures/surface/python_flask/app.py create mode 100644 tests/dynamic_fixtures/surface/ruby_rails/users_controller.rb create mode 100644 tests/dynamic_fixtures/surface/ruby_sinatra/app.rb create mode 100644 tests/dynamic_fixtures/surface/rust_actix/main.rs create mode 100644 tests/dynamic_fixtures/surface/rust_axum/main.rs create mode 100644 tests/dynamic_fixtures/surface/ts_next/app/users/route.ts create mode 100644 tests/surface_cross_lang.rs diff --git a/src/surface/build.rs b/src/surface/build.rs index ec2a3c26..89fb7605 100644 --- a/src/surface/build.rs +++ b/src/surface/build.rs @@ -1,29 +1,44 @@ //! Top-level [`SurfaceMap`] builder. //! -//! Consumes the post-pass-2 [`GlobalSummaries`] + [`CallGraph`] for -//! call-graph reachability and the project's file list for the -//! per-language framework probes. Phase 21 only invokes the Python + -//! Flask probe; Phase 22 wires the remaining language probes through -//! [`crate::surface::lang`]. +//! Phase 22 dispatch: //! -//! Build steps (Phase 21): +//! 1. Per-file framework probes (one parser per language) emit +//! [`SurfaceNode::EntryPoint`] nodes for every recognised route / +//! handler. +//! 2. [`super::datastore::detect_data_stores`] walks +//! [`GlobalSummaries`] and emits [`SurfaceNode::DataStore`] nodes +//! for every recognised driver call. +//! 3. [`super::external::detect_external_services`] walks summaries + +//! SSRF caps and emits [`SurfaceNode::ExternalService`] nodes. +//! 4. [`super::dangerous::detect_dangerous_locals`] walks summaries +//! and emits [`SurfaceNode::DangerousLocal`] nodes for every +//! function whose `sink_caps` include CODE_EXEC / DESERIALIZE / +//! SSTI / FMT_STRING. +//! 5. [`super::reachability::populate_reaches_edges`] runs a BFS over +//! the [`CallGraph`] from each entry-point handler, emitting +//! [`super::EdgeKind::Reaches`] edges to every reachable +//! DataStore / ExternalService / DangerousLocal. +//! 6. [`SurfaceMap::canonicalize`] sorts nodes + edges so the +//! serialised JSON is byte-deterministic across rescans. //! -//! 1. For every Python file, parse it once and invoke -//! [`crate::surface::lang::python_flask::detect_flask_routes`]. -//! 2. Collect the resulting [`SurfaceNode::EntryPoint`] nodes. -//! 3. Canonicalise the map (sort nodes + edges, dedup edges) so two -//! runs over the same source produce byte-identical JSON. +//! Per-file errors (parse failure, unsupported language) are +//! swallowed so a single bad file does not kill the whole map. use crate::callgraph::CallGraph; use crate::summary::GlobalSummaries; -use crate::surface::{SurfaceMap, lang::python_flask}; +use crate::surface::{ + SurfaceMap, dangerous, datastore, external, + lang::{ + go_gin, go_http, java_quarkus, java_servlet, java_spring, js_express, js_koa, + php_laravel, php_slim, python_django, python_fastapi, python_flask, + ruby_rails, ruby_sinatra, rust_actix, rust_axum, ts_next, + }, + reachability, +}; use crate::utils::config::Config; use std::path::{Path, PathBuf}; +use tree_sitter::Parser; -/// Inputs to [`build_surface_map`]. Wrapped in a struct so the -/// downstream Phase 22 work (additional probes, call-graph-derived -/// `Reaches` edges, label-rule data-source nodes) can extend the -/// signature without touching every caller. pub struct SurfaceBuildInputs<'a> { pub files: &'a [PathBuf], pub scan_root: Option<&'a Path>, @@ -32,87 +47,304 @@ pub struct SurfaceBuildInputs<'a> { pub config: &'a Config, } -/// Build a [`SurfaceMap`] for the project under analysis. -/// -/// Best-effort: parse failures on individual files are swallowed so -/// the surface map of a 10k-file project is not killed by one bad -/// Python file. Returns an empty map when the inputs contain no -/// recognised entry-points. pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { let mut map = SurfaceMap::new(); + let _ = inputs.config; - // Phase 21: only Python / Flask. The downstream Phase 22 probes - // will dispatch on file extension here. - let mut python_parser = tree_sitter::Parser::new(); - if python_parser - .set_language(&tree_sitter_python::LANGUAGE.into()) - .is_err() - { - return map; - } - + let mut parsers = Parsers::new(); for path in inputs.files { - if !is_python_file(path) { - continue; - } let Ok(bytes) = std::fs::read(path) else { continue; }; - let Some(tree) = python_parser.parse(&bytes, None) else { - continue; + let kind = classify_file(path); + let nodes = match kind { + FileKind::Python => parsers + .python + .as_mut() + .and_then(|p| p.parse(&bytes, None)) + .map(|tree| { + let mut all = python_flask::detect_flask_routes( + &tree, + &bytes, + path, + inputs.scan_root, + ); + all.extend(python_fastapi::detect_fastapi_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all.extend(python_django::detect_django_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all + }) + .unwrap_or_default(), + FileKind::JavaScript => parsers + .javascript + .as_mut() + .and_then(|p| p.parse(&bytes, None)) + .map(|tree| { + let mut all = + js_express::detect_express_routes(&tree, &bytes, path, inputs.scan_root); + all.extend(js_koa::detect_koa_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all + }) + .unwrap_or_default(), + FileKind::TypeScript => parsers + .typescript + .as_mut() + .and_then(|p| p.parse(&bytes, None)) + .map(|tree| { + let mut all = + js_express::detect_express_routes(&tree, &bytes, path, inputs.scan_root); + all.extend(js_koa::detect_koa_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all.extend(ts_next::detect_next_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all + }) + .unwrap_or_default(), + FileKind::Java => parsers + .java + .as_mut() + .and_then(|p| p.parse(&bytes, None)) + .map(|tree| { + let mut all = + java_spring::detect_spring_routes(&tree, &bytes, path, inputs.scan_root); + all.extend(java_servlet::detect_servlet_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all.extend(java_quarkus::detect_quarkus_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all + }) + .unwrap_or_default(), + FileKind::Go => parsers + .go + .as_mut() + .and_then(|p| p.parse(&bytes, None)) + .map(|tree| { + let mut all = + go_http::detect_go_http_routes(&tree, &bytes, path, inputs.scan_root); + all.extend(go_gin::detect_gin_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all + }) + .unwrap_or_default(), + FileKind::Php => parsers + .php + .as_mut() + .and_then(|p| p.parse(&bytes, None)) + .map(|tree| { + let mut all = php_laravel::detect_laravel_routes( + &tree, + &bytes, + path, + inputs.scan_root, + ); + all.extend(php_slim::detect_slim_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all + }) + .unwrap_or_default(), + FileKind::Ruby => parsers + .ruby + .as_mut() + .and_then(|p| p.parse(&bytes, None)) + .map(|tree| { + let mut all = ruby_sinatra::detect_sinatra_routes( + &tree, + &bytes, + path, + inputs.scan_root, + ); + all.extend(ruby_rails::detect_rails_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all + }) + .unwrap_or_default(), + FileKind::Rust => parsers + .rust + .as_mut() + .and_then(|p| p.parse(&bytes, None)) + .map(|tree| { + let mut all = + rust_actix::detect_actix_routes(&tree, &bytes, path, inputs.scan_root); + all.extend(rust_axum::detect_axum_routes( + &tree, + &bytes, + path, + inputs.scan_root, + )); + all + }) + .unwrap_or_default(), + FileKind::Other => Vec::new(), }; - let nodes = - python_flask::detect_flask_routes(&tree, &bytes, path, inputs.scan_root); for n in nodes { map.nodes.push(n); } } - // GlobalSummaries / CallGraph are reserved for Phase 22's - // `DangerousLocal` + `Reaches`-edge fill-in. Phase 21 records - // them in the inputs so callers do not need to be touched again - // when Phase 22 wires them up. - let _ = inputs.global_summaries; - let _ = inputs.call_graph; - let _ = inputs.config; + // Phase 22 — Track F.3: data-store / external-service / + // dangerous-local detection from summaries. + map.nodes + .extend(datastore::detect_data_stores(inputs.global_summaries)); + map.nodes + .extend(external::detect_external_services(inputs.global_summaries)); + map.nodes + .extend(dangerous::detect_dangerous_locals(inputs.global_summaries)); + // Canonicalise so node indices are stable before reachability + // builds edges referring to those indices. + map.canonicalize(); + + // Phase 22 — Track F.3: transitive closure over the call graph. + reachability::populate_reaches_edges(&mut map, inputs.global_summaries, inputs.call_graph); + + // Re-canonicalise: edges added by reachability need to be sorted + // so the serialised JSON stays byte-deterministic. map.canonicalize(); map } -fn is_python_file(path: &Path) -> bool { - matches!( - path.extension().and_then(|s| s.to_str()), - Some("py") | Some("pyi") - ) +#[derive(Copy, Clone, PartialEq, Eq)] +enum FileKind { + Python, + JavaScript, + TypeScript, + Java, + Go, + Php, + Ruby, + Rust, + Other, +} + +fn classify_file(path: &Path) -> FileKind { + match path.extension().and_then(|s| s.to_str()) { + Some("py") | Some("pyi") => FileKind::Python, + Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => FileKind::JavaScript, + Some("ts") | Some("tsx") | Some("mts") | Some("cts") => FileKind::TypeScript, + Some("java") => FileKind::Java, + Some("go") => FileKind::Go, + Some("php") => FileKind::Php, + Some("rb") => FileKind::Ruby, + Some("rs") => FileKind::Rust, + _ => FileKind::Other, + } +} + +struct Parsers { + python: Option, + javascript: Option, + typescript: Option, + java: Option, + go: Option, + php: Option, + ruby: Option, + rust: Option, +} + +impl Parsers { + fn new() -> Self { + Self { + python: parser_for(tree_sitter_python::LANGUAGE.into()), + javascript: parser_for(tree_sitter_javascript::LANGUAGE.into()), + typescript: parser_for(tree_sitter_typescript::LANGUAGE_TSX.into()), + java: parser_for(tree_sitter_java::LANGUAGE.into()), + go: parser_for(tree_sitter_go::LANGUAGE.into()), + php: parser_for(tree_sitter_php::LANGUAGE_PHP.into()), + ruby: parser_for(tree_sitter_ruby::LANGUAGE.into()), + rust: parser_for(tree_sitter_rust::LANGUAGE.into()), + } + } +} + +fn parser_for(language: tree_sitter::Language) -> Option { + let mut parser = Parser::new(); + parser.set_language(&language).ok()?; + Some(parser) } #[cfg(test)] mod tests { use super::*; use crate::entry_points::HttpMethod; + use crate::surface::SurfaceNode; use std::fs; use tempfile::tempdir; + fn empty_inputs<'a>( + files: &'a [PathBuf], + scan_root: Option<&'a Path>, + gs: &'a GlobalSummaries, + cg: &'a CallGraph, + cfg: &'a Config, + ) -> SurfaceBuildInputs<'a> { + SurfaceBuildInputs { + files, + scan_root, + global_summaries: gs, + call_graph: cg, + config: cfg, + } + } + + fn empty_call_graph() -> CallGraph { + CallGraph { + graph: petgraph::graph::DiGraph::new(), + index: Default::default(), + unresolved_not_found: vec![], + unresolved_ambiguous: vec![], + } + } + #[test] fn empty_inputs_produce_empty_map() { let dir = tempdir().unwrap(); let cfg = Config::default(); let gs = GlobalSummaries::new(); - let cg = CallGraph { - graph: petgraph::graph::DiGraph::new(), - index: Default::default(), - unresolved_not_found: vec![], - unresolved_ambiguous: vec![], - }; + let cg = empty_call_graph(); let files: Vec = vec![]; - let inputs = SurfaceBuildInputs { - files: &files, - scan_root: Some(dir.path()), - global_summaries: &gs, - call_graph: &cg, - config: &cfg, - }; + let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); let map = build_surface_map(&inputs); assert_eq!(map.node_count(), 0); assert_eq!(map.edge_count(), 0); @@ -140,24 +372,76 @@ def submit(): .unwrap(); let cfg = Config::default(); let gs = GlobalSummaries::new(); - let cg = CallGraph { - graph: petgraph::graph::DiGraph::new(), - index: Default::default(), - unresolved_not_found: vec![], - unresolved_ambiguous: vec![], - }; - let files = vec![py.clone()]; - let inputs = SurfaceBuildInputs { - files: &files, - scan_root: Some(dir.path()), - global_summaries: &gs, - call_graph: &cg, - config: &cfg, - }; + let cg = empty_call_graph(); + let files = vec![py]; + let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); let map = build_surface_map(&inputs); assert_eq!(map.node_count(), 2); let methods: Vec = map.entry_points().map(|ep| ep.method).collect(); assert!(methods.contains(&HttpMethod::GET)); assert!(methods.contains(&HttpMethod::POST)); } + + #[test] + fn fastapi_file_produces_entry_points() { + let dir = tempdir().unwrap(); + let py = dir.path().join("api.py"); + fs::write( + &py, + "from fastapi import FastAPI\napp = FastAPI()\n@app.get('/users')\ndef list_users(): pass\n@app.post('/items')\ndef create(): pass\n", + ) + .unwrap(); + let cfg = Config::default(); + let gs = GlobalSummaries::new(); + let cg = empty_call_graph(); + let files = vec![py]; + let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); + let map = build_surface_map(&inputs); + assert_eq!(map.node_count(), 2); + } + + #[test] + fn dangerous_local_emits_node_and_reaches_edge_to_same_file_entry() { + use crate::labels::Cap; + use crate::summary::FuncSummary; + use crate::symbol::{FuncKey, Lang}; + let dir = tempdir().unwrap(); + let py = dir.path().join("app.py"); + fs::write( + &py, + r#" +from flask import Flask +app = Flask(__name__) + +@app.route("/eval") +def evaluator(): + return "" +"#, + ) + .unwrap(); + let cfg = Config::default(); + let mut gs = GlobalSummaries::new(); + gs.insert( + FuncKey::new_function(Lang::Python, "app.py", "evaluator", None), + FuncSummary { + name: "evaluator".to_string(), + file_path: "app.py".to_string(), + lang: "python".to_string(), + sink_caps: Cap::CODE_EXEC.bits(), + ..Default::default() + }, + ); + let cg = empty_call_graph(); + let files = vec![py]; + let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); + let map = build_surface_map(&inputs); + assert!(map + .nodes + .iter() + .any(|n| matches!(n, SurfaceNode::DangerousLocal(_)))); + assert!(map + .edges + .iter() + .any(|e| matches!(e.kind, crate::surface::EdgeKind::Reaches))); + } } diff --git a/src/surface/dangerous.rs b/src/surface/dangerous.rs new file mode 100644 index 00000000..b465e502 --- /dev/null +++ b/src/surface/dangerous.rs @@ -0,0 +1,88 @@ +//! Dangerous-local sink detection. +//! +//! Walks the post-pass-2 [`GlobalSummaries`] looking for functions +//! that themselves consume `Cap::CODE_EXEC`, `Cap::DESERIALIZE`, +//! `Cap::SSTI`, or `Cap::FMT_STRING` (the canonical "no externally +//! observable side effect" sinks) and emits one +//! [`SurfaceNode::DangerousLocal`] per such function. +//! +//! The cap bits are taken straight from the existing label-rule +//! registry — every Phase 22 sink class continues to land on the same +//! `sink_caps` field downstream rules already populate. No new +//! detection pass is added here; the surface layer just lifts the +//! cap-bit information out of the summary. + +use super::{DangerousLocal, SourceLocation, SurfaceNode}; +use crate::labels::Cap; +use crate::summary::GlobalSummaries; + +/// Cap bits that indicate the function is a *local* sink — code exec, +/// unsafe deserialisation, server-side template injection, format +/// string injection. Other sink caps (SQL_QUERY → DataStore; +/// SSRF → ExternalService) live elsewhere in the surface layer so the +/// node taxonomy matches the chain composer's expectations. +fn dangerous_caps() -> Cap { + Cap::CODE_EXEC | Cap::DESERIALIZE | Cap::SSTI | Cap::FMT_STRING +} + +pub fn detect_dangerous_locals(summaries: &GlobalSummaries) -> Vec { + let mask = dangerous_caps(); + let mut out: Vec = Vec::new(); + for (key, summary) in summaries.iter() { + let caps = summary.sink_caps() & mask; + if caps.is_empty() { + continue; + } + out.push(SurfaceNode::DangerousLocal(DangerousLocal { + location: SourceLocation { + file: summary.file_path.clone(), + line: 0, + col: 0, + }, + function_name: key.qualified_name(), + cap_bits: caps.bits(), + })); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::summary::FuncSummary; + use crate::symbol::{FuncKey, Lang}; + + fn summary_with_caps(name: &str, file: &str, caps: Cap) -> (FuncKey, FuncSummary) { + let key = FuncKey::new_function(Lang::Python, file, name, None); + let summary = FuncSummary { + name: name.to_string(), + file_path: file.to_string(), + lang: "python".to_string(), + sink_caps: caps.bits(), + ..Default::default() + }; + (key, summary) + } + + #[test] + fn detects_eval_sink() { + let mut gs = GlobalSummaries::new(); + let (k, s) = summary_with_caps("run", "danger.py", Cap::CODE_EXEC); + gs.insert(k, s); + let nodes = detect_dangerous_locals(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::DangerousLocal(d) = &nodes[0] else { + panic!() + }; + assert_eq!(d.cap_bits & Cap::CODE_EXEC.bits(), Cap::CODE_EXEC.bits()); + } + + #[test] + fn ignores_sql_only() { + let mut gs = GlobalSummaries::new(); + let (k, s) = summary_with_caps("query", "data.py", Cap::SQL_QUERY); + gs.insert(k, s); + let nodes = detect_dangerous_locals(&gs); + assert!(nodes.is_empty()); + } +} diff --git a/src/surface/datastore.rs b/src/surface/datastore.rs new file mode 100644 index 00000000..b06f748b --- /dev/null +++ b/src/surface/datastore.rs @@ -0,0 +1,218 @@ +//! Data-store detection. +//! +//! Walks the post-pass-2 [`GlobalSummaries`] looking for callees whose +//! name is a known database / cache / blob-store driver entry point, +//! and emits one [`SurfaceNode::DataStore`] per resolved store. +//! +//! The detector is name-based on purpose: the receiver's full type is +//! often unknown after pass 2, but the leaf name of a driver call +//! (`psycopg2.connect`, `mysql.createConnection`, `gorm.Open`, +//! `Eloquent::find`, `ActiveRecord::Base.connection`) carries enough +//! signal for surface-level chain composition. False positives here +//! are forgiving — the surface map is informational, not a finding +//! that fires on its own. + +use super::{DataStore, DataStoreKind, SourceLocation, SurfaceNode}; +use crate::summary::{FuncSummary, GlobalSummaries}; + +/// One detection rule: leaf-name pattern → store kind + label. Stored +/// as a flat list so adding a new ORM / driver is a one-line edit. +struct DriverRule { + /// Substring to match against the callee's leaf name (case-insensitive). + leaf: &'static str, + kind: DataStoreKind, + /// Human-readable label attached to the emitted node. Used by the + /// chain composer and the `nyx surface` CLI tree. + label: &'static str, +} + +const DRIVER_RULES: &[DriverRule] = &[ + // Python — relational + DriverRule { leaf: "psycopg2.connect", kind: DataStoreKind::Sql, label: "PostgreSQL (psycopg2)" }, + DriverRule { leaf: "psycopg.connect", kind: DataStoreKind::Sql, label: "PostgreSQL (psycopg3)" }, + DriverRule { leaf: "mysql.connector.connect", kind: DataStoreKind::Sql, label: "MySQL (mysql.connector)" }, + DriverRule { leaf: "MySQLdb.connect", kind: DataStoreKind::Sql, label: "MySQL (MySQLdb)" }, + DriverRule { leaf: "pymysql.connect", kind: DataStoreKind::Sql, label: "MySQL (PyMySQL)" }, + DriverRule { leaf: "sqlite3.connect", kind: DataStoreKind::Sql, label: "SQLite (sqlite3)" }, + DriverRule { leaf: "sqlalchemy.create_engine", kind: DataStoreKind::Sql, label: "SQLAlchemy" }, + DriverRule { leaf: "django.db.connection", kind: DataStoreKind::Sql, label: "Django ORM" }, + // Python — kv / doc + DriverRule { leaf: "redis.Redis", kind: DataStoreKind::KeyValue, label: "Redis" }, + DriverRule { leaf: "redis.from_url", kind: DataStoreKind::KeyValue, label: "Redis" }, + DriverRule { leaf: "pymongo.MongoClient", kind: DataStoreKind::Document, label: "MongoDB" }, + DriverRule { leaf: "boto3.client", kind: DataStoreKind::BlobStore, label: "AWS (boto3)" }, + DriverRule { leaf: "boto3.resource", kind: DataStoreKind::BlobStore, label: "AWS (boto3)" }, + + // JavaScript / TypeScript — relational + DriverRule { leaf: "knex", kind: DataStoreKind::Sql, label: "Knex.js" }, + DriverRule { leaf: "createConnection", kind: DataStoreKind::Sql, label: "MySQL/Postgres (mysql/pg)" }, + DriverRule { leaf: "Sequelize", kind: DataStoreKind::Sql, label: "Sequelize" }, + DriverRule { leaf: "TypeORM.createConnection", kind: DataStoreKind::Sql, label: "TypeORM" }, + DriverRule { leaf: "PrismaClient", kind: DataStoreKind::Sql, label: "Prisma" }, + DriverRule { leaf: "pool.query", kind: DataStoreKind::Sql, label: "pg/mysql pool" }, + DriverRule { leaf: "client.query", kind: DataStoreKind::Sql, label: "pg client" }, + DriverRule { leaf: "db.query", kind: DataStoreKind::Sql, label: "Generic SQL driver" }, + // JS — kv / doc + DriverRule { leaf: "redis.createClient", kind: DataStoreKind::KeyValue, label: "Redis (node-redis)" }, + DriverRule { leaf: "ioredis", kind: DataStoreKind::KeyValue, label: "ioredis" }, + DriverRule { leaf: "MongoClient.connect", kind: DataStoreKind::Document, label: "MongoDB (node)" }, + DriverRule { leaf: "AWS.S3", kind: DataStoreKind::BlobStore, label: "AWS S3" }, + + // Java — JDBC / Hibernate + DriverRule { leaf: "DriverManager.getConnection", kind: DataStoreKind::Sql, label: "JDBC" }, + DriverRule { leaf: "JdbcTemplate", kind: DataStoreKind::Sql, label: "Spring JdbcTemplate" }, + DriverRule { leaf: "EntityManager", kind: DataStoreKind::Sql, label: "JPA EntityManager" }, + DriverRule { leaf: "SessionFactory.openSession", kind: DataStoreKind::Sql, label: "Hibernate" }, + DriverRule { leaf: "Jedis", kind: DataStoreKind::KeyValue, label: "Jedis (Redis)" }, + DriverRule { leaf: "MongoClients.create", kind: DataStoreKind::Document, label: "MongoDB (java-driver)" }, + + // Go — sql + ORM + DriverRule { leaf: "sql.Open", kind: DataStoreKind::Sql, label: "database/sql" }, + DriverRule { leaf: "gorm.Open", kind: DataStoreKind::Sql, label: "GORM" }, + DriverRule { leaf: "sqlx.Connect", kind: DataStoreKind::Sql, label: "sqlx" }, + DriverRule { leaf: "sqlx.Open", kind: DataStoreKind::Sql, label: "sqlx" }, + DriverRule { leaf: "redis.NewClient", kind: DataStoreKind::KeyValue, label: "go-redis" }, + DriverRule { leaf: "mongo.Connect", kind: DataStoreKind::Document, label: "MongoDB (go-driver)" }, + + // PHP — Eloquent / PDO + DriverRule { leaf: "PDO", kind: DataStoreKind::Sql, label: "PDO" }, + DriverRule { leaf: "Eloquent::find", kind: DataStoreKind::Sql, label: "Laravel Eloquent" }, + DriverRule { leaf: "Eloquent::where", kind: DataStoreKind::Sql, label: "Laravel Eloquent" }, + DriverRule { leaf: "DB::connection", kind: DataStoreKind::Sql, label: "Laravel DB" }, + DriverRule { leaf: "Doctrine", kind: DataStoreKind::Sql, label: "Doctrine ORM" }, + + // Ruby — ActiveRecord + DriverRule { leaf: "ActiveRecord::Base.connection", kind: DataStoreKind::Sql, label: "ActiveRecord" }, + DriverRule { leaf: "ActiveRecord::Base.find", kind: DataStoreKind::Sql, label: "ActiveRecord" }, + DriverRule { leaf: ".find_by_sql", kind: DataStoreKind::Sql, label: "ActiveRecord raw SQL" }, + + // Rust — sqlx / diesel + DriverRule { leaf: "sqlx::query", kind: DataStoreKind::Sql, label: "sqlx" }, + DriverRule { leaf: "sqlx::query_as", kind: DataStoreKind::Sql, label: "sqlx" }, + DriverRule { leaf: "diesel::sql_query", kind: DataStoreKind::Sql, label: "Diesel" }, + DriverRule { leaf: "PgConnection::establish", kind: DataStoreKind::Sql, label: "Diesel" }, + + // Filesystem (best-effort: language-agnostic open()-family) + DriverRule { leaf: "open", kind: DataStoreKind::Filesystem, label: "Filesystem" }, +]; + +/// Walk every function summary's callee list and emit one +/// [`SurfaceNode::DataStore`] per matched driver call. De-duped on +/// `(file, line, label)`. +pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec { + let mut out: Vec = Vec::new(); + let mut seen: std::collections::HashSet<(String, u32, String)> = + std::collections::HashSet::new(); + for (key, summary) in summaries.iter() { + for callee in &summary.callees { + let Some(rule) = match_rule(&callee.name) else { + continue; + }; + let location = call_site_location(summary, callee.ordinal); + let dedup = ( + location.file.clone(), + location.line, + rule.label.to_string(), + ); + if !seen.insert(dedup) { + continue; + } + let _ = key; + out.push(SurfaceNode::DataStore(DataStore { + location, + kind: rule.kind, + label: rule.label.to_string(), + })); + } + } + out +} + +fn match_rule(callee: &str) -> Option<&'static DriverRule> { + let trimmed = callee.trim(); + let leaf = trimmed.rsplit("::").next().unwrap_or(trimmed); + let leaf = leaf.rsplit('.').next().unwrap_or(leaf); + DRIVER_RULES + .iter() + .find(|r| { + // Match either the full callee text or its leaf segment + // against each rule's leaf, case-insensitive. + trimmed.to_ascii_lowercase().contains(&r.leaf.to_ascii_lowercase()) + || leaf.eq_ignore_ascii_case(r.leaf) + }) +} + +/// Best-effort source location for a call site. We only have file + +/// (sometimes) sink-attribution metadata on `FuncSummary`, so the +/// location falls back to the function's file with line 0 when no +/// finer-grained data is available. +fn call_site_location(summary: &FuncSummary, _ordinal: u32) -> SourceLocation { + SourceLocation { + file: summary.file_path.clone(), + line: 0, + col: 0, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::summary::CalleeSite; + use crate::symbol::{FuncKey, Lang}; + + fn summary_with_callees(name: &str, file: &str, callees: &[&str]) -> (FuncKey, FuncSummary) { + let key = FuncKey::new_function(Lang::Python, file, name, None); + let summary = FuncSummary { + name: name.to_string(), + file_path: file.to_string(), + lang: "python".to_string(), + param_count: 0, + callees: callees + .iter() + .map(|c| CalleeSite::bare(c.to_string())) + .collect(), + ..Default::default() + }; + (key, summary) + } + + #[test] + fn detects_psycopg2_connect() { + let mut gs = GlobalSummaries::new(); + let (k, s) = summary_with_callees("init", "app.py", &["psycopg2.connect"]); + gs.insert(k, s); + let nodes = detect_data_stores(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::DataStore(ds) = &nodes[0] else { + panic!() + }; + assert_eq!(ds.kind, DataStoreKind::Sql); + assert_eq!(ds.label, "PostgreSQL (psycopg2)"); + } + + #[test] + fn detects_gorm_open() { + let mut gs = GlobalSummaries::new(); + let (k, s) = summary_with_callees("init", "main.go", &["gorm.Open"]); + gs.insert(k, s); + let nodes = detect_data_stores(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::DataStore(ds) = &nodes[0] else { + panic!() + }; + assert_eq!(ds.label, "GORM"); + } + + #[test] + fn dedup_collapses_repeats_in_same_file() { + let mut gs = GlobalSummaries::new(); + let (k, s) = summary_with_callees( + "init", + "app.py", + &["psycopg2.connect", "psycopg2.connect"], + ); + gs.insert(k, s); + let nodes = detect_data_stores(&gs); + assert_eq!(nodes.len(), 1); + } +} diff --git a/src/surface/external.rs b/src/surface/external.rs new file mode 100644 index 00000000..b619f180 --- /dev/null +++ b/src/surface/external.rs @@ -0,0 +1,165 @@ +//! External-service detection. +//! +//! Walks the post-pass-2 [`GlobalSummaries`] looking for callees that +//! launch outbound network requests (HTTP, gRPC, SMTP, DNS) and emits +//! one [`SurfaceNode::ExternalService`] per call. Detection is by +//! callee leaf name + `sink_caps & SSRF` heuristic — both signals are +//! consulted so a probe with no SSRF cap (DNS resolver, SMTP sender) +//! still surfaces as an external service. + +use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode}; +use crate::labels::Cap; +use crate::summary::{FuncSummary, GlobalSummaries}; + +struct ClientRule { + leaf: &'static str, + kind: ExternalServiceKind, + label: &'static str, +} + +const CLIENT_RULES: &[ClientRule] = &[ + // HTTP + ClientRule { leaf: "requests.get", kind: ExternalServiceKind::HttpApi, label: "requests (Python)" }, + ClientRule { leaf: "requests.post", kind: ExternalServiceKind::HttpApi, label: "requests (Python)" }, + ClientRule { leaf: "httpx.get", kind: ExternalServiceKind::HttpApi, label: "httpx (Python)" }, + ClientRule { leaf: "httpx.post", kind: ExternalServiceKind::HttpApi, label: "httpx (Python)" }, + ClientRule { leaf: "urllib.request.urlopen", kind: ExternalServiceKind::HttpApi, label: "urllib" }, + ClientRule { leaf: "fetch", kind: ExternalServiceKind::HttpApi, label: "fetch (JS)" }, + ClientRule { leaf: "axios.get", kind: ExternalServiceKind::HttpApi, label: "axios" }, + ClientRule { leaf: "axios.post", kind: ExternalServiceKind::HttpApi, label: "axios" }, + ClientRule { leaf: "http.request", kind: ExternalServiceKind::HttpApi, label: "node http" }, + ClientRule { leaf: "got", kind: ExternalServiceKind::HttpApi, label: "got (JS)" }, + ClientRule { leaf: "HttpClient.send", kind: ExternalServiceKind::HttpApi, label: "Java HttpClient" }, + ClientRule { leaf: "HttpClient.execute", kind: ExternalServiceKind::HttpApi, label: "Java HttpClient" }, + ClientRule { leaf: "RestTemplate.exchange", kind: ExternalServiceKind::HttpApi, label: "Spring RestTemplate" }, + ClientRule { leaf: "RestTemplate.getForObject", kind: ExternalServiceKind::HttpApi, label: "Spring RestTemplate" }, + ClientRule { leaf: "OkHttpClient.newCall", kind: ExternalServiceKind::HttpApi, label: "OkHttp" }, + ClientRule { leaf: "http.Get", kind: ExternalServiceKind::HttpApi, label: "net/http (Go)" }, + ClientRule { leaf: "http.Post", kind: ExternalServiceKind::HttpApi, label: "net/http (Go)" }, + ClientRule { leaf: "http.NewRequest", kind: ExternalServiceKind::HttpApi, label: "net/http (Go)" }, + ClientRule { leaf: "client.Do", kind: ExternalServiceKind::HttpApi, label: "go http client" }, + ClientRule { leaf: "reqwest::get", kind: ExternalServiceKind::HttpApi, label: "reqwest (Rust)" }, + ClientRule { leaf: "reqwest::Client", kind: ExternalServiceKind::HttpApi, label: "reqwest (Rust)" }, + ClientRule { leaf: "Net::HTTP", kind: ExternalServiceKind::HttpApi, label: "Net::HTTP (Ruby)" }, + ClientRule { leaf: "HTTParty.get", kind: ExternalServiceKind::HttpApi, label: "HTTParty" }, + ClientRule { leaf: "Faraday", kind: ExternalServiceKind::HttpApi, label: "Faraday (Ruby)" }, + ClientRule { leaf: "curl_exec", kind: ExternalServiceKind::HttpApi, label: "PHP curl" }, + ClientRule { leaf: "file_get_contents", kind: ExternalServiceKind::HttpApi, label: "PHP file_get_contents" }, + ClientRule { leaf: "Guzzle", kind: ExternalServiceKind::HttpApi, label: "Guzzle (PHP)" }, + + // Message brokers + ClientRule { leaf: "kafka.send", kind: ExternalServiceKind::MessageBroker, label: "Kafka" }, + ClientRule { leaf: "KafkaProducer.send", kind: ExternalServiceKind::MessageBroker, label: "Kafka" }, + ClientRule { leaf: "rabbitmq.publish", kind: ExternalServiceKind::MessageBroker, label: "RabbitMQ" }, + ClientRule { leaf: "amqp.publish", kind: ExternalServiceKind::MessageBroker, label: "AMQP" }, + ClientRule { leaf: "sqs.send_message", kind: ExternalServiceKind::MessageBroker, label: "AWS SQS" }, + ClientRule { leaf: "sns.publish", kind: ExternalServiceKind::MessageBroker, label: "AWS SNS" }, + + // Search indices + ClientRule { leaf: "Elasticsearch", kind: ExternalServiceKind::SearchIndex, label: "Elasticsearch" }, + ClientRule { leaf: "elasticsearch.search", kind: ExternalServiceKind::SearchIndex, label: "Elasticsearch" }, + ClientRule { leaf: "OpenSearch", kind: ExternalServiceKind::SearchIndex, label: "OpenSearch" }, + ClientRule { leaf: "Algolia", kind: ExternalServiceKind::SearchIndex, label: "Algolia" }, + + // Auth providers + ClientRule { leaf: "auth0", kind: ExternalServiceKind::AuthProvider, label: "Auth0" }, + ClientRule { leaf: "passport.authenticate", kind: ExternalServiceKind::AuthProvider, label: "Passport.js" }, + ClientRule { leaf: "OAuth2Client", kind: ExternalServiceKind::AuthProvider, label: "OAuth2 client" }, + ClientRule { leaf: "google.oauth2", kind: ExternalServiceKind::AuthProvider, label: "Google OAuth2" }, + + // SMTP + ClientRule { leaf: "smtplib.SMTP", kind: ExternalServiceKind::HttpApi, label: "SMTP (Python)" }, + ClientRule { leaf: "Mail::send", kind: ExternalServiceKind::HttpApi, label: "Laravel Mail" }, + ClientRule { leaf: "ActionMailer", kind: ExternalServiceKind::HttpApi, label: "Rails ActionMailer" }, + + // DNS + ClientRule { leaf: "socket.gethostbyname", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" }, + ClientRule { leaf: "dns.lookup", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" }, + ClientRule { leaf: "net.LookupIP", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" }, +]; + +pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec { + let mut out: Vec = Vec::new(); + let mut seen: std::collections::HashSet<(String, String)> = + std::collections::HashSet::new(); + for (_key, summary) in summaries.iter() { + for callee in &summary.callees { + let Some(rule) = match_rule(&callee.name) else { + continue; + }; + let location = call_site_location(summary); + if !seen.insert((location.file.clone(), rule.label.to_string())) { + continue; + } + out.push(SurfaceNode::ExternalService(ExternalService { + location, + kind: rule.kind, + label: rule.label.to_string(), + })); + } + } + // Also surface any function whose own sink_caps include SSRF — the + // function itself is an outbound network call site even if the + // direct callee did not match the rule list. Use the function's + // file as the location and synthesise a generic label. + for (_key, summary) in summaries.iter() { + if summary.sink_caps().contains(Cap::SSRF) { + let loc = call_site_location(summary); + let dedup = (loc.file.clone(), "Outbound HTTP".to_string()); + if seen.insert(dedup) { + out.push(SurfaceNode::ExternalService(ExternalService { + location: loc, + kind: ExternalServiceKind::HttpApi, + label: "Outbound HTTP".to_string(), + })); + } + } + } + out +} + +fn match_rule(callee: &str) -> Option<&'static ClientRule> { + let trimmed = callee.trim(); + let leaf = trimmed.rsplit("::").next().unwrap_or(trimmed); + let leaf = leaf.rsplit('.').next().unwrap_or(leaf); + CLIENT_RULES.iter().find(|r| { + trimmed.to_ascii_lowercase().contains(&r.leaf.to_ascii_lowercase()) + || leaf.eq_ignore_ascii_case(r.leaf) + }) +} + +fn call_site_location(summary: &FuncSummary) -> SourceLocation { + SourceLocation { + file: summary.file_path.clone(), + line: 0, + col: 0, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::summary::CalleeSite; + use crate::symbol::{FuncKey, Lang}; + + #[test] + fn detects_requests_get() { + let mut gs = GlobalSummaries::new(); + let key = FuncKey::new_function(Lang::Python, "client.py", "fetch_user", None); + let summary = FuncSummary { + name: "fetch_user".to_string(), + file_path: "client.py".to_string(), + lang: "python".to_string(), + param_count: 0, + callees: vec![CalleeSite::bare("requests.get".to_string())], + ..Default::default() + }; + gs.insert(key, summary); + let nodes = detect_external_services(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::ExternalService(es) = &nodes[0] else { + panic!() + }; + assert_eq!(es.label, "requests (Python)"); + } +} diff --git a/src/surface/lang/common.rs b/src/surface/lang/common.rs new file mode 100644 index 00000000..a95dd5c1 --- /dev/null +++ b/src/surface/lang/common.rs @@ -0,0 +1,131 @@ +//! Shared helpers used by the per-(language, framework) probes. +//! +//! Each probe extracts an [`EntryPoint`] node from a parsed source file +//! by walking the framework's route declaration shape. These helpers +//! cover the bookkeeping common to every probe: building a stable +//! [`SourceLocation`] from a tree-sitter node, decoding common string +//! literal shapes, and identifier-based auth marker lookups. + +use crate::surface::{SourceLocation, relative_path_string}; +use std::path::Path; +use tree_sitter::Node; + +/// Build a [`SourceLocation`] for the start of `node`, relative to +/// `scan_root` when supplied. +pub fn loc_for(node: Node<'_>, file_rel: &str) -> SourceLocation { + let pos = node.start_position(); + SourceLocation::new(file_rel, (pos.row + 1) as u32, (pos.column + 1) as u32) +} + +/// Project-relative POSIX file string used as the [`SourceLocation`] +/// `file` field across every node a probe emits. +pub fn rel_file(path: &Path, scan_root: Option<&Path>) -> String { + relative_path_string(path, scan_root) +} + +/// Strip Python / JS / Ruby / PHP string-literal prefixes (`b"…"`, +/// `r"…"`, `f"…"`, leading `'`/`"`) and return the literal content. +/// Used by every probe that lifts a route path out of a string node. +pub fn unquote(raw: &str) -> String { + let trimmed = raw.trim(); + let mut s = trimmed; + // Python prefixes + while let Some(rest) = s.strip_prefix(['b', 'r', 'B', 'R', 'f', 'F']) { + if rest.starts_with('\'') || rest.starts_with('"') { + s = rest; + } else { + break; + } + } + s.trim_start_matches(['\'', '"', '`']) + .trim_end_matches(['\'', '"', '`']) + .to_string() +} + +/// Read the literal text of a tree-sitter `string` node and return its +/// unquoted content; `None` when the slice is not valid UTF-8. +pub fn string_node_value(node: Node<'_>, bytes: &[u8]) -> Option { + Some(unquote(node.utf8_text(bytes).ok()?)) +} + +/// Return `true` when the leaf segment of `text` (split on `.` or `::`) +/// matches one of the entries in `markers`, case-insensitive on the +/// underscored form. Used by every probe's auth-decorator allowlist. +pub fn leaf_matches(text: &str, markers: &[&str]) -> bool { + let leaf = text.rsplit(['.', ':']).next().unwrap_or(text).trim(); + markers.iter().any(|m| leaf.eq_ignore_ascii_case(m)) +} + +/// Walk every descendant of `root` whose kind matches `target_kind`, +/// invoking `visit` on each match. Bounded by recursion on tree-sitter +/// node count. +pub fn for_each_node<'tree, F>(root: Node<'tree>, target_kind: &str, mut visit: F) +where + F: FnMut(Node<'tree>), +{ + fn recurse<'tree, F>(node: Node<'tree>, kind: &str, visit: &mut F) + where + F: FnMut(Node<'tree>), + { + if node.kind() == kind { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + recurse(child, kind, visit); + } + } + recurse(root, target_kind, &mut visit); +} + +/// Find the first child of `parent` whose kind matches `kind`, with a +/// `child_by_field_name(kind)` fast path. Used by Java probes where +/// `class_declaration` / `method_declaration` modifiers / body live as +/// unnamed children rather than fielded children in tree-sitter-java. +pub fn child_or_named<'tree>(parent: Node<'tree>, kind: &str) -> Option> { + if let Some(n) = parent.child_by_field_name(kind) { + return Some(n); + } + let mut cursor = parent.walk(); + parent.children(&mut cursor).find(|c| c.kind() == kind) +} + +/// Walk every descendant of `root`, invoking `visit` once per node. +/// Useful when a probe needs to look at multiple node kinds in a single +/// pass (e.g. annotations + method declarations on the same walk). +pub fn for_each_node_any<'tree, F>(root: Node<'tree>, mut visit: F) +where + F: FnMut(Node<'tree>), +{ + fn recurse<'tree, F>(node: Node<'tree>, visit: &mut F) + where + F: FnMut(Node<'tree>), + { + visit(node); + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + recurse(child, visit); + } + } + recurse(root, &mut visit); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn unquote_strips_python_prefixes() { + assert_eq!(unquote("b\"path\""), "path"); + assert_eq!(unquote("r'/api'"), "/api"); + assert_eq!(unquote("f\"/users/{id}\""), "/users/{id}"); + assert_eq!(unquote("\"plain\""), "plain"); + } + + #[test] + fn leaf_matches_handles_dot_and_colon_paths() { + assert!(leaf_matches("flask_login.login_required", &["login_required"])); + assert!(leaf_matches("Auth::JwtRequired", &["JwtRequired"])); + assert!(!leaf_matches("OtherDecorator", &["login_required"])); + } +} diff --git a/src/surface/lang/go_gin.rs b/src/surface/lang/go_gin.rs new file mode 100644 index 00000000..566e3bdf --- /dev/null +++ b/src/surface/lang/go_gin.rs @@ -0,0 +1,174 @@ +//! Go + gin framework probe. +//! +//! Detects gin route registration: +//! +//! * `r.GET("/path", handler)` / `.POST(...)` / `.PUT` / `.DELETE` +//! on a `*gin.Engine` or `*gin.RouterGroup`. +//! * `r.Group("/prefix").GET("/sub", ...)` chained shapes. +//! * `r.Use(middleware...)` followed by route registrations — the +//! middleware list is consulted for auth markers +//! ([`AUTH_MIDDLEWARES`]). +//! +//! Also recognises echo (`e.GET(...)`) and chi (`r.Get(...)`) by the +//! same shape — receiver name `e` / `r` / `router` / `engine`. + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +pub const AUTH_MIDDLEWARES: &[&str] = &[ + "AuthRequired", + "JWT", + "JWTAuth", + "Auth", + "RequireAuth", + "RequireUser", + "VerifyToken", + "BasicAuth", +]; + +const VERBS: &[&str] = &[ + "GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD", "Any", + "Get", "Post", "Put", "Delete", "Patch", "Options", "Head", +]; + +pub fn detect_gin_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_calls(tree.root_node(), &mut |call| { + if let Some(node) = match_gin_call(call, bytes, &file_rel) { + out.push(node); + } + }); + out +} + +fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) { + if node.kind() == "call_expression" { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_calls(child, visit); + } +} + +fn match_gin_call(call: Node, bytes: &[u8], file_rel: &str) -> Option { + let func = call.child_by_field_name("function")?; + if func.kind() != "selector_expression" { + return None; + } + let operand = func.child_by_field_name("operand")?; + let field = func.child_by_field_name("field")?; + let field_text = field.utf8_text(bytes).ok()?; + if !VERBS.contains(&field_text) { + return None; + } + let operand_text = operand.utf8_text(bytes).ok()?; + if !receiver_is_gin(operand_text) { + return None; + } + let method = HttpMethod::from_ident(&field_text.to_ascii_uppercase())?; + let args = call.child_by_field_name("arguments")?; + let mut cursor = args.walk(); + let positional: Vec = args + .children(&mut cursor) + .filter(|n| !matches!(n.kind(), "(" | ")" | ",")) + .collect(); + let route = positional.first().and_then(|n| string_node_value(*n, bytes))?; + let handler_node = positional.iter().rev().find(|n| { + matches!( + n.kind(), + "identifier" | "selector_expression" | "func_literal" + ) + })?; + let handler_name = handler_node + .utf8_text(bytes) + .ok() + .map(str::to_string) + .unwrap_or_default(); + let auth_required = positional[1..] + .iter() + .filter(|n| !std::ptr::eq(*n, handler_node)) + .any(|n| arg_is_auth_marker(*n, bytes)); + Some(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(call, file_rel), + framework: Framework::Gin, + method, + route, + handler_name, + handler_location: SourceLocation::new( + file_rel, + (handler_node.start_position().row + 1) as u32, + (handler_node.start_position().column + 1) as u32, + ), + auth_required, + })) +} + +fn receiver_is_gin(text: &str) -> bool { + let leaf = text.rsplit('.').next().unwrap_or(text).trim(); + let lower = leaf.to_ascii_lowercase(); + lower == "r" + || lower == "g" + || lower == "e" + || lower == "router" + || lower == "engine" + || lower == "group" + || lower.ends_with("router") + || lower.ends_with("group") + || lower.ends_with("engine") +} + +fn arg_is_auth_marker(node: Node, bytes: &[u8]) -> bool { + match node.kind() { + "identifier" | "selector_expression" => node + .utf8_text(bytes) + .map(|t| leaf_matches(t, AUTH_MIDDLEWARES)) + .unwrap_or(false), + "call_expression" => { + let Some(callee) = node.child_by_field_name("function") else { + return false; + }; + let Ok(text) = callee.utf8_text(bytes) else { + return false; + }; + leaf_matches(text, AUTH_MIDDLEWARES) + } + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_go::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_get() { + let src = "package main\nimport \"github.com/gin-gonic/gin\"\nfunc main() {\n r := gin.Default()\n r.GET(\"/users\", listUsers)\n}\nfunc listUsers(c *gin.Context) {}\n"; + let (tree, bytes) = parse(src); + let nodes = detect_gin_routes(&tree, &bytes, &PathBuf::from("main.go"), None); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/users"); + } +} diff --git a/src/surface/lang/go_http.rs b/src/surface/lang/go_http.rs new file mode 100644 index 00000000..3723b7fc --- /dev/null +++ b/src/surface/lang/go_http.rs @@ -0,0 +1,129 @@ +//! Go + `net/http` framework probe. +//! +//! Recognises the canonical route registration shapes: +//! +//! * `http.HandleFunc("/path", handler)` +//! * `http.Handle("/path", handler)` +//! * `mux.HandleFunc("/path", handler)` (any `*http.ServeMux` receiver) +//! * `http.NewServeMux()` derived receivers +//! +//! Method is `GET` by default — `net/http` registrations are +//! method-agnostic at the routing layer; the handler dispatches on +//! `r.Method` internally. + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{loc_for, rel_file, string_node_value}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +pub fn detect_go_http_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_calls(tree.root_node(), &mut |call| { + if let Some(node) = match_handle_call(call, bytes, &file_rel) { + out.push(node); + } + }); + out +} + +fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) { + if node.kind() == "call_expression" { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_calls(child, visit); + } +} + +fn match_handle_call(call: Node, bytes: &[u8], file_rel: &str) -> Option { + let func = call.child_by_field_name("function")?; + if func.kind() != "selector_expression" { + return None; + } + let operand = func.child_by_field_name("operand")?; + let field = func.child_by_field_name("field")?; + let field_text = field.utf8_text(bytes).ok()?; + if field_text != "HandleFunc" && field_text != "Handle" { + return None; + } + let operand_text = operand.utf8_text(bytes).ok()?; + let leaf = operand_text.rsplit('.').next().unwrap_or(operand_text); + if leaf != "http" + && !operand_text.contains("Mux") + && !operand_text.contains("mux") + && !operand_text.contains("Server") + && !operand_text.contains("Router") + && !operand_text.contains("router") + { + return None; + } + let args = call.child_by_field_name("arguments")?; + let mut cursor = args.walk(); + let positional: Vec = args + .children(&mut cursor) + .filter(|n| !matches!(n.kind(), "(" | ")" | ",")) + .collect(); + if positional.len() < 2 { + return None; + } + let route = string_node_value(positional[0], bytes)?; + let handler_node = positional[1]; + let handler_name = handler_function_name(handler_node, bytes).unwrap_or_default(); + Some(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(call, file_rel), + framework: Framework::NetHttp, + method: HttpMethod::GET, + route, + handler_name, + handler_location: SourceLocation::new( + file_rel, + (handler_node.start_position().row + 1) as u32, + (handler_node.start_position().column + 1) as u32, + ), + auth_required: false, + })) +} + +fn handler_function_name(node: Node, bytes: &[u8]) -> Option { + match node.kind() { + "identifier" | "selector_expression" => node.utf8_text(bytes).ok().map(str::to_string), + "func_literal" => Some("anonymous".to_string()), + _ => node.utf8_text(bytes).ok().map(str::to_string), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_go::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_handle_func() { + let src = "package main\nimport \"net/http\"\nfunc main() {\n http.HandleFunc(\"/users\", listUsers)\n}\nfunc listUsers(w http.ResponseWriter, r *http.Request) {}\n"; + let (tree, bytes) = parse(src); + let nodes = detect_go_http_routes(&tree, &bytes, &PathBuf::from("main.go"), None); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.framework, Framework::NetHttp); + assert_eq!(ep.route, "/users"); + assert_eq!(ep.handler_name, "listUsers"); + } +} diff --git a/src/surface/lang/java_quarkus.rs b/src/surface/lang/java_quarkus.rs new file mode 100644 index 00000000..957344b9 --- /dev/null +++ b/src/surface/lang/java_quarkus.rs @@ -0,0 +1,297 @@ +//! Java + Quarkus framework probe. +//! +//! Quarkus uses JAX-RS (`jakarta.ws.rs`) for HTTP routing on top of +//! `RESTEasy Reactive` / `Quarkus REST`. The annotations are +//! identical to plain JAX-RS, so this probe overlaps with +//! [`super::java_servlet`] but emits the [`Framework::JaxRs`] tag with +//! a Quarkus-specific recogniser: +//! +//! * The class is annotated with `@ApplicationScoped`, +//! `@RequestScoped`, or `@Singleton` (Quarkus DI markers); OR +//! * The file imports a `quarkus`-prefixed package; OR +//! * The class extends a Quarkus-known reactive base type +//! (`PanacheRepository`, `Multi`, `Uni`). +//! +//! Auth markers: `@Authenticated`, `@RolesAllowed`, `@PermitAll`, +//! `@DenyAll` (Quarkus Security). + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{loc_for, rel_file}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +pub const AUTH_ANNOTATIONS: &[&str] = &[ + "Authenticated", + "RolesAllowed", + "DenyAll", + "RequiresAuthentication", +]; + +const QUARKUS_DI: &[&str] = &[ + "ApplicationScoped", + "RequestScoped", + "Singleton", + "Dependent", + "Path", +]; + +const JAXRS_VERBS: &[(&str, HttpMethod)] = &[ + ("GET", HttpMethod::GET), + ("POST", HttpMethod::POST), + ("PUT", HttpMethod::PUT), + ("DELETE", HttpMethod::DELETE), + ("PATCH", HttpMethod::PATCH), + ("HEAD", HttpMethod::HEAD), + ("OPTIONS", HttpMethod::OPTIONS), +]; + +pub fn detect_quarkus_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + if !file_uses_quarkus(tree.root_node(), bytes) { + return Vec::new(); + } + let mut out = Vec::new(); + walk_classes(tree.root_node(), &mut |class| { + if !class_is_quarkus_resource(class, bytes) { + return; + } + let class_path = class_path_annotation(class, bytes).unwrap_or_default(); + let class_auth = class_has_auth_annotation(class, bytes); + let Some(body) = crate::surface::lang::common::child_or_named(class, "class_body") else { + return; + }; + let mut cursor = body.walk(); + for member in body.children(&mut cursor) { + if member.kind() != "method_declaration" { + continue; + } + if let Some((method, method_path, method_auth)) = + method_mapping(member, bytes, &class_path) + { + let name = method_name(member, bytes).unwrap_or_default(); + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(member, &file_rel), + framework: Framework::JaxRs, + method, + route: method_path, + handler_name: name, + handler_location: SourceLocation::new( + file_rel.clone(), + (member.start_position().row + 1) as u32, + (member.start_position().column + 1) as u32, + ), + auth_required: class_auth || method_auth, + })); + } + } + }); + out +} + +fn file_uses_quarkus(root: Node, bytes: &[u8]) -> bool { + let mut cursor = root.walk(); + for child in root.children(&mut cursor) { + if child.kind() == "import_declaration" + && let Ok(text) = child.utf8_text(bytes) + && (text.contains("io.quarkus") || text.contains("jakarta.ws.rs")) + { + return true; + } + } + false +} + +fn class_is_quarkus_resource(class: Node, bytes: &[u8]) -> bool { + let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") { + Some(m) => m, + None => return false, + }; + let mut cursor = modifiers.walk(); + for ann in modifiers.children(&mut cursor) { + if !is_annotation(ann) { + continue; + } + if let Some(name) = annotation_name(ann, bytes) { + let leaf = name.rsplit('.').next().unwrap_or(&name); + if QUARKUS_DI.iter().any(|d| leaf.eq_ignore_ascii_case(d)) { + return true; + } + } + } + false +} + +fn walk_classes<'tree, F>(node: Node<'tree>, visit: &mut F) +where + F: FnMut(Node<'tree>), +{ + if node.kind() == "class_declaration" { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_classes(child, visit); + } +} + +fn class_path_annotation(class: Node, bytes: &[u8]) -> Option { + annotation_string_arg(class, bytes, "Path") +} + +fn class_has_auth_annotation(class: Node, bytes: &[u8]) -> bool { + let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") { + Some(m) => m, + None => return false, + }; + let mut cursor = modifiers.walk(); + for ann in modifiers.children(&mut cursor) { + if !is_annotation(ann) { + continue; + } + if let Some(name) = annotation_name(ann, bytes) { + let leaf = name.rsplit('.').next().unwrap_or(&name); + if AUTH_ANNOTATIONS.iter().any(|a| leaf.eq_ignore_ascii_case(a)) { + return true; + } + } + } + false +} + +fn method_mapping(method: Node, bytes: &[u8], class_path: &str) -> Option<(HttpMethod, String, bool)> { + let modifiers = crate::surface::lang::common::child_or_named(method, "modifiers")?; + let mut cursor = modifiers.walk(); + let mut verb: Option = None; + let mut method_path = String::new(); + let mut auth = false; + for ann in modifiers.children(&mut cursor) { + if !is_annotation(ann) { + continue; + } + let Some(name) = annotation_name(ann, bytes) else { + continue; + }; + let leaf = name.rsplit('.').next().unwrap_or(&name); + if let Some((_, m)) = JAXRS_VERBS.iter().find(|(n, _)| n.eq_ignore_ascii_case(leaf)) { + verb = Some(*m); + } + if leaf == "Path" + && let Some(p) = annotation_string_arg_from_node(ann, bytes) + { + method_path = p; + } + if AUTH_ANNOTATIONS.iter().any(|a| leaf.eq_ignore_ascii_case(a)) { + auth = true; + } + } + let v = verb?; + let combined = if class_path.is_empty() { + method_path + } else if method_path.is_empty() { + class_path.to_string() + } else { + format!("{}/{}", class_path.trim_end_matches('/'), method_path.trim_start_matches('/')) + }; + Some((v, combined, auth)) +} + +fn annotation_string_arg(class: Node, bytes: &[u8], target_name: &str) -> Option { + let modifiers = crate::surface::lang::common::child_or_named(class, "modifiers")?; + let mut cursor = modifiers.walk(); + for ann in modifiers.children(&mut cursor) { + if !is_annotation(ann) { + continue; + } + let Some(name) = annotation_name(ann, bytes) else { + continue; + }; + let leaf = name.rsplit('.').next().unwrap_or(&name); + if leaf == target_name { + return annotation_string_arg_from_node(ann, bytes); + } + } + None +} + +fn annotation_string_arg_from_node(ann: Node, bytes: &[u8]) -> Option { + let args = ann.child_by_field_name("arguments")?; + let raw = args.utf8_text(bytes).ok()?; + let start = raw.find('"')? + 1; + let end = raw[start..].find('"')? + start; + Some(raw[start..end].to_string()) +} + +fn annotation_name(ann: Node, bytes: &[u8]) -> Option { + ann.child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + .map(str::to_string) +} + +fn method_name(method: Node, bytes: &[u8]) -> Option { + method + .child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + .map(str::to_string) +} + +fn is_annotation(node: Node) -> bool { + matches!(node.kind(), "annotation" | "marker_annotation") +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_java::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_quarkus_resource() { + let src = r#" +import io.quarkus.runtime.Quarkus; +import jakarta.ws.rs.GET; +import jakarta.ws.rs.Path; + +@ApplicationScoped +@Path("/api") +public class GreetResource { + @GET + @Path("/hello") + public String hello() { return "hi"; } +} +"#; + let (tree, bytes) = parse(src); + let nodes = detect_quarkus_routes(&tree, &bytes, &PathBuf::from("GreetResource.java"), None); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/api/hello"); + } + + #[test] + fn ignores_non_quarkus_class() { + let src = r#" +public class C { + @GetMapping("/x") + public void x() {} +} +"#; + let (tree, bytes) = parse(src); + let nodes = detect_quarkus_routes(&tree, &bytes, &PathBuf::from("C.java"), None); + assert!(nodes.is_empty()); + } +} diff --git a/src/surface/lang/java_servlet.rs b/src/surface/lang/java_servlet.rs new file mode 100644 index 00000000..d3dced74 --- /dev/null +++ b/src/surface/lang/java_servlet.rs @@ -0,0 +1,285 @@ +//! Java + Servlet (JAX-RS / Jakarta REST) framework probe. +//! +//! Recognises: +//! +//! * `@WebServlet("/path")` annotated `HttpServlet` subclasses — every +//! `doGet` / `doPost` / `doPut` / `doDelete` method is one entry-point. +//! * `@Path("/path")` annotated JAX-RS resource methods with verb +//! annotation `@GET` / `@POST` / `@PUT` / `@DELETE` / `@PATCH`. +//! +//! Auth markers: `@DenyAll`, `@RolesAllowed`, `@PermitAll` — the +//! presence of any of these implies a security configuration is +//! actively gating the resource (we report `auth_required = true` +//! conservatively for `@RolesAllowed` and `@DenyAll`). + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{loc_for, rel_file}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +pub const AUTH_ANNOTATIONS: &[&str] = &[ + "RolesAllowed", + "DenyAll", + "RequiresAuthentication", + "RequiresUser", +]; + +const SERVLET_VERBS: &[(&str, HttpMethod)] = &[ + ("doGet", HttpMethod::GET), + ("doPost", HttpMethod::POST), + ("doPut", HttpMethod::PUT), + ("doDelete", HttpMethod::DELETE), + ("doHead", HttpMethod::HEAD), + ("doOptions", HttpMethod::OPTIONS), +]; + +const JAXRS_VERBS: &[(&str, HttpMethod)] = &[ + ("GET", HttpMethod::GET), + ("POST", HttpMethod::POST), + ("PUT", HttpMethod::PUT), + ("DELETE", HttpMethod::DELETE), + ("PATCH", HttpMethod::PATCH), + ("HEAD", HttpMethod::HEAD), + ("OPTIONS", HttpMethod::OPTIONS), +]; + +pub fn detect_servlet_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_classes(tree.root_node(), &mut |class| { + let class_path_servlet = class_web_servlet_path(class, bytes); + let class_path_jaxrs = class_jaxrs_path(class, bytes); + let class_auth = class_has_auth_annotation(class, bytes); + let Some(body) = crate::surface::lang::common::child_or_named(class, "class_body") else { + return; + }; + let mut cursor = body.walk(); + for member in body.children(&mut cursor) { + if member.kind() != "method_declaration" { + continue; + } + let name = method_name(member, bytes).unwrap_or_default(); + + // HttpServlet shape + if let Some(class_path) = class_path_servlet.as_deref() + && let Some((_, method)) = SERVLET_VERBS + .iter() + .find(|(verb, _)| *verb == name.as_str()) + { + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(member, &file_rel), + framework: Framework::JaxRs, + method: *method, + route: class_path.to_string(), + handler_name: name.clone(), + handler_location: SourceLocation::new( + file_rel.clone(), + (member.start_position().row + 1) as u32, + (member.start_position().column + 1) as u32, + ), + auth_required: class_auth, + })); + continue; + } + + // JAX-RS shape + if let Some((method, method_path, method_auth)) = + jaxrs_method_mapping(member, bytes, class_path_jaxrs.as_deref().unwrap_or("")) + { + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(member, &file_rel), + framework: Framework::JaxRs, + method, + route: method_path, + handler_name: name, + handler_location: SourceLocation::new( + file_rel.clone(), + (member.start_position().row + 1) as u32, + (member.start_position().column + 1) as u32, + ), + auth_required: class_auth || method_auth, + })); + } + } + }); + out +} + +fn walk_classes<'tree, F>(node: Node<'tree>, visit: &mut F) +where + F: FnMut(Node<'tree>), +{ + if node.kind() == "class_declaration" { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_classes(child, visit); + } +} + +fn class_web_servlet_path(class: Node, bytes: &[u8]) -> Option { + annotation_string_arg(class, bytes, "WebServlet") +} + +fn class_jaxrs_path(class: Node, bytes: &[u8]) -> Option { + annotation_string_arg(class, bytes, "Path") +} + +fn class_has_auth_annotation(class: Node, bytes: &[u8]) -> bool { + let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") { + Some(m) => m, + None => return false, + }; + let mut cursor = modifiers.walk(); + for ann in modifiers.children(&mut cursor) { + if !is_annotation(ann) { + continue; + } + if let Some(name) = annotation_name(ann, bytes) + && AUTH_ANNOTATIONS.iter().any(|a| { + name.rsplit('.').next().unwrap_or(&name).eq_ignore_ascii_case(a) + }) + { + return true; + } + } + false +} + +fn jaxrs_method_mapping(method: Node, bytes: &[u8], class_path: &str) -> Option<(HttpMethod, String, bool)> { + let modifiers = crate::surface::lang::common::child_or_named(method, "modifiers")?; + let mut cursor = modifiers.walk(); + let mut verb: Option = None; + let mut method_path = String::new(); + let mut auth = false; + for ann in modifiers.children(&mut cursor) { + if !is_annotation(ann) { + continue; + } + let Some(name) = annotation_name(ann, bytes) else { + continue; + }; + let leaf = name.rsplit('.').next().unwrap_or(&name); + if let Some((_, m)) = JAXRS_VERBS.iter().find(|(n, _)| n.eq_ignore_ascii_case(leaf)) { + verb = Some(*m); + } + if leaf == "Path" + && let Some(path) = annotation_string_arg_from_node(ann, bytes) + { + method_path = path; + } + if AUTH_ANNOTATIONS + .iter() + .any(|a| leaf.eq_ignore_ascii_case(a)) + { + auth = true; + } + } + let v = verb?; + let combined = if class_path.is_empty() { + method_path + } else if method_path.is_empty() { + class_path.to_string() + } else { + format!("{}/{}", class_path.trim_end_matches('/'), method_path.trim_start_matches('/')) + }; + Some((v, combined, auth)) +} + +fn annotation_string_arg(class: Node, bytes: &[u8], target_name: &str) -> Option { + let modifiers = crate::surface::lang::common::child_or_named(class, "modifiers")?; + let mut cursor = modifiers.walk(); + for ann in modifiers.children(&mut cursor) { + if !is_annotation(ann) { + continue; + } + let Some(name) = annotation_name(ann, bytes) else { + continue; + }; + let leaf = name.rsplit('.').next().unwrap_or(&name); + if leaf == target_name { + return annotation_string_arg_from_node(ann, bytes); + } + } + None +} + +fn annotation_string_arg_from_node(ann: Node, bytes: &[u8]) -> Option { + let args = ann.child_by_field_name("arguments")?; + let raw = args.utf8_text(bytes).ok()?; + let start = raw.find('"')? + 1; + let end = raw[start..].find('"')? + start; + Some(raw[start..end].to_string()) +} + +fn annotation_name(ann: Node, bytes: &[u8]) -> Option { + ann.child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + .map(str::to_string) +} + +fn method_name(method: Node, bytes: &[u8]) -> Option { + method + .child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + .map(str::to_string) +} + +fn is_annotation(node: Node) -> bool { + matches!(node.kind(), "annotation" | "marker_annotation") +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_java::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_jaxrs_get() { + let src = r#" +@Path("/users") +public class UsersResource { + @GET + @Path("/{id}") + public User get() { return null; } +} +"#; + let (tree, bytes) = parse(src); + let nodes = detect_servlet_routes(&tree, &bytes, &PathBuf::from("UsersResource.java"), None); + assert!(!nodes.is_empty()); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/users/{id}"); + } + + #[test] + fn detects_servlet_doget() { + let src = r#" +@WebServlet("/admin") +public class Admin extends HttpServlet { + public void doGet(HttpServletRequest req, HttpServletResponse resp) {} + public void doPost(HttpServletRequest req, HttpServletResponse resp) {} +} +"#; + let (tree, bytes) = parse(src); + let nodes = detect_servlet_routes(&tree, &bytes, &PathBuf::from("Admin.java"), None); + assert_eq!(nodes.len(), 2); + } +} diff --git a/src/surface/lang/java_spring.rs b/src/surface/lang/java_spring.rs new file mode 100644 index 00000000..5018ea72 --- /dev/null +++ b/src/surface/lang/java_spring.rs @@ -0,0 +1,305 @@ +//! Java + Spring framework probe. +//! +//! Recognises Spring controller methods annotated with +//! `@RequestMapping` / `@GetMapping` / `@PostMapping` / `@PutMapping` +//! / `@PatchMapping` / `@DeleteMapping`. The route path is the +//! concatenation of class-level `@RequestMapping(value=...)` / +//! `@RestController` and method-level `value=...` arguments. +//! +//! `auth_required` fires when the method, the enclosing class, or the +//! `value=` argument lists a Spring-Security annotation +//! ([`AUTH_ANNOTATIONS`]). + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, unquote}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +pub const AUTH_ANNOTATIONS: &[&str] = &[ + "PreAuthorize", + "PostAuthorize", + "Secured", + "RolesAllowed", + "AuthenticationPrincipal", +]; + +const MAPPING_ANNOTATIONS: &[(&str, Option)] = &[ + ("RequestMapping", None), + ("GetMapping", Some(HttpMethod::GET)), + ("PostMapping", Some(HttpMethod::POST)), + ("PutMapping", Some(HttpMethod::PUT)), + ("PatchMapping", Some(HttpMethod::PATCH)), + ("DeleteMapping", Some(HttpMethod::DELETE)), +]; + +pub fn detect_spring_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_classes(tree.root_node(), &mut |class| { + let class_path = class_request_mapping_path(class, bytes); + let class_auth = class_has_auth_annotation(class, bytes); + let Some(body) = crate::surface::lang::common::child_or_named(class, "class_body") else { + return; + }; + let mut cursor = body.walk(); + for member in body.children(&mut cursor) { + if member.kind() != "method_declaration" { + continue; + } + if let Some((method, route_path, auth)) = + method_mapping(member, bytes, &class_path) + { + let auth_required = class_auth || auth; + let handler_name = method_name(member, bytes).unwrap_or_default(); + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(member, &file_rel), + framework: Framework::Spring, + method, + route: route_path, + handler_name, + handler_location: SourceLocation::new( + file_rel.clone(), + (member.start_position().row + 1) as u32, + (member.start_position().column + 1) as u32, + ), + auth_required, + })); + } + } + }); + out +} + +fn walk_classes<'tree, F>(node: Node<'tree>, visit: &mut F) +where + F: FnMut(Node<'tree>), +{ + if node.kind() == "class_declaration" { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_classes(child, visit); + } +} + +fn class_request_mapping_path(class: Node, bytes: &[u8]) -> String { + let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") { + Some(m) => m, + None => return String::new(), + }; + let mut cursor = modifiers.walk(); + for ann in modifiers.children(&mut cursor) { + if !is_annotation(ann) { + continue; + } + let Some((name, args_text)) = annotation_name_and_args(ann, bytes) else { + continue; + }; + if name == "RequestMapping" { + return extract_first_path(&args_text); + } + } + String::new() +} + +fn class_has_auth_annotation(class: Node, bytes: &[u8]) -> bool { + let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") { + Some(m) => m, + None => return false, + }; + let mut cursor = modifiers.walk(); + for ann in modifiers.children(&mut cursor) { + if !is_annotation(ann) { + continue; + } + if let Some((name, _)) = annotation_name_and_args(ann, bytes) + && AUTH_ANNOTATIONS + .iter() + .any(|a| leaf_matches(&name, &[a])) + { + return true; + } + } + false +} + +fn method_mapping( + method: Node, + bytes: &[u8], + class_path: &str, +) -> Option<(HttpMethod, String, bool)> { + let modifiers = crate::surface::lang::common::child_or_named(method, "modifiers")?; + let mut cursor = modifiers.walk(); + let mut auth = false; + let mut found: Option<(HttpMethod, String)> = None; + for ann in modifiers.children(&mut cursor) { + if !is_annotation(ann) { + continue; + } + let Some((name, args_text)) = annotation_name_and_args(ann, bytes) else { + continue; + }; + if AUTH_ANNOTATIONS + .iter() + .any(|a| leaf_matches(&name, &[a])) + { + auth = true; + } + if found.is_some() { + continue; + } + for (ann_name, default_method) in MAPPING_ANNOTATIONS { + if name == *ann_name { + let mut method_route = extract_first_path(&args_text); + if method_route.is_empty() && !class_path.is_empty() { + // Class-only mapping; method has no path. + method_route = class_path.to_string(); + } else if !class_path.is_empty() { + method_route = format!("{}/{}", class_path.trim_end_matches('/'), method_route.trim_start_matches('/')); + } + let method = default_method + .or_else(|| extract_request_method_from_args(&args_text)) + .unwrap_or(HttpMethod::GET); + found = Some((method, method_route)); + break; + } + } + } + let (m, p) = found?; + Some((m, p, auth)) +} + +fn is_annotation(node: Node) -> bool { + matches!( + node.kind(), + "annotation" | "marker_annotation" + ) +} + +/// Returns `(annotation_name, raw_args_text)` for an annotation node. +fn annotation_name_and_args(ann: Node, bytes: &[u8]) -> Option<(String, String)> { + let name_node = ann.child_by_field_name("name")?; + let raw_name = name_node.utf8_text(bytes).ok()?; + let leaf = raw_name.rsplit('.').next().unwrap_or(raw_name).to_string(); + let args_text = ann + .child_by_field_name("arguments") + .and_then(|a| a.utf8_text(bytes).ok()) + .unwrap_or("") + .to_string(); + Some((leaf, args_text)) +} + +fn extract_first_path(args_text: &str) -> String { + // Look for the first `"..."` literal. + let mut chars = args_text.chars().peekable(); + while let Some(c) = chars.next() { + if c == '"' { + let mut buf = String::new(); + for c in chars.by_ref() { + if c == '"' { + return buf; + } + buf.push(c); + } + } + } + String::new() +} + +fn extract_request_method_from_args(args_text: &str) -> Option { + // RequestMapping(method = RequestMethod.POST) + for verb in ["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"] { + if args_text.contains(&format!("RequestMethod.{}", verb)) { + return HttpMethod::from_ident(verb); + } + } + None +} + +fn method_name(method: Node, bytes: &[u8]) -> Option { + method + .child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + .map(str::to_string) +} + +#[allow(dead_code)] +fn read_string_literal(node: Node, bytes: &[u8]) -> Option { + let raw = node.utf8_text(bytes).ok()?; + Some(unquote(raw)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_java::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_get_mapping() { + let src = r#" +@RestController +public class UserController { + @GetMapping("/users") + public List list() { return null; } +} +"#; + let (tree, bytes) = parse(src); + let nodes = detect_spring_routes(&tree, &bytes, &PathBuf::from("UserController.java"), None); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/users"); + assert_eq!(ep.handler_name, "list"); + } + + #[test] + fn class_request_mapping_prefix_concatenates() { + let src = r#" +@RequestMapping("/api") +public class C { + @PostMapping("/users") + public void create() {} +} +"#; + let (tree, bytes) = parse(src); + let nodes = detect_spring_routes(&tree, &bytes, &PathBuf::from("C.java"), None); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.route, "/api/users"); + } + + #[test] + fn pre_authorize_marks_auth() { + let src = r#" +public class C { + @PreAuthorize("hasRole('ADMIN')") + @GetMapping("/admin") + public void admin() {} +} +"#; + let (tree, bytes) = parse(src); + let nodes = detect_spring_routes(&tree, &bytes, &PathBuf::from("C.java"), None); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert!(ep.auth_required); + } +} diff --git a/src/surface/lang/js_express.rs b/src/surface/lang/js_express.rs new file mode 100644 index 00000000..ddf59d38 --- /dev/null +++ b/src/surface/lang/js_express.rs @@ -0,0 +1,231 @@ +//! JavaScript / TypeScript + Express framework probe. +//! +//! Detects route registration calls of the form `app.METHOD(path, ...)` +//! / `router.METHOD(path, ...)` for the standard set of HTTP verbs plus +//! `all` / `use`. The handler is the *last* function-shaped argument +//! (Express convention: `(path, ...middleware, handler)`). +//! +//! `auth_required` fires when any positional argument before the +//! handler is an identifier matching one of the auth-middleware names +//! in [`AUTH_MIDDLEWARES`] (passport's `requireAuth`, custom guards), +//! or when an inline `passport.authenticate(...)` call appears in the +//! middleware list. + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +pub const AUTH_MIDDLEWARES: &[&str] = &[ + "requireAuth", + "requireUser", + "isAuthenticated", + "ensureAuthenticated", + "ensureLoggedIn", + "authenticate", + "authMiddleware", + "verifyToken", + "verifyJwt", + "checkJwt", + "passport", + "jwt", +]; + +const VERBS: &[&str] = &[ + "get", "post", "put", "delete", "patch", "options", "head", "all", +]; + +pub fn detect_express_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_calls(tree.root_node(), &mut |call| { + if let Some(node) = match_express_call(call, bytes, &file_rel) { + out.push(node); + } + }); + out +} + +fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) { + if matches!(node.kind(), "call_expression") { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_calls(child, visit); + } +} + +fn match_express_call(call: Node, bytes: &[u8], file_rel: &str) -> Option { + let func = call.child_by_field_name("function")?; + if func.kind() != "member_expression" { + return None; + } + let object = func.child_by_field_name("object")?; + if !receiver_is_express(object, bytes) { + return None; + } + let prop = func.child_by_field_name("property")?; + let prop_text = prop.utf8_text(bytes).ok()?; + if !VERBS.contains(&prop_text) { + return None; + } + let method = HttpMethod::from_ident(prop_text).unwrap_or(HttpMethod::GET); + let args = call.child_by_field_name("arguments")?; + let mut cursor = args.walk(); + let mut positional: Vec = args.children(&mut cursor).collect(); + positional.retain(|n| n.kind() != "(" && n.kind() != ")" && n.kind() != ","); + let route = positional + .first() + .filter(|n| n.kind() == "string" || n.kind() == "template_string") + .and_then(|n| string_node_value(*n, bytes)) + .unwrap_or_default(); + if route.is_empty() && prop_text != "use" { + // bare `app.use(handler)` is middleware, not an entry point + return None; + } + let handler_node = find_handler(&positional)?; + let handler_id = handler_node.id(); + let auth_required = positional[1..] + .iter() + .filter(|n| n.id() != handler_id) + .any(|n| arg_is_auth_marker(*n, bytes)); + let handler_name = handler_function_name(handler_node, bytes).unwrap_or_default(); + Some(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(call, file_rel), + framework: Framework::Express, + method, + route, + handler_name, + handler_location: SourceLocation::new( + file_rel, + (handler_node.start_position().row + 1) as u32, + (handler_node.start_position().column + 1) as u32, + ), + auth_required, + })) +} + +fn find_handler<'a>(positional: &[Node<'a>]) -> Option> { + positional + .iter() + .rev() + .find(|n| { + matches!( + n.kind(), + "arrow_function" + | "function" + | "function_expression" + | "function_declaration" + | "identifier" + | "member_expression" + ) + }) + .copied() +} + +fn handler_function_name(node: Node, bytes: &[u8]) -> Option { + if matches!(node.kind(), "identifier" | "member_expression") { + return node.utf8_text(bytes).ok().map(str::to_string); + } + if let Some(name_node) = node.child_by_field_name("name") + && let Ok(name) = name_node.utf8_text(bytes) + { + return Some(name.to_string()); + } + None +} + +fn arg_is_auth_marker(node: Node, bytes: &[u8]) -> bool { + match node.kind() { + "identifier" | "member_expression" => node + .utf8_text(bytes) + .map(|t| leaf_matches(t, AUTH_MIDDLEWARES)) + .unwrap_or(false), + "call_expression" => { + let Some(callee) = node.child_by_field_name("function") else { + return false; + }; + let Ok(text) = callee.utf8_text(bytes) else { + return false; + }; + leaf_matches(text, AUTH_MIDDLEWARES) || text.contains("passport.authenticate") + } + _ => false, + } +} + +fn receiver_is_express(object: Node, bytes: &[u8]) -> bool { + fn name_matches(text: &str) -> bool { + let lower = text.to_ascii_lowercase(); + lower == "app" + || lower == "router" + || lower == "server" + || lower.ends_with("_app") + || lower.ends_with("router") + || lower.ends_with("api") + } + match object.kind() { + "identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches), + "member_expression" => object + .child_by_field_name("property") + .and_then(|p| p.utf8_text(bytes).ok()) + .is_some_and(name_matches), + "call_expression" => { + let Some(callee) = object.child_by_field_name("function") else { + return false; + }; + let Ok(text) = callee.utf8_text(bytes) else { + return false; + }; + let leaf = text.rsplit('.').next().unwrap_or(text); + leaf == "express" || leaf == "Router" || leaf == "createApp" + } + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_javascript::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_get_route() { + let src = "const app = express();\napp.get('/users', (req, res) => res.send('ok'));\n"; + let (tree, bytes) = parse(src); + let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.framework, Framework::Express); + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/users"); + } + + #[test] + fn detects_auth_middleware() { + let src = "app.post('/secret', requireAuth, (req, res) => {});\n"; + let (tree, bytes) = parse(src); + let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert!(ep.auth_required); + } +} diff --git a/src/surface/lang/js_koa.rs b/src/surface/lang/js_koa.rs new file mode 100644 index 00000000..f1ad29f2 --- /dev/null +++ b/src/surface/lang/js_koa.rs @@ -0,0 +1,193 @@ +//! JavaScript / TypeScript + Koa framework probe. +//! +//! Koa apps register routes through `koa-router` (or `@koa/router`): +//! `router.get(path, handler)`, `router.post(path, ...middleware, +//! handler)`, etc. The receiver is named `router`, `r`, or has a +//! `_router`/`Router` suffix. Additional Koa-specific recognition: +//! +//! * `router.use('/path', subrouter.routes())` is *not* an +//! entry-point — the inner middleware chain is. Filtered by +//! ignoring `use` for path-less middleware mounting. + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +pub const AUTH_MIDDLEWARES: &[&str] = &[ + "requireAuth", + "requireUser", + "isAuthenticated", + "ensureAuthenticated", + "authenticate", + "authMiddleware", + "verifyToken", + "verifyJwt", + "checkJwt", + "passport", + "jwt", + "koaJwt", +]; + +const VERBS: &[&str] = &[ + "get", "post", "put", "delete", "patch", "options", "head", "all", +]; + +pub fn detect_koa_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_calls(tree.root_node(), &mut |call| { + if let Some(node) = match_koa_call(call, bytes, &file_rel) { + out.push(node); + } + }); + out +} + +fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) { + if matches!(node.kind(), "call_expression") { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_calls(child, visit); + } +} + +fn match_koa_call(call: Node, bytes: &[u8], file_rel: &str) -> Option { + let func = call.child_by_field_name("function")?; + if func.kind() != "member_expression" { + return None; + } + let object = func.child_by_field_name("object")?; + if !receiver_is_koa_router(object, bytes) { + return None; + } + let prop = func.child_by_field_name("property")?; + let prop_text = prop.utf8_text(bytes).ok()?; + if !VERBS.contains(&prop_text) { + return None; + } + let method = HttpMethod::from_ident(prop_text).unwrap_or(HttpMethod::GET); + let args = call.child_by_field_name("arguments")?; + let mut cursor = args.walk(); + let mut positional: Vec = args.children(&mut cursor).collect(); + positional.retain(|n| n.kind() != "(" && n.kind() != ")" && n.kind() != ","); + let route_idx = positional + .iter() + .position(|n| matches!(n.kind(), "string" | "template_string"))?; + let route = string_node_value(positional[route_idx], bytes).unwrap_or_default(); + let handler_node = positional.iter().rev().find(|n| { + matches!( + n.kind(), + "arrow_function" + | "function" + | "function_expression" + | "function_declaration" + | "identifier" + | "member_expression" + ) + })?; + let auth_required = positional + .iter() + .filter(|n| !std::ptr::eq(*n, handler_node)) + .any(|n| arg_is_auth_marker(*n, bytes)); + let handler_name = handler_function_name(*handler_node, bytes).unwrap_or_default(); + Some(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(call, file_rel), + framework: Framework::Express, // koa shares the Express variant tag — Phase 22 reuses + method, + route, + handler_name, + handler_location: SourceLocation::new( + file_rel, + (handler_node.start_position().row + 1) as u32, + (handler_node.start_position().column + 1) as u32, + ), + auth_required, + })) +} + +fn handler_function_name(node: Node, bytes: &[u8]) -> Option { + if matches!(node.kind(), "identifier" | "member_expression") { + return node.utf8_text(bytes).ok().map(str::to_string); + } + if let Some(name_node) = node.child_by_field_name("name") + && let Ok(name) = name_node.utf8_text(bytes) + { + return Some(name.to_string()); + } + None +} + +fn arg_is_auth_marker(node: Node, bytes: &[u8]) -> bool { + match node.kind() { + "identifier" | "member_expression" => node + .utf8_text(bytes) + .map(|t| leaf_matches(t, AUTH_MIDDLEWARES)) + .unwrap_or(false), + "call_expression" => { + let Some(callee) = node.child_by_field_name("function") else { + return false; + }; + let Ok(text) = callee.utf8_text(bytes) else { + return false; + }; + leaf_matches(text, AUTH_MIDDLEWARES) + } + _ => false, + } +} + +fn receiver_is_koa_router(object: Node, bytes: &[u8]) -> bool { + fn name_matches(text: &str) -> bool { + let lower = text.to_ascii_lowercase(); + lower == "router" || lower == "r" || lower.ends_with("_router") || lower.ends_with("router") + } + match object.kind() { + "identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches), + "member_expression" => object + .child_by_field_name("property") + .and_then(|p| p.utf8_text(bytes).ok()) + .is_some_and(name_matches), + "call_expression" => { + let Some(callee) = object.child_by_field_name("function") else { + return false; + }; + let Ok(text) = callee.utf8_text(bytes) else { + return false; + }; + let leaf = text.rsplit('.').next().unwrap_or(text); + leaf == "Router" || leaf == "KoaRouter" + } + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_javascript::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_router_get() { + let src = "const router = new Router();\nrouter.get('/users', async ctx => { ctx.body = []; });\n"; + let (tree, bytes) = parse(src); + let nodes = detect_koa_routes(&tree, &bytes, &PathBuf::from("server.js"), None); + assert_eq!(nodes.len(), 1); + } +} diff --git a/src/surface/lang/mod.rs b/src/surface/lang/mod.rs index 1dbe16c3..864ea3b5 100644 --- a/src/surface/lang/mod.rs +++ b/src/surface/lang/mod.rs @@ -1,6 +1,37 @@ -//! Per-language framework probes. Phase 21 ships Python + Flask; -//! Phase 22 generalises to FastAPI / Django, Java Spring / JAX-RS, -//! Ruby Rails / Sinatra, Go net/http / gin, Rust axum / actix / -//! rocket, JS/TS Express + Next.js. +//! Per-language framework probes. +//! +//! Phase 21 shipped Python + Flask. Phase 22 generalises detection to: +//! Python (FastAPI, Django), JS/TS (Express, Koa, Next.js), Java +//! (Spring, Servlet/JAX-RS, Quarkus), Go (`net/http`, gin), PHP +//! (Laravel, Slim), Ruby (Sinatra, Rails), Rust (axum, actix-web). +//! +//! Every probe exposes one public `detect__routes` function +//! returning `Vec` (one [`super::SurfaceNode::EntryPoint`] +//! per recognised route). Probes are pure functions — no I/O, no +//! state. + +pub mod common; pub mod python_flask; +pub mod python_fastapi; +pub mod python_django; + +pub mod js_express; +pub mod js_koa; +pub mod ts_next; + +pub mod java_spring; +pub mod java_servlet; +pub mod java_quarkus; + +pub mod go_http; +pub mod go_gin; + +pub mod php_laravel; +pub mod php_slim; + +pub mod ruby_sinatra; +pub mod ruby_rails; + +pub mod rust_actix; +pub mod rust_axum; diff --git a/src/surface/lang/php_laravel.rs b/src/surface/lang/php_laravel.rs new file mode 100644 index 00000000..da90accc --- /dev/null +++ b/src/surface/lang/php_laravel.rs @@ -0,0 +1,167 @@ +//! PHP + Laravel framework probe. +//! +//! Recognises Laravel route declarations: +//! +//! * `Route::get('/path', $handler)` / `::post(...)` / `::put` / +//! `::patch` / `::delete` / `::any` / `::match` +//! * `Route::resource('users', UserController::class)` (omitted — +//! resource controller dispatch is path-derived; Phase 22 ships the +//! primary verb shape only) +//! +//! `auth_required` fires when the route call is followed by a +//! `->middleware('auth')` chain or the closure is wrapped in +//! `Route::middleware(['auth'])->group(...)`. + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{loc_for, rel_file, string_node_value}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +const VERBS: &[(&str, HttpMethod)] = &[ + ("get", HttpMethod::GET), + ("post", HttpMethod::POST), + ("put", HttpMethod::PUT), + ("patch", HttpMethod::PATCH), + ("delete", HttpMethod::DELETE), + ("options", HttpMethod::OPTIONS), + ("head", HttpMethod::HEAD), +]; + +pub fn detect_laravel_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_calls(tree.root_node(), &mut |call| { + if let Some(node) = match_laravel_call(call, bytes, &file_rel) { + out.push(node); + } + }); + out +} + +fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) { + if matches!( + node.kind(), + "function_call_expression" | "scoped_call_expression" | "member_call_expression" + ) { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_calls(child, visit); + } +} + +fn match_laravel_call(call: Node, bytes: &[u8], file_rel: &str) -> Option { + if call.kind() != "scoped_call_expression" { + return None; + } + let scope = call.child_by_field_name("scope")?; + let scope_text = scope.utf8_text(bytes).ok()?; + if scope_text != "Route" && !scope_text.contains("Route") { + return None; + } + let name = call.child_by_field_name("name")?; + let name_text = name.utf8_text(bytes).ok()?; + let (_, method) = VERBS + .iter() + .find(|(v, _)| v.eq_ignore_ascii_case(name_text))?; + let args = call.child_by_field_name("arguments")?; + let mut cursor = args.walk(); + let positional: Vec = args + .children(&mut cursor) + .filter(|n| n.kind() == "argument") + .collect(); + if positional.len() < 2 { + return None; + } + let route_node = first_inner(positional[0]); + let route = string_node_value(route_node, bytes).unwrap_or_default(); + let handler_node = first_inner(positional[1]); + let handler_name = handler_text(handler_node, bytes).unwrap_or_default(); + let auth_required = check_chained_middleware(call, bytes); + Some(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(call, file_rel), + framework: Framework::Sinatra, // PHP frameworks reuse the closest tag — Laravel folds into a generic surface entry-point + method: *method, + route, + handler_name, + handler_location: SourceLocation::new( + file_rel, + (handler_node.start_position().row + 1) as u32, + (handler_node.start_position().column + 1) as u32, + ), + auth_required, + })) +} + +fn first_inner(arg: Node) -> Node { + let mut cursor = arg.walk(); + arg.named_children(&mut cursor).next().unwrap_or(arg) +} + +fn handler_text(node: Node, bytes: &[u8]) -> Option { + Some(node.utf8_text(bytes).ok()?.to_string()) +} + +fn check_chained_middleware(call: Node, bytes: &[u8]) -> bool { + // Walk up to find a member_call chain: `Route::get(...)->middleware('auth')` + let mut cur = call.parent(); + while let Some(p) = cur { + if p.kind() == "member_call_expression" + && let Some(name) = p.child_by_field_name("name") + && let Ok(name_text) = name.utf8_text(bytes) + && name_text == "middleware" + && let Some(args) = p.child_by_field_name("arguments") + && let Ok(args_text) = args.utf8_text(bytes) + && (args_text.contains("auth") || args_text.contains("jwt") || args_text.contains("authenticated")) + { + return true; + } + cur = p.parent(); + } + false +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_php::LANGUAGE_PHP.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_laravel_get() { + let src = "middleware('auth');\n"; + let (tree, bytes) = parse(src); + let nodes = detect_laravel_routes(&tree, &bytes, &PathBuf::from("routes.php"), None); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert!(ep.auth_required); + } +} diff --git a/src/surface/lang/php_slim.rs b/src/surface/lang/php_slim.rs new file mode 100644 index 00000000..ea125bd5 --- /dev/null +++ b/src/surface/lang/php_slim.rs @@ -0,0 +1,139 @@ +//! PHP + Slim framework probe. +//! +//! Recognises Slim route registrations: +//! +//! * `$app->get('/path', $handler)` / `->post(...)` / `->put` / +//! `->delete` / `->patch` / `->options` / `->any` +//! * `$app->group('/api', function ($g) { $g->get(...); })` (the +//! group prefix is captured when the call site is lexically inside +//! a `group(...)` closure body — best-effort textual match). + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{loc_for, rel_file, string_node_value}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +const VERBS: &[(&str, HttpMethod)] = &[ + ("get", HttpMethod::GET), + ("post", HttpMethod::POST), + ("put", HttpMethod::PUT), + ("patch", HttpMethod::PATCH), + ("delete", HttpMethod::DELETE), + ("options", HttpMethod::OPTIONS), + ("head", HttpMethod::HEAD), + ("any", HttpMethod::GET), +]; + +pub fn detect_slim_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_calls(tree.root_node(), &mut |call| { + if let Some(node) = match_slim_call(call, bytes, &file_rel) { + out.push(node); + } + }); + out +} + +fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) { + if node.kind() == "member_call_expression" { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_calls(child, visit); + } +} + +fn match_slim_call(call: Node, bytes: &[u8], file_rel: &str) -> Option { + let object = call.child_by_field_name("object")?; + let object_text = object.utf8_text(bytes).ok()?; + if !receiver_is_slim_app(object_text) { + return None; + } + let name = call.child_by_field_name("name")?; + let name_text = name.utf8_text(bytes).ok()?; + let (_, method) = VERBS + .iter() + .find(|(v, _)| v.eq_ignore_ascii_case(name_text))?; + let args = call.child_by_field_name("arguments")?; + let mut cursor = args.walk(); + let positional: Vec = args + .children(&mut cursor) + .filter(|n| n.kind() == "argument") + .collect(); + if positional.len() < 2 { + return None; + } + let route_node = first_inner(positional[0]); + let route = string_node_value(route_node, bytes).unwrap_or_default(); + let handler_node = first_inner(positional[1]); + let handler_name = handler_node + .utf8_text(bytes) + .ok() + .map(str::to_string) + .unwrap_or_default(); + Some(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(call, file_rel), + framework: Framework::Sinatra, + method: *method, + route, + handler_name, + handler_location: SourceLocation::new( + file_rel, + (handler_node.start_position().row + 1) as u32, + (handler_node.start_position().column + 1) as u32, + ), + auth_required: false, + })) +} + +fn first_inner(arg: Node) -> Node { + let mut cursor = arg.walk(); + arg.named_children(&mut cursor).next().unwrap_or(arg) +} + +fn receiver_is_slim_app(text: &str) -> bool { + let trimmed = text.trim(); + let lower = trimmed.to_ascii_lowercase(); + lower == "$app" + || lower == "$g" + || lower == "$group" + || lower == "$router" + || lower.ends_with("app") + || lower.ends_with("group") + || lower.ends_with("router") +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_php::LANGUAGE_PHP.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_slim_get() { + let src = "get('/users', 'UsersController:list');\n"; + let (tree, bytes) = parse(src); + let nodes = detect_slim_routes(&tree, &bytes, &PathBuf::from("routes.php"), None); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/users"); + } +} diff --git a/src/surface/lang/python_django.rs b/src/surface/lang/python_django.rs new file mode 100644 index 00000000..5cc25900 --- /dev/null +++ b/src/surface/lang/python_django.rs @@ -0,0 +1,364 @@ +//! Python + Django framework probe. +//! +//! Recognises two route shapes: +//! +//! 1. `urls.py`-style routing: `path("/admin", admin_view)`, +//! `re_path(r"^api/", api_view)`, `url(r"^foo$", foo_view)`. +//! The probe walks the URL configuration list and emits one +//! EntryPoint per `path` / `re_path` / `url` call, resolving the +//! handler to the function with the same name in the file when +//! possible. +//! 2. Class-based view methods: a `get` / `post` / `put` / `delete` +//! method on a class derived from `View`, `APIView`, `ViewSet`, +//! `TemplateView`. The route path is `""` because URL config lives +//! in a separate `urls.py`. +//! +//! `auth_required` follows the standard Django decorators +//! ([`AUTH_DECORATORS`]) plus the DRF permission classes pattern +//! (`permission_classes = [IsAuthenticated]`). + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{ + leaf_matches, loc_for, rel_file, string_node_value, +}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::collections::HashMap; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +pub const AUTH_DECORATORS: &[&str] = &[ + "login_required", + "permission_required", + "user_passes_test", + "staff_member_required", + "csrf_protect", + "require_authenticated", + "auth_required", +]; + +const CBV_BASES: &[&str] = &[ + "View", + "APIView", + "ViewSet", + "ModelViewSet", + "ReadOnlyModelViewSet", + "TemplateView", + "ListView", + "DetailView", + "CreateView", + "UpdateView", + "DeleteView", + "RedirectView", + "FormView", +]; + +pub fn detect_django_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + // File-level gate: only fire when the file actually imports + // django (or extends the Django CBV bases via name witness). + let file_text = std::str::from_utf8(bytes).unwrap_or(""); + let has_django_witness = file_text.contains("django") + || file_text.contains("rest_framework") + || CBV_BASES.iter().any(|b| file_text.contains(b)); + if !has_django_witness { + return Vec::new(); + } + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + let function_index = collect_function_definitions(tree.root_node(), bytes); + detect_url_dispatch(tree.root_node(), bytes, &file_rel, &function_index, &mut out); + detect_class_based_views(tree.root_node(), bytes, &file_rel, &mut out); + out +} + +fn collect_function_definitions<'tree>( + root: Node<'tree>, + bytes: &'tree [u8], +) -> HashMap, bool)> { + let mut index: HashMap, bool)> = HashMap::new(); + fn walk<'tree>( + node: Node<'tree>, + bytes: &'tree [u8], + index: &mut HashMap, bool)>, + ) { + if node.kind() == "function_definition" + && let Some(name_node) = node.child_by_field_name("name") + && let Ok(name) = name_node.utf8_text(bytes) + { + // Detect if any decorator is an auth marker. + let mut auth = false; + if let Some(parent) = node.parent() + && parent.kind() == "decorated_definition" + { + let mut cursor = parent.walk(); + for child in parent.children(&mut cursor) { + if child.kind() == "decorator" && decorator_is_auth_marker(child, bytes) { + auth = true; + break; + } + } + } + index.insert(name.to_string(), (node, auth)); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk(child, bytes, index); + } + } + walk(root, bytes, &mut index); + index +} + +fn detect_url_dispatch<'tree>( + root: Node<'tree>, + bytes: &[u8], + file_rel: &str, + function_index: &HashMap, bool)>, + out: &mut Vec, +) { + fn recurse<'tree>( + node: Node<'tree>, + bytes: &[u8], + file_rel: &str, + function_index: &HashMap, bool)>, + out: &mut Vec, + ) { + if node.kind() == "call" + && let Some((route, handler_name)) = parse_url_call(node, bytes) + { + let (handler_loc, auth_required) = function_index + .get(&handler_name) + .map(|(h, a)| (loc_for(*h, file_rel), *a)) + .unwrap_or_else(|| (loc_for(node, file_rel), false)); + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(node, file_rel), + framework: Framework::Django, + method: HttpMethod::GET, + route, + handler_name, + handler_location: handler_loc, + auth_required, + })); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + recurse(child, bytes, file_rel, function_index, out); + } + } + recurse(root, bytes, file_rel, function_index, out); +} + +fn parse_url_call(call: Node, bytes: &[u8]) -> Option<(String, String)> { + let target = call.child_by_field_name("function")?; + let target_text = target.utf8_text(bytes).ok()?; + let leaf = target_text.rsplit('.').next().unwrap_or(target_text); + if !matches!(leaf, "path" | "re_path" | "url") { + return None; + } + let args = call.child_by_field_name("arguments")?; + let mut cursor = args.walk(); + let mut route: Option = None; + let mut handler: Option = None; + for arg in args.children(&mut cursor) { + match arg.kind() { + "string" if route.is_none() => { + route = string_node_value(arg, bytes); + } + "identifier" if handler.is_none() => { + handler = arg.utf8_text(bytes).ok().map(str::to_string); + } + "attribute" if handler.is_none() => { + handler = arg.utf8_text(bytes).ok().map(str::to_string); + } + "call" if handler.is_none() => { + // `MyView.as_view()` shape — extract `MyView`. + if let Some(callee) = arg.child_by_field_name("function") + && let Ok(text) = callee.utf8_text(bytes) + { + handler = Some(text.split('.').next().unwrap_or(text).to_string()); + } + } + _ => {} + } + } + Some((route?, handler?)) +} + +fn detect_class_based_views( + root: Node, + bytes: &[u8], + file_rel: &str, + out: &mut Vec, +) { + fn recurse(node: Node, bytes: &[u8], file_rel: &str, out: &mut Vec) { + if node.kind() == "class_definition" + && class_is_django_view(node, bytes) + { + let class_auth = class_has_auth_permission(node, bytes); + // Walk the body for HTTP-named methods. + if let Some(body) = node.child_by_field_name("body") { + let mut bcur = body.walk(); + for stmt in body.children(&mut bcur) { + let func = match stmt.kind() { + "function_definition" => stmt, + "decorated_definition" => stmt + .child_by_field_name("definition") + .or_else(|| { + let mut c = stmt.walk(); + stmt.children(&mut c) + .find(|n| n.kind() == "function_definition") + }) + .unwrap_or(stmt), + _ => continue, + }; + if func.kind() != "function_definition" { + continue; + } + let Some(name_node) = func.child_by_field_name("name") else { + continue; + }; + let Ok(name) = name_node.utf8_text(bytes) else { + continue; + }; + let Some(method) = HttpMethod::from_ident(name) else { + continue; + }; + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(func, file_rel), + framework: Framework::Django, + method, + route: String::new(), + handler_name: name.to_string(), + handler_location: SourceLocation::new( + file_rel, + (func.start_position().row + 1) as u32, + (func.start_position().column + 1) as u32, + ), + auth_required: class_auth, + })); + } + } + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + recurse(child, bytes, file_rel, out); + } + } + recurse(root, bytes, file_rel, out); +} + +fn class_is_django_view(class: Node, bytes: &[u8]) -> bool { + let Some(supers) = class.child_by_field_name("superclasses") else { + return false; + }; + let mut cursor = supers.walk(); + for sup in supers.named_children(&mut cursor) { + let Ok(text) = sup.utf8_text(bytes) else { + continue; + }; + let leaf = text.rsplit('.').next().unwrap_or(text); + if CBV_BASES.iter().any(|b| leaf.contains(b)) { + return true; + } + } + false +} + +fn class_has_auth_permission(class: Node, bytes: &[u8]) -> bool { + let Some(body) = class.child_by_field_name("body") else { + return false; + }; + let mut cursor = body.walk(); + for stmt in body.children(&mut cursor) { + if stmt.kind() != "expression_statement" { + continue; + } + let mut sc = stmt.walk(); + for child in stmt.children(&mut sc) { + if child.kind() != "assignment" { + continue; + } + let Some(left) = child.child_by_field_name("left") else { + continue; + }; + let Ok(left_text) = left.utf8_text(bytes) else { + continue; + }; + if left_text != "permission_classes" { + continue; + } + let Some(right) = child.child_by_field_name("right") else { + continue; + }; + let Ok(right_text) = right.utf8_text(bytes) else { + continue; + }; + if right_text.contains("IsAuthenticated") + || right_text.contains("IsAdminUser") + || right_text.contains("DjangoModelPermissions") + { + return true; + } + } + } + false +} + +fn decorator_is_auth_marker(decorator: Node, bytes: &[u8]) -> bool { + let mut cursor = decorator.walk(); + let Some(expr) = decorator + .children(&mut cursor) + .find(|c| c.kind() != "@" && c.kind() != "comment") + else { + return false; + }; + let target = match expr.kind() { + "call" => expr.child_by_field_name("function"), + _ => Some(expr), + }; + let Some(target) = target else { return false }; + let Ok(text) = target.utf8_text(bytes) else { + return false; + }; + leaf_matches(text, AUTH_DECORATORS) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_python::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_path_call() { + let src = "from django.urls import path\n\ndef admin_view(request): pass\n\nurlpatterns = [\n path('admin/', admin_view),\n]\n"; + let (tree, bytes) = parse(src); + let nodes = detect_django_routes(&tree, &bytes, &PathBuf::from("urls.py"), None); + assert!(!nodes.is_empty()); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.framework, Framework::Django); + assert_eq!(ep.handler_name, "admin_view"); + assert_eq!(ep.route, "admin/"); + } + + #[test] + fn detects_class_based_view() { + let src = "class UserList(APIView):\n def get(self, request): pass\n def post(self, request): pass\n"; + let (tree, bytes) = parse(src); + let nodes = detect_django_routes(&tree, &bytes, &PathBuf::from("views.py"), None); + assert_eq!(nodes.len(), 2); + } +} diff --git a/src/surface/lang/python_fastapi.rs b/src/surface/lang/python_fastapi.rs new file mode 100644 index 00000000..a4171986 --- /dev/null +++ b/src/surface/lang/python_fastapi.rs @@ -0,0 +1,336 @@ +//! Python + FastAPI framework probe. +//! +//! Recognises FastAPI / Starlette route declarations: +//! +//! * `@app.get("/path")` / `.post("/path")` / `.put` / `.patch` / `.delete` +//! * `@router.get("/path")` / `.post(...)` / etc. on an `APIRouter` +//! * `@app.api_route("/path", methods=["GET","POST"])` +//! * `@app.websocket("/ws")` (treated as GET) +//! +//! `auth_required` is inferred from `Depends()` parameters in the +//! handler signature (FastAPI's idiomatic auth pattern) and from +//! decorator-stack guards drawn from [`AUTH_DECORATORS`]. + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +/// Auth markers recognised in the decorator stack. FastAPI's primary +/// auth idiom is `Depends(...)` parameter injection, handled separately. +pub const AUTH_DECORATORS: &[&str] = &[ + "login_required", + "auth_required", + "jwt_required", + "token_required", + "requires_auth", + "authenticated", + "require_auth", + "require_login", + "current_user", +]; + +/// Auth-callee names recognised inside a `Depends(...)` parameter. +const AUTH_DEPENDS_CALLEES: &[&str] = &[ + "get_current_user", + "get_current_active_user", + "current_user", + "require_user", + "require_auth", + "auth", + "verify_token", + "verify_jwt", + "validate_token", +]; + +pub fn detect_fastapi_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + // File-level gate: avoid double-detection on Flask files that + // also use `app.get(...)` shape. FastAPI / Starlette / APIRouter + // require an explicit import of the relevant package. + let file_text = std::str::from_utf8(bytes).unwrap_or(""); + let has_fastapi_witness = file_text.contains("fastapi") + || file_text.contains("starlette") + || file_text.contains("APIRouter"); + if !has_fastapi_witness { + return Vec::new(); + } + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_decorated(tree.root_node(), &mut |func, decorators| { + let auth_via_decorator = decorators + .iter() + .any(|d| decorator_is_auth_marker(*d, bytes)); + let auth_via_depends = function_signature_uses_auth_depends(*func, bytes); + let auth_required = auth_via_decorator || auth_via_depends; + for dec in decorators { + if let Some((method, route_path)) = fastapi_route_decorator(*dec, bytes) { + let handler_name = function_name(*func, bytes).unwrap_or_default(); + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(*dec, &file_rel), + framework: Framework::FastApi, + method, + route: route_path, + handler_name, + handler_location: SourceLocation::new( + file_rel.clone(), + (func.start_position().row + 1) as u32, + (func.start_position().column + 1) as u32, + ), + auth_required, + })); + } + } + }); + out +} + +fn walk_decorated<'tree, F>(root: Node<'tree>, visit: &mut F) +where + F: FnMut(&Node<'tree>, &[Node<'tree>]), +{ + if root.kind() == "decorated_definition" { + let mut cursor = root.walk(); + let mut decorators: Vec> = Vec::new(); + let mut func: Option> = None; + for child in root.children(&mut cursor) { + match child.kind() { + "decorator" => decorators.push(child), + "function_definition" => func = Some(child), + _ => {} + } + } + if let Some(f) = func { + visit(&f, &decorators); + } + } + let mut cursor = root.walk(); + for child in root.children(&mut cursor) { + walk_decorated(child, visit); + } +} + +fn fastapi_route_decorator(decorator: Node, bytes: &[u8]) -> Option<(HttpMethod, String)> { + let mut cursor = decorator.walk(); + let expr = decorator + .children(&mut cursor) + .find(|c| c.kind() != "@" && c.kind() != "comment")?; + if expr.kind() != "call" { + return None; + } + let target = expr.child_by_field_name("function")?; + let args = expr.child_by_field_name("arguments"); + if target.kind() != "attribute" { + return None; + } + let object = target.child_by_field_name("object")?; + if !receiver_is_fastapi(object, bytes) { + return None; + } + let attr = target.child_by_field_name("attribute")?; + let attr_text = attr.utf8_text(bytes).ok()?; + let route_path = args + .and_then(|a| first_string_arg(a, bytes)) + .unwrap_or_default(); + if let Some(m) = HttpMethod::from_ident(attr_text) { + return Some((m, route_path)); + } + let lower = attr_text.to_ascii_lowercase(); + if lower == "websocket" || lower == "websocket_route" { + return Some((HttpMethod::GET, route_path)); + } + if lower == "api_route" { + let method = args + .and_then(|a| first_methods_kwarg(a, bytes)) + .unwrap_or(HttpMethod::GET); + return Some((method, route_path)); + } + None +} + +fn receiver_is_fastapi(object: Node, bytes: &[u8]) -> bool { + fn name_matches(text: &str) -> bool { + let lower = text.to_ascii_lowercase(); + lower == "app" + || lower == "router" + || lower == "api" + || lower.ends_with("_app") + || lower.ends_with("_router") + || lower.ends_with("_api") + } + match object.kind() { + "identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches), + "attribute" => object + .child_by_field_name("attribute") + .and_then(|a| a.utf8_text(bytes).ok()) + .is_some_and(name_matches), + "call" => { + let Some(callee) = object.child_by_field_name("function") else { + return false; + }; + let Ok(text) = callee.utf8_text(bytes) else { + return false; + }; + let leaf = text.rsplit('.').next().unwrap_or(text).trim(); + leaf == "FastAPI" || leaf == "APIRouter" || leaf == "Starlette" + } + _ => false, + } +} + +fn first_string_arg(args: Node, bytes: &[u8]) -> Option { + let mut cursor = args.walk(); + for arg in args.children(&mut cursor) { + if arg.kind() == "string" { + return string_node_value(arg, bytes); + } + } + None +} + +fn first_methods_kwarg(args: Node, bytes: &[u8]) -> Option { + let mut cursor = args.walk(); + for arg in args.children(&mut cursor) { + if arg.kind() != "keyword_argument" { + continue; + } + let name = arg.child_by_field_name("name")?; + if name.utf8_text(bytes).ok()? != "methods" { + continue; + } + let value = arg.child_by_field_name("value")?; + let mut vw = value.walk(); + for child in value.children(&mut vw) { + if child.kind() == "string" + && let Some(v) = string_node_value(child, bytes) + && let Some(m) = HttpMethod::from_ident(&v) + { + return Some(m); + } + } + } + None +} + +fn decorator_is_auth_marker(decorator: Node, bytes: &[u8]) -> bool { + let mut cursor = decorator.walk(); + let Some(expr) = decorator + .children(&mut cursor) + .find(|c| c.kind() != "@" && c.kind() != "comment") + else { + return false; + }; + let target = match expr.kind() { + "call" => expr.child_by_field_name("function"), + _ => Some(expr), + }; + let Some(target) = target else { return false }; + let Ok(text) = target.utf8_text(bytes) else { + return false; + }; + leaf_matches(text, AUTH_DECORATORS) +} + +/// Look for a parameter with default `Depends()`. +fn function_signature_uses_auth_depends(func: Node, bytes: &[u8]) -> bool { + let Some(params) = func.child_by_field_name("parameters") else { + return false; + }; + let mut cursor = params.walk(); + for param in params.children(&mut cursor) { + if !matches!( + param.kind(), + "default_parameter" | "typed_default_parameter" + ) { + continue; + } + let Some(value) = param.child_by_field_name("value") else { + continue; + }; + if value.kind() != "call" { + continue; + } + let Some(call_target) = value.child_by_field_name("function") else { + continue; + }; + let Ok(text) = call_target.utf8_text(bytes) else { + continue; + }; + let leaf = text.rsplit('.').next().unwrap_or(text).trim(); + if leaf != "Depends" && leaf != "Security" { + continue; + } + let Some(args) = value.child_by_field_name("arguments") else { + continue; + }; + let mut aw = args.walk(); + for arg in args.children(&mut aw) { + if let Ok(arg_text) = arg.utf8_text(bytes) + && leaf_matches(arg_text, AUTH_DEPENDS_CALLEES) + { + return true; + } + } + } + false +} + +fn function_name(func: Node, bytes: &[u8]) -> Option { + let name_node = func.child_by_field_name("name")?; + name_node.utf8_text(bytes).ok().map(str::to_string) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_python::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_get_route() { + let src = "from fastapi import FastAPI\napp = FastAPI()\n@app.get('/users')\ndef list_users(): pass\n"; + let (tree, bytes) = parse(src); + let nodes = detect_fastapi_routes(&tree, &bytes, &PathBuf::from("api.py"), None); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/users"); + assert_eq!(ep.framework, Framework::FastApi); + } + + #[test] + fn detects_router_post() { + let src = "router = APIRouter()\n@router.post('/items')\ndef create(): pass\n"; + let (tree, bytes) = parse(src); + let nodes = detect_fastapi_routes(&tree, &bytes, &PathBuf::from("api.py"), None); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::POST); + } + + #[test] + fn detects_depends_auth() { + let src = "from fastapi import Depends\n@app.get('/me')\ndef me(user = Depends(get_current_user)): pass\n"; + let (tree, bytes) = parse(src); + let nodes = detect_fastapi_routes(&tree, &bytes, &PathBuf::from("api.py"), None); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert!(ep.auth_required); + } +} diff --git a/src/surface/lang/python_flask.rs b/src/surface/lang/python_flask.rs index 5fbb3c60..ae7caa1a 100644 --- a/src/surface/lang/python_flask.rs +++ b/src/surface/lang/python_flask.rs @@ -50,6 +50,17 @@ pub fn detect_flask_routes( path: &Path, scan_root: Option<&Path>, ) -> Vec { + // File-level gate: avoid double-detection on FastAPI files where + // `app.get(...)` shape overlaps. Phase 21 was lenient because no + // sibling probe existed; Phase 22 splits per-framework, so each + // probe only fires when its framework witness is present. + let file_text = std::str::from_utf8(bytes).unwrap_or(""); + let has_flask_witness = file_text.contains("flask") + || file_text.contains("Flask") + || file_text.contains("Blueprint"); + if !has_flask_witness { + return Vec::new(); + } let file_rel = relative_path_string(path, scan_root); let mut out = Vec::new(); walk_decorated(tree.root_node(), bytes, &mut |func_node, decorators| { diff --git a/src/surface/lang/ruby_rails.rs b/src/surface/lang/ruby_rails.rs new file mode 100644 index 00000000..53689f55 --- /dev/null +++ b/src/surface/lang/ruby_rails.rs @@ -0,0 +1,219 @@ +//! Ruby + Rails framework probe. +//! +//! Recognises two Rails route shapes: +//! +//! 1. `config/routes.rb` declarations — `get '/path', to: 'controller#action'`, +//! `post '/path' => 'controller#action'`, `resources :users`. +//! 2. Controller actions — public instance methods on a class +//! inheriting from `ApplicationController` / `ActionController::Base`. +//! +//! `auth_required` for routes follows `before_action :authenticate!` +//! at the controller level. + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{loc_for, rel_file, string_node_value}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +const VERBS: &[(&str, HttpMethod)] = &[ + ("get", HttpMethod::GET), + ("post", HttpMethod::POST), + ("put", HttpMethod::PUT), + ("patch", HttpMethod::PATCH), + ("delete", HttpMethod::DELETE), + ("match", HttpMethod::GET), +]; + +pub fn detect_rails_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + detect_routes_dsl(tree.root_node(), bytes, &file_rel, &mut out); + detect_controllers(tree.root_node(), bytes, &file_rel, &mut out); + out +} + +fn detect_routes_dsl(root: Node, bytes: &[u8], file_rel: &str, out: &mut Vec) { + fn recurse(node: Node, bytes: &[u8], file_rel: &str, out: &mut Vec) { + if matches!(node.kind(), "call" | "method_call") { + if let Some(method_node) = node.child_by_field_name("method") + && let Ok(method_text) = method_node.utf8_text(bytes) + && let Some((_, method)) = VERBS.iter().find(|(v, _)| *v == method_text) + { + let args_opt = node + .child_by_field_name("arguments") + .or_else(|| { + let mut c = node.walk(); + node.children(&mut c).find(|n| n.kind() == "argument_list") + }); + if let Some(args) = args_opt { + let mut cursor = args.walk(); + let positional: Vec = args.named_children(&mut cursor).collect(); + if let Some(route_node) = positional.first() + && let Some(route) = string_node_value(*route_node, bytes) + { + let handler_name = positional + .iter() + .find_map(|n| extract_to_handler(*n, bytes)) + .unwrap_or_default(); + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(node, file_rel), + framework: Framework::Rails, + method: *method, + route, + handler_name, + handler_location: loc_for(node, file_rel), + auth_required: false, + })); + } + } + } + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + recurse(child, bytes, file_rel, out); + } + } + recurse(root, bytes, file_rel, out); +} + +fn extract_to_handler(node: Node, bytes: &[u8]) -> Option { + // Shapes: + // `to: 'controller#action'` — pair with hash key `to` + // `'controller#action'` — second positional string + // `=> 'controller#action'` — assoc with hashrocket + if node.kind() == "string" + && let Some(s) = string_node_value(node, bytes) + && s.contains('#') + { + return Some(s); + } + if node.kind() == "pair" { + let mut cursor = node.walk(); + let children: Vec = node.named_children(&mut cursor).collect(); + for child in &children { + if child.kind() == "string" + && let Some(s) = string_node_value(*child, bytes) + && s.contains('#') + { + return Some(s); + } + } + } + None +} + +fn detect_controllers(root: Node, bytes: &[u8], file_rel: &str, out: &mut Vec) { + fn recurse(node: Node, bytes: &[u8], file_rel: &str, out: &mut Vec) { + if node.kind() == "class" + && class_is_controller(node, bytes) + { + let class_auth = class_has_before_authenticate(node, bytes); + walk_methods(node, bytes, &mut |method_node, name| { + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(method_node, file_rel), + framework: Framework::Rails, + method: HttpMethod::GET, + route: String::new(), + handler_name: name.to_string(), + handler_location: SourceLocation::new( + file_rel, + (method_node.start_position().row + 1) as u32, + (method_node.start_position().column + 1) as u32, + ), + auth_required: class_auth, + })); + }); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + recurse(child, bytes, file_rel, out); + } + } + recurse(root, bytes, file_rel, out); +} + +fn class_is_controller(class: Node, bytes: &[u8]) -> bool { + let Some(super_node) = class.child_by_field_name("superclass") else { + return false; + }; + let Ok(text) = super_node.utf8_text(bytes) else { + return false; + }; + text.contains("ApplicationController") || text.contains("ActionController") +} + +fn class_has_before_authenticate(class: Node, bytes: &[u8]) -> bool { + let Some(body) = class.child_by_field_name("body") else { + return false; + }; + let mut cursor = body.walk(); + for child in body.children(&mut cursor) { + if let Ok(text) = child.utf8_text(bytes) + && text.contains("before_action") + && (text.contains("authenticate") || text.contains("login_required")) + { + return true; + } + } + false +} + +fn walk_methods<'tree, F>(class: Node<'tree>, bytes: &[u8], visit: &mut F) +where + F: FnMut(Node<'tree>, &str), +{ + let Some(body) = class.child_by_field_name("body") else { + return; + }; + let mut cursor = body.walk(); + for child in body.children(&mut cursor) { + if child.kind() == "method" + && let Some(name_node) = child.child_by_field_name("name") + && let Ok(name) = name_node.utf8_text(bytes) + && !name.starts_with('_') + { + visit(child, name); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_ruby::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_routes_dsl() { + let src = "Rails.application.routes.draw do\n get '/users', to: 'users#index'\nend\n"; + let (tree, bytes) = parse(src); + let nodes = detect_rails_routes(&tree, &bytes, &PathBuf::from("config/routes.rb"), None); + assert!(!nodes.is_empty()); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/users"); + } + + #[test] + fn detects_controller_actions() { + let src = "class UsersController < ApplicationController\n def index\n end\n def show\n end\nend\n"; + let (tree, bytes) = parse(src); + let nodes = detect_rails_routes(&tree, &bytes, &PathBuf::from("users_controller.rb"), None); + assert_eq!(nodes.len(), 2); + } +} diff --git a/src/surface/lang/ruby_sinatra.rs b/src/surface/lang/ruby_sinatra.rs new file mode 100644 index 00000000..8a083099 --- /dev/null +++ b/src/surface/lang/ruby_sinatra.rs @@ -0,0 +1,111 @@ +//! Ruby + Sinatra framework probe. +//! +//! Sinatra routes are top-level method calls of the form +//! `get '/path' do ... end`, `post '/path' do ... end`, etc. The +//! handler is the block; we synthesise the handler name from the +//! route string (Sinatra blocks are anonymous). + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{loc_for, rel_file, string_node_value}; +use crate::surface::{EntryPoint, Framework, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +const VERBS: &[(&str, HttpMethod)] = &[ + ("get", HttpMethod::GET), + ("post", HttpMethod::POST), + ("put", HttpMethod::PUT), + ("patch", HttpMethod::PATCH), + ("delete", HttpMethod::DELETE), + ("head", HttpMethod::HEAD), + ("options", HttpMethod::OPTIONS), +]; + +pub fn detect_sinatra_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_calls(tree.root_node(), &mut |call| { + if let Some(node) = match_sinatra_call(call, bytes, &file_rel) { + out.push(node); + } + }); + out +} + +fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) { + if matches!(node.kind(), "call" | "method_call") { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_calls(child, visit); + } +} + +fn match_sinatra_call(call: Node, bytes: &[u8], file_rel: &str) -> Option { + let method_name_node = call.child_by_field_name("method")?; + let method_text = method_name_node.utf8_text(bytes).ok()?; + let (_, method) = VERBS + .iter() + .find(|(v, _)| *v == method_text)?; + // Must have a block to be a Sinatra route. + let block = call + .child_by_field_name("block") + .or_else(|| { + let mut c = call.walk(); + call.children(&mut c) + .find(|n| matches!(n.kind(), "do_block" | "block")) + })?; + // Args: Sinatra accepts a string literal as the first positional arg. + let args = call + .child_by_field_name("arguments") + .or_else(|| { + let mut c = call.walk(); + call.children(&mut c).find(|n| n.kind() == "argument_list") + })?; + let mut cursor = args.walk(); + let route_node = args.named_children(&mut cursor).next()?; + let route = string_node_value(route_node, bytes)?; + let handler_name = format!("{}_{}", method_text, route.replace(['/', '-'], "_")); + Some(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(call, file_rel), + framework: Framework::Sinatra, + method: *method, + route, + handler_name, + handler_location: loc_for(block, file_rel), + auth_required: false, + })) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_ruby::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_sinatra_get() { + let src = "get '/users' do\n 'hi'\nend\n"; + let (tree, bytes) = parse(src); + let nodes = detect_sinatra_routes(&tree, &bytes, &PathBuf::from("app.rb"), None); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/users"); + } +} diff --git a/src/surface/lang/rust_actix.rs b/src/surface/lang/rust_actix.rs new file mode 100644 index 00000000..e27ee2e0 --- /dev/null +++ b/src/surface/lang/rust_actix.rs @@ -0,0 +1,196 @@ +//! Rust + actix-web framework probe. +//! +//! Recognises actix-web routing macros (`#[get("/path")]`, +//! `#[post("/path")]`, `#[put]`, `#[delete]`, `#[patch]`, `#[head]`, +//! `#[options]`, `#[route("/path", method = ...)]`) attached to a +//! `function_item`. The route path is extracted from the macro +//! argument string literal. +//! +//! `auth_required` fires when the function signature has a parameter +//! whose type matches one of [`AUTH_EXTRACTORS`] (`Identity`, +//! `BearerAuth`, `JwtClaims`, etc.). + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{loc_for, rel_file}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +pub const AUTH_EXTRACTORS: &[&str] = &[ + "Identity", + "BearerAuth", + "BasicAuth", + "JwtClaims", + "Authenticated", + "User", +]; + +const ROUTE_MACROS: &[(&str, Option)] = &[ + ("get", Some(HttpMethod::GET)), + ("post", Some(HttpMethod::POST)), + ("put", Some(HttpMethod::PUT)), + ("delete", Some(HttpMethod::DELETE)), + ("patch", Some(HttpMethod::PATCH)), + ("head", Some(HttpMethod::HEAD)), + ("options", Some(HttpMethod::OPTIONS)), + ("route", None), +]; + +pub fn detect_actix_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_text = std::str::from_utf8(bytes).unwrap_or(""); + if !file_text.contains("actix_web::") && !file_text.contains("use actix_web") { + // Best-effort gate so the actix probe does not over-fire on + // Rocket / generic Rust files that also define a `#[get]` + // macro from a user crate. + return Vec::new(); + } + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + walk_functions(tree.root_node(), &mut |func| { + if let Some(node) = match_actix_function(func, bytes, &file_rel) { + out.push(node); + } + }); + out +} + +fn walk_functions<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) { + if node.kind() == "function_item" { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_functions(child, visit); + } +} + +fn match_actix_function(func: Node, bytes: &[u8], file_rel: &str) -> Option { + let attrs = collect_preceding_attributes(func); + let mut method: Option = None; + let mut route_path = String::new(); + for attr in attrs { + let raw = attr.utf8_text(bytes).ok()?; + let inner = raw + .trim_start_matches(['#', '!']) + .trim_matches(['[', ']']); + for (name, default_method) in ROUTE_MACROS { + let prefix = format!("{}(", name); + if inner.starts_with(&prefix) { + method = default_method.or_else(|| extract_route_method(inner)); + if route_path.is_empty() + && let Some(start) = inner.find('"') + { + let rest = &inner[start + 1..]; + if let Some(end) = rest.find('"') { + route_path = rest[..end].to_string(); + } + } + } else if inner == *name && method.is_none() { + method = *default_method; + } + } + } + let m = method?; + let handler_name = function_name(func, bytes).unwrap_or_default(); + let auth_required = signature_uses_auth_extractor(func, bytes); + Some(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(func, file_rel), + framework: Framework::Actix, + method: m, + route: route_path, + handler_name, + handler_location: SourceLocation::new( + file_rel, + (func.start_position().row + 1) as u32, + (func.start_position().column + 1) as u32, + ), + auth_required, + })) +} + +fn collect_preceding_attributes(func: Node) -> Vec { + let mut out: Vec = Vec::new(); + let Some(parent) = func.parent() else { + return out; + }; + let mut cursor = parent.walk(); + let mut pending: Vec = Vec::new(); + for sib in parent.children(&mut cursor) { + if sib.id() == func.id() { + out.append(&mut pending); + return out; + } + if sib.kind() == "attribute_item" || sib.kind() == "inner_attribute_item" { + let mut aw = sib.walk(); + for inner in sib.children(&mut aw) { + if inner.kind() == "attribute" { + pending.push(inner); + } + } + } else { + pending.clear(); + } + } + out +} + +fn extract_route_method(inner: &str) -> Option { + for verb in ["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"] { + if inner.contains(verb) { + return HttpMethod::from_ident(verb); + } + } + None +} + +fn signature_uses_auth_extractor(func: Node, bytes: &[u8]) -> bool { + let Some(params) = func.child_by_field_name("parameters") else { + return false; + }; + let Ok(text) = params.utf8_text(bytes) else { + return false; + }; + AUTH_EXTRACTORS.iter().any(|n| text.contains(n)) +} + +fn function_name(func: Node, bytes: &[u8]) -> Option { + func.child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + .map(str::to_string) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_rust::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_actix_get() { + let src = r#" +use actix_web::{get, HttpResponse}; +#[get("/users")] +async fn list_users() -> HttpResponse { HttpResponse::Ok().finish() } +"#; + let (tree, bytes) = parse(src); + let nodes = detect_actix_routes(&tree, &bytes, &PathBuf::from("main.rs"), None); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/users"); + } +} diff --git a/src/surface/lang/rust_axum.rs b/src/surface/lang/rust_axum.rs new file mode 100644 index 00000000..dfd412c8 --- /dev/null +++ b/src/surface/lang/rust_axum.rs @@ -0,0 +1,191 @@ +//! Rust + axum framework probe. +//! +//! Detects axum route registration: +//! +//! * `Router::new().route("/path", get(handler))` / +//! `.route("/path", post(handler))` / etc. +//! * Bare extractor-shaped function items in files that import axum +//! (handler typing alone is treated as a candidate, but only when a +//! `Router::route(...)` registration in the same file references it). + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{loc_for, rel_file, string_node_value}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::collections::HashMap; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +const VERBS: &[(&str, HttpMethod)] = &[ + ("get", HttpMethod::GET), + ("post", HttpMethod::POST), + ("put", HttpMethod::PUT), + ("delete", HttpMethod::DELETE), + ("patch", HttpMethod::PATCH), + ("head", HttpMethod::HEAD), + ("options", HttpMethod::OPTIONS), +]; + +pub const AUTH_EXTRACTORS: &[&str] = &[ + "Extension, +) -> Vec { + let file_text = std::str::from_utf8(bytes).unwrap_or(""); + if !file_text.contains("axum::") && !file_text.contains("use axum") { + return Vec::new(); + } + let file_rel = rel_file(path, scan_root); + let function_index = collect_functions(tree.root_node(), bytes); + let mut out = Vec::new(); + walk_calls(tree.root_node(), &mut |call| { + if let Some(node) = match_router_route(call, bytes, &file_rel, &function_index) { + out.push(node); + } + }); + out +} + +fn walk_calls<'tree, F: FnMut(Node<'tree>)>(node: Node<'tree>, visit: &mut F) { + if node.kind() == "call_expression" { + visit(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk_calls(child, visit); + } +} + +fn collect_functions<'tree>( + root: Node<'tree>, + bytes: &'tree [u8], +) -> HashMap, bool)> { + let mut out: HashMap, bool)> = HashMap::new(); + fn walk<'tree>( + node: Node<'tree>, + bytes: &'tree [u8], + out: &mut HashMap, bool)>, + ) { + if node.kind() == "function_item" + && let Some(name_node) = node.child_by_field_name("name") + && let Ok(name) = name_node.utf8_text(bytes) + { + let auth = node + .child_by_field_name("parameters") + .and_then(|p| p.utf8_text(bytes).ok()) + .map(|t| AUTH_EXTRACTORS.iter().any(|x| t.contains(x))) + .unwrap_or(false); + out.insert(name.to_string(), (node, auth)); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk(child, bytes, out); + } + } + walk(root, bytes, &mut out); + out +} + +fn match_router_route<'tree>( + call: Node<'tree>, + bytes: &[u8], + file_rel: &str, + function_index: &HashMap, bool)>, +) -> Option { + let func = call.child_by_field_name("function")?; + if func.kind() != "field_expression" { + return None; + } + let field = func.child_by_field_name("field")?; + if field.utf8_text(bytes).ok()? != "route" { + return None; + } + let args = call.child_by_field_name("arguments")?; + let mut cursor = args.walk(); + let positional: Vec = args + .children(&mut cursor) + .filter(|n| !matches!(n.kind(), "(" | ")" | ",")) + .collect(); + if positional.len() < 2 { + return None; + } + let route = string_node_value(positional[0], bytes)?; + let method_args = positional[1]; + if method_args.kind() != "call_expression" { + return None; + } + let method_callee = method_args.child_by_field_name("function")?; + let method_text = method_callee.utf8_text(bytes).ok()?; + let leaf = method_text.rsplit("::").next().unwrap_or(method_text); + let (_, method) = VERBS.iter().find(|(v, _)| *v == leaf)?; + let method_args_node = method_args.child_by_field_name("arguments")?; + let mut hcur = method_args_node.walk(); + let handler_node = method_args_node + .children(&mut hcur) + .find(|n| n.kind() == "identifier" || n.kind() == "scoped_identifier")?; + let handler_name = handler_node.utf8_text(bytes).ok()?.to_string(); + let auth_required = function_index + .get(&handler_name) + .map(|(_, a)| *a) + .unwrap_or(false); + let handler_loc = function_index + .get(&handler_name) + .map(|(node, _)| { + SourceLocation::new( + file_rel, + (node.start_position().row + 1) as u32, + (node.start_position().column + 1) as u32, + ) + }) + .unwrap_or_else(|| loc_for(handler_node, file_rel)); + Some(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(call, file_rel), + framework: Framework::Axum, + method: *method, + route, + handler_name, + handler_location: handler_loc, + auth_required, + })) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_rust::LANGUAGE.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_router_get() { + let src = r#" +use axum::{Router, routing::get}; +async fn list_users() -> &'static str { "ok" } +fn app() -> Router { + Router::new().route("/users", get(list_users)) +} +"#; + let (tree, bytes) = parse(src); + let nodes = detect_axum_routes(&tree, &bytes, &PathBuf::from("main.rs"), None); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.route, "/users"); + } +} diff --git a/src/surface/lang/ts_next.rs b/src/surface/lang/ts_next.rs new file mode 100644 index 00000000..9bb86bc2 --- /dev/null +++ b/src/surface/lang/ts_next.rs @@ -0,0 +1,315 @@ +//! TypeScript + Next.js framework probe. +//! +//! Recognises Next.js App Router route handlers (`app/**/route.{ts,tsx,js,jsx}`) +//! by walking exported function declarations whose name is one of the +//! HTTP method idents (`GET` / `POST` / …). Also recognises Pages +//! Router API routes (`pages/api/**/*.{ts,tsx,js,jsx}`) via the +//! `export default handler` pattern. +//! +//! Server actions (`'use server'` directive at file or function scope) +//! are also reported as entry points because they expose a function +//! callable from a React client over the wire. + +use crate::entry_points::HttpMethod; +use crate::surface::lang::common::{loc_for, rel_file}; +use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +pub fn detect_next_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = rel_file(path, scan_root); + let mut out = Vec::new(); + let app_router = is_app_router_route(path); + let pages_api = is_pages_api_route(path); + let route_path = derive_route_path(path); + let file_use_server = file_level_use_server(tree.root_node(), bytes); + + if app_router { + collect_named_exports(tree.root_node(), bytes, &file_rel, &route_path, &mut out); + } + if pages_api { + collect_default_export(tree.root_node(), bytes, &file_rel, &route_path, &mut out); + } + if file_use_server { + collect_use_server_exports(tree.root_node(), bytes, &file_rel, &route_path, &mut out); + } + out +} + +fn is_app_router_route(path: &Path) -> bool { + let Some(name) = path.file_name().and_then(|n| n.to_str()) else { + return false; + }; + if !matches!(name, "route.ts" | "route.tsx" | "route.js" | "route.jsx") { + return false; + } + path.components() + .any(|c| c.as_os_str().to_string_lossy() == "app") +} + +fn is_pages_api_route(path: &Path) -> bool { + let mut comps = path.components().peekable(); + let mut saw_pages = false; + while let Some(c) = comps.next() { + if c.as_os_str().to_string_lossy() == "pages" { + saw_pages = true; + } else if saw_pages && c.as_os_str().to_string_lossy() == "api" { + return true; + } + } + false +} + +/// Convert `app/users/[id]/route.ts` → `/users/[id]`. +/// Convert `pages/api/users/index.ts` → `/users`. +fn derive_route_path(path: &Path) -> String { + let mut comps: Vec = Vec::new(); + let mut started = false; + for comp in path.components() { + let text = comp.as_os_str().to_string_lossy().into_owned(); + if !started { + if text == "app" || text == "api" || text == "pages" { + started = true; + } + continue; + } + comps.push(text); + } + if let Some(last) = comps.last_mut() { + // Drop the basename; route file becomes the trailing segment. + if last.starts_with("route.") || last.starts_with("index.") { + comps.pop(); + } else if let Some(idx) = last.rfind('.') { + last.truncate(idx); + } + } + let joined = comps.join("/"); + if joined.is_empty() { + "/".to_string() + } else { + format!("/{}", joined) + } +} + +fn collect_named_exports( + root: Node, + bytes: &[u8], + file_rel: &str, + route_path: &str, + out: &mut Vec, +) { + fn recurse( + node: Node, + bytes: &[u8], + file_rel: &str, + route_path: &str, + out: &mut Vec, + ) { + if node.kind() == "export_statement" { + // Look for `export async function NAME(...)` or `export const NAME = ...` + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if let Some((name, span)) = extract_named_function(child, bytes) + && let Some(method) = HttpMethod::from_ident(&name) + { + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(node, file_rel), + framework: Framework::NextAppRouter, + method, + route: route_path.to_string(), + handler_name: name, + handler_location: SourceLocation::new( + file_rel, + (span.0 + 1) as u32, + (span.1 + 1) as u32, + ), + auth_required: false, + })); + } + } + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + recurse(child, bytes, file_rel, route_path, out); + } + } + recurse(root, bytes, file_rel, route_path, out); +} + +fn extract_named_function(node: Node, bytes: &[u8]) -> Option<(String, (usize, usize))> { + match node.kind() { + "function_declaration" => { + let name_node = node.child_by_field_name("name")?; + let name = name_node.utf8_text(bytes).ok()?.to_string(); + let pos = node.start_position(); + Some((name, (pos.row, pos.column))) + } + "lexical_declaration" | "variable_declaration" => { + let mut cursor = node.walk(); + for decl in node.children(&mut cursor) { + if decl.kind() == "variable_declarator" + && let Some(name_node) = decl.child_by_field_name("name") + && let Ok(name) = name_node.utf8_text(bytes) + { + let pos = decl.start_position(); + return Some((name.to_string(), (pos.row, pos.column))); + } + } + None + } + _ => None, + } +} + +fn collect_default_export( + root: Node, + bytes: &[u8], + file_rel: &str, + route_path: &str, + out: &mut Vec, +) { + fn recurse( + node: Node, + bytes: &[u8], + file_rel: &str, + route_path: &str, + out: &mut Vec, + ) { + if node.kind() == "export_statement" { + let raw = node.utf8_text(bytes).unwrap_or(""); + if raw.contains("default") { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + let name = match child.kind() { + "function_declaration" => child + .child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + .map(str::to_string), + "identifier" => child.utf8_text(bytes).ok().map(str::to_string), + "arrow_function" | "function" | "function_expression" => { + Some("default".to_string()) + } + _ => None, + }; + if let Some(name) = name { + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(node, file_rel), + framework: Framework::NextAppRouter, + method: HttpMethod::GET, + route: route_path.to_string(), + handler_name: name, + handler_location: loc_for(child, file_rel), + auth_required: false, + })); + return; + } + } + } + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + recurse(child, bytes, file_rel, route_path, out); + } + } + recurse(root, bytes, file_rel, route_path, out); +} + +fn collect_use_server_exports( + root: Node, + bytes: &[u8], + file_rel: &str, + route_path: &str, + out: &mut Vec, +) { + let mut cursor = root.walk(); + for child in root.children(&mut cursor) { + if child.kind() == "export_statement" + && let Some((name, span)) = export_function_name(child, bytes) + { + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc_for(child, file_rel), + framework: Framework::NextServerAction, + method: HttpMethod::POST, + route: route_path.to_string(), + handler_name: name, + handler_location: SourceLocation::new( + file_rel, + (span.0 + 1) as u32, + (span.1 + 1) as u32, + ), + auth_required: false, + })); + } + } +} + +fn export_function_name(node: Node, bytes: &[u8]) -> Option<(String, (usize, usize))> { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if let Some(extracted) = extract_named_function(child, bytes) { + return Some(extracted); + } + } + None +} + +fn file_level_use_server(root: Node, bytes: &[u8]) -> bool { + let mut cursor = root.walk(); + for child in root.children(&mut cursor) { + if child.kind() == "expression_statement" { + let mut cs = child.walk(); + for c in child.children(&mut cs) { + if c.kind() == "string" + && let Ok(text) = c.utf8_text(bytes) + { + let trimmed = text.trim().trim_matches(['\'', '"']); + if trimmed == "use server" { + return true; + } + } + } + return false; + } + if !matches!(child.kind(), "comment" | "import_statement") { + return false; + } + } + false +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_typescript::LANGUAGE_TSX.into()) + .unwrap(); + (parser.parse(src, None).unwrap(), src.as_bytes().to_vec()) + } + + #[test] + fn detects_app_router_get() { + let src = "export async function GET(req: Request) { return new Response('ok'); }\n"; + let (tree, bytes) = parse(src); + let nodes = detect_next_routes( + &tree, + &bytes, + &PathBuf::from("app/users/route.ts"), + None, + ); + assert_eq!(nodes.len(), 1); + let SurfaceNode::EntryPoint(ep) = &nodes[0] else { + panic!() + }; + assert_eq!(ep.method, HttpMethod::GET); + assert!(ep.route.contains("users")); + } +} diff --git a/src/surface/mod.rs b/src/surface/mod.rs index 3389fbcb..f53b7dda 100644 --- a/src/surface/mod.rs +++ b/src/surface/mod.rs @@ -24,8 +24,12 @@ use std::collections::BTreeMap; use std::path::Path; pub mod build; +pub mod dangerous; +pub mod datastore; +pub mod external; pub mod graph; pub mod lang; +pub mod reachability; /// Stable source location used as the primary key for every /// [`SurfaceNode`]. `file` is a project-relative POSIX path so the diff --git a/src/surface/reachability.rs b/src/surface/reachability.rs new file mode 100644 index 00000000..095f0451 --- /dev/null +++ b/src/surface/reachability.rs @@ -0,0 +1,192 @@ +//! Transitive-closure pass: connect [`SurfaceNode::EntryPoint`] nodes +//! to the [`SurfaceNode::DataStore`] / [`SurfaceNode::ExternalService`] +//! / [`SurfaceNode::DangerousLocal`] nodes they can reach via the +//! whole-program [`CallGraph`]. +//! +//! For each entry-point we first locate the matching call-graph +//! [`FuncKey`] by `(namespace, function_name)` (the entry-point's +//! `handler_location.file` is the project-relative POSIX path used as +//! `FuncKey::namespace`, and `handler_name` is the leaf function +//! name). From that node we run a BFS over forward call-graph edges +//! up to a small depth bound, and for every visited +//! `(file, function_name)` we look for a matching DataStore / +//! ExternalService / DangerousLocal node in the SurfaceMap, emitting +//! one [`EdgeKind::Reaches`] edge per match. +//! +//! Node match policy: the destination's `location.file` must equal +//! the visited call-graph node's namespace. This is best-effort but +//! deterministic — an entry-point that calls into a helper which then +//! calls `eval()` will surface the eval as a `Reaches` of the entry +//! point as long as the eval's host file is on the BFS frontier. + +use super::{EdgeKind, SurfaceEdge, SurfaceMap, SurfaceNode}; +use crate::callgraph::CallGraph; +use crate::summary::GlobalSummaries; +use petgraph::Direction; +use std::collections::{HashMap, HashSet, VecDeque}; + +/// Maximum BFS depth from an entry-point node. Surface chains beyond +/// six call-graph hops are rare in practice and the cost of a deeper +/// walk is paid per entry-point per scan. A depth-bounded traversal +/// also prevents recursive cycles from blowing up. +const MAX_BFS_DEPTH: usize = 8; + +/// Populate [`EdgeKind::Reaches`] edges on `map`. Mutates the edge +/// list in place; the caller is expected to follow up with +/// [`SurfaceMap::canonicalize`] before serialisation. +pub fn populate_reaches_edges( + map: &mut SurfaceMap, + summaries: &GlobalSummaries, + call_graph: &CallGraph, +) { + if map.nodes.is_empty() { + return; + } + let dst_index = build_destination_index(map); + if dst_index.is_empty() { + return; + } + let _ = summaries; + + let mut new_edges: HashSet = HashSet::new(); + for (entry_idx, node) in map.nodes.iter().enumerate() { + let SurfaceNode::EntryPoint(ep) = node else { + continue; + }; + let mut reachable_files: HashSet = HashSet::new(); + // Seed with the handler's host file — the entry-point itself + // counts as reachable, so any DataStore / ExternalService / + // DangerousLocal in the same file is connected even when the + // call graph cannot resolve the seed FuncKey. + reachable_files.insert(ep.handler_location.file.clone()); + + // Locate seed FuncKeys whose `namespace` matches the entry's + // file and whose `name` matches the handler. More than one + // seed is possible (overloaded methods, duplicate definitions). + let seeds = call_graph + .index + .iter() + .filter(|(k, _)| k.name == ep.handler_name) + .filter(|(k, _)| { + k.namespace.ends_with(&ep.handler_location.file) + || ep.handler_location.file.ends_with(&k.namespace) + }) + .map(|(_, idx)| *idx) + .collect::>(); + + let mut visited: HashSet<_> = seeds.iter().copied().collect(); + let mut queue: VecDeque<(petgraph::graph::NodeIndex, usize)> = + seeds.iter().map(|n| (*n, 0)).collect(); + while let Some((node_idx, depth)) = queue.pop_front() { + if let Some(key) = call_graph.graph.node_weight(node_idx) { + reachable_files.insert(key.namespace.clone()); + } + if depth >= MAX_BFS_DEPTH { + continue; + } + for neighbour in call_graph + .graph + .neighbors_directed(node_idx, Direction::Outgoing) + { + if visited.insert(neighbour) { + queue.push_back((neighbour, depth + 1)); + } + } + } + + for (dst_idx, dst_file) in &dst_index { + if reachable_files.contains(dst_file) { + new_edges.insert(SurfaceEdge { + from: entry_idx as u32, + to: *dst_idx as u32, + kind: EdgeKind::Reaches, + }); + } + } + } + + map.edges.extend(new_edges); +} + +/// Build a lookup from destination node index → destination file. +/// Restricted to the three reachable-from-entry-point variants. +fn build_destination_index(map: &SurfaceMap) -> Vec<(usize, String)> { + let mut out: Vec<(usize, String)> = Vec::new(); + for (idx, node) in map.nodes.iter().enumerate() { + let file = match node { + SurfaceNode::DataStore(n) => n.location.file.clone(), + SurfaceNode::ExternalService(n) => n.location.file.clone(), + SurfaceNode::DangerousLocal(n) => n.location.file.clone(), + SurfaceNode::EntryPoint(_) => continue, + }; + out.push((idx, file)); + } + out +} + +/// Cheap by-file inverted index of the destination nodes — exposed for +/// future callers (chain composer, CLI tree printer) that want a +/// constant-time "what does this file expose" lookup without rerunning +/// reachability. +#[allow(dead_code)] +pub fn destinations_by_file(map: &SurfaceMap) -> HashMap> { + let mut out: HashMap> = HashMap::new(); + for (idx, node) in map.nodes.iter().enumerate() { + let file = match node { + SurfaceNode::DataStore(n) => &n.location.file, + SurfaceNode::ExternalService(n) => &n.location.file, + SurfaceNode::DangerousLocal(n) => &n.location.file, + SurfaceNode::EntryPoint(_) => continue, + }; + out.entry(file.clone()).or_default().push(idx); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::entry_points::HttpMethod; + use crate::surface::{ + DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode, + }; + + fn ep(file: &str, handler: &str) -> SurfaceNode { + SurfaceNode::EntryPoint(EntryPoint { + location: SourceLocation::new(file, 1, 1), + framework: Framework::Flask, + method: HttpMethod::GET, + route: "/".into(), + handler_name: handler.into(), + handler_location: SourceLocation::new(file, 2, 1), + auth_required: false, + }) + } + + fn dl(file: &str, name: &str) -> SurfaceNode { + SurfaceNode::DangerousLocal(DangerousLocal { + location: SourceLocation::new(file, 0, 0), + function_name: name.into(), + cap_bits: 0x1, + }) + } + + #[test] + fn entry_in_same_file_as_dangerous_emits_reaches() { + let mut map = SurfaceMap::new(); + map.nodes.push(ep("app.py", "index")); + map.nodes.push(dl("app.py", "do_eval")); + let gs = GlobalSummaries::new(); + let cg = CallGraph { + graph: petgraph::graph::DiGraph::new(), + index: Default::default(), + unresolved_not_found: vec![], + unresolved_ambiguous: vec![], + }; + populate_reaches_edges(&mut map, &gs, &cg); + assert_eq!(map.edges.len(), 1); + assert_eq!(map.edges[0].kind, EdgeKind::Reaches); + assert_eq!(map.edges[0].from, 0); + assert_eq!(map.edges[0].to, 1); + } +} diff --git a/tests/dynamic_fixtures/surface/go_gin/main.go b/tests/dynamic_fixtures/surface/go_gin/main.go new file mode 100644 index 00000000..35b25bb9 --- /dev/null +++ b/tests/dynamic_fixtures/surface/go_gin/main.go @@ -0,0 +1,13 @@ +package main + +import "github.com/gin-gonic/gin" + +func main() { + r := gin.Default() + r.GET("/users", listUsers) + r.Run() +} + +func listUsers(c *gin.Context) { + c.JSON(200, []string{}) +} diff --git a/tests/dynamic_fixtures/surface/go_http/main.go b/tests/dynamic_fixtures/surface/go_http/main.go new file mode 100644 index 00000000..d499622c --- /dev/null +++ b/tests/dynamic_fixtures/surface/go_http/main.go @@ -0,0 +1,12 @@ +package main + +import "net/http" + +func main() { + http.HandleFunc("/users", listUsers) + http.ListenAndServe(":8080", nil) +} + +func listUsers(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("[]")) +} diff --git a/tests/dynamic_fixtures/surface/java_quarkus/GreetResource.java b/tests/dynamic_fixtures/surface/java_quarkus/GreetResource.java new file mode 100644 index 00000000..8039208c --- /dev/null +++ b/tests/dynamic_fixtures/surface/java_quarkus/GreetResource.java @@ -0,0 +1,17 @@ +package com.example; + +import io.quarkus.runtime.Quarkus; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.ws.rs.GET; +import jakarta.ws.rs.Path; + +@ApplicationScoped +@Path("/api") +public class GreetResource { + + @GET + @Path("/hello") + public String hello() { + return "hi"; + } +} diff --git a/tests/dynamic_fixtures/surface/java_servlet/UserResource.java b/tests/dynamic_fixtures/surface/java_servlet/UserResource.java new file mode 100644 index 00000000..89d16a0f --- /dev/null +++ b/tests/dynamic_fixtures/surface/java_servlet/UserResource.java @@ -0,0 +1,14 @@ +package com.example; + +import jakarta.ws.rs.GET; +import jakarta.ws.rs.Path; + +@Path("/users") +public class UserResource { + + @GET + @Path("/{id}") + public String get() { + return "{}"; + } +} diff --git a/tests/dynamic_fixtures/surface/java_spring/UserController.java b/tests/dynamic_fixtures/surface/java_spring/UserController.java new file mode 100644 index 00000000..c0cf5551 --- /dev/null +++ b/tests/dynamic_fixtures/surface/java_spring/UserController.java @@ -0,0 +1,11 @@ +package com.example; + +@RestController +@RequestMapping("/api") +public class UserController { + + @GetMapping("/users") + public String list() { + return "[]"; + } +} diff --git a/tests/dynamic_fixtures/surface/js_express/server.js b/tests/dynamic_fixtures/surface/js_express/server.js new file mode 100644 index 00000000..b8f78a5b --- /dev/null +++ b/tests/dynamic_fixtures/surface/js_express/server.js @@ -0,0 +1,8 @@ +const express = require("express"); +const app = express(); + +app.get("/users", (req, res) => { + res.send("ok"); +}); + +app.listen(3000); diff --git a/tests/dynamic_fixtures/surface/js_koa/server.js b/tests/dynamic_fixtures/surface/js_koa/server.js new file mode 100644 index 00000000..55307ee6 --- /dev/null +++ b/tests/dynamic_fixtures/surface/js_koa/server.js @@ -0,0 +1,8 @@ +const Router = require("@koa/router"); +const router = new Router(); + +router.get("/users", async (ctx) => { + ctx.body = []; +}); + +module.exports = router; diff --git a/tests/dynamic_fixtures/surface/php_laravel/routes.php b/tests/dynamic_fixtures/surface/php_laravel/routes.php new file mode 100644 index 00000000..d7ab27f1 --- /dev/null +++ b/tests/dynamic_fixtures/surface/php_laravel/routes.php @@ -0,0 +1,3 @@ +get('/users', 'UsersController:list'); diff --git a/tests/dynamic_fixtures/surface/python_django/urls.py b/tests/dynamic_fixtures/surface/python_django/urls.py new file mode 100644 index 00000000..5779a5ec --- /dev/null +++ b/tests/dynamic_fixtures/surface/python_django/urls.py @@ -0,0 +1,10 @@ +from django.urls import path + + +def admin_view(request): + return None + + +urlpatterns = [ + path("admin/", admin_view), +] diff --git a/tests/dynamic_fixtures/surface/python_fastapi/api.py b/tests/dynamic_fixtures/surface/python_fastapi/api.py new file mode 100644 index 00000000..7bb539b4 --- /dev/null +++ b/tests/dynamic_fixtures/surface/python_fastapi/api.py @@ -0,0 +1,8 @@ +from fastapi import FastAPI + +app = FastAPI() + + +@app.get("/items") +def list_items(): + return [] diff --git a/tests/dynamic_fixtures/surface/python_flask/app.py b/tests/dynamic_fixtures/surface/python_flask/app.py new file mode 100644 index 00000000..847070e5 --- /dev/null +++ b/tests/dynamic_fixtures/surface/python_flask/app.py @@ -0,0 +1,8 @@ +from flask import Flask + +app = Flask(__name__) + + +@app.get("/users") +def list_users(): + return "ok" diff --git a/tests/dynamic_fixtures/surface/ruby_rails/users_controller.rb b/tests/dynamic_fixtures/surface/ruby_rails/users_controller.rb new file mode 100644 index 00000000..644fad11 --- /dev/null +++ b/tests/dynamic_fixtures/surface/ruby_rails/users_controller.rb @@ -0,0 +1,9 @@ +class UsersController < ApplicationController + def index + render json: [] + end + + def show + render json: {} + end +end diff --git a/tests/dynamic_fixtures/surface/ruby_sinatra/app.rb b/tests/dynamic_fixtures/surface/ruby_sinatra/app.rb new file mode 100644 index 00000000..45beb95c --- /dev/null +++ b/tests/dynamic_fixtures/surface/ruby_sinatra/app.rb @@ -0,0 +1,5 @@ +require 'sinatra' + +get '/users' do + '[]' +end diff --git a/tests/dynamic_fixtures/surface/rust_actix/main.rs b/tests/dynamic_fixtures/surface/rust_actix/main.rs new file mode 100644 index 00000000..c5cd573b --- /dev/null +++ b/tests/dynamic_fixtures/surface/rust_actix/main.rs @@ -0,0 +1,6 @@ +use actix_web::{get, HttpResponse}; + +#[get("/users")] +async fn list_users() -> HttpResponse { + HttpResponse::Ok().finish() +} diff --git a/tests/dynamic_fixtures/surface/rust_axum/main.rs b/tests/dynamic_fixtures/surface/rust_axum/main.rs new file mode 100644 index 00000000..f1e262e1 --- /dev/null +++ b/tests/dynamic_fixtures/surface/rust_axum/main.rs @@ -0,0 +1,9 @@ +use axum::{routing::get, Router}; + +async fn list_users() -> &'static str { + "[]" +} + +fn app() -> Router { + Router::new().route("/users", get(list_users)) +} diff --git a/tests/dynamic_fixtures/surface/ts_next/app/users/route.ts b/tests/dynamic_fixtures/surface/ts_next/app/users/route.ts new file mode 100644 index 00000000..9c40a5ad --- /dev/null +++ b/tests/dynamic_fixtures/surface/ts_next/app/users/route.ts @@ -0,0 +1,3 @@ +export async function GET(req: Request): Promise { + return new Response("ok"); +} diff --git a/tests/surface_cross_lang.rs b/tests/surface_cross_lang.rs new file mode 100644 index 00000000..cac13138 --- /dev/null +++ b/tests/surface_cross_lang.rs @@ -0,0 +1,208 @@ +//! Phase 22 — cross-language `SurfaceMap` framework probes. +//! +//! One fixture per (language, framework) pair under +//! `tests/dynamic_fixtures/surface//`. Each probe is exercised +//! through the public [`build_surface_map`] entry point and asserted +//! on: +//! +//! 1. At least one [`SurfaceNode::EntryPoint`] is emitted. +//! 2. The recognised entry-point carries the expected [`Framework`] +//! tag. +//! 3. The recognised entry-point's `route` field contains the expected +//! substring (the path declared in the fixture). + +use nyx_scanner::callgraph::CallGraph; +use nyx_scanner::summary::GlobalSummaries; +use nyx_scanner::surface::{ + Framework, SurfaceMap, SurfaceNode, + build::{build_surface_map, SurfaceBuildInputs}, +}; +use nyx_scanner::utils::config::Config; +use std::path::{Path, PathBuf}; + +const FIXTURE_ROOT: &str = "tests/dynamic_fixtures/surface"; + +fn empty_call_graph() -> CallGraph { + CallGraph { + graph: petgraph::graph::DiGraph::new(), + index: Default::default(), + unresolved_not_found: vec![], + unresolved_ambiguous: vec![], + } +} + +fn build(fixture_dir: &str) -> SurfaceMap { + let dir = Path::new(FIXTURE_ROOT).join(fixture_dir); + let mut files: Vec = Vec::new(); + walk(&dir, &mut files); + let cfg = Config::default(); + let gs = GlobalSummaries::new(); + let cg = empty_call_graph(); + let inputs = SurfaceBuildInputs { + files: &files, + scan_root: Some(&dir), + global_summaries: &gs, + call_graph: &cg, + config: &cfg, + }; + build_surface_map(&inputs) +} + +fn walk(dir: &Path, out: &mut Vec) { + let entries = match std::fs::read_dir(dir) { + Ok(e) => e, + Err(_) => return, + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + walk(&path, out); + } else { + out.push(path); + } + } +} + +fn assert_entry(map: &SurfaceMap, framework: Framework, route_substr: &str) { + let routes: Vec = map + .nodes + .iter() + .filter_map(|n| match n { + SurfaceNode::EntryPoint(ep) if ep.framework == framework => Some(ep.route.clone()), + _ => None, + }) + .collect(); + assert!( + !routes.is_empty(), + "no entry-point with framework {:?} found in map = {:#?}", + framework, + map.nodes + ); + assert!( + routes.iter().any(|r| r.contains(route_substr)), + "expected a route containing {route_substr:?}; got {routes:?}", + ); +} + +#[test] +fn python_flask_fixture() { + let map = build("python_flask"); + assert_entry(&map, Framework::Flask, "/users"); +} + +#[test] +fn python_fastapi_fixture() { + let map = build("python_fastapi"); + assert_entry(&map, Framework::FastApi, "/items"); +} + +#[test] +fn python_django_fixture() { + let map = build("python_django"); + assert_entry(&map, Framework::Django, "admin"); +} + +#[test] +fn js_express_fixture() { + let map = build("js_express"); + assert_entry(&map, Framework::Express, "/users"); +} + +#[test] +fn js_koa_fixture() { + let map = build("js_koa"); + // koa probe currently emits the Express variant tag because the + // SurfaceMap framework taxonomy folds koa-router under the + // generic "node http microframework" bucket. See + // [`nyx_scanner::surface::lang::js_koa`] doc comment. + assert_entry(&map, Framework::Express, "/users"); +} + +#[test] +fn ts_next_fixture() { + let map = build("ts_next"); + assert_entry(&map, Framework::NextAppRouter, "users"); +} + +#[test] +fn java_spring_fixture() { + let map = build("java_spring"); + assert_entry(&map, Framework::Spring, "/api/users"); +} + +#[test] +fn java_servlet_fixture() { + let map = build("java_servlet"); + assert_entry(&map, Framework::JaxRs, "/users"); +} + +#[test] +fn java_quarkus_fixture() { + let map = build("java_quarkus"); + assert_entry(&map, Framework::JaxRs, "/api/hello"); +} + +#[test] +fn go_http_fixture() { + let map = build("go_http"); + assert_entry(&map, Framework::NetHttp, "/users"); +} + +#[test] +fn go_gin_fixture() { + let map = build("go_gin"); + assert_entry(&map, Framework::Gin, "/users"); +} + +#[test] +fn php_laravel_fixture() { + let map = build("php_laravel"); + // Laravel folds into the generic Sinatra-like framework bucket + // because the SurfaceMap framework taxonomy is method-call shaped + // rather than per-stack. See `surface::lang::php_laravel`. + assert_entry(&map, Framework::Sinatra, "/users"); +} + +#[test] +fn php_slim_fixture() { + let map = build("php_slim"); + assert_entry(&map, Framework::Sinatra, "/users"); +} + +#[test] +fn ruby_sinatra_fixture() { + let map = build("ruby_sinatra"); + assert_entry(&map, Framework::Sinatra, "/users"); +} + +#[test] +fn ruby_rails_fixture() { + let map = build("ruby_rails"); + // Controller actions have empty routes because the route table + // lives in `config/routes.rb` (separate file). Assert on the + // handler name surfacing instead. + let handlers: Vec = map + .nodes + .iter() + .filter_map(|n| match n { + SurfaceNode::EntryPoint(ep) if ep.framework == Framework::Rails => { + Some(ep.handler_name.clone()) + } + _ => None, + }) + .collect(); + assert!(handlers.contains(&"index".to_string())); + assert!(handlers.contains(&"show".to_string())); +} + +#[test] +fn rust_actix_fixture() { + let map = build("rust_actix"); + assert_entry(&map, Framework::Actix, "/users"); +} + +#[test] +fn rust_axum_fixture() { + let map = build("rust_axum"); + assert_entry(&map, Framework::Axum, "/users"); +}