//! Top-level [`SurfaceMap`] builder. //! //! Phase 22 dispatch: //! //! 1. Per-file framework probes (one parser per language) emit //! [`SurfaceNode::EntryPoint`] nodes for every recognised route / //! handler. //! 2. [`super::datastore::detect_data_stores`] walks //! [`GlobalSummaries`] and emits [`SurfaceNode::DataStore`] nodes //! for every recognised driver call. //! 3. [`super::external::detect_external_services`] walks summaries + //! SSRF caps and emits [`SurfaceNode::ExternalService`] nodes. //! 4. [`super::dangerous::detect_dangerous_locals`] walks summaries //! and emits [`SurfaceNode::DangerousLocal`] nodes for every //! function whose `sink_caps` include CODE_EXEC / DESERIALIZE / //! SSTI / FMT_STRING. //! 5. [`super::reachability::populate_reaches_edges`] runs a BFS over //! the [`CallGraph`] from each entry-point handler, emitting //! [`super::EdgeKind::Reaches`] edges to every reachable //! DataStore / ExternalService / DangerousLocal. //! 6. [`SurfaceMap::canonicalize`] sorts nodes + edges so the //! serialised JSON is byte-deterministic across rescans. //! //! Per-file errors (parse failure, unsupported language) are //! swallowed so a single bad file does not kill the whole map. use crate::callgraph::CallGraph; use crate::summary::GlobalSummaries; use crate::surface::{ SurfaceMap, dangerous, datastore, external, lang::{ go_gin, go_http, java_quarkus, java_servlet, java_spring, js_express, js_koa, php_laravel, php_slim, python_django, python_fastapi, python_flask, ruby_rails, ruby_sinatra, rust_actix, rust_axum, ts_next, }, reachability, }; use crate::utils::config::Config; use std::path::{Path, PathBuf}; use tree_sitter::Parser; pub struct SurfaceBuildInputs<'a> { pub files: &'a [PathBuf], pub scan_root: Option<&'a Path>, pub global_summaries: &'a GlobalSummaries, pub call_graph: &'a CallGraph, pub config: &'a Config, } pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { let mut map = SurfaceMap::new(); let _ = inputs.config; let mut parsers = Parsers::new(); for path in inputs.files { let Ok(bytes) = std::fs::read(path) else { continue; }; let kind = classify_file(path); let nodes = match kind { FileKind::Python => parsers .python .as_mut() .and_then(|p| p.parse(&bytes, None)) .map(|tree| { let mut all = python_flask::detect_flask_routes(&tree, &bytes, path, inputs.scan_root); all.extend(python_fastapi::detect_fastapi_routes( &tree, &bytes, path, inputs.scan_root, )); all.extend(python_django::detect_django_routes( &tree, &bytes, path, inputs.scan_root, )); all }) .unwrap_or_default(), FileKind::JavaScript => parsers .javascript .as_mut() .and_then(|p| p.parse(&bytes, None)) .map(|tree| { let mut all = js_express::detect_express_routes(&tree, &bytes, path, inputs.scan_root); all.extend(js_koa::detect_koa_routes( &tree, &bytes, path, inputs.scan_root, )); all }) .unwrap_or_default(), FileKind::TypeScript => parsers .typescript .as_mut() .and_then(|p| p.parse(&bytes, None)) .map(|tree| { let mut all = js_express::detect_express_routes(&tree, &bytes, path, inputs.scan_root); all.extend(js_koa::detect_koa_routes( &tree, &bytes, path, inputs.scan_root, )); all.extend(ts_next::detect_next_routes( &tree, &bytes, path, inputs.scan_root, )); all }) .unwrap_or_default(), FileKind::Java => parsers .java .as_mut() .and_then(|p| p.parse(&bytes, None)) .map(|tree| { let mut all = java_spring::detect_spring_routes(&tree, &bytes, path, inputs.scan_root); all.extend(java_servlet::detect_servlet_routes( &tree, &bytes, path, inputs.scan_root, )); all.extend(java_quarkus::detect_quarkus_routes( &tree, &bytes, path, inputs.scan_root, )); all }) .unwrap_or_default(), FileKind::Go => parsers .go .as_mut() .and_then(|p| p.parse(&bytes, None)) .map(|tree| { let mut all = go_http::detect_go_http_routes(&tree, &bytes, path, inputs.scan_root); all.extend(go_gin::detect_gin_routes( &tree, &bytes, path, inputs.scan_root, )); all }) .unwrap_or_default(), FileKind::Php => parsers .php .as_mut() .and_then(|p| p.parse(&bytes, None)) .map(|tree| { let mut all = php_laravel::detect_laravel_routes(&tree, &bytes, path, inputs.scan_root); all.extend(php_slim::detect_slim_routes( &tree, &bytes, path, inputs.scan_root, )); all }) .unwrap_or_default(), FileKind::Ruby => parsers .ruby .as_mut() .and_then(|p| p.parse(&bytes, None)) .map(|tree| { let mut all = ruby_sinatra::detect_sinatra_routes(&tree, &bytes, path, inputs.scan_root); all.extend(ruby_rails::detect_rails_routes( &tree, &bytes, path, inputs.scan_root, )); all }) .unwrap_or_default(), FileKind::Rust => parsers .rust .as_mut() .and_then(|p| p.parse(&bytes, None)) .map(|tree| { let mut all = rust_actix::detect_actix_routes(&tree, &bytes, path, inputs.scan_root); all.extend(rust_axum::detect_axum_routes( &tree, &bytes, path, inputs.scan_root, )); all }) .unwrap_or_default(), FileKind::Other => Vec::new(), }; for n in nodes { map.nodes.push(n); } } // Phase 22 — Track F.3: data-store / external-service / // dangerous-local detection from summaries. map.nodes .extend(datastore::detect_data_stores(inputs.global_summaries)); map.nodes .extend(external::detect_external_services(inputs.global_summaries)); map.nodes .extend(dangerous::detect_dangerous_locals(inputs.global_summaries)); // Canonicalise so node indices are stable before reachability // builds edges referring to those indices. map.canonicalize(); // Phase 22 — Track F.3: transitive closure over the call graph. reachability::populate_reaches_edges(&mut map, inputs.global_summaries, inputs.call_graph); // Re-canonicalise: edges added by reachability need to be sorted // so the serialised JSON stays byte-deterministic. map.canonicalize(); map } #[derive(Copy, Clone, PartialEq, Eq)] enum FileKind { Python, JavaScript, TypeScript, Java, Go, Php, Ruby, Rust, Other, } fn classify_file(path: &Path) -> FileKind { match path.extension().and_then(|s| s.to_str()) { Some("py") | Some("pyi") => FileKind::Python, Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => FileKind::JavaScript, Some("ts") | Some("tsx") | Some("mts") | Some("cts") => FileKind::TypeScript, Some("java") => FileKind::Java, Some("go") => FileKind::Go, Some("php") => FileKind::Php, Some("rb") => FileKind::Ruby, Some("rs") => FileKind::Rust, _ => FileKind::Other, } } struct Parsers { python: Option, javascript: Option, typescript: Option, java: Option, go: Option, php: Option, ruby: Option, rust: Option, } impl Parsers { fn new() -> Self { Self { python: parser_for(tree_sitter_python::LANGUAGE.into()), javascript: parser_for(tree_sitter_javascript::LANGUAGE.into()), typescript: parser_for(tree_sitter_typescript::LANGUAGE_TSX.into()), java: parser_for(tree_sitter_java::LANGUAGE.into()), go: parser_for(tree_sitter_go::LANGUAGE.into()), php: parser_for(tree_sitter_php::LANGUAGE_PHP.into()), ruby: parser_for(tree_sitter_ruby::LANGUAGE.into()), rust: parser_for(tree_sitter_rust::LANGUAGE.into()), } } } fn parser_for(language: tree_sitter::Language) -> Option { let mut parser = Parser::new(); parser.set_language(&language).ok()?; Some(parser) } #[cfg(test)] mod tests { use super::*; use crate::entry_points::HttpMethod; use crate::surface::SurfaceNode; use std::fs; use tempfile::tempdir; fn empty_inputs<'a>( files: &'a [PathBuf], scan_root: Option<&'a Path>, gs: &'a GlobalSummaries, cg: &'a CallGraph, cfg: &'a Config, ) -> SurfaceBuildInputs<'a> { SurfaceBuildInputs { files, scan_root, global_summaries: gs, call_graph: cg, config: cfg, } } fn empty_call_graph() -> CallGraph { CallGraph { graph: petgraph::graph::DiGraph::new(), index: Default::default(), unresolved_not_found: vec![], unresolved_ambiguous: vec![], } } #[test] fn empty_inputs_produce_empty_map() { let dir = tempdir().unwrap(); let cfg = Config::default(); let gs = GlobalSummaries::new(); let cg = empty_call_graph(); let files: Vec = vec![]; let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); let map = build_surface_map(&inputs); assert_eq!(map.node_count(), 0); assert_eq!(map.edge_count(), 0); } #[test] fn flask_file_produces_entry_points() { let dir = tempdir().unwrap(); let py = dir.path().join("app.py"); fs::write( &py, r#" from flask import Flask app = Flask(__name__) @app.route("/") def index(): return "hi" @app.post("/submit") def submit(): return "ok" "#, ) .unwrap(); let cfg = Config::default(); let gs = GlobalSummaries::new(); let cg = empty_call_graph(); let files = vec![py]; let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); let map = build_surface_map(&inputs); assert_eq!(map.node_count(), 2); let methods: Vec = map.entry_points().map(|ep| ep.method).collect(); assert!(methods.contains(&HttpMethod::GET)); assert!(methods.contains(&HttpMethod::POST)); } #[test] fn fastapi_file_produces_entry_points() { let dir = tempdir().unwrap(); let py = dir.path().join("api.py"); fs::write( &py, "from fastapi import FastAPI\napp = FastAPI()\n@app.get('/users')\ndef list_users(): pass\n@app.post('/items')\ndef create(): pass\n", ) .unwrap(); let cfg = Config::default(); let gs = GlobalSummaries::new(); let cg = empty_call_graph(); let files = vec![py]; let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); let map = build_surface_map(&inputs); assert_eq!(map.node_count(), 2); } #[test] fn dangerous_local_emits_node_and_reaches_edge_to_same_file_entry() { use crate::labels::Cap; use crate::summary::FuncSummary; use crate::symbol::{FuncKey, Lang}; let dir = tempdir().unwrap(); let py = dir.path().join("app.py"); fs::write( &py, r#" from flask import Flask app = Flask(__name__) @app.route("/eval") def evaluator(): return "" "#, ) .unwrap(); let cfg = Config::default(); let mut gs = GlobalSummaries::new(); gs.insert( FuncKey::new_function(Lang::Python, "app.py", "evaluator", None), FuncSummary { name: "evaluator".to_string(), file_path: "app.py".to_string(), lang: "python".to_string(), sink_caps: Cap::CODE_EXEC.bits(), ..Default::default() }, ); let cg = empty_call_graph(); let files = vec![py]; let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); let map = build_surface_map(&inputs); assert!( map.nodes .iter() .any(|n| matches!(n, SurfaceNode::DangerousLocal(_))) ); assert!( map.edges .iter() .any(|e| matches!(e.kind, crate::surface::EdgeKind::Reaches)) ); } }