diff --git a/src/commands/scan.rs b/src/commands/scan.rs index 8086af4c..a52771f5 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -2126,6 +2126,7 @@ pub(crate) fn scan_filesystem_with_observer( ); } let pass2_start = std::time::Instant::now(); + let mut gs = global_summaries; let mut diags: Vec = { let _span = tracing::info_span!("pass2_analysis", files = all_paths.len()).entered(); let pb = make_progress_bar( @@ -2156,7 +2157,6 @@ pub(crate) fn scan_filesystem_with_observer( ); } - let mut gs = global_summaries; let total_batches = batches.len() as u64 + u64::from(!orphans.is_empty()); if let Some(p) = progress { p.set_batches_total(total_batches); @@ -2177,6 +2177,20 @@ pub(crate) fn scan_filesystem_with_observer( result }; tracing::info!(diags = diags.len(), "pass 2 complete"); + + // Phase 21: build the SurfaceMap from the post-pass-2 view. + // No persistence here; the index-backed path persists into the + // `surface_map` SQLite table. Errors here are swallowed: the + // surface map is an additive Phase F deliverable, not a gate. + let _surface_map = crate::surface::build::build_surface_map( + &crate::surface::build::SurfaceBuildInputs { + files: &all_paths, + scan_root: Some(root), + global_summaries: &gs, + call_graph: &call_graph, + config: cfg, + }, + ); if let Some(p) = progress { p.record_pass2_ms(pass2_start.elapsed().as_millis() as u64); } @@ -2987,6 +3001,34 @@ pub fn scan_with_index_parallel_observer( let mut diags = topo_diags; + // Phase 21: build + persist the SurfaceMap from the post-pass-2 + // view. Errors here are logged but not propagated — the surface + // map is an additive Phase F deliverable, not a scan gate. + { + let surface_map = crate::surface::build::build_surface_map( + &crate::surface::build::SurfaceBuildInputs { + files: &files, + scan_root: Some(scan_root), + global_summaries: &global_summaries, + call_graph: &call_graph, + config: cfg, + }, + ); + let mut idx = Indexer::from_pool(project, &pool)?; + if let Err(e) = idx.replace_surface_map(&surface_map) { + tracing::warn!("failed to persist surface_map: {e}"); + } else if let Some(l) = logs { + l.info( + format!( + "Surface map: {} nodes, {} edges", + surface_map.node_count(), + surface_map.edge_count() + ), + None, + ); + } + } + // NOTE: Taint-mode output is *not* filtered here. `run_rules_on_bytes` // already gates AST queries and auth analyses behind `mode == Full`, so // Taint-mode raw output is exactly the set of diagnostics the analysis diff --git a/src/database.rs b/src/database.rs index 176ac788..90db6642 100644 --- a/src/database.rs +++ b/src/database.rs @@ -228,6 +228,15 @@ pub mod index { CREATE INDEX IF NOT EXISTS idx_dynamic_verdict_cache_spec_hash ON dynamic_verdict_cache(spec_hash); + -- Phase 21: persisted attack-surface map. One row per project. + -- Stored as canonical JSON so the round-trip is byte-identical + -- across rescans (see `SurfaceMap::to_json`). + CREATE TABLE IF NOT EXISTS surface_map ( + project TEXT PRIMARY KEY, + map_json BLOB NOT NULL, + updated_at INTEGER NOT NULL + ); + -- Indexes on (project, file_path) for the per-file replace_* paths. -- Without these, every DELETE WHERE project=? AND file_path=? does a -- full table scan, which dominates indexing time as the cache grows. @@ -547,6 +556,22 @@ pub mod index { conn.execute_batch(SCHEMA)?; } + // Phase 21: ensure the `surface_map` table exists on + // DBs created before this column set was introduced. + let surface_exists: bool = conn + .query_row( + "SELECT 1 FROM sqlite_master + WHERE type = 'table' AND name = 'surface_map'", + [], + |_| Ok(true), + ) + .optional()? + .unwrap_or(false); + if !surface_exists { + tracing::info!("creating surface_map table"); + conn.execute_batch(SCHEMA)?; + } + // Schema version check: invalidate cached summary tables // when the on-disk artefact layout has changed in an // incompatible way, independently of the engine version. @@ -1882,6 +1907,63 @@ pub mod index { Ok(out) } + /// Persist a [`crate::surface::SurfaceMap`] for this project. + /// + /// Replaces any previously-persisted map; the table holds one row + /// per project. The map is canonicalised before serialisation so + /// `replace_surface_map` + `load_surface_map` round-trip is + /// byte-identical for structurally identical maps. + pub fn replace_surface_map( + &mut self, + map: &crate::surface::SurfaceMap, + ) -> NyxResult<()> { + let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; + let mut canon = map.clone(); + let bytes = canon + .to_json() + .map_err(|e| NyxError::Msg(format!("surface map serialise: {e}")))?; + self.c().execute( + "INSERT OR REPLACE INTO surface_map (project, map_json, updated_at) + VALUES (?1, ?2, ?3)", + params![self.project, bytes, now], + )?; + Ok(()) + } + + /// Load the persisted [`crate::surface::SurfaceMap`] for this + /// project, or `None` when no map has been written. + pub fn load_surface_map(&self) -> NyxResult> { + let row: Option> = self + .c() + .query_row( + "SELECT map_json FROM surface_map WHERE project = ?1", + params![self.project], + |r| r.get::<_, Vec>(0), + ) + .optional()?; + let Some(bytes) = row else { + return Ok(None); + }; + let map = crate::surface::SurfaceMap::from_json(&bytes) + .map_err(|e| NyxError::Msg(format!("surface map deserialise: {e}")))?; + Ok(Some(map)) + } + + /// Return the raw JSON bytes stored for the surface map without + /// deserialising. Used by the round-trip parity tests so they + /// can compare on-disk bytes across rescans. + pub fn load_surface_map_bytes(&self) -> NyxResult>> { + let row: Option> = self + .c() + .query_row( + "SELECT map_json FROM surface_map WHERE project = ?1", + params![self.project], + |r| r.get::<_, Vec>(0), + ) + .optional()?; + Ok(row) + } + /// Remove a file and all derived persisted state for this project. /// /// This deletes the file row, issues, and all persisted summary rows so diff --git a/src/lib.rs b/src/lib.rs index 4a5065f1..c4528394 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -121,6 +121,7 @@ pub mod ssa; pub mod state; pub mod summary; pub mod suppress; +pub mod surface; pub mod symbol; pub mod symex; pub mod taint; diff --git a/src/surface/build.rs b/src/surface/build.rs new file mode 100644 index 00000000..ec2a3c26 --- /dev/null +++ b/src/surface/build.rs @@ -0,0 +1,163 @@ +//! Top-level [`SurfaceMap`] builder. +//! +//! Consumes the post-pass-2 [`GlobalSummaries`] + [`CallGraph`] for +//! call-graph reachability and the project's file list for the +//! per-language framework probes. Phase 21 only invokes the Python + +//! Flask probe; Phase 22 wires the remaining language probes through +//! [`crate::surface::lang`]. +//! +//! Build steps (Phase 21): +//! +//! 1. For every Python file, parse it once and invoke +//! [`crate::surface::lang::python_flask::detect_flask_routes`]. +//! 2. Collect the resulting [`SurfaceNode::EntryPoint`] nodes. +//! 3. Canonicalise the map (sort nodes + edges, dedup edges) so two +//! runs over the same source produce byte-identical JSON. + +use crate::callgraph::CallGraph; +use crate::summary::GlobalSummaries; +use crate::surface::{SurfaceMap, lang::python_flask}; +use crate::utils::config::Config; +use std::path::{Path, PathBuf}; + +/// Inputs to [`build_surface_map`]. Wrapped in a struct so the +/// downstream Phase 22 work (additional probes, call-graph-derived +/// `Reaches` edges, label-rule data-source nodes) can extend the +/// signature without touching every caller. +pub struct SurfaceBuildInputs<'a> { + pub files: &'a [PathBuf], + pub scan_root: Option<&'a Path>, + pub global_summaries: &'a GlobalSummaries, + pub call_graph: &'a CallGraph, + pub config: &'a Config, +} + +/// Build a [`SurfaceMap`] for the project under analysis. +/// +/// Best-effort: parse failures on individual files are swallowed so +/// the surface map of a 10k-file project is not killed by one bad +/// Python file. Returns an empty map when the inputs contain no +/// recognised entry-points. +pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { + let mut map = SurfaceMap::new(); + + // Phase 21: only Python / Flask. The downstream Phase 22 probes + // will dispatch on file extension here. + let mut python_parser = tree_sitter::Parser::new(); + if python_parser + .set_language(&tree_sitter_python::LANGUAGE.into()) + .is_err() + { + return map; + } + + for path in inputs.files { + if !is_python_file(path) { + continue; + } + let Ok(bytes) = std::fs::read(path) else { + continue; + }; + let Some(tree) = python_parser.parse(&bytes, None) else { + continue; + }; + let nodes = + python_flask::detect_flask_routes(&tree, &bytes, path, inputs.scan_root); + for n in nodes { + map.nodes.push(n); + } + } + + // GlobalSummaries / CallGraph are reserved for Phase 22's + // `DangerousLocal` + `Reaches`-edge fill-in. Phase 21 records + // them in the inputs so callers do not need to be touched again + // when Phase 22 wires them up. + let _ = inputs.global_summaries; + let _ = inputs.call_graph; + let _ = inputs.config; + + map.canonicalize(); + map +} + +fn is_python_file(path: &Path) -> bool { + matches!( + path.extension().and_then(|s| s.to_str()), + Some("py") | Some("pyi") + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::entry_points::HttpMethod; + use std::fs; + use tempfile::tempdir; + + #[test] + fn empty_inputs_produce_empty_map() { + let dir = tempdir().unwrap(); + let cfg = Config::default(); + let gs = GlobalSummaries::new(); + let cg = CallGraph { + graph: petgraph::graph::DiGraph::new(), + index: Default::default(), + unresolved_not_found: vec![], + unresolved_ambiguous: vec![], + }; + let files: Vec = vec![]; + let inputs = SurfaceBuildInputs { + files: &files, + scan_root: Some(dir.path()), + global_summaries: &gs, + call_graph: &cg, + config: &cfg, + }; + let map = build_surface_map(&inputs); + assert_eq!(map.node_count(), 0); + assert_eq!(map.edge_count(), 0); + } + + #[test] + fn flask_file_produces_entry_points() { + let dir = tempdir().unwrap(); + let py = dir.path().join("app.py"); + fs::write( + &py, + r#" +from flask import Flask +app = Flask(__name__) + +@app.route("/") +def index(): + return "hi" + +@app.post("/submit") +def submit(): + return "ok" +"#, + ) + .unwrap(); + let cfg = Config::default(); + let gs = GlobalSummaries::new(); + let cg = CallGraph { + graph: petgraph::graph::DiGraph::new(), + index: Default::default(), + unresolved_not_found: vec![], + unresolved_ambiguous: vec![], + }; + let files = vec![py.clone()]; + let inputs = SurfaceBuildInputs { + files: &files, + scan_root: Some(dir.path()), + global_summaries: &gs, + call_graph: &cg, + config: &cfg, + }; + let map = build_surface_map(&inputs); + assert_eq!(map.node_count(), 2); + let methods: Vec = map.entry_points().map(|ep| ep.method).collect(); + assert!(methods.contains(&HttpMethod::GET)); + assert!(methods.contains(&HttpMethod::POST)); + } +} diff --git a/src/surface/graph.rs b/src/surface/graph.rs new file mode 100644 index 00000000..1d7d9b54 --- /dev/null +++ b/src/surface/graph.rs @@ -0,0 +1,107 @@ +//! petgraph-backed read-only view over a [`SurfaceMap`]. +//! +//! The on-disk shape is two parallel `Vec`s (deterministic ordering, +//! byte-identical JSON), but downstream consumers — the Track G chain +//! composer, the `nyx surface` CLI walker — want graph queries: +//! neighbours, reachability, topological order. [`petgraph_view`] +//! constructs a `DiGraph, EdgeRef<'_>>` on demand without +//! cloning the underlying nodes or edges. + +use super::{EdgeKind, SurfaceEdge, SurfaceMap, SurfaceNode}; +use petgraph::graph::{DiGraph, NodeIndex}; +use std::collections::HashMap; + +/// Borrowed handle to one [`SurfaceNode`] inside the petgraph view. +#[derive(Debug, Clone, Copy)] +pub struct NodeRef<'a> { + pub idx: u32, + pub node: &'a SurfaceNode, +} + +/// Borrowed handle to one [`SurfaceEdge`] inside the petgraph view. +#[derive(Debug, Clone, Copy)] +pub struct EdgeRef<'a> { + pub edge: &'a SurfaceEdge, +} + +impl<'a> EdgeRef<'a> { + pub fn kind(&self) -> EdgeKind { + self.edge.kind + } +} + +/// Materialise a petgraph view of `map`. Node indices in the returned +/// graph match `map.nodes` ordering 1:1, and the `lookup` map lets +/// callers translate from the surface index (`u32`) to the petgraph +/// [`NodeIndex`]. Walking edges respects `map.edges` order. +pub fn petgraph_view(map: &SurfaceMap) -> SurfaceGraphView<'_> { + let mut graph: DiGraph, EdgeRef<'_>> = DiGraph::new(); + let mut lookup: HashMap = HashMap::with_capacity(map.nodes.len()); + for (i, node) in map.nodes.iter().enumerate() { + let nx = graph.add_node(NodeRef { + idx: i as u32, + node, + }); + lookup.insert(i as u32, nx); + } + for edge in &map.edges { + if let (Some(&from), Some(&to)) = (lookup.get(&edge.from), lookup.get(&edge.to)) { + graph.add_edge(from, to, EdgeRef { edge }); + } + } + SurfaceGraphView { graph, lookup } +} + +/// petgraph view returned by [`petgraph_view`]. +pub struct SurfaceGraphView<'a> { + pub graph: DiGraph, EdgeRef<'a>>, + pub lookup: HashMap, +} + +impl<'a> SurfaceGraphView<'a> { + /// Resolve a surface index back to its petgraph [`NodeIndex`]. + pub fn node_index(&self, surface_idx: u32) -> Option { + self.lookup.get(&surface_idx).copied() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::entry_points::HttpMethod; + use crate::surface::{EntryPoint, Framework, SourceLocation}; + + #[test] + fn petgraph_view_preserves_indices() { + let mut m = SurfaceMap::new(); + m.nodes.push(SurfaceNode::EntryPoint(EntryPoint { + location: SourceLocation::new("a.py", 1, 1), + framework: Framework::Flask, + method: HttpMethod::GET, + route: "/a".into(), + handler_name: "h".into(), + handler_location: SourceLocation::new("a.py", 2, 1), + auth_required: false, + })); + m.nodes.push(SurfaceNode::EntryPoint(EntryPoint { + location: SourceLocation::new("b.py", 1, 1), + framework: Framework::Flask, + method: HttpMethod::POST, + route: "/b".into(), + handler_name: "h".into(), + handler_location: SourceLocation::new("b.py", 2, 1), + auth_required: false, + })); + m.edges.push(SurfaceEdge { + from: 0, + to: 1, + kind: EdgeKind::Calls, + }); + let view = petgraph_view(&m); + assert_eq!(view.graph.node_count(), 2); + assert_eq!(view.graph.edge_count(), 1); + let n0 = view.node_index(0).unwrap(); + let n1 = view.node_index(1).unwrap(); + assert!(view.graph.find_edge(n0, n1).is_some()); + } +} diff --git a/src/surface/lang/mod.rs b/src/surface/lang/mod.rs new file mode 100644 index 00000000..1dbe16c3 --- /dev/null +++ b/src/surface/lang/mod.rs @@ -0,0 +1,6 @@ +//! Per-language framework probes. Phase 21 ships Python + Flask; +//! Phase 22 generalises to FastAPI / Django, Java Spring / JAX-RS, +//! Ruby Rails / Sinatra, Go net/http / gin, Rust axum / actix / +//! rocket, JS/TS Express + Next.js. + +pub mod python_flask; diff --git a/src/surface/lang/python_flask.rs b/src/surface/lang/python_flask.rs new file mode 100644 index 00000000..5fbb3c60 --- /dev/null +++ b/src/surface/lang/python_flask.rs @@ -0,0 +1,413 @@ +//! Python + Flask framework probe. +//! +//! Walks a parsed Python file looking for the four canonical Flask +//! route shapes: +//! +//! * `@app.route("/path", methods=[...])` +//! * `@app.get("/path")` / `.post(...)` / etc. (Flask ≥ 2.0) +//! * `@bp.route("/path", methods=[...])` on a `Blueprint` +//! * `@bp.get("/path")` / `.post(...)` / etc. +//! +//! `auth_required` is inferred from the decorator stack: any decorator +//! whose textual representation matches one of [`AUTH_DECORATORS`] is +//! treated as an auth boundary on the following route. This catches +//! the canonical `@login_required` (Flask-Login), `@auth_required` +//! (custom guards), and `@jwt_required` / `@jwt_required()` (Flask-JWT +//! and -JWT-Extended). + +use crate::entry_points::HttpMethod; +use crate::surface::{ + EntryPoint, Framework, SourceLocation, SurfaceNode, relative_path_string, +}; +use std::path::Path; +use tree_sitter::{Node, Tree}; + +/// Decorator names that mark a route as requiring authentication. +/// Matched against the *leaf* of the decorator expression — i.e. the +/// last `attribute` / `identifier` segment — so `@login_required`, +/// `@auth.login_required`, and `@flask_login.login_required` all +/// match. Match is case-insensitive on the underscored form. +pub const AUTH_DECORATORS: &[&str] = &[ + "login_required", + "auth_required", + "jwt_required", + "token_required", + "requires_auth", + "authenticated", + "require_login", +]; + +/// Detect every Flask route in a parsed Python file. +/// +/// `scan_root` is used to convert the file path to a project-relative +/// POSIX path; pass `None` to record absolute paths. Returns one +/// [`SurfaceNode::EntryPoint`] per `@route` / `@get` / `@post` / … +/// decorator that targets a Flask-shaped receiver (`app`, `bp`, +/// `blueprint`, or anything ending in `_bp` / `Blueprint`). +pub fn detect_flask_routes( + tree: &Tree, + bytes: &[u8], + path: &Path, + scan_root: Option<&Path>, +) -> Vec { + let file_rel = relative_path_string(path, scan_root); + let mut out = Vec::new(); + walk_decorated(tree.root_node(), bytes, &mut |func_node, decorators| { + // Reverse pass: find Flask-route decorators and collect auth + // markers seen at *any* position in the decorator stack — + // Flask honours decorators in stacked order regardless of + // sequence relative to the route. + let auth_required = decorators + .iter() + .any(|d| decorator_is_auth_marker(*d, bytes)); + for dec in decorators { + if let Some((method, route_path)) = flask_route_decorator(*dec, bytes) { + let dec_pos = dec.start_position(); + let handler_pos = func_node.start_position(); + let handler_name = function_name(*func_node, bytes).unwrap_or_default(); + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: SourceLocation::new( + file_rel.clone(), + (dec_pos.row + 1) as u32, + (dec_pos.column + 1) as u32, + ), + framework: Framework::Flask, + method, + route: route_path, + handler_name, + handler_location: SourceLocation::new( + file_rel.clone(), + (handler_pos.row + 1) as u32, + (handler_pos.column + 1) as u32, + ), + auth_required, + })); + } + } + }); + out +} + +/// Walk every `function_definition` in `root` and invoke `visit` with +/// the function node plus the list of decorator nodes wrapping it. +/// Handles both `decorated_definition` (one or more decorators) and +/// bare `function_definition` (zero decorators, visit skipped). +fn walk_decorated<'tree, F>(root: Node<'tree>, bytes: &[u8], visit: &mut F) +where + F: FnMut(&Node<'tree>, &[Node<'tree>]), +{ + if root.kind() == "decorated_definition" { + let mut cursor = root.walk(); + let mut decorators: Vec> = Vec::new(); + let mut func: Option> = None; + for child in root.children(&mut cursor) { + match child.kind() { + "decorator" => decorators.push(child), + "function_definition" => func = Some(child), + _ => {} + } + } + if let Some(func_node) = func { + visit(&func_node, &decorators); + } + let _ = bytes; + } + let mut cursor = root.walk(); + for child in root.children(&mut cursor) { + walk_decorated(child, bytes, visit); + } +} + +/// Classify a `decorator` node as a Flask route, returning the +/// `(method, path)` pair. Recognises both the `@app.route(...)` and +/// `@app.(...)` shapes and the Blueprint equivalents. +fn flask_route_decorator(decorator: Node, bytes: &[u8]) -> Option<(HttpMethod, String)> { + let mut walker = decorator.walk(); + let expr = decorator + .children(&mut walker) + .find(|c| c.kind() != "@" && c.kind() != "comment")?; + let (call_target, args) = match expr.kind() { + "call" => ( + expr.child_by_field_name("function")?, + expr.child_by_field_name("arguments"), + ), + _ => return None, + }; + if call_target.kind() != "attribute" { + return None; + } + let object = call_target.child_by_field_name("object")?; + if !receiver_is_flask(object, bytes) { + return None; + } + let attr = call_target.child_by_field_name("attribute")?; + let attr_text = attr.utf8_text(bytes).ok()?; + let route_path = args + .and_then(|a| first_string_arg(a, bytes)) + .unwrap_or_default(); + if attr_text == "route" { + let method = args + .and_then(|a| extract_first_method(a, bytes)) + .unwrap_or(HttpMethod::GET); + return Some((method, route_path)); + } + if let Some(method) = HttpMethod::from_ident(attr_text) { + return Some((method, route_path)); + } + None +} + +/// `true` when the decorator receiver looks like a Flask app or +/// Blueprint binding. Allowlist over identifier names + a structural +/// match on call expressions like `Blueprint("name", __name__)`. +fn receiver_is_flask(object: Node, bytes: &[u8]) -> bool { + fn name_matches(text: &str) -> bool { + let lower = text.to_ascii_lowercase(); + lower == "app" + || lower == "bp" + || lower == "blueprint" + || lower.ends_with("_app") + || lower.ends_with("_bp") + || lower.ends_with("blueprint") + || lower.ends_with("api") + } + match object.kind() { + "identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches), + "attribute" => object + .child_by_field_name("attribute") + .and_then(|a| a.utf8_text(bytes).ok()) + .is_some_and(name_matches), + "call" => { + let Some(callee) = object.child_by_field_name("function") else { + return false; + }; + let Ok(text) = callee.utf8_text(bytes) else { + return false; + }; + let leaf = text.rsplit('.').next().unwrap_or(text).trim(); + leaf == "Flask" || leaf == "Blueprint" + } + _ => false, + } +} + +/// Pull the first string literal positional argument out of a +/// `argument_list` node. Used to extract the route path from +/// `@app.route("/path", ...)`. +fn first_string_arg(args: Node, bytes: &[u8]) -> Option { + let mut cursor = args.walk(); + for arg in args.children(&mut cursor) { + if arg.kind() == "string" { + return Some(string_literal_text(arg, bytes)); + } + } + None +} + +/// Strip Python quotes / prefix bytes (`b"..."`, `r"..."`) and return +/// the literal content. Falls back to the raw slice when the literal +/// has an unfamiliar shape. +fn string_literal_text(node: Node, bytes: &[u8]) -> String { + let raw = node.utf8_text(bytes).unwrap_or(""); + let trimmed = raw.trim(); + let mut s = trimmed; + while let Some(rest) = s.strip_prefix(['b', 'r', 'B', 'R', 'f', 'F']) { + s = rest; + } + let stripped = s + .trim_start_matches(['\'', '"']) + .trim_end_matches(['\'', '"']); + stripped.to_string() +} + +/// Extract the first HTTP method named in a `methods=[...]` kwarg, or +/// `None` when the decorator omits the kwarg. The first method in +/// the list wins; multi-method routes are recorded as the first +/// (Flask itself runs the same handler for every listed method). +fn extract_first_method(args: Node, bytes: &[u8]) -> Option { + let mut cursor = args.walk(); + for arg in args.children(&mut cursor) { + if arg.kind() != "keyword_argument" { + continue; + } + let name_node = arg.child_by_field_name("name")?; + let Ok(name) = name_node.utf8_text(bytes) else { + continue; + }; + if name != "methods" { + continue; + } + let value = arg.child_by_field_name("value")?; + let mut cur = value.walk(); + for child in value.children(&mut cur) { + if child.kind() == "string" { + let text = string_literal_text(child, bytes); + if let Some(m) = HttpMethod::from_ident(&text) { + return Some(m); + } + } + } + } + None +} + +/// `true` when the decorator is an auth-guard marker. Matches the +/// last segment of the decorator expression against +/// [`AUTH_DECORATORS`]. +fn decorator_is_auth_marker(decorator: Node, bytes: &[u8]) -> bool { + let mut walker = decorator.walk(); + let Some(expr) = decorator + .children(&mut walker) + .find(|c| c.kind() != "@" && c.kind() != "comment") + else { + return false; + }; + let target = match expr.kind() { + "call" => expr.child_by_field_name("function"), + _ => Some(expr), + }; + let Some(target) = target else { return false }; + let Ok(text) = target.utf8_text(bytes) else { + return false; + }; + let leaf = text.rsplit('.').next().unwrap_or(text).trim(); + AUTH_DECORATORS + .iter() + .any(|d| leaf.eq_ignore_ascii_case(d)) +} + +/// Read the function name from a `function_definition` node. +fn function_name(func: Node, bytes: &[u8]) -> Option { + let name_node = func.child_by_field_name("name")?; + name_node.utf8_text(bytes).ok().map(str::to_string) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn parse(src: &str) -> (Tree, Vec) { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_python::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(src, None).unwrap(); + (tree, src.as_bytes().to_vec()) + } + + fn detect(src: &str) -> Vec { + let (tree, bytes) = parse(src); + detect_flask_routes(&tree, &bytes, &PathBuf::from("app.py"), None) + } + + #[test] + fn detects_basic_route() { + let src = r#" +from flask import Flask +app = Flask(__name__) + +@app.route("/hello") +def hello(): + return "hi" +"#; + let nodes = detect(src); + assert_eq!(nodes.len(), 1); + if let SurfaceNode::EntryPoint(ep) = &nodes[0] { + assert_eq!(ep.route, "/hello"); + assert_eq!(ep.method, HttpMethod::GET); + assert_eq!(ep.handler_name, "hello"); + assert!(!ep.auth_required); + } else { + panic!("not an EntryPoint"); + } + } + + #[test] + fn detects_methods_kwarg() { + let src = r#" +from flask import Flask +app = Flask(__name__) + +@app.route("/submit", methods=["POST"]) +def submit(): + return "ok" +"#; + let nodes = detect(src); + let ep = match &nodes[0] { + SurfaceNode::EntryPoint(ep) => ep, + _ => panic!("not an EntryPoint"), + }; + assert_eq!(ep.method, HttpMethod::POST); + } + + #[test] + fn detects_verb_decorator() { + let src = r#" +from flask import Flask +app = Flask(__name__) + +@app.post("/users") +def create(): + return "ok" +"#; + let nodes = detect(src); + let ep = match &nodes[0] { + SurfaceNode::EntryPoint(ep) => ep, + _ => panic!("not an EntryPoint"), + }; + assert_eq!(ep.method, HttpMethod::POST); + } + + #[test] + fn detects_blueprint() { + let src = r#" +from flask import Blueprint +bp = Blueprint("admin", __name__) + +@bp.get("/admin") +def admin(): + return "secret" +"#; + let nodes = detect(src); + let ep = match &nodes[0] { + SurfaceNode::EntryPoint(ep) => ep, + _ => panic!("not an EntryPoint"), + }; + assert_eq!(ep.route, "/admin"); + } + + #[test] + fn detects_auth_decorator() { + let src = r#" +from flask import Flask +from flask_login import login_required +app = Flask(__name__) + +@app.route("/secret") +@login_required +def secret(): + return "shh" +"#; + let nodes = detect(src); + let ep = match &nodes[0] { + SurfaceNode::EntryPoint(ep) => ep, + _ => panic!("not an EntryPoint"), + }; + assert!(ep.auth_required); + } + + #[test] + fn rejects_non_flask_receiver() { + let src = r#" +client = requests.Session() + +@client.get("/whatever") +def x(): + pass +"#; + let nodes = detect(src); + // `client` does not match the Flask receiver allowlist. + assert!(nodes.is_empty()); + } +} diff --git a/src/surface/mod.rs b/src/surface/mod.rs new file mode 100644 index 00000000..3389fbcb --- /dev/null +++ b/src/surface/mod.rs @@ -0,0 +1,398 @@ +//! Phase 21 — attack-surface map. +//! +//! The `SurfaceMap` graph names the externally-reachable shape of the +//! project under analysis: HTTP route entry-points (Flask, FastAPI, +//! Spring, Express, …), the data stores they read/write, the external +//! services they talk to, and the local sinks they ultimately reach. +//! +//! Track G's chain composer walks this graph to translate findings into +//! cross-feature attack chains, and the `nyx surface` CLI prints a +//! human-readable tree from it. Phase 21 ships the graph types plus +//! the first framework probe (Python + Flask); Phase 22 generalises the +//! probe to the remaining languages and Phase 23 wires the CLI. +//! +//! Storage shape: a flat `Vec` sorted by [`SourceLocation`] +//! and a flat `Vec` sorted by `(from_idx, to_idx, kind)`. +//! Both vectors are byte-deterministic, so two scans of the same source +//! produce byte-identical JSON when round-tripped through SQLite. See +//! [`graph::petgraph_view`] for a petgraph-backed view used by the +//! chain composer. + +use crate::entry_points::HttpMethod; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use std::path::Path; + +pub mod build; +pub mod graph; +pub mod lang; + +/// Stable source location used as the primary key for every +/// [`SurfaceNode`]. `file` is a project-relative POSIX path so the +/// SurfaceMap is portable across machines; `line` and `col` are +/// 1-indexed. Ordering is `(file, line, col)` lexicographic, matching +/// the determinism the rest of the analyser uses for spans. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] +pub struct SourceLocation { + pub file: String, + pub line: u32, + pub col: u32, +} + +impl SourceLocation { + pub fn new(file: impl Into, line: u32, col: u32) -> Self { + Self { + file: file.into(), + line, + col, + } + } +} + +/// Web-framework tag attached to every [`EntryPoint`]. The set is +/// fixed in Phase 21 + 22 and matches the set of framework probes +/// behind [`lang`]. New frameworks land as new variants. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum Framework { + Flask, + FastApi, + Django, + Express, + Spring, + JaxRs, + Rails, + Sinatra, + Axum, + Actix, + Rocket, + NetHttp, + Gin, + NextAppRouter, + NextServerAction, +} + +/// HTTP-handler entry-point recognised by a framework probe. +/// +/// Every node carries the route's declared path string, HTTP method, +/// and a resolved handler [`SourceLocation`] pointing at the function +/// definition. `auth_required` is `true` when the decorator stack +/// (or framework equivalent) contains an auth guard the probe was +/// able to identify; Phase 21 recognises Flask's `@login_required`, +/// `@auth_required`, and `@jwt_required` decorators. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct EntryPoint { + pub location: SourceLocation, + pub framework: Framework, + pub method: HttpMethod, + pub route: String, + pub handler_name: String, + pub handler_location: SourceLocation, + pub auth_required: bool, +} + +/// Persistent data store reachable from the surface — SQL database, +/// key-value store, document DB, blob store. Phase 22 populates this +/// from label-rule data-source matches and ORM-receiver type facts; +/// Phase 21 ships the type for forward-compat only and emits no +/// `DataStore` nodes. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct DataStore { + pub location: SourceLocation, + pub kind: DataStoreKind, + pub label: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DataStoreKind { + Sql, + KeyValue, + Document, + BlobStore, + Filesystem, + Unknown, +} + +/// External service the surface talks to over a network — third-party +/// HTTP API, message broker, search index. Phase 22 fills this in; +/// Phase 21 ships the type. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ExternalService { + pub location: SourceLocation, + pub kind: ExternalServiceKind, + pub label: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ExternalServiceKind { + HttpApi, + MessageBroker, + SearchIndex, + AuthProvider, + Unknown, +} + +/// Local sink with no externally observable side-effect — `eval`, +/// `pickle.loads`, `subprocess.Popen`, raw SQL execute, etc. Phase 22 +/// fills this in from the existing label-rule registry; Phase 21 +/// ships the type. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct DangerousLocal { + pub location: SourceLocation, + pub function_name: String, + pub cap_bits: u32, +} + +/// A node in the [`SurfaceMap`]. Every variant carries a +/// [`SourceLocation`] so the surface ordering is total and stable. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "node", rename_all = "snake_case")] +pub enum SurfaceNode { + EntryPoint(EntryPoint), + DataStore(DataStore), + ExternalService(ExternalService), + DangerousLocal(DangerousLocal), +} + +impl SurfaceNode { + pub fn location(&self) -> &SourceLocation { + match self { + SurfaceNode::EntryPoint(n) => &n.location, + SurfaceNode::DataStore(n) => &n.location, + SurfaceNode::ExternalService(n) => &n.location, + SurfaceNode::DangerousLocal(n) => &n.location, + } + } + + /// Discriminator used as a secondary sort key so two nodes that + /// happen to share a [`SourceLocation`] (e.g. multiple route + /// decorators on one function) keep a deterministic relative + /// order. Returns the variant index in the enum declaration. + fn kind_ordinal(&self) -> u8 { + match self { + SurfaceNode::EntryPoint(_) => 0, + SurfaceNode::DataStore(_) => 1, + SurfaceNode::ExternalService(_) => 2, + SurfaceNode::DangerousLocal(_) => 3, + } + } + + /// Tertiary sort key used to disambiguate nodes that share both + /// [`SourceLocation`] and kind — e.g. a single Flask function with + /// two `@app.route(...)` decorators ending up at the same handler + /// location. + fn dedup_tag(&self) -> String { + match self { + SurfaceNode::EntryPoint(n) => format!("{:?}:{:?}:{}", n.framework, n.method, n.route), + SurfaceNode::DataStore(n) => format!("{:?}:{}", n.kind, n.label), + SurfaceNode::ExternalService(n) => format!("{:?}:{}", n.kind, n.label), + SurfaceNode::DangerousLocal(n) => format!("{}:{:#x}", n.function_name, n.cap_bits), + } + } +} + +/// Semantic kind of an edge in the [`SurfaceMap`]. Encodes the +/// seven edge classes the chain composer walks; persistence is via +/// JSON so adding a variant is a non-breaking schema change as long +/// as the SQLite-level migration drops the old surface_map rows. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum EdgeKind { + /// Caller → callee. Wraps the call-graph edge so consumers do + /// not have to consult [`crate::callgraph::CallGraph`] directly. + Calls, + /// Function or entry-point reads from a data store / external + /// service. + ReadsFrom, + /// Function or entry-point writes to a data store. + WritesTo, + /// Function or entry-point sends a request to an external + /// service. + TalksTo, + /// Entry-point reaches a dangerous-local sink through some + /// transitive call chain. + Reaches, + /// Entry-point triggers a side-effecting action (job, email, + /// webhook) other than a direct call. + Triggers, + /// Entry-point gates downstream access on a successful auth + /// check. The `from` is the auth-check node, the `to` is the + /// entry-point. + AuthRequiredOn, +} + +/// A single edge in the [`SurfaceMap`]. `from` and `to` are indices +/// into [`SurfaceMap::nodes`]; the surface ordering keeps these +/// stable across rescans. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)] +pub struct SurfaceEdge { + pub from: u32, + pub to: u32, + pub kind: EdgeKind, +} + +/// The attack-surface graph for a project. Stored as parallel +/// `Vec`s keyed on [`SourceLocation`] so JSON serialisation is +/// byte-deterministic and SQLite round-trips are stable. +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct SurfaceMap { + pub nodes: Vec, + pub edges: Vec, +} + +impl SurfaceMap { + /// Construct an empty map. + pub fn new() -> Self { + Self::default() + } + + /// Total node count. Cheap. + pub fn node_count(&self) -> usize { + self.nodes.len() + } + + /// Total edge count. Cheap. + pub fn edge_count(&self) -> usize { + self.edges.len() + } + + /// Return the first entry-point node matching `(method, route)`. + /// Linear scan; the SurfaceMap is small (one node per route + + /// store + service + sink) so this is fine in practice. + pub fn entry_for_route(&self, method: HttpMethod, route: &str) -> Option<&EntryPoint> { + self.nodes.iter().find_map(|n| match n { + SurfaceNode::EntryPoint(ep) if ep.method == method && ep.route == route => Some(ep), + _ => None, + }) + } + + /// Iterate over every entry-point node in surface order. + pub fn entry_points(&self) -> impl Iterator { + self.nodes.iter().filter_map(|n| match n { + SurfaceNode::EntryPoint(ep) => Some(ep), + _ => None, + }) + } + + /// Sort nodes by `(SourceLocation, kind_ordinal, dedup_tag)` and + /// rewrite every edge's `from`/`to` accordingly. Two structurally + /// identical maps are byte-identical after [`canonicalize`] + + /// `serde_json::to_vec` regardless of insertion order. + /// + /// [`canonicalize`]: SurfaceMap::canonicalize + pub fn canonicalize(&mut self) { + if self.nodes.is_empty() { + self.edges.sort(); + self.edges.dedup(); + return; + } + let mut indexed: Vec<(usize, &SurfaceNode)> = self.nodes.iter().enumerate().collect(); + indexed.sort_by(|(_, a), (_, b)| { + let key_a = (a.location(), a.kind_ordinal(), a.dedup_tag()); + let key_b = (b.location(), b.kind_ordinal(), b.dedup_tag()); + key_a.cmp(&key_b) + }); + let mut remap: BTreeMap = BTreeMap::new(); + let mut new_nodes: Vec = Vec::with_capacity(self.nodes.len()); + for (new_idx, (old_idx, _)) in indexed.iter().enumerate() { + remap.insert(*old_idx as u32, new_idx as u32); + } + for (_, node) in indexed { + new_nodes.push(node.clone()); + } + for edge in &mut self.edges { + if let Some(&new_from) = remap.get(&edge.from) { + edge.from = new_from; + } + if let Some(&new_to) = remap.get(&edge.to) { + edge.to = new_to; + } + } + self.nodes = new_nodes; + self.edges.sort(); + self.edges.dedup(); + } + + /// Serialize to deterministic JSON. The map is canonicalised + /// first; structurally identical maps emit byte-identical JSON. + pub fn to_json(&mut self) -> serde_json::Result> { + self.canonicalize(); + serde_json::to_vec(self) + } + + /// Deserialize from JSON. Does not canonicalise; the producer is + /// responsible for emitting a canonicalised payload. + pub fn from_json(bytes: &[u8]) -> serde_json::Result { + serde_json::from_slice(bytes) + } +} + +/// Convert an absolute path to a project-relative POSIX path string. +/// Returns the absolute path verbatim when the file is outside the +/// scan root or when path stripping fails. +pub fn relative_path_string(path: &Path, scan_root: Option<&Path>) -> String { + if let Some(root) = scan_root { + if let Ok(rel) = path.strip_prefix(root) { + return rel.to_string_lossy().replace('\\', "/"); + } + } + path.to_string_lossy().replace('\\', "/") +} + +#[cfg(test)] +mod tests { + use super::*; + + fn loc(file: &str, line: u32, col: u32) -> SourceLocation { + SourceLocation::new(file, line, col) + } + + fn ep(file: &str, line: u32, route: &str, method: HttpMethod) -> SurfaceNode { + SurfaceNode::EntryPoint(EntryPoint { + location: loc(file, line, 1), + framework: Framework::Flask, + method, + route: route.into(), + handler_name: "h".into(), + handler_location: loc(file, line + 1, 1), + auth_required: false, + }) + } + + #[test] + fn canonicalize_sorts_nodes_and_remaps_edges() { + let mut m = SurfaceMap::new(); + m.nodes.push(ep("b.py", 10, "/b", HttpMethod::GET)); + m.nodes.push(ep("a.py", 5, "/a", HttpMethod::GET)); + m.edges.push(SurfaceEdge { + from: 0, + to: 1, + kind: EdgeKind::Calls, + }); + m.canonicalize(); + assert_eq!(m.nodes[0].location().file, "a.py"); + assert_eq!(m.nodes[1].location().file, "b.py"); + // edge `from=0` was b.py (now index 1), `to=1` was a.py (now index 0) + assert_eq!(m.edges[0].from, 1); + assert_eq!(m.edges[0].to, 0); + } + + #[test] + fn json_round_trip_byte_identical() { + let mut a = SurfaceMap::new(); + a.nodes.push(ep("a.py", 1, "/a", HttpMethod::GET)); + a.nodes.push(ep("b.py", 2, "/b", HttpMethod::POST)); + a.edges.push(SurfaceEdge { + from: 0, + to: 1, + kind: EdgeKind::Calls, + }); + let bytes_a = a.to_json().unwrap(); + let b = SurfaceMap::from_json(&bytes_a).unwrap(); + let mut b = b; + let bytes_b = b.to_json().unwrap(); + assert_eq!(bytes_a, bytes_b); + } +} diff --git a/tests/surface_flask.rs b/tests/surface_flask.rs new file mode 100644 index 00000000..d71a9774 --- /dev/null +++ b/tests/surface_flask.rs @@ -0,0 +1,183 @@ +//! Phase 21 — `SurfaceMap` Python + Flask vertical. +//! +//! Five-route Flask fixture exercising: +//! +//! * `@app.route("/", methods=["GET"])` – default GET +//! * `@app.route("/submit", methods=["POST"])` – POST via methods kwarg +//! * `@app.get("/users")` – verb decorator +//! * `@bp.post("/admin")` – Blueprint receiver +//! * `@app.route("/secret")` + `@login_required` – auth-guarded +//! +//! Asserts every route node appears with the correct `method`, `route`, +//! `auth_required`, and `handler_name`. Round-trips the surface map +//! through SQLite and confirms the byte representation is identical to +//! the in-memory canonical JSON. + +use nyx_scanner::commands::index::build_index; +use nyx_scanner::commands::scan::scan_with_index_parallel; +use nyx_scanner::database::index::Indexer; +use nyx_scanner::entry_points::HttpMethod; +use nyx_scanner::surface::{Framework, SurfaceMap, SurfaceNode}; +use nyx_scanner::utils::config::{AnalysisMode, Config}; +use std::path::Path; +use std::sync::Arc; + +fn test_cfg() -> Config { + let mut cfg = Config::default(); + cfg.scanner.mode = AnalysisMode::Full; + cfg.scanner.read_vcsignore = false; + cfg.scanner.require_git_to_read_vcsignore = false; + cfg.performance.worker_threads = Some(1); + cfg.performance.batch_size = 8; + cfg.performance.channel_multiplier = 1; + cfg +} + +const FIVE_ROUTE_FIXTURE: &str = r#" +from flask import Flask, Blueprint +from flask_login import login_required + +app = Flask(__name__) +bp = Blueprint("admin", __name__) + +@app.route("/", methods=["GET"]) +def index(): + return "home" + +@app.route("/submit", methods=["POST"]) +def submit(): + return "ok" + +@app.get("/users") +def list_users(): + return "users" + +@bp.post("/admin") +def admin_create(): + return "created" + +@login_required +@app.route("/secret") +def secret(): + return "shh" +"#; + +fn seed_flask_fixture(root: &Path) { + std::fs::write(root.join("app.py"), FIVE_ROUTE_FIXTURE.as_bytes()).unwrap(); +} + +#[test] +fn surface_map_captures_five_flask_routes() { + let project = tempfile::tempdir().unwrap(); + seed_flask_fixture(project.path()); + let db_dir = tempfile::tempdir().unwrap(); + let db_path = db_dir.path().join("surface.sqlite"); + build_index("surface", project.path(), &db_path, &test_cfg(), false) + .expect("build_index on flask fixture should succeed"); + let pool = Indexer::init(&db_path).expect("re-init pool"); + let _ = scan_with_index_parallel( + "surface", + Arc::clone(&pool), + &test_cfg(), + false, + project.path(), + ) + .expect("indexed scan should succeed"); + + let idx = Indexer::from_pool("surface", &pool).expect("from_pool"); + let map = idx + .load_surface_map() + .expect("load_surface_map ok") + .expect("surface map persisted after scan"); + + let entries: Vec<_> = map.entry_points().collect(); + assert_eq!( + entries.len(), + 5, + "expected five Flask routes, got {entries:#?}", + ); + + let assert_route = |method: HttpMethod, route: &str, handler: &str, auth: bool| { + let ep = map.entry_for_route(method, route).unwrap_or_else(|| { + panic!("missing route {method:?} {route}; map = {entries:#?}"); + }); + assert_eq!(ep.framework, Framework::Flask, "framework mismatch on {route}"); + assert_eq!(ep.handler_name, handler, "handler mismatch on {route}"); + assert_eq!( + ep.auth_required, auth, + "auth mismatch on {route} (got {})", + ep.auth_required + ); + // Handler location must point inside the project file. + assert!( + ep.handler_location.file.ends_with("app.py"), + "handler location not in app.py: {:?}", + ep.handler_location.file + ); + }; + assert_route(HttpMethod::GET, "/", "index", false); + assert_route(HttpMethod::POST, "/submit", "submit", false); + assert_route(HttpMethod::GET, "/users", "list_users", false); + assert_route(HttpMethod::POST, "/admin", "admin_create", false); + assert_route(HttpMethod::GET, "/secret", "secret", true); +} + +#[test] +fn surface_map_round_trips_byte_identical_through_sqlite() { + let project = tempfile::tempdir().unwrap(); + seed_flask_fixture(project.path()); + let db_dir = tempfile::tempdir().unwrap(); + let db_path = db_dir.path().join("rt.sqlite"); + + build_index("rt", project.path(), &db_path, &test_cfg(), false).expect("first build_index"); + let pool = Indexer::init(&db_path).expect("first pool"); + let _ = scan_with_index_parallel("rt", Arc::clone(&pool), &test_cfg(), false, project.path()) + .expect("first scan"); + let idx = Indexer::from_pool("rt", &pool).expect("first from_pool"); + let bytes_first = idx + .load_surface_map_bytes() + .expect("load bytes 1") + .expect("surface map persisted 1"); + drop(idx); + + // Rescan against the same DB. No source change → byte-identical + // canonical surface map. + let _ = scan_with_index_parallel("rt", Arc::clone(&pool), &test_cfg(), false, project.path()) + .expect("second scan"); + let idx2 = Indexer::from_pool("rt", &pool).expect("second from_pool"); + let bytes_second = idx2 + .load_surface_map_bytes() + .expect("load bytes 2") + .expect("surface map persisted 2"); + + assert_eq!( + bytes_first, bytes_second, + "surface_map JSON must be byte-identical across rescans" + ); + + // Round-trip through the in-memory representation: canonicalise → + // serialise should reproduce the on-disk bytes exactly. + let mut map = SurfaceMap::from_json(&bytes_first).expect("from_json"); + let bytes_after_round_trip = map.to_json().expect("to_json"); + assert_eq!( + bytes_first, bytes_after_round_trip, + "canonical JSON must match round-tripped JSON" + ); + + // Light sanity check: the same map deserialised twice still names + // the five fixture routes (i.e. persistence does not lose nodes). + let entries: Vec<&str> = map + .nodes + .iter() + .filter_map(|n| match n { + SurfaceNode::EntryPoint(ep) => Some(ep.route.as_str()), + _ => None, + }) + .collect(); + for route in ["/", "/submit", "/users", "/admin", "/secret"] { + assert!( + entries.contains(&route), + "route {route} missing after round trip; got {entries:?}", + ); + } +}