mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss] phase 21: Track F.1 — SurfaceMap module + Python/Flask vertical
This commit is contained in:
parent
f8bff38217
commit
c03326a658
9 changed files with 1396 additions and 1 deletions
|
|
@ -2126,6 +2126,7 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
);
|
||||
}
|
||||
let pass2_start = std::time::Instant::now();
|
||||
let mut gs = global_summaries;
|
||||
let mut diags: Vec<Diag> = {
|
||||
let _span = tracing::info_span!("pass2_analysis", files = all_paths.len()).entered();
|
||||
let pb = make_progress_bar(
|
||||
|
|
@ -2156,7 +2157,6 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
);
|
||||
}
|
||||
|
||||
let mut gs = global_summaries;
|
||||
let total_batches = batches.len() as u64 + u64::from(!orphans.is_empty());
|
||||
if let Some(p) = progress {
|
||||
p.set_batches_total(total_batches);
|
||||
|
|
@ -2177,6 +2177,20 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
result
|
||||
};
|
||||
tracing::info!(diags = diags.len(), "pass 2 complete");
|
||||
|
||||
// Phase 21: build the SurfaceMap from the post-pass-2 view.
|
||||
// No persistence here; the index-backed path persists into the
|
||||
// `surface_map` SQLite table. Errors here are swallowed: the
|
||||
// surface map is an additive Phase F deliverable, not a gate.
|
||||
let _surface_map = crate::surface::build::build_surface_map(
|
||||
&crate::surface::build::SurfaceBuildInputs {
|
||||
files: &all_paths,
|
||||
scan_root: Some(root),
|
||||
global_summaries: &gs,
|
||||
call_graph: &call_graph,
|
||||
config: cfg,
|
||||
},
|
||||
);
|
||||
if let Some(p) = progress {
|
||||
p.record_pass2_ms(pass2_start.elapsed().as_millis() as u64);
|
||||
}
|
||||
|
|
@ -2987,6 +3001,34 @@ pub fn scan_with_index_parallel_observer(
|
|||
|
||||
let mut diags = topo_diags;
|
||||
|
||||
// Phase 21: build + persist the SurfaceMap from the post-pass-2
|
||||
// view. Errors here are logged but not propagated — the surface
|
||||
// map is an additive Phase F deliverable, not a scan gate.
|
||||
{
|
||||
let surface_map = crate::surface::build::build_surface_map(
|
||||
&crate::surface::build::SurfaceBuildInputs {
|
||||
files: &files,
|
||||
scan_root: Some(scan_root),
|
||||
global_summaries: &global_summaries,
|
||||
call_graph: &call_graph,
|
||||
config: cfg,
|
||||
},
|
||||
);
|
||||
let mut idx = Indexer::from_pool(project, &pool)?;
|
||||
if let Err(e) = idx.replace_surface_map(&surface_map) {
|
||||
tracing::warn!("failed to persist surface_map: {e}");
|
||||
} else if let Some(l) = logs {
|
||||
l.info(
|
||||
format!(
|
||||
"Surface map: {} nodes, {} edges",
|
||||
surface_map.node_count(),
|
||||
surface_map.edge_count()
|
||||
),
|
||||
None,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: Taint-mode output is *not* filtered here. `run_rules_on_bytes`
|
||||
// already gates AST queries and auth analyses behind `mode == Full`, so
|
||||
// Taint-mode raw output is exactly the set of diagnostics the analysis
|
||||
|
|
|
|||
|
|
@ -228,6 +228,15 @@ pub mod index {
|
|||
CREATE INDEX IF NOT EXISTS idx_dynamic_verdict_cache_spec_hash
|
||||
ON dynamic_verdict_cache(spec_hash);
|
||||
|
||||
-- Phase 21: persisted attack-surface map. One row per project.
|
||||
-- Stored as canonical JSON so the round-trip is byte-identical
|
||||
-- across rescans (see `SurfaceMap::to_json`).
|
||||
CREATE TABLE IF NOT EXISTS surface_map (
|
||||
project TEXT PRIMARY KEY,
|
||||
map_json BLOB NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
);
|
||||
|
||||
-- Indexes on (project, file_path) for the per-file replace_* paths.
|
||||
-- Without these, every DELETE WHERE project=? AND file_path=? does a
|
||||
-- full table scan, which dominates indexing time as the cache grows.
|
||||
|
|
@ -547,6 +556,22 @@ pub mod index {
|
|||
conn.execute_batch(SCHEMA)?;
|
||||
}
|
||||
|
||||
// Phase 21: ensure the `surface_map` table exists on
|
||||
// DBs created before this column set was introduced.
|
||||
let surface_exists: bool = conn
|
||||
.query_row(
|
||||
"SELECT 1 FROM sqlite_master
|
||||
WHERE type = 'table' AND name = 'surface_map'",
|
||||
[],
|
||||
|_| Ok(true),
|
||||
)
|
||||
.optional()?
|
||||
.unwrap_or(false);
|
||||
if !surface_exists {
|
||||
tracing::info!("creating surface_map table");
|
||||
conn.execute_batch(SCHEMA)?;
|
||||
}
|
||||
|
||||
// Schema version check: invalidate cached summary tables
|
||||
// when the on-disk artefact layout has changed in an
|
||||
// incompatible way, independently of the engine version.
|
||||
|
|
@ -1882,6 +1907,63 @@ pub mod index {
|
|||
Ok(out)
|
||||
}
|
||||
|
||||
/// Persist a [`crate::surface::SurfaceMap`] for this project.
|
||||
///
|
||||
/// Replaces any previously-persisted map; the table holds one row
|
||||
/// per project. The map is canonicalised before serialisation so
|
||||
/// `replace_surface_map` + `load_surface_map` round-trip is
|
||||
/// byte-identical for structurally identical maps.
|
||||
pub fn replace_surface_map(
|
||||
&mut self,
|
||||
map: &crate::surface::SurfaceMap,
|
||||
) -> NyxResult<()> {
|
||||
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
let mut canon = map.clone();
|
||||
let bytes = canon
|
||||
.to_json()
|
||||
.map_err(|e| NyxError::Msg(format!("surface map serialise: {e}")))?;
|
||||
self.c().execute(
|
||||
"INSERT OR REPLACE INTO surface_map (project, map_json, updated_at)
|
||||
VALUES (?1, ?2, ?3)",
|
||||
params![self.project, bytes, now],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load the persisted [`crate::surface::SurfaceMap`] for this
|
||||
/// project, or `None` when no map has been written.
|
||||
pub fn load_surface_map(&self) -> NyxResult<Option<crate::surface::SurfaceMap>> {
|
||||
let row: Option<Vec<u8>> = self
|
||||
.c()
|
||||
.query_row(
|
||||
"SELECT map_json FROM surface_map WHERE project = ?1",
|
||||
params![self.project],
|
||||
|r| r.get::<_, Vec<u8>>(0),
|
||||
)
|
||||
.optional()?;
|
||||
let Some(bytes) = row else {
|
||||
return Ok(None);
|
||||
};
|
||||
let map = crate::surface::SurfaceMap::from_json(&bytes)
|
||||
.map_err(|e| NyxError::Msg(format!("surface map deserialise: {e}")))?;
|
||||
Ok(Some(map))
|
||||
}
|
||||
|
||||
/// Return the raw JSON bytes stored for the surface map without
|
||||
/// deserialising. Used by the round-trip parity tests so they
|
||||
/// can compare on-disk bytes across rescans.
|
||||
pub fn load_surface_map_bytes(&self) -> NyxResult<Option<Vec<u8>>> {
|
||||
let row: Option<Vec<u8>> = self
|
||||
.c()
|
||||
.query_row(
|
||||
"SELECT map_json FROM surface_map WHERE project = ?1",
|
||||
params![self.project],
|
||||
|r| r.get::<_, Vec<u8>>(0),
|
||||
)
|
||||
.optional()?;
|
||||
Ok(row)
|
||||
}
|
||||
|
||||
/// Remove a file and all derived persisted state for this project.
|
||||
///
|
||||
/// This deletes the file row, issues, and all persisted summary rows so
|
||||
|
|
|
|||
|
|
@ -121,6 +121,7 @@ pub mod ssa;
|
|||
pub mod state;
|
||||
pub mod summary;
|
||||
pub mod suppress;
|
||||
pub mod surface;
|
||||
pub mod symbol;
|
||||
pub mod symex;
|
||||
pub mod taint;
|
||||
|
|
|
|||
163
src/surface/build.rs
Normal file
163
src/surface/build.rs
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
//! Top-level [`SurfaceMap`] builder.
|
||||
//!
|
||||
//! Consumes the post-pass-2 [`GlobalSummaries`] + [`CallGraph`] for
|
||||
//! call-graph reachability and the project's file list for the
|
||||
//! per-language framework probes. Phase 21 only invokes the Python +
|
||||
//! Flask probe; Phase 22 wires the remaining language probes through
|
||||
//! [`crate::surface::lang`].
|
||||
//!
|
||||
//! Build steps (Phase 21):
|
||||
//!
|
||||
//! 1. For every Python file, parse it once and invoke
|
||||
//! [`crate::surface::lang::python_flask::detect_flask_routes`].
|
||||
//! 2. Collect the resulting [`SurfaceNode::EntryPoint`] nodes.
|
||||
//! 3. Canonicalise the map (sort nodes + edges, dedup edges) so two
|
||||
//! runs over the same source produce byte-identical JSON.
|
||||
|
||||
use crate::callgraph::CallGraph;
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::surface::{SurfaceMap, lang::python_flask};
|
||||
use crate::utils::config::Config;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Inputs to [`build_surface_map`]. Wrapped in a struct so the
|
||||
/// downstream Phase 22 work (additional probes, call-graph-derived
|
||||
/// `Reaches` edges, label-rule data-source nodes) can extend the
|
||||
/// signature without touching every caller.
|
||||
pub struct SurfaceBuildInputs<'a> {
|
||||
pub files: &'a [PathBuf],
|
||||
pub scan_root: Option<&'a Path>,
|
||||
pub global_summaries: &'a GlobalSummaries,
|
||||
pub call_graph: &'a CallGraph,
|
||||
pub config: &'a Config,
|
||||
}
|
||||
|
||||
/// Build a [`SurfaceMap`] for the project under analysis.
|
||||
///
|
||||
/// Best-effort: parse failures on individual files are swallowed so
|
||||
/// the surface map of a 10k-file project is not killed by one bad
|
||||
/// Python file. Returns an empty map when the inputs contain no
|
||||
/// recognised entry-points.
|
||||
pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
||||
let mut map = SurfaceMap::new();
|
||||
|
||||
// Phase 21: only Python / Flask. The downstream Phase 22 probes
|
||||
// will dispatch on file extension here.
|
||||
let mut python_parser = tree_sitter::Parser::new();
|
||||
if python_parser
|
||||
.set_language(&tree_sitter_python::LANGUAGE.into())
|
||||
.is_err()
|
||||
{
|
||||
return map;
|
||||
}
|
||||
|
||||
for path in inputs.files {
|
||||
if !is_python_file(path) {
|
||||
continue;
|
||||
}
|
||||
let Ok(bytes) = std::fs::read(path) else {
|
||||
continue;
|
||||
};
|
||||
let Some(tree) = python_parser.parse(&bytes, None) else {
|
||||
continue;
|
||||
};
|
||||
let nodes =
|
||||
python_flask::detect_flask_routes(&tree, &bytes, path, inputs.scan_root);
|
||||
for n in nodes {
|
||||
map.nodes.push(n);
|
||||
}
|
||||
}
|
||||
|
||||
// GlobalSummaries / CallGraph are reserved for Phase 22's
|
||||
// `DangerousLocal` + `Reaches`-edge fill-in. Phase 21 records
|
||||
// them in the inputs so callers do not need to be touched again
|
||||
// when Phase 22 wires them up.
|
||||
let _ = inputs.global_summaries;
|
||||
let _ = inputs.call_graph;
|
||||
let _ = inputs.config;
|
||||
|
||||
map.canonicalize();
|
||||
map
|
||||
}
|
||||
|
||||
fn is_python_file(path: &Path) -> bool {
|
||||
matches!(
|
||||
path.extension().and_then(|s| s.to_str()),
|
||||
Some("py") | Some("pyi")
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use std::fs;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
fn empty_inputs_produce_empty_map() {
|
||||
let dir = tempdir().unwrap();
|
||||
let cfg = Config::default();
|
||||
let gs = GlobalSummaries::new();
|
||||
let cg = CallGraph {
|
||||
graph: petgraph::graph::DiGraph::new(),
|
||||
index: Default::default(),
|
||||
unresolved_not_found: vec![],
|
||||
unresolved_ambiguous: vec![],
|
||||
};
|
||||
let files: Vec<PathBuf> = vec![];
|
||||
let inputs = SurfaceBuildInputs {
|
||||
files: &files,
|
||||
scan_root: Some(dir.path()),
|
||||
global_summaries: &gs,
|
||||
call_graph: &cg,
|
||||
config: &cfg,
|
||||
};
|
||||
let map = build_surface_map(&inputs);
|
||||
assert_eq!(map.node_count(), 0);
|
||||
assert_eq!(map.edge_count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flask_file_produces_entry_points() {
|
||||
let dir = tempdir().unwrap();
|
||||
let py = dir.path().join("app.py");
|
||||
fs::write(
|
||||
&py,
|
||||
r#"
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return "hi"
|
||||
|
||||
@app.post("/submit")
|
||||
def submit():
|
||||
return "ok"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = Config::default();
|
||||
let gs = GlobalSummaries::new();
|
||||
let cg = CallGraph {
|
||||
graph: petgraph::graph::DiGraph::new(),
|
||||
index: Default::default(),
|
||||
unresolved_not_found: vec![],
|
||||
unresolved_ambiguous: vec![],
|
||||
};
|
||||
let files = vec![py.clone()];
|
||||
let inputs = SurfaceBuildInputs {
|
||||
files: &files,
|
||||
scan_root: Some(dir.path()),
|
||||
global_summaries: &gs,
|
||||
call_graph: &cg,
|
||||
config: &cfg,
|
||||
};
|
||||
let map = build_surface_map(&inputs);
|
||||
assert_eq!(map.node_count(), 2);
|
||||
let methods: Vec<HttpMethod> = map.entry_points().map(|ep| ep.method).collect();
|
||||
assert!(methods.contains(&HttpMethod::GET));
|
||||
assert!(methods.contains(&HttpMethod::POST));
|
||||
}
|
||||
}
|
||||
107
src/surface/graph.rs
Normal file
107
src/surface/graph.rs
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
//! petgraph-backed read-only view over a [`SurfaceMap`].
|
||||
//!
|
||||
//! The on-disk shape is two parallel `Vec`s (deterministic ordering,
|
||||
//! byte-identical JSON), but downstream consumers — the Track G chain
|
||||
//! composer, the `nyx surface` CLI walker — want graph queries:
|
||||
//! neighbours, reachability, topological order. [`petgraph_view`]
|
||||
//! constructs a `DiGraph<NodeRef<'_>, EdgeRef<'_>>` on demand without
|
||||
//! cloning the underlying nodes or edges.
|
||||
|
||||
use super::{EdgeKind, SurfaceEdge, SurfaceMap, SurfaceNode};
|
||||
use petgraph::graph::{DiGraph, NodeIndex};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Borrowed handle to one [`SurfaceNode`] inside the petgraph view.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct NodeRef<'a> {
|
||||
pub idx: u32,
|
||||
pub node: &'a SurfaceNode,
|
||||
}
|
||||
|
||||
/// Borrowed handle to one [`SurfaceEdge`] inside the petgraph view.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct EdgeRef<'a> {
|
||||
pub edge: &'a SurfaceEdge,
|
||||
}
|
||||
|
||||
impl<'a> EdgeRef<'a> {
|
||||
pub fn kind(&self) -> EdgeKind {
|
||||
self.edge.kind
|
||||
}
|
||||
}
|
||||
|
||||
/// Materialise a petgraph view of `map`. Node indices in the returned
|
||||
/// graph match `map.nodes` ordering 1:1, and the `lookup` map lets
|
||||
/// callers translate from the surface index (`u32`) to the petgraph
|
||||
/// [`NodeIndex`]. Walking edges respects `map.edges` order.
|
||||
pub fn petgraph_view(map: &SurfaceMap) -> SurfaceGraphView<'_> {
|
||||
let mut graph: DiGraph<NodeRef<'_>, EdgeRef<'_>> = DiGraph::new();
|
||||
let mut lookup: HashMap<u32, NodeIndex> = HashMap::with_capacity(map.nodes.len());
|
||||
for (i, node) in map.nodes.iter().enumerate() {
|
||||
let nx = graph.add_node(NodeRef {
|
||||
idx: i as u32,
|
||||
node,
|
||||
});
|
||||
lookup.insert(i as u32, nx);
|
||||
}
|
||||
for edge in &map.edges {
|
||||
if let (Some(&from), Some(&to)) = (lookup.get(&edge.from), lookup.get(&edge.to)) {
|
||||
graph.add_edge(from, to, EdgeRef { edge });
|
||||
}
|
||||
}
|
||||
SurfaceGraphView { graph, lookup }
|
||||
}
|
||||
|
||||
/// petgraph view returned by [`petgraph_view`].
|
||||
pub struct SurfaceGraphView<'a> {
|
||||
pub graph: DiGraph<NodeRef<'a>, EdgeRef<'a>>,
|
||||
pub lookup: HashMap<u32, NodeIndex>,
|
||||
}
|
||||
|
||||
impl<'a> SurfaceGraphView<'a> {
|
||||
/// Resolve a surface index back to its petgraph [`NodeIndex`].
|
||||
pub fn node_index(&self, surface_idx: u32) -> Option<NodeIndex> {
|
||||
self.lookup.get(&surface_idx).copied()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation};
|
||||
|
||||
#[test]
|
||||
fn petgraph_view_preserves_indices() {
|
||||
let mut m = SurfaceMap::new();
|
||||
m.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::GET,
|
||||
route: "/a".into(),
|
||||
handler_name: "h".into(),
|
||||
handler_location: SourceLocation::new("a.py", 2, 1),
|
||||
auth_required: false,
|
||||
}));
|
||||
m.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new("b.py", 1, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::POST,
|
||||
route: "/b".into(),
|
||||
handler_name: "h".into(),
|
||||
handler_location: SourceLocation::new("b.py", 2, 1),
|
||||
auth_required: false,
|
||||
}));
|
||||
m.edges.push(SurfaceEdge {
|
||||
from: 0,
|
||||
to: 1,
|
||||
kind: EdgeKind::Calls,
|
||||
});
|
||||
let view = petgraph_view(&m);
|
||||
assert_eq!(view.graph.node_count(), 2);
|
||||
assert_eq!(view.graph.edge_count(), 1);
|
||||
let n0 = view.node_index(0).unwrap();
|
||||
let n1 = view.node_index(1).unwrap();
|
||||
assert!(view.graph.find_edge(n0, n1).is_some());
|
||||
}
|
||||
}
|
||||
6
src/surface/lang/mod.rs
Normal file
6
src/surface/lang/mod.rs
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
//! Per-language framework probes. Phase 21 ships Python + Flask;
|
||||
//! Phase 22 generalises to FastAPI / Django, Java Spring / JAX-RS,
|
||||
//! Ruby Rails / Sinatra, Go net/http / gin, Rust axum / actix /
|
||||
//! rocket, JS/TS Express + Next.js.
|
||||
|
||||
pub mod python_flask;
|
||||
413
src/surface/lang/python_flask.rs
Normal file
413
src/surface/lang/python_flask.rs
Normal file
|
|
@ -0,0 +1,413 @@
|
|||
//! Python + Flask framework probe.
|
||||
//!
|
||||
//! Walks a parsed Python file looking for the four canonical Flask
|
||||
//! route shapes:
|
||||
//!
|
||||
//! * `@app.route("/path", methods=[...])`
|
||||
//! * `@app.get("/path")` / `.post(...)` / etc. (Flask ≥ 2.0)
|
||||
//! * `@bp.route("/path", methods=[...])` on a `Blueprint`
|
||||
//! * `@bp.get("/path")` / `.post(...)` / etc.
|
||||
//!
|
||||
//! `auth_required` is inferred from the decorator stack: any decorator
|
||||
//! whose textual representation matches one of [`AUTH_DECORATORS`] is
|
||||
//! treated as an auth boundary on the following route. This catches
|
||||
//! the canonical `@login_required` (Flask-Login), `@auth_required`
|
||||
//! (custom guards), and `@jwt_required` / `@jwt_required()` (Flask-JWT
|
||||
//! and -JWT-Extended).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::{
|
||||
EntryPoint, Framework, SourceLocation, SurfaceNode, relative_path_string,
|
||||
};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
/// Decorator names that mark a route as requiring authentication.
|
||||
/// Matched against the *leaf* of the decorator expression — i.e. the
|
||||
/// last `attribute` / `identifier` segment — so `@login_required`,
|
||||
/// `@auth.login_required`, and `@flask_login.login_required` all
|
||||
/// match. Match is case-insensitive on the underscored form.
|
||||
pub const AUTH_DECORATORS: &[&str] = &[
|
||||
"login_required",
|
||||
"auth_required",
|
||||
"jwt_required",
|
||||
"token_required",
|
||||
"requires_auth",
|
||||
"authenticated",
|
||||
"require_login",
|
||||
];
|
||||
|
||||
/// Detect every Flask route in a parsed Python file.
|
||||
///
|
||||
/// `scan_root` is used to convert the file path to a project-relative
|
||||
/// POSIX path; pass `None` to record absolute paths. Returns one
|
||||
/// [`SurfaceNode::EntryPoint`] per `@route` / `@get` / `@post` / …
|
||||
/// decorator that targets a Flask-shaped receiver (`app`, `bp`,
|
||||
/// `blueprint`, or anything ending in `_bp` / `Blueprint`).
|
||||
pub fn detect_flask_routes(
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = relative_path_string(path, scan_root);
|
||||
let mut out = Vec::new();
|
||||
walk_decorated(tree.root_node(), bytes, &mut |func_node, decorators| {
|
||||
// Reverse pass: find Flask-route decorators and collect auth
|
||||
// markers seen at *any* position in the decorator stack —
|
||||
// Flask honours decorators in stacked order regardless of
|
||||
// sequence relative to the route.
|
||||
let auth_required = decorators
|
||||
.iter()
|
||||
.any(|d| decorator_is_auth_marker(*d, bytes));
|
||||
for dec in decorators {
|
||||
if let Some((method, route_path)) = flask_route_decorator(*dec, bytes) {
|
||||
let dec_pos = dec.start_position();
|
||||
let handler_pos = func_node.start_position();
|
||||
let handler_name = function_name(*func_node, bytes).unwrap_or_default();
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new(
|
||||
file_rel.clone(),
|
||||
(dec_pos.row + 1) as u32,
|
||||
(dec_pos.column + 1) as u32,
|
||||
),
|
||||
framework: Framework::Flask,
|
||||
method,
|
||||
route: route_path,
|
||||
handler_name,
|
||||
handler_location: SourceLocation::new(
|
||||
file_rel.clone(),
|
||||
(handler_pos.row + 1) as u32,
|
||||
(handler_pos.column + 1) as u32,
|
||||
),
|
||||
auth_required,
|
||||
}));
|
||||
}
|
||||
}
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
/// Walk every `function_definition` in `root` and invoke `visit` with
|
||||
/// the function node plus the list of decorator nodes wrapping it.
|
||||
/// Handles both `decorated_definition` (one or more decorators) and
|
||||
/// bare `function_definition` (zero decorators, visit skipped).
|
||||
fn walk_decorated<'tree, F>(root: Node<'tree>, bytes: &[u8], visit: &mut F)
|
||||
where
|
||||
F: FnMut(&Node<'tree>, &[Node<'tree>]),
|
||||
{
|
||||
if root.kind() == "decorated_definition" {
|
||||
let mut cursor = root.walk();
|
||||
let mut decorators: Vec<Node<'tree>> = Vec::new();
|
||||
let mut func: Option<Node<'tree>> = None;
|
||||
for child in root.children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"decorator" => decorators.push(child),
|
||||
"function_definition" => func = Some(child),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if let Some(func_node) = func {
|
||||
visit(&func_node, &decorators);
|
||||
}
|
||||
let _ = bytes;
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
for child in root.children(&mut cursor) {
|
||||
walk_decorated(child, bytes, visit);
|
||||
}
|
||||
}
|
||||
|
||||
/// Classify a `decorator` node as a Flask route, returning the
|
||||
/// `(method, path)` pair. Recognises both the `@app.route(...)` and
|
||||
/// `@app.<verb>(...)` shapes and the Blueprint equivalents.
|
||||
fn flask_route_decorator(decorator: Node, bytes: &[u8]) -> Option<(HttpMethod, String)> {
|
||||
let mut walker = decorator.walk();
|
||||
let expr = decorator
|
||||
.children(&mut walker)
|
||||
.find(|c| c.kind() != "@" && c.kind() != "comment")?;
|
||||
let (call_target, args) = match expr.kind() {
|
||||
"call" => (
|
||||
expr.child_by_field_name("function")?,
|
||||
expr.child_by_field_name("arguments"),
|
||||
),
|
||||
_ => return None,
|
||||
};
|
||||
if call_target.kind() != "attribute" {
|
||||
return None;
|
||||
}
|
||||
let object = call_target.child_by_field_name("object")?;
|
||||
if !receiver_is_flask(object, bytes) {
|
||||
return None;
|
||||
}
|
||||
let attr = call_target.child_by_field_name("attribute")?;
|
||||
let attr_text = attr.utf8_text(bytes).ok()?;
|
||||
let route_path = args
|
||||
.and_then(|a| first_string_arg(a, bytes))
|
||||
.unwrap_or_default();
|
||||
if attr_text == "route" {
|
||||
let method = args
|
||||
.and_then(|a| extract_first_method(a, bytes))
|
||||
.unwrap_or(HttpMethod::GET);
|
||||
return Some((method, route_path));
|
||||
}
|
||||
if let Some(method) = HttpMethod::from_ident(attr_text) {
|
||||
return Some((method, route_path));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// `true` when the decorator receiver looks like a Flask app or
|
||||
/// Blueprint binding. Allowlist over identifier names + a structural
|
||||
/// match on call expressions like `Blueprint("name", __name__)`.
|
||||
fn receiver_is_flask(object: Node, bytes: &[u8]) -> bool {
|
||||
fn name_matches(text: &str) -> bool {
|
||||
let lower = text.to_ascii_lowercase();
|
||||
lower == "app"
|
||||
|| lower == "bp"
|
||||
|| lower == "blueprint"
|
||||
|| lower.ends_with("_app")
|
||||
|| lower.ends_with("_bp")
|
||||
|| lower.ends_with("blueprint")
|
||||
|| lower.ends_with("api")
|
||||
}
|
||||
match object.kind() {
|
||||
"identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches),
|
||||
"attribute" => object
|
||||
.child_by_field_name("attribute")
|
||||
.and_then(|a| a.utf8_text(bytes).ok())
|
||||
.is_some_and(name_matches),
|
||||
"call" => {
|
||||
let Some(callee) = object.child_by_field_name("function") else {
|
||||
return false;
|
||||
};
|
||||
let Ok(text) = callee.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
let leaf = text.rsplit('.').next().unwrap_or(text).trim();
|
||||
leaf == "Flask" || leaf == "Blueprint"
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Pull the first string literal positional argument out of a
|
||||
/// `argument_list` node. Used to extract the route path from
|
||||
/// `@app.route("/path", ...)`.
|
||||
fn first_string_arg(args: Node, bytes: &[u8]) -> Option<String> {
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
if arg.kind() == "string" {
|
||||
return Some(string_literal_text(arg, bytes));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Strip Python quotes / prefix bytes (`b"..."`, `r"..."`) and return
|
||||
/// the literal content. Falls back to the raw slice when the literal
|
||||
/// has an unfamiliar shape.
|
||||
fn string_literal_text(node: Node, bytes: &[u8]) -> String {
|
||||
let raw = node.utf8_text(bytes).unwrap_or("");
|
||||
let trimmed = raw.trim();
|
||||
let mut s = trimmed;
|
||||
while let Some(rest) = s.strip_prefix(['b', 'r', 'B', 'R', 'f', 'F']) {
|
||||
s = rest;
|
||||
}
|
||||
let stripped = s
|
||||
.trim_start_matches(['\'', '"'])
|
||||
.trim_end_matches(['\'', '"']);
|
||||
stripped.to_string()
|
||||
}
|
||||
|
||||
/// Extract the first HTTP method named in a `methods=[...]` kwarg, or
|
||||
/// `None` when the decorator omits the kwarg. The first method in
|
||||
/// the list wins; multi-method routes are recorded as the first
|
||||
/// (Flask itself runs the same handler for every listed method).
|
||||
fn extract_first_method(args: Node, bytes: &[u8]) -> Option<HttpMethod> {
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
if arg.kind() != "keyword_argument" {
|
||||
continue;
|
||||
}
|
||||
let name_node = arg.child_by_field_name("name")?;
|
||||
let Ok(name) = name_node.utf8_text(bytes) else {
|
||||
continue;
|
||||
};
|
||||
if name != "methods" {
|
||||
continue;
|
||||
}
|
||||
let value = arg.child_by_field_name("value")?;
|
||||
let mut cur = value.walk();
|
||||
for child in value.children(&mut cur) {
|
||||
if child.kind() == "string" {
|
||||
let text = string_literal_text(child, bytes);
|
||||
if let Some(m) = HttpMethod::from_ident(&text) {
|
||||
return Some(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// `true` when the decorator is an auth-guard marker. Matches the
|
||||
/// last segment of the decorator expression against
|
||||
/// [`AUTH_DECORATORS`].
|
||||
fn decorator_is_auth_marker(decorator: Node, bytes: &[u8]) -> bool {
|
||||
let mut walker = decorator.walk();
|
||||
let Some(expr) = decorator
|
||||
.children(&mut walker)
|
||||
.find(|c| c.kind() != "@" && c.kind() != "comment")
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
let target = match expr.kind() {
|
||||
"call" => expr.child_by_field_name("function"),
|
||||
_ => Some(expr),
|
||||
};
|
||||
let Some(target) = target else { return false };
|
||||
let Ok(text) = target.utf8_text(bytes) else {
|
||||
return false;
|
||||
};
|
||||
let leaf = text.rsplit('.').next().unwrap_or(text).trim();
|
||||
AUTH_DECORATORS
|
||||
.iter()
|
||||
.any(|d| leaf.eq_ignore_ascii_case(d))
|
||||
}
|
||||
|
||||
/// Read the function name from a `function_definition` node.
|
||||
fn function_name(func: Node, bytes: &[u8]) -> Option<String> {
|
||||
let name_node = func.child_by_field_name("name")?;
|
||||
name_node.utf8_text(bytes).ok().map(str::to_string)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_python::LANGUAGE.into())
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.as_bytes().to_vec())
|
||||
}
|
||||
|
||||
fn detect(src: &str) -> Vec<SurfaceNode> {
|
||||
let (tree, bytes) = parse(src);
|
||||
detect_flask_routes(&tree, &bytes, &PathBuf::from("app.py"), None)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_basic_route() {
|
||||
let src = r#"
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/hello")
|
||||
def hello():
|
||||
return "hi"
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
if let SurfaceNode::EntryPoint(ep) = &nodes[0] {
|
||||
assert_eq!(ep.route, "/hello");
|
||||
assert_eq!(ep.method, HttpMethod::GET);
|
||||
assert_eq!(ep.handler_name, "hello");
|
||||
assert!(!ep.auth_required);
|
||||
} else {
|
||||
panic!("not an EntryPoint");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_methods_kwarg() {
|
||||
let src = r#"
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/submit", methods=["POST"])
|
||||
def submit():
|
||||
return "ok"
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
let ep = match &nodes[0] {
|
||||
SurfaceNode::EntryPoint(ep) => ep,
|
||||
_ => panic!("not an EntryPoint"),
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::POST);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_verb_decorator() {
|
||||
let src = r#"
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.post("/users")
|
||||
def create():
|
||||
return "ok"
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
let ep = match &nodes[0] {
|
||||
SurfaceNode::EntryPoint(ep) => ep,
|
||||
_ => panic!("not an EntryPoint"),
|
||||
};
|
||||
assert_eq!(ep.method, HttpMethod::POST);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_blueprint() {
|
||||
let src = r#"
|
||||
from flask import Blueprint
|
||||
bp = Blueprint("admin", __name__)
|
||||
|
||||
@bp.get("/admin")
|
||||
def admin():
|
||||
return "secret"
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
let ep = match &nodes[0] {
|
||||
SurfaceNode::EntryPoint(ep) => ep,
|
||||
_ => panic!("not an EntryPoint"),
|
||||
};
|
||||
assert_eq!(ep.route, "/admin");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_auth_decorator() {
|
||||
let src = r#"
|
||||
from flask import Flask
|
||||
from flask_login import login_required
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/secret")
|
||||
@login_required
|
||||
def secret():
|
||||
return "shh"
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
let ep = match &nodes[0] {
|
||||
SurfaceNode::EntryPoint(ep) => ep,
|
||||
_ => panic!("not an EntryPoint"),
|
||||
};
|
||||
assert!(ep.auth_required);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_flask_receiver() {
|
||||
let src = r#"
|
||||
client = requests.Session()
|
||||
|
||||
@client.get("/whatever")
|
||||
def x():
|
||||
pass
|
||||
"#;
|
||||
let nodes = detect(src);
|
||||
// `client` does not match the Flask receiver allowlist.
|
||||
assert!(nodes.is_empty());
|
||||
}
|
||||
}
|
||||
398
src/surface/mod.rs
Normal file
398
src/surface/mod.rs
Normal file
|
|
@ -0,0 +1,398 @@
|
|||
//! Phase 21 — attack-surface map.
|
||||
//!
|
||||
//! The `SurfaceMap` graph names the externally-reachable shape of the
|
||||
//! project under analysis: HTTP route entry-points (Flask, FastAPI,
|
||||
//! Spring, Express, …), the data stores they read/write, the external
|
||||
//! services they talk to, and the local sinks they ultimately reach.
|
||||
//!
|
||||
//! Track G's chain composer walks this graph to translate findings into
|
||||
//! cross-feature attack chains, and the `nyx surface` CLI prints a
|
||||
//! human-readable tree from it. Phase 21 ships the graph types plus
|
||||
//! the first framework probe (Python + Flask); Phase 22 generalises the
|
||||
//! probe to the remaining languages and Phase 23 wires the CLI.
|
||||
//!
|
||||
//! Storage shape: a flat `Vec<SurfaceNode>` sorted by [`SourceLocation`]
|
||||
//! and a flat `Vec<SurfaceEdge>` sorted by `(from_idx, to_idx, kind)`.
|
||||
//! Both vectors are byte-deterministic, so two scans of the same source
|
||||
//! produce byte-identical JSON when round-tripped through SQLite. See
|
||||
//! [`graph::petgraph_view`] for a petgraph-backed view used by the
|
||||
//! chain composer.
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub mod build;
|
||||
pub mod graph;
|
||||
pub mod lang;
|
||||
|
||||
/// Stable source location used as the primary key for every
|
||||
/// [`SurfaceNode`]. `file` is a project-relative POSIX path so the
|
||||
/// SurfaceMap is portable across machines; `line` and `col` are
|
||||
/// 1-indexed. Ordering is `(file, line, col)` lexicographic, matching
|
||||
/// the determinism the rest of the analyser uses for spans.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct SourceLocation {
|
||||
pub file: String,
|
||||
pub line: u32,
|
||||
pub col: u32,
|
||||
}
|
||||
|
||||
impl SourceLocation {
|
||||
pub fn new(file: impl Into<String>, line: u32, col: u32) -> Self {
|
||||
Self {
|
||||
file: file.into(),
|
||||
line,
|
||||
col,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Web-framework tag attached to every [`EntryPoint`]. The set is
|
||||
/// fixed in Phase 21 + 22 and matches the set of framework probes
|
||||
/// behind [`lang`]. New frameworks land as new variants.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum Framework {
|
||||
Flask,
|
||||
FastApi,
|
||||
Django,
|
||||
Express,
|
||||
Spring,
|
||||
JaxRs,
|
||||
Rails,
|
||||
Sinatra,
|
||||
Axum,
|
||||
Actix,
|
||||
Rocket,
|
||||
NetHttp,
|
||||
Gin,
|
||||
NextAppRouter,
|
||||
NextServerAction,
|
||||
}
|
||||
|
||||
/// HTTP-handler entry-point recognised by a framework probe.
|
||||
///
|
||||
/// Every node carries the route's declared path string, HTTP method,
|
||||
/// and a resolved handler [`SourceLocation`] pointing at the function
|
||||
/// definition. `auth_required` is `true` when the decorator stack
|
||||
/// (or framework equivalent) contains an auth guard the probe was
|
||||
/// able to identify; Phase 21 recognises Flask's `@login_required`,
|
||||
/// `@auth_required`, and `@jwt_required` decorators.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct EntryPoint {
|
||||
pub location: SourceLocation,
|
||||
pub framework: Framework,
|
||||
pub method: HttpMethod,
|
||||
pub route: String,
|
||||
pub handler_name: String,
|
||||
pub handler_location: SourceLocation,
|
||||
pub auth_required: bool,
|
||||
}
|
||||
|
||||
/// Persistent data store reachable from the surface — SQL database,
|
||||
/// key-value store, document DB, blob store. Phase 22 populates this
|
||||
/// from label-rule data-source matches and ORM-receiver type facts;
|
||||
/// Phase 21 ships the type for forward-compat only and emits no
|
||||
/// `DataStore` nodes.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DataStore {
|
||||
pub location: SourceLocation,
|
||||
pub kind: DataStoreKind,
|
||||
pub label: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum DataStoreKind {
|
||||
Sql,
|
||||
KeyValue,
|
||||
Document,
|
||||
BlobStore,
|
||||
Filesystem,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// External service the surface talks to over a network — third-party
|
||||
/// HTTP API, message broker, search index. Phase 22 fills this in;
|
||||
/// Phase 21 ships the type.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ExternalService {
|
||||
pub location: SourceLocation,
|
||||
pub kind: ExternalServiceKind,
|
||||
pub label: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ExternalServiceKind {
|
||||
HttpApi,
|
||||
MessageBroker,
|
||||
SearchIndex,
|
||||
AuthProvider,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// Local sink with no externally observable side-effect — `eval`,
|
||||
/// `pickle.loads`, `subprocess.Popen`, raw SQL execute, etc. Phase 22
|
||||
/// fills this in from the existing label-rule registry; Phase 21
|
||||
/// ships the type.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DangerousLocal {
|
||||
pub location: SourceLocation,
|
||||
pub function_name: String,
|
||||
pub cap_bits: u32,
|
||||
}
|
||||
|
||||
/// A node in the [`SurfaceMap`]. Every variant carries a
|
||||
/// [`SourceLocation`] so the surface ordering is total and stable.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(tag = "node", rename_all = "snake_case")]
|
||||
pub enum SurfaceNode {
|
||||
EntryPoint(EntryPoint),
|
||||
DataStore(DataStore),
|
||||
ExternalService(ExternalService),
|
||||
DangerousLocal(DangerousLocal),
|
||||
}
|
||||
|
||||
impl SurfaceNode {
|
||||
pub fn location(&self) -> &SourceLocation {
|
||||
match self {
|
||||
SurfaceNode::EntryPoint(n) => &n.location,
|
||||
SurfaceNode::DataStore(n) => &n.location,
|
||||
SurfaceNode::ExternalService(n) => &n.location,
|
||||
SurfaceNode::DangerousLocal(n) => &n.location,
|
||||
}
|
||||
}
|
||||
|
||||
/// Discriminator used as a secondary sort key so two nodes that
|
||||
/// happen to share a [`SourceLocation`] (e.g. multiple route
|
||||
/// decorators on one function) keep a deterministic relative
|
||||
/// order. Returns the variant index in the enum declaration.
|
||||
fn kind_ordinal(&self) -> u8 {
|
||||
match self {
|
||||
SurfaceNode::EntryPoint(_) => 0,
|
||||
SurfaceNode::DataStore(_) => 1,
|
||||
SurfaceNode::ExternalService(_) => 2,
|
||||
SurfaceNode::DangerousLocal(_) => 3,
|
||||
}
|
||||
}
|
||||
|
||||
/// Tertiary sort key used to disambiguate nodes that share both
|
||||
/// [`SourceLocation`] and kind — e.g. a single Flask function with
|
||||
/// two `@app.route(...)` decorators ending up at the same handler
|
||||
/// location.
|
||||
fn dedup_tag(&self) -> String {
|
||||
match self {
|
||||
SurfaceNode::EntryPoint(n) => format!("{:?}:{:?}:{}", n.framework, n.method, n.route),
|
||||
SurfaceNode::DataStore(n) => format!("{:?}:{}", n.kind, n.label),
|
||||
SurfaceNode::ExternalService(n) => format!("{:?}:{}", n.kind, n.label),
|
||||
SurfaceNode::DangerousLocal(n) => format!("{}:{:#x}", n.function_name, n.cap_bits),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Semantic kind of an edge in the [`SurfaceMap`]. Encodes the
|
||||
/// seven edge classes the chain composer walks; persistence is via
|
||||
/// JSON so adding a variant is a non-breaking schema change as long
|
||||
/// as the SQLite-level migration drops the old surface_map rows.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum EdgeKind {
|
||||
/// Caller → callee. Wraps the call-graph edge so consumers do
|
||||
/// not have to consult [`crate::callgraph::CallGraph`] directly.
|
||||
Calls,
|
||||
/// Function or entry-point reads from a data store / external
|
||||
/// service.
|
||||
ReadsFrom,
|
||||
/// Function or entry-point writes to a data store.
|
||||
WritesTo,
|
||||
/// Function or entry-point sends a request to an external
|
||||
/// service.
|
||||
TalksTo,
|
||||
/// Entry-point reaches a dangerous-local sink through some
|
||||
/// transitive call chain.
|
||||
Reaches,
|
||||
/// Entry-point triggers a side-effecting action (job, email,
|
||||
/// webhook) other than a direct call.
|
||||
Triggers,
|
||||
/// Entry-point gates downstream access on a successful auth
|
||||
/// check. The `from` is the auth-check node, the `to` is the
|
||||
/// entry-point.
|
||||
AuthRequiredOn,
|
||||
}
|
||||
|
||||
/// A single edge in the [`SurfaceMap`]. `from` and `to` are indices
|
||||
/// into [`SurfaceMap::nodes`]; the surface ordering keeps these
|
||||
/// stable across rescans.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
pub struct SurfaceEdge {
|
||||
pub from: u32,
|
||||
pub to: u32,
|
||||
pub kind: EdgeKind,
|
||||
}
|
||||
|
||||
/// The attack-surface graph for a project. Stored as parallel
|
||||
/// `Vec`s keyed on [`SourceLocation`] so JSON serialisation is
|
||||
/// byte-deterministic and SQLite round-trips are stable.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SurfaceMap {
|
||||
pub nodes: Vec<SurfaceNode>,
|
||||
pub edges: Vec<SurfaceEdge>,
|
||||
}
|
||||
|
||||
impl SurfaceMap {
|
||||
/// Construct an empty map.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Total node count. Cheap.
|
||||
pub fn node_count(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
|
||||
/// Total edge count. Cheap.
|
||||
pub fn edge_count(&self) -> usize {
|
||||
self.edges.len()
|
||||
}
|
||||
|
||||
/// Return the first entry-point node matching `(method, route)`.
|
||||
/// Linear scan; the SurfaceMap is small (one node per route +
|
||||
/// store + service + sink) so this is fine in practice.
|
||||
pub fn entry_for_route(&self, method: HttpMethod, route: &str) -> Option<&EntryPoint> {
|
||||
self.nodes.iter().find_map(|n| match n {
|
||||
SurfaceNode::EntryPoint(ep) if ep.method == method && ep.route == route => Some(ep),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Iterate over every entry-point node in surface order.
|
||||
pub fn entry_points(&self) -> impl Iterator<Item = &EntryPoint> {
|
||||
self.nodes.iter().filter_map(|n| match n {
|
||||
SurfaceNode::EntryPoint(ep) => Some(ep),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Sort nodes by `(SourceLocation, kind_ordinal, dedup_tag)` and
|
||||
/// rewrite every edge's `from`/`to` accordingly. Two structurally
|
||||
/// identical maps are byte-identical after [`canonicalize`] +
|
||||
/// `serde_json::to_vec` regardless of insertion order.
|
||||
///
|
||||
/// [`canonicalize`]: SurfaceMap::canonicalize
|
||||
pub fn canonicalize(&mut self) {
|
||||
if self.nodes.is_empty() {
|
||||
self.edges.sort();
|
||||
self.edges.dedup();
|
||||
return;
|
||||
}
|
||||
let mut indexed: Vec<(usize, &SurfaceNode)> = self.nodes.iter().enumerate().collect();
|
||||
indexed.sort_by(|(_, a), (_, b)| {
|
||||
let key_a = (a.location(), a.kind_ordinal(), a.dedup_tag());
|
||||
let key_b = (b.location(), b.kind_ordinal(), b.dedup_tag());
|
||||
key_a.cmp(&key_b)
|
||||
});
|
||||
let mut remap: BTreeMap<u32, u32> = BTreeMap::new();
|
||||
let mut new_nodes: Vec<SurfaceNode> = Vec::with_capacity(self.nodes.len());
|
||||
for (new_idx, (old_idx, _)) in indexed.iter().enumerate() {
|
||||
remap.insert(*old_idx as u32, new_idx as u32);
|
||||
}
|
||||
for (_, node) in indexed {
|
||||
new_nodes.push(node.clone());
|
||||
}
|
||||
for edge in &mut self.edges {
|
||||
if let Some(&new_from) = remap.get(&edge.from) {
|
||||
edge.from = new_from;
|
||||
}
|
||||
if let Some(&new_to) = remap.get(&edge.to) {
|
||||
edge.to = new_to;
|
||||
}
|
||||
}
|
||||
self.nodes = new_nodes;
|
||||
self.edges.sort();
|
||||
self.edges.dedup();
|
||||
}
|
||||
|
||||
/// Serialize to deterministic JSON. The map is canonicalised
|
||||
/// first; structurally identical maps emit byte-identical JSON.
|
||||
pub fn to_json(&mut self) -> serde_json::Result<Vec<u8>> {
|
||||
self.canonicalize();
|
||||
serde_json::to_vec(self)
|
||||
}
|
||||
|
||||
/// Deserialize from JSON. Does not canonicalise; the producer is
|
||||
/// responsible for emitting a canonicalised payload.
|
||||
pub fn from_json(bytes: &[u8]) -> serde_json::Result<Self> {
|
||||
serde_json::from_slice(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an absolute path to a project-relative POSIX path string.
|
||||
/// Returns the absolute path verbatim when the file is outside the
|
||||
/// scan root or when path stripping fails.
|
||||
pub fn relative_path_string(path: &Path, scan_root: Option<&Path>) -> String {
|
||||
if let Some(root) = scan_root {
|
||||
if let Ok(rel) = path.strip_prefix(root) {
|
||||
return rel.to_string_lossy().replace('\\', "/");
|
||||
}
|
||||
}
|
||||
path.to_string_lossy().replace('\\', "/")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn loc(file: &str, line: u32, col: u32) -> SourceLocation {
|
||||
SourceLocation::new(file, line, col)
|
||||
}
|
||||
|
||||
fn ep(file: &str, line: u32, route: &str, method: HttpMethod) -> SurfaceNode {
|
||||
SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc(file, line, 1),
|
||||
framework: Framework::Flask,
|
||||
method,
|
||||
route: route.into(),
|
||||
handler_name: "h".into(),
|
||||
handler_location: loc(file, line + 1, 1),
|
||||
auth_required: false,
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canonicalize_sorts_nodes_and_remaps_edges() {
|
||||
let mut m = SurfaceMap::new();
|
||||
m.nodes.push(ep("b.py", 10, "/b", HttpMethod::GET));
|
||||
m.nodes.push(ep("a.py", 5, "/a", HttpMethod::GET));
|
||||
m.edges.push(SurfaceEdge {
|
||||
from: 0,
|
||||
to: 1,
|
||||
kind: EdgeKind::Calls,
|
||||
});
|
||||
m.canonicalize();
|
||||
assert_eq!(m.nodes[0].location().file, "a.py");
|
||||
assert_eq!(m.nodes[1].location().file, "b.py");
|
||||
// edge `from=0` was b.py (now index 1), `to=1` was a.py (now index 0)
|
||||
assert_eq!(m.edges[0].from, 1);
|
||||
assert_eq!(m.edges[0].to, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_round_trip_byte_identical() {
|
||||
let mut a = SurfaceMap::new();
|
||||
a.nodes.push(ep("a.py", 1, "/a", HttpMethod::GET));
|
||||
a.nodes.push(ep("b.py", 2, "/b", HttpMethod::POST));
|
||||
a.edges.push(SurfaceEdge {
|
||||
from: 0,
|
||||
to: 1,
|
||||
kind: EdgeKind::Calls,
|
||||
});
|
||||
let bytes_a = a.to_json().unwrap();
|
||||
let b = SurfaceMap::from_json(&bytes_a).unwrap();
|
||||
let mut b = b;
|
||||
let bytes_b = b.to_json().unwrap();
|
||||
assert_eq!(bytes_a, bytes_b);
|
||||
}
|
||||
}
|
||||
183
tests/surface_flask.rs
Normal file
183
tests/surface_flask.rs
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
//! Phase 21 — `SurfaceMap` Python + Flask vertical.
|
||||
//!
|
||||
//! Five-route Flask fixture exercising:
|
||||
//!
|
||||
//! * `@app.route("/", methods=["GET"])` – default GET
|
||||
//! * `@app.route("/submit", methods=["POST"])` – POST via methods kwarg
|
||||
//! * `@app.get("/users")` – verb decorator
|
||||
//! * `@bp.post("/admin")` – Blueprint receiver
|
||||
//! * `@app.route("/secret")` + `@login_required` – auth-guarded
|
||||
//!
|
||||
//! Asserts every route node appears with the correct `method`, `route`,
|
||||
//! `auth_required`, and `handler_name`. Round-trips the surface map
|
||||
//! through SQLite and confirms the byte representation is identical to
|
||||
//! the in-memory canonical JSON.
|
||||
|
||||
use nyx_scanner::commands::index::build_index;
|
||||
use nyx_scanner::commands::scan::scan_with_index_parallel;
|
||||
use nyx_scanner::database::index::Indexer;
|
||||
use nyx_scanner::entry_points::HttpMethod;
|
||||
use nyx_scanner::surface::{Framework, SurfaceMap, SurfaceNode};
|
||||
use nyx_scanner::utils::config::{AnalysisMode, Config};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
fn test_cfg() -> Config {
|
||||
let mut cfg = Config::default();
|
||||
cfg.scanner.mode = AnalysisMode::Full;
|
||||
cfg.scanner.read_vcsignore = false;
|
||||
cfg.scanner.require_git_to_read_vcsignore = false;
|
||||
cfg.performance.worker_threads = Some(1);
|
||||
cfg.performance.batch_size = 8;
|
||||
cfg.performance.channel_multiplier = 1;
|
||||
cfg
|
||||
}
|
||||
|
||||
const FIVE_ROUTE_FIXTURE: &str = r#"
|
||||
from flask import Flask, Blueprint
|
||||
from flask_login import login_required
|
||||
|
||||
app = Flask(__name__)
|
||||
bp = Blueprint("admin", __name__)
|
||||
|
||||
@app.route("/", methods=["GET"])
|
||||
def index():
|
||||
return "home"
|
||||
|
||||
@app.route("/submit", methods=["POST"])
|
||||
def submit():
|
||||
return "ok"
|
||||
|
||||
@app.get("/users")
|
||||
def list_users():
|
||||
return "users"
|
||||
|
||||
@bp.post("/admin")
|
||||
def admin_create():
|
||||
return "created"
|
||||
|
||||
@login_required
|
||||
@app.route("/secret")
|
||||
def secret():
|
||||
return "shh"
|
||||
"#;
|
||||
|
||||
fn seed_flask_fixture(root: &Path) {
|
||||
std::fs::write(root.join("app.py"), FIVE_ROUTE_FIXTURE.as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn surface_map_captures_five_flask_routes() {
|
||||
let project = tempfile::tempdir().unwrap();
|
||||
seed_flask_fixture(project.path());
|
||||
let db_dir = tempfile::tempdir().unwrap();
|
||||
let db_path = db_dir.path().join("surface.sqlite");
|
||||
build_index("surface", project.path(), &db_path, &test_cfg(), false)
|
||||
.expect("build_index on flask fixture should succeed");
|
||||
let pool = Indexer::init(&db_path).expect("re-init pool");
|
||||
let _ = scan_with_index_parallel(
|
||||
"surface",
|
||||
Arc::clone(&pool),
|
||||
&test_cfg(),
|
||||
false,
|
||||
project.path(),
|
||||
)
|
||||
.expect("indexed scan should succeed");
|
||||
|
||||
let idx = Indexer::from_pool("surface", &pool).expect("from_pool");
|
||||
let map = idx
|
||||
.load_surface_map()
|
||||
.expect("load_surface_map ok")
|
||||
.expect("surface map persisted after scan");
|
||||
|
||||
let entries: Vec<_> = map.entry_points().collect();
|
||||
assert_eq!(
|
||||
entries.len(),
|
||||
5,
|
||||
"expected five Flask routes, got {entries:#?}",
|
||||
);
|
||||
|
||||
let assert_route = |method: HttpMethod, route: &str, handler: &str, auth: bool| {
|
||||
let ep = map.entry_for_route(method, route).unwrap_or_else(|| {
|
||||
panic!("missing route {method:?} {route}; map = {entries:#?}");
|
||||
});
|
||||
assert_eq!(ep.framework, Framework::Flask, "framework mismatch on {route}");
|
||||
assert_eq!(ep.handler_name, handler, "handler mismatch on {route}");
|
||||
assert_eq!(
|
||||
ep.auth_required, auth,
|
||||
"auth mismatch on {route} (got {})",
|
||||
ep.auth_required
|
||||
);
|
||||
// Handler location must point inside the project file.
|
||||
assert!(
|
||||
ep.handler_location.file.ends_with("app.py"),
|
||||
"handler location not in app.py: {:?}",
|
||||
ep.handler_location.file
|
||||
);
|
||||
};
|
||||
assert_route(HttpMethod::GET, "/", "index", false);
|
||||
assert_route(HttpMethod::POST, "/submit", "submit", false);
|
||||
assert_route(HttpMethod::GET, "/users", "list_users", false);
|
||||
assert_route(HttpMethod::POST, "/admin", "admin_create", false);
|
||||
assert_route(HttpMethod::GET, "/secret", "secret", true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn surface_map_round_trips_byte_identical_through_sqlite() {
|
||||
let project = tempfile::tempdir().unwrap();
|
||||
seed_flask_fixture(project.path());
|
||||
let db_dir = tempfile::tempdir().unwrap();
|
||||
let db_path = db_dir.path().join("rt.sqlite");
|
||||
|
||||
build_index("rt", project.path(), &db_path, &test_cfg(), false).expect("first build_index");
|
||||
let pool = Indexer::init(&db_path).expect("first pool");
|
||||
let _ = scan_with_index_parallel("rt", Arc::clone(&pool), &test_cfg(), false, project.path())
|
||||
.expect("first scan");
|
||||
let idx = Indexer::from_pool("rt", &pool).expect("first from_pool");
|
||||
let bytes_first = idx
|
||||
.load_surface_map_bytes()
|
||||
.expect("load bytes 1")
|
||||
.expect("surface map persisted 1");
|
||||
drop(idx);
|
||||
|
||||
// Rescan against the same DB. No source change → byte-identical
|
||||
// canonical surface map.
|
||||
let _ = scan_with_index_parallel("rt", Arc::clone(&pool), &test_cfg(), false, project.path())
|
||||
.expect("second scan");
|
||||
let idx2 = Indexer::from_pool("rt", &pool).expect("second from_pool");
|
||||
let bytes_second = idx2
|
||||
.load_surface_map_bytes()
|
||||
.expect("load bytes 2")
|
||||
.expect("surface map persisted 2");
|
||||
|
||||
assert_eq!(
|
||||
bytes_first, bytes_second,
|
||||
"surface_map JSON must be byte-identical across rescans"
|
||||
);
|
||||
|
||||
// Round-trip through the in-memory representation: canonicalise →
|
||||
// serialise should reproduce the on-disk bytes exactly.
|
||||
let mut map = SurfaceMap::from_json(&bytes_first).expect("from_json");
|
||||
let bytes_after_round_trip = map.to_json().expect("to_json");
|
||||
assert_eq!(
|
||||
bytes_first, bytes_after_round_trip,
|
||||
"canonical JSON must match round-tripped JSON"
|
||||
);
|
||||
|
||||
// Light sanity check: the same map deserialised twice still names
|
||||
// the five fixture routes (i.e. persistence does not lose nodes).
|
||||
let entries: Vec<&str> = map
|
||||
.nodes
|
||||
.iter()
|
||||
.filter_map(|n| match n {
|
||||
SurfaceNode::EntryPoint(ep) => Some(ep.route.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
for route in ["/", "/submit", "/users", "/admin", "/secret"] {
|
||||
assert!(
|
||||
entries.contains(&route),
|
||||
"route {route} missing after round trip; got {entries:?}",
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue