mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
[pitboss] sweep after phase 23: 4 deferred items resolved
This commit is contained in:
parent
655ec45b21
commit
a6d88def1a
13 changed files with 328 additions and 67 deletions
|
|
@ -90,6 +90,119 @@ pub fn child_or_named<'tree>(parent: Node<'tree>, kind: &str) -> Option<Node<'tr
|
|||
parent.children(&mut cursor).find(|c| c.kind() == kind)
|
||||
}
|
||||
|
||||
/// Return `true` when `bytes` contains a top-level Python `import` /
|
||||
/// `from … import …` statement whose leading package segment starts
|
||||
/// with one of `modules` (case-insensitive prefix match). This means
|
||||
/// `["flask"]` matches `flask`, `flask_login`, and `flask_jwt_extended`
|
||||
/// — the canonical Flask framework family — but does not match
|
||||
/// `os.flask_helper` or a comment that mentions flask.
|
||||
pub fn python_imports_any(bytes: &[u8], modules: &[&str]) -> bool {
|
||||
let text = match std::str::from_utf8(bytes) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return false,
|
||||
};
|
||||
for line in text.lines() {
|
||||
let line = line.trim_start();
|
||||
let pkg = if let Some(rest) = line.strip_prefix("from ") {
|
||||
rest.split_whitespace().next().unwrap_or("")
|
||||
} else if let Some(rest) = line.strip_prefix("import ") {
|
||||
rest.split([',', ' ', ';'])
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
.trim()
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
if pkg.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let head = pkg.split('.').next().unwrap_or(pkg);
|
||||
if matches_prefix_ci(head, modules) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn matches_prefix_ci(head: &str, prefixes: &[&str]) -> bool {
|
||||
let head_lc = head.to_ascii_lowercase();
|
||||
prefixes
|
||||
.iter()
|
||||
.any(|p| head_lc.starts_with(&p.to_ascii_lowercase()))
|
||||
}
|
||||
|
||||
/// Return `true` when `bytes` contains a top-level Rust `use` (or
|
||||
/// `extern crate`) statement whose leading path segment matches one of
|
||||
/// `crates` (case-insensitive). Optional `pub` / `pub(crate)` /
|
||||
/// `pub(super)` visibility prefixes are stripped before the `use`
|
||||
/// keyword check.
|
||||
pub fn rust_uses_any(bytes: &[u8], crates: &[&str]) -> bool {
|
||||
let text = match std::str::from_utf8(bytes) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return false,
|
||||
};
|
||||
for line in text.lines() {
|
||||
let mut line = line.trim_start();
|
||||
if let Some(rest) = line.strip_prefix("pub") {
|
||||
let rest = rest.trim_start();
|
||||
line = if let Some(r) = rest.strip_prefix("(crate)") {
|
||||
r.trim_start()
|
||||
} else if let Some(r) = rest.strip_prefix("(super)") {
|
||||
r.trim_start()
|
||||
} else if let Some(r) = rest.strip_prefix("(self)") {
|
||||
r.trim_start()
|
||||
} else {
|
||||
rest
|
||||
};
|
||||
}
|
||||
let rest = if let Some(r) = line.strip_prefix("use ") {
|
||||
r
|
||||
} else if let Some(r) = line.strip_prefix("extern crate ") {
|
||||
r
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
let head = rest
|
||||
.split(['{', ';', ' ', ':', '/'])
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
.trim();
|
||||
if head.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if matches_prefix_ci(head, crates) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Return `true` when `bytes` contains a top-level Java `import`
|
||||
/// statement (including `import static`) whose package path begins
|
||||
/// with one of `prefixes`. Comment-only mentions do *not* match.
|
||||
pub fn java_imports_any(bytes: &[u8], prefixes: &[&str]) -> bool {
|
||||
let text = match std::str::from_utf8(bytes) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return false,
|
||||
};
|
||||
for line in text.lines() {
|
||||
let line = line.trim_start();
|
||||
let Some(rest) = line.strip_prefix("import ") else {
|
||||
continue;
|
||||
};
|
||||
let path = rest
|
||||
.strip_prefix("static ")
|
||||
.unwrap_or(rest)
|
||||
.trim()
|
||||
.trim_end_matches(';')
|
||||
.trim();
|
||||
if prefixes.iter().any(|p| path.starts_with(p)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Walk every descendant of `root`, invoking `visit` once per node.
|
||||
/// Useful when a probe needs to look at multiple node kinds in a single
|
||||
/// pass (e.g. annotations + method declarations on the same walk).
|
||||
|
|
@ -128,4 +241,63 @@ mod tests {
|
|||
assert!(leaf_matches("Auth::JwtRequired", &["JwtRequired"]));
|
||||
assert!(!leaf_matches("OtherDecorator", &["login_required"]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_imports_any_matches_actual_imports() {
|
||||
assert!(python_imports_any(b"from flask import Flask\n", &["flask"]));
|
||||
assert!(python_imports_any(b"import flask\n", &["flask"]));
|
||||
assert!(python_imports_any(b"from flask.app import Flask\n", &["flask"]));
|
||||
assert!(python_imports_any(b"import django.urls\n", &["django"]));
|
||||
// Comment-only mention must not match.
|
||||
assert!(!python_imports_any(b"# flask is great\n", &["flask"]));
|
||||
// String-only mention must not match.
|
||||
assert!(!python_imports_any(b"x = 'flask'\n", &["flask"]));
|
||||
// Wrong module.
|
||||
assert!(!python_imports_any(b"import os\n", &["flask"]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_uses_any_matches_use_statements() {
|
||||
assert!(rust_uses_any(b"use actix_web::web;\n", &["actix_web"]));
|
||||
assert!(rust_uses_any(b"use actix_web;\n", &["actix_web"]));
|
||||
assert!(rust_uses_any(
|
||||
b"pub use axum::Router;\n",
|
||||
&["axum"]
|
||||
));
|
||||
assert!(rust_uses_any(
|
||||
b"pub(crate) use axum::extract::Path;\n",
|
||||
&["axum"]
|
||||
));
|
||||
assert!(rust_uses_any(b"extern crate axum;\n", &["axum"]));
|
||||
// Comment-only mention must not match.
|
||||
assert!(!rust_uses_any(b"// use actix_web::web;\n", &["actix_web"]));
|
||||
// Wrong crate.
|
||||
assert!(!rust_uses_any(b"use serde::Deserialize;\n", &["actix_web"]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn java_imports_any_matches_package_prefix() {
|
||||
assert!(java_imports_any(
|
||||
b"import io.quarkus.runtime.Quarkus;\n",
|
||||
&["io.quarkus"]
|
||||
));
|
||||
assert!(java_imports_any(
|
||||
b"import jakarta.ws.rs.GET;\n",
|
||||
&["jakarta.ws.rs"]
|
||||
));
|
||||
assert!(java_imports_any(
|
||||
b"import static io.quarkus.runtime.Quarkus.run;\n",
|
||||
&["io.quarkus"]
|
||||
));
|
||||
// Comment-only mention must not match.
|
||||
assert!(!java_imports_any(
|
||||
b"// import io.quarkus.runtime.Quarkus;\n",
|
||||
&["io.quarkus"]
|
||||
));
|
||||
// Wrong prefix.
|
||||
assert!(!java_imports_any(
|
||||
b"import org.springframework.web.bind.annotation.GetMapping;\n",
|
||||
&["io.quarkus"]
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
//! `@DenyAll` (Quarkus Security).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file};
|
||||
use crate::surface::lang::common::{java_imports_any, loc_for, rel_file};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
|
@ -53,7 +53,10 @@ pub fn detect_quarkus_routes(
|
|||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
if !file_uses_quarkus(tree.root_node(), bytes) {
|
||||
// Phase 23 follow-up: tighten witness to top-level `import`
|
||||
// statements with the strict package prefix, replacing the
|
||||
// previous AST `import_declaration.contains(...)` substring scan.
|
||||
if !java_imports_any(bytes, &["io.quarkus", "jakarta.ws.rs"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let mut out = Vec::new();
|
||||
|
|
@ -94,19 +97,6 @@ pub fn detect_quarkus_routes(
|
|||
out
|
||||
}
|
||||
|
||||
fn file_uses_quarkus(root: Node, bytes: &[u8]) -> bool {
|
||||
let mut cursor = root.walk();
|
||||
for child in root.children(&mut cursor) {
|
||||
if child.kind() == "import_declaration"
|
||||
&& let Ok(text) = child.utf8_text(bytes)
|
||||
&& (text.contains("io.quarkus") || text.contains("jakarta.ws.rs"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn class_is_quarkus_resource(class: Node, bytes: &[u8]) -> bool {
|
||||
let modifiers = match crate::surface::lang::common::child_or_named(class, "modifiers") {
|
||||
Some(m) => m,
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{
|
||||
leaf_matches, loc_for, rel_file, string_node_value,
|
||||
leaf_matches, loc_for, python_imports_any, rel_file, string_node_value,
|
||||
};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::collections::HashMap;
|
||||
|
|
@ -59,12 +59,10 @@ pub fn detect_django_routes(
|
|||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
// File-level gate: only fire when the file actually imports
|
||||
// django (or extends the Django CBV bases via name witness).
|
||||
let file_text = std::str::from_utf8(bytes).unwrap_or("");
|
||||
let has_django_witness = file_text.contains("django")
|
||||
|| file_text.contains("rest_framework")
|
||||
|| CBV_BASES.iter().any(|b| file_text.contains(b));
|
||||
if !has_django_witness {
|
||||
// django or DRF. Phase 23 follow-up tightens the witness to
|
||||
// top-level `import` / `from` statements so a comment or string
|
||||
// mention of "django" / "rest_framework" cannot trigger detection.
|
||||
if !python_imports_any(bytes, &["django", "rest_framework"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
|
|
@ -356,7 +354,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn detects_class_based_view() {
|
||||
let src = "class UserList(APIView):\n def get(self, request): pass\n def post(self, request): pass\n";
|
||||
let src = "from rest_framework.views import APIView\n\nclass UserList(APIView):\n def get(self, request): pass\n def post(self, request): pass\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_django_routes(&tree, &bytes, &PathBuf::from("views.py"), None);
|
||||
assert_eq!(nodes.len(), 2);
|
||||
|
|
|
|||
|
|
@ -12,7 +12,9 @@
|
|||
//! decorator-stack guards drawn from [`AUTH_DECORATORS`].
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, string_node_value};
|
||||
use crate::surface::lang::common::{
|
||||
leaf_matches, loc_for, python_imports_any, rel_file, string_node_value,
|
||||
};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
|
@ -51,13 +53,10 @@ pub fn detect_fastapi_routes(
|
|||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
// File-level gate: avoid double-detection on Flask files that
|
||||
// also use `app.get(...)` shape. FastAPI / Starlette / APIRouter
|
||||
// require an explicit import of the relevant package.
|
||||
let file_text = std::str::from_utf8(bytes).unwrap_or("");
|
||||
let has_fastapi_witness = file_text.contains("fastapi")
|
||||
|| file_text.contains("starlette")
|
||||
|| file_text.contains("APIRouter");
|
||||
if !has_fastapi_witness {
|
||||
// also use `app.get(...)` shape. Phase 23 follow-up tightens the
|
||||
// witness to actual top-level `import` / `from` statements so a
|
||||
// comment or string mention of "fastapi" cannot trigger detection.
|
||||
if !python_imports_any(bytes, &["fastapi", "starlette"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
|
|
@ -314,7 +313,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn detects_router_post() {
|
||||
let src = "router = APIRouter()\n@router.post('/items')\ndef create(): pass\n";
|
||||
let src = "from fastapi import APIRouter\nrouter = APIRouter()\n@router.post('/items')\ndef create(): pass\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let nodes = detect_fastapi_routes(&tree, &bytes, &PathBuf::from("api.py"), None);
|
||||
let SurfaceNode::EntryPoint(ep) = &nodes[0] else {
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
//! and -JWT-Extended).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::python_imports_any;
|
||||
use crate::surface::{
|
||||
EntryPoint, Framework, SourceLocation, SurfaceNode, relative_path_string,
|
||||
};
|
||||
|
|
@ -52,13 +53,11 @@ pub fn detect_flask_routes(
|
|||
) -> Vec<SurfaceNode> {
|
||||
// File-level gate: avoid double-detection on FastAPI files where
|
||||
// `app.get(...)` shape overlaps. Phase 21 was lenient because no
|
||||
// sibling probe existed; Phase 22 splits per-framework, so each
|
||||
// probe only fires when its framework witness is present.
|
||||
let file_text = std::str::from_utf8(bytes).unwrap_or("");
|
||||
let has_flask_witness = file_text.contains("flask")
|
||||
|| file_text.contains("Flask")
|
||||
|| file_text.contains("Blueprint");
|
||||
if !has_flask_witness {
|
||||
// sibling probe existed; Phase 22 split per-framework via free
|
||||
// text witness; Phase 23 follow-up tightens the witness to actual
|
||||
// top-level `import` / `from` statements so a comment or vendored
|
||||
// license header that names "flask" cannot trigger detection.
|
||||
if !python_imports_any(bytes, &["flask"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let file_rel = relative_path_string(path, scan_root);
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
//! `BearerAuth`, `JwtClaims`, etc.).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file};
|
||||
use crate::surface::lang::common::{loc_for, rel_file, rust_uses_any};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
|
@ -42,11 +42,11 @@ pub fn detect_actix_routes(
|
|||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_text = std::str::from_utf8(bytes).unwrap_or("");
|
||||
if !file_text.contains("actix_web::") && !file_text.contains("use actix_web") {
|
||||
// Best-effort gate so the actix probe does not over-fire on
|
||||
// Rocket / generic Rust files that also define a `#[get]`
|
||||
// macro from a user crate.
|
||||
// Phase 23 follow-up: gate on a real top-level `use actix_web…` /
|
||||
// `extern crate actix_web` so a comment or string literal
|
||||
// mentioning actix_web cannot trigger detection on a Rocket /
|
||||
// generic Rust file that also defines a `#[get]` user macro.
|
||||
if !rust_uses_any(bytes, &["actix_web"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
//! `Router::route(...)` registration in the same file references it).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{loc_for, rel_file, string_node_value};
|
||||
use crate::surface::lang::common::{loc_for, rel_file, rust_uses_any, string_node_value};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
|
@ -39,8 +39,10 @@ pub fn detect_axum_routes(
|
|||
path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let file_text = std::str::from_utf8(bytes).unwrap_or("");
|
||||
if !file_text.contains("axum::") && !file_text.contains("use axum") {
|
||||
// Phase 23 follow-up: gate on a real top-level `use axum…` /
|
||||
// `extern crate axum` so a comment / string literal mentioning
|
||||
// axum cannot trigger detection.
|
||||
if !rust_uses_any(bytes, &["axum"]) {
|
||||
return Vec::new();
|
||||
}
|
||||
let file_rel = rel_file(path, scan_root);
|
||||
|
|
|
|||
|
|
@ -60,16 +60,25 @@ pub fn populate_reaches_edges(
|
|||
// call graph cannot resolve the seed FuncKey.
|
||||
reachable_files.insert(ep.handler_location.file.clone());
|
||||
|
||||
// Locate seed FuncKeys whose `namespace` matches the entry's
|
||||
// file and whose `name` matches the handler. More than one
|
||||
// seed is possible (overloaded methods, duplicate definitions).
|
||||
// Locate seed FuncKeys whose `namespace` (project-relative
|
||||
// POSIX path, optionally prefixed with `@pkg/name::`) matches
|
||||
// the entry's file and whose `name` matches the handler. More
|
||||
// than one seed is possible (overloaded methods, duplicate
|
||||
// definitions).
|
||||
//
|
||||
// Phase 23 follow-up: this used to be an `ends_with` substring
|
||||
// check on both sides, which silently aliased same-basename
|
||||
// files in sibling directories — `subdir/app.py` and
|
||||
// `other/app.py` would both seed when the entry-point pointed
|
||||
// at `app.py`. We now compare the file part exactly so a
|
||||
// handler in `subdir/app.py` only seeds the FuncKey whose
|
||||
// namespace strips to `subdir/app.py`.
|
||||
let seeds = call_graph
|
||||
.index
|
||||
.iter()
|
||||
.filter(|(k, _)| k.name == ep.handler_name)
|
||||
.filter(|(k, _)| {
|
||||
k.namespace.ends_with(&ep.handler_location.file)
|
||||
|| ep.handler_location.file.ends_with(&k.namespace)
|
||||
file_part_of_namespace(&k.namespace) == ep.handler_location.file
|
||||
})
|
||||
.map(|(_, idx)| *idx)
|
||||
.collect::<Vec<_>>();
|
||||
|
|
@ -108,6 +117,15 @@ pub fn populate_reaches_edges(
|
|||
map.edges.extend(new_edges);
|
||||
}
|
||||
|
||||
/// Strip the optional `@pkg/name::` package prefix from a `FuncKey`
|
||||
/// namespace, returning the project-relative POSIX file path part.
|
||||
/// `namespace_with_package` produces `"@scope/name::src/file.ts"` for
|
||||
/// JS/TS files inside resolved packages; the file part is what
|
||||
/// matches an entry-point's `handler_location.file`.
|
||||
fn file_part_of_namespace(ns: &str) -> &str {
|
||||
ns.rsplit_once("::").map(|(_, rest)| rest).unwrap_or(ns)
|
||||
}
|
||||
|
||||
/// Build a lookup from destination node index → destination file.
|
||||
/// Restricted to the three reachable-from-entry-point variants.
|
||||
fn build_destination_index(map: &SurfaceMap) -> Vec<(usize, String)> {
|
||||
|
|
@ -189,4 +207,19 @@ mod tests {
|
|||
assert_eq!(map.edges[0].from, 0);
|
||||
assert_eq!(map.edges[0].to, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn file_part_of_namespace_strips_package_prefix() {
|
||||
assert_eq!(file_part_of_namespace("app.py"), "app.py");
|
||||
assert_eq!(file_part_of_namespace("src/main.rs"), "src/main.rs");
|
||||
assert_eq!(
|
||||
file_part_of_namespace("@scope/name::src/file.ts"),
|
||||
"src/file.ts"
|
||||
);
|
||||
// Last `::` wins, matching `namespace_with_package`'s shape.
|
||||
assert_eq!(
|
||||
file_part_of_namespace("@a/b::@c/d::lib/x.ts"),
|
||||
"lib/x.ts"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue