diff --git a/src/dynamic/framework/adapters/kafka_java.rs b/src/dynamic/framework/adapters/kafka_java.rs index 299d37a6..5c0bfbb3 100644 --- a/src/dynamic/framework/adapters/kafka_java.rs +++ b/src/dynamic/framework/adapters/kafka_java.rs @@ -84,7 +84,7 @@ impl FrameworkAdapter for KafkaJavaAdapter { fn detect_kafka_java( summary: &FuncSummary, ssa_summary: Option<&SsaFuncSummary>, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { let matches_call = super::any_callee_matches(summary, callee_is_kafka); @@ -109,7 +109,7 @@ fn detect_kafka_java( route: None, request_params: Vec::new(), response_writer: None, - middleware: Vec::new(), + middleware: super::collect_message_middleware(Lang::Java, ast, file_bytes), }) } diff --git a/src/dynamic/framework/adapters/kafka_python.rs b/src/dynamic/framework/adapters/kafka_python.rs index 4d57dc26..54343fd0 100644 --- a/src/dynamic/framework/adapters/kafka_python.rs +++ b/src/dynamic/framework/adapters/kafka_python.rs @@ -88,7 +88,7 @@ impl FrameworkAdapter for KafkaPythonAdapter { fn detect_kafka_python( summary: &FuncSummary, ssa_summary: Option<&SsaFuncSummary>, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { let matches_call = super::any_callee_matches(summary, callee_is_kafka_consumer); @@ -113,7 +113,7 @@ fn detect_kafka_python( route: None, request_params: Vec::new(), response_writer: None, - middleware: Vec::new(), + middleware: super::collect_message_middleware(Lang::Python, ast, file_bytes), }) } diff --git a/src/dynamic/framework/adapters/middleware_express.rs b/src/dynamic/framework/adapters/middleware_express.rs index 988c8ec4..5d0f1b6a 100644 --- a/src/dynamic/framework/adapters/middleware_express.rs +++ b/src/dynamic/framework/adapters/middleware_express.rs @@ -11,6 +11,7 @@ use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; +use crate::summary::ssa_summary::SsaFuncSummary; use crate::symbol::Lang; pub struct MiddlewareExpressAdapter; @@ -61,32 +62,63 @@ impl FrameworkAdapter for MiddlewareExpressAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let mounted_by_name = function_is_mounted_as_middleware(file_bytes, &summary.name); - let has_mw_signature = function_has_middleware_signature(summary); - let body_mounts = super::any_callee_matches(summary, callee_is_express_mount); - let binds = mounted_by_name || has_mw_signature || body_mounts; - if !binds { - return None; - } - Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Middleware { - name: summary.name.clone(), - }, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }) + detect_express_middleware(summary, None, ast, file_bytes) } + + fn detect_with_context( + &self, + summary: &FuncSummary, + ssa_summary: Option<&SsaFuncSummary>, + ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + detect_express_middleware(summary, ssa_summary, ast, file_bytes) + } +} + +fn detect_express_middleware( + summary: &FuncSummary, + ssa_summary: Option<&SsaFuncSummary>, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], +) -> Option { + let mounted_by_name = function_is_mounted_as_middleware(file_bytes, &summary.name); + let has_mw_signature = function_has_middleware_signature(summary); + let body_mounts = super::any_callee_matches(summary, callee_is_express_mount) + && super::typed_receiver_facts_allow( + summary, + ssa_summary, + callee_is_express_mount, + typed_container_allows_express, + ); + let binds = mounted_by_name || has_mw_signature || body_mounts; + if !binds { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Middleware { + name: summary.name.clone(), + }, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) +} + +fn typed_container_allows_express(container: &str) -> bool { + let lc = container.to_ascii_lowercase(); + lc.contains("express") || lc.contains("router") } #[cfg(test)] mod tests { use super::*; + use crate::summary::CalleeSite; fn parse_js(src: &[u8]) -> tree_sitter::Tree { let mut parser = tree_sitter::Parser::new(); @@ -136,4 +168,53 @@ mod tests { "unrelated helper in an Express setup file must not bind as middleware", ); } + + #[test] + fn ssa_receiver_type_rejects_non_express_use_collision() { + let src: &[u8] = b"const express = require('express');\n\ + function helper() { cache.use('audit'); }\n"; + let tree = parse_js(src); + let mut summary = FuncSummary { + name: "helper".into(), + ..Default::default() + }; + summary.callees.push(CalleeSite { + name: "cache.use".to_owned(), + receiver: Some("cache".to_owned()), + ordinal: 0, + ..Default::default() + }); + let mut ssa = SsaFuncSummary::default(); + ssa.typed_call_receivers.push((0, "Map".to_owned())); + assert!( + MiddlewareExpressAdapter + .detect_with_context(&summary, Some(&ssa), tree.root_node(), src) + .is_none() + ); + } + + #[test] + fn ssa_receiver_type_keeps_express_use_receiver() { + let src: &[u8] = b"const express = require('express');\n\ + function helper() { app.use(audit); }\n"; + let tree = parse_js(src); + let mut summary = FuncSummary { + name: "helper".into(), + ..Default::default() + }; + summary.callees.push(CalleeSite { + name: "app.use".to_owned(), + receiver: Some("app".to_owned()), + ordinal: 0, + ..Default::default() + }); + let mut ssa = SsaFuncSummary::default(); + ssa.typed_call_receivers + .push((0, "express.Application".to_owned())); + assert!( + MiddlewareExpressAdapter + .detect_with_context(&summary, Some(&ssa), tree.root_node(), src) + .is_some() + ); + } } diff --git a/src/dynamic/framework/adapters/mod.rs b/src/dynamic/framework/adapters/mod.rs index 323efe52..82b8de2f 100644 --- a/src/dynamic/framework/adapters/mod.rs +++ b/src/dynamic/framework/adapters/mod.rs @@ -254,6 +254,9 @@ pub use xxe_php::XxePhpAdapter; pub use xxe_python::XxePythonAdapter; pub use xxe_ruby::XxeRubyAdapter; +use crate::dynamic::framework::{MiddlewareShape, auth_markers}; +use crate::symbol::Lang; + /// True when any callee in `summary.callees` matches `predicate`. fn any_callee_matches( summary: &crate::summary::FuncSummary, @@ -296,6 +299,202 @@ fn typed_receiver_facts_allow( true } +/// Walk a broker consumer source file and collect validator / +/// middleware names attached around the consumer setup. +/// +/// The Phase 20 broker adapters all stamp [`EntryKind::MessageHandler`] +/// bindings, but the protective layer vocabulary is language-wide: JSON +/// schema validators, Spring AMQP interceptors, SQS middleware stacks, and +/// Go payload validators should be reported uniformly regardless of broker. +/// This helper keeps that matching in one place and intentionally returns +/// only names recognised by the verifier-side auth marker registry. +fn collect_message_middleware( + lang: Lang, + root: tree_sitter::Node<'_>, + bytes: &[u8], +) -> Vec { + let mut out = Vec::new(); + walk_message_middleware(lang, root, bytes, &mut out); + out +} + +fn walk_message_middleware( + lang: Lang, + node: tree_sitter::Node<'_>, + bytes: &[u8], + out: &mut Vec, +) { + match node.kind() { + "call" + | "call_expression" + | "method_call" + | "method_invocation" + | "object_creation_expression" + | "decorator" + | "annotation" + | "marker_annotation" => { + inspect_message_middleware_node(lang, node, bytes, out); + } + _ => {} + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk_message_middleware(lang, child, bytes, out); + } +} + +fn inspect_message_middleware_node( + lang: Lang, + node: tree_sitter::Node<'_>, + bytes: &[u8], + out: &mut Vec, +) { + let text = node.utf8_text(bytes).unwrap_or(""); + if matches!( + node.kind(), + "decorator" | "annotation" | "marker_annotation" + ) { + push_annotation_candidates(lang, text, out); + return; + } + + let callee = message_call_callee(node, bytes).unwrap_or_default(); + push_candidate_if_protective(lang, &callee, out); + if !is_message_middleware_site(&callee, text) { + return; + } + push_tokens_if_protective(lang, text, out); +} + +fn message_call_callee(node: tree_sitter::Node<'_>, bytes: &[u8]) -> Option { + if let Some(function) = node.child_by_field_name("function") { + return function.utf8_text(bytes).ok().map(|s| s.trim().to_owned()); + } + if let Some(name) = node.child_by_field_name("name") { + return name.utf8_text(bytes).ok().map(|s| s.trim().to_owned()); + } + if let Some(ty) = node.child_by_field_name("type") { + return ty.utf8_text(bytes).ok().map(|s| s.trim().to_owned()); + } + None +} + +fn is_message_middleware_site(callee: &str, text: &str) -> bool { + let last = last_message_segment(callee); + let text_lc = text.to_ascii_lowercase(); + let callee_lc = callee.to_ascii_lowercase(); + + matches!( + last, + "batch_processor" + | "sqs_batch_processor" + | "middleware" + | "middlewareStack" + | "setErrorHandler" + | "setCommonErrorHandler" + | "setRecordInterceptor" + | "setBatchInterceptor" + | "setAdviceChain" + | "setAfterReceivePostProcessors" + | "setMessageConverter" + | "setValidator" + | "withValidator" + | "withMessageValidator" + | "UseMiddleware" + | "QueueSubscribe" + ) || ((last == "add" || last == "use") && callee_lc.contains("middlewarestack")) + || text_lc.contains("validationrules") + || text_lc.contains("validator") + || text_lc.contains("interceptor") + || text_lc.contains("middlewarestack") +} + +fn push_annotation_candidates(lang: Lang, text: &str, out: &mut Vec) { + let trimmed = text.trim(); + if let Some(rest) = trimmed.strip_prefix('@') + && let Some(name) = rest + .split(|ch: char| !is_message_name_char(ch)) + .find(|part| !part.is_empty()) + { + if lang == Lang::Java { + push_candidate_if_protective(lang, &format!("@{name}"), out); + } + push_candidate_if_protective(lang, name, out); + } + push_tokens_if_protective(lang, trimmed, out); +} + +fn push_tokens_if_protective(lang: Lang, text: &str, out: &mut Vec) { + let mut token = String::new(); + for ch in text.chars() { + if is_message_name_char(ch) { + token.push(ch); + } else if !token.is_empty() { + push_candidate_if_protective(lang, &token, out); + token.clear(); + } + } + if !token.is_empty() { + push_candidate_if_protective(lang, &token, out); + } +} + +fn is_message_name_char(ch: char) -> bool { + ch.is_ascii_alphanumeric() || matches!(ch, '_' | '.' | ':' | '!') +} + +fn push_candidate_if_protective(lang: Lang, candidate: &str, out: &mut Vec) { + for name in candidate_variants(candidate) { + if is_message_setup_method(&name) { + continue; + } + if auth_markers::is_protective(lang, &name) && !out.iter().any(|m| m.name == name) { + out.push(MiddlewareShape { name }); + } + } +} + +fn is_message_setup_method(name: &str) -> bool { + matches!( + last_message_segment(name), + "add" + | "use" + | "setErrorHandler" + | "setCommonErrorHandler" + | "setRecordInterceptor" + | "setBatchInterceptor" + | "setAdviceChain" + | "setAfterReceivePostProcessors" + | "setMessageConverter" + | "setValidator" + | "withValidator" + | "withMessageValidator" + | "UseMiddleware" + | "QueueSubscribe" + ) +} + +fn candidate_variants(candidate: &str) -> Vec { + let trimmed = candidate + .trim() + .trim_matches(|ch| matches!(ch, '"' | '\'' | '`' | '(' | ')' | '[' | ']' | '{' | '}')); + if trimmed.is_empty() { + return Vec::new(); + } + let mut out = vec![trimmed.to_owned()]; + let last = last_message_segment(trimmed); + if last != trimmed { + out.push(last.to_owned()); + } + out +} + +fn last_message_segment(name: &str) -> &str { + name.rsplit(['.', ':', '/', '\\', '#']) + .find(|segment| !segment.is_empty()) + .unwrap_or(name) +} + /// True when any callee in `summary.callees` matches `name_pred` AND /// (its receiver matches `receiver_pred` OR its receiver is `None`). /// diff --git a/src/dynamic/framework/adapters/nats_go.rs b/src/dynamic/framework/adapters/nats_go.rs index bf15c9af..91a7864d 100644 --- a/src/dynamic/framework/adapters/nats_go.rs +++ b/src/dynamic/framework/adapters/nats_go.rs @@ -70,7 +70,7 @@ impl FrameworkAdapter for NatsGoAdapter { fn detect_nats_go( summary: &FuncSummary, ssa_summary: Option<&SsaFuncSummary>, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { let matches_call = super::any_callee_matches(summary, callee_is_nats); @@ -95,7 +95,7 @@ fn detect_nats_go( route: None, request_params: Vec::new(), response_writer: None, - middleware: Vec::new(), + middleware: super::collect_message_middleware(Lang::Go, ast, file_bytes), }) } diff --git a/src/dynamic/framework/adapters/pubsub_go.rs b/src/dynamic/framework/adapters/pubsub_go.rs index b6d9eff9..63cc314f 100644 --- a/src/dynamic/framework/adapters/pubsub_go.rs +++ b/src/dynamic/framework/adapters/pubsub_go.rs @@ -75,7 +75,7 @@ impl FrameworkAdapter for PubsubGoAdapter { fn detect_pubsub_go( summary: &FuncSummary, ssa_summary: Option<&SsaFuncSummary>, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { let matches_call = super::any_callee_matches(summary, callee_is_pubsub); @@ -100,7 +100,7 @@ fn detect_pubsub_go( route: None, request_params: Vec::new(), response_writer: None, - middleware: Vec::new(), + middleware: super::collect_message_middleware(Lang::Go, ast, file_bytes), }) } diff --git a/src/dynamic/framework/adapters/pubsub_python.rs b/src/dynamic/framework/adapters/pubsub_python.rs index 87d0fc3e..eb96241e 100644 --- a/src/dynamic/framework/adapters/pubsub_python.rs +++ b/src/dynamic/framework/adapters/pubsub_python.rs @@ -78,7 +78,7 @@ impl FrameworkAdapter for PubsubPythonAdapter { fn detect_pubsub_python( summary: &FuncSummary, ssa_summary: Option<&SsaFuncSummary>, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { let matches_call = super::any_callee_matches(summary, callee_is_pubsub); @@ -103,7 +103,7 @@ fn detect_pubsub_python( route: None, request_params: Vec::new(), response_writer: None, - middleware: Vec::new(), + middleware: super::collect_message_middleware(Lang::Python, ast, file_bytes), }) } diff --git a/src/dynamic/framework/adapters/rabbit_java.rs b/src/dynamic/framework/adapters/rabbit_java.rs index c2f07abe..008757db 100644 --- a/src/dynamic/framework/adapters/rabbit_java.rs +++ b/src/dynamic/framework/adapters/rabbit_java.rs @@ -81,7 +81,7 @@ impl FrameworkAdapter for RabbitJavaAdapter { fn detect_rabbit_java( summary: &FuncSummary, ssa_summary: Option<&SsaFuncSummary>, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { let matches_call = super::any_callee_matches(summary, callee_is_rabbit); @@ -106,7 +106,7 @@ fn detect_rabbit_java( route: None, request_params: Vec::new(), response_writer: None, - middleware: Vec::new(), + middleware: super::collect_message_middleware(Lang::Java, ast, file_bytes), }) } diff --git a/src/dynamic/framework/adapters/rabbit_python.rs b/src/dynamic/framework/adapters/rabbit_python.rs index 54f50575..635be4f9 100644 --- a/src/dynamic/framework/adapters/rabbit_python.rs +++ b/src/dynamic/framework/adapters/rabbit_python.rs @@ -77,7 +77,7 @@ impl FrameworkAdapter for RabbitPythonAdapter { fn detect_rabbit_python( summary: &FuncSummary, ssa_summary: Option<&SsaFuncSummary>, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { let matches_call = super::any_callee_matches(summary, callee_is_rabbit); @@ -102,7 +102,7 @@ fn detect_rabbit_python( route: None, request_params: Vec::new(), response_writer: None, - middleware: Vec::new(), + middleware: super::collect_message_middleware(Lang::Python, ast, file_bytes), }) } diff --git a/src/dynamic/framework/adapters/scheduled_sidekiq.rs b/src/dynamic/framework/adapters/scheduled_sidekiq.rs index 81492fed..2b59178f 100644 --- a/src/dynamic/framework/adapters/scheduled_sidekiq.rs +++ b/src/dynamic/framework/adapters/scheduled_sidekiq.rs @@ -17,6 +17,7 @@ use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; +use crate::summary::ssa_summary::SsaFuncSummary; use crate::symbol::Lang; pub struct ScheduledSidekiqAdapter; @@ -86,34 +87,65 @@ impl FrameworkAdapter for ScheduledSidekiqAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let has_shape = source_has_sidekiq_shape(file_bytes); - if !has_shape { - return None; - } - let name_matches = name_is_sidekiq_entry(&summary.name); - let body_schedules = super::any_callee_matches(summary, callee_schedules_sidekiq); - if !(name_matches || body_schedules) { - return None; - } - Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::ScheduledJob { - schedule: extract_schedule(file_bytes), - }, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }) + detect_sidekiq(summary, None, ast, file_bytes) } + + fn detect_with_context( + &self, + summary: &FuncSummary, + ssa_summary: Option<&SsaFuncSummary>, + ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + detect_sidekiq(summary, ssa_summary, ast, file_bytes) + } +} + +fn detect_sidekiq( + summary: &FuncSummary, + ssa_summary: Option<&SsaFuncSummary>, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], +) -> Option { + let has_shape = source_has_sidekiq_shape(file_bytes); + if !has_shape { + return None; + } + let name_matches = name_is_sidekiq_entry(&summary.name); + let body_schedules = super::any_callee_matches(summary, callee_schedules_sidekiq) + && super::typed_receiver_facts_allow( + summary, + ssa_summary, + callee_schedules_sidekiq, + typed_container_allows_sidekiq, + ); + if !(name_matches || body_schedules) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::ScheduledJob { + schedule: extract_schedule(file_bytes), + }, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) +} + +fn typed_container_allows_sidekiq(container: &str) -> bool { + let lc = container.to_ascii_lowercase(); + lc.contains("sidekiq") || lc.ends_with("worker") || lc.ends_with("job") } #[cfg(test)] mod tests { use super::*; + use crate::summary::CalleeSite; fn parse_ruby(src: &[u8]) -> tree_sitter::Tree { let mut parser = tree_sitter::Parser::new(); @@ -174,4 +206,50 @@ mod tests { "non-worker helper in a Sidekiq file must not bind", ); } + + #[test] + fn ssa_receiver_type_rejects_non_sidekiq_scheduler_collision() { + let src: &[u8] = b"# include Sidekiq::Worker\nclass Enqueuer\n def enqueue(payload)\n mailer.perform_async(payload)\n end\nend\n"; + let tree = parse_ruby(src); + let mut summary = FuncSummary { + name: "enqueue".into(), + ..Default::default() + }; + summary.callees.push(CalleeSite { + name: "mailer.perform_async".to_owned(), + receiver: Some("mailer".to_owned()), + ordinal: 0, + ..Default::default() + }); + let mut ssa = SsaFuncSummary::default(); + ssa.typed_call_receivers.push((0, "Mailer".to_owned())); + assert!( + ScheduledSidekiqAdapter + .detect_with_context(&summary, Some(&ssa), tree.root_node(), src) + .is_none() + ); + } + + #[test] + fn ssa_receiver_type_keeps_sidekiq_scheduler_receiver() { + let src: &[u8] = b"class TickWorker\n include Sidekiq::Worker\n def enqueue(payload)\n TickWorker.perform_async(payload)\n end\nend\n"; + let tree = parse_ruby(src); + let mut summary = FuncSummary { + name: "enqueue".into(), + ..Default::default() + }; + summary.callees.push(CalleeSite { + name: "TickWorker.perform_async".to_owned(), + receiver: Some("TickWorker".to_owned()), + ordinal: 0, + ..Default::default() + }); + let mut ssa = SsaFuncSummary::default(); + ssa.typed_call_receivers.push((0, "TickWorker".to_owned())); + assert!( + ScheduledSidekiqAdapter + .detect_with_context(&summary, Some(&ssa), tree.root_node(), src) + .is_some() + ); + } } diff --git a/src/dynamic/framework/adapters/sqs_java.rs b/src/dynamic/framework/adapters/sqs_java.rs index 8969ce06..4065f432 100644 --- a/src/dynamic/framework/adapters/sqs_java.rs +++ b/src/dynamic/framework/adapters/sqs_java.rs @@ -75,7 +75,7 @@ impl FrameworkAdapter for SqsJavaAdapter { fn detect_sqs_java( summary: &FuncSummary, ssa_summary: Option<&SsaFuncSummary>, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { let matches_call = super::any_callee_matches(summary, callee_is_sqs); @@ -100,7 +100,7 @@ fn detect_sqs_java( route: None, request_params: Vec::new(), response_writer: None, - middleware: Vec::new(), + middleware: super::collect_message_middleware(Lang::Java, ast, file_bytes), }) } diff --git a/src/dynamic/framework/adapters/sqs_node.rs b/src/dynamic/framework/adapters/sqs_node.rs index 477d5c1b..6c2417bd 100644 --- a/src/dynamic/framework/adapters/sqs_node.rs +++ b/src/dynamic/framework/adapters/sqs_node.rs @@ -79,7 +79,7 @@ impl FrameworkAdapter for SqsNodeAdapter { fn detect_sqs_node( summary: &FuncSummary, ssa_summary: Option<&SsaFuncSummary>, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { let matches_call = super::any_callee_matches(summary, callee_is_sqs); @@ -99,7 +99,7 @@ fn detect_sqs_node( route: None, request_params: Vec::new(), response_writer: None, - middleware: Vec::new(), + middleware: super::collect_message_middleware(Lang::JavaScript, ast, file_bytes), }) } diff --git a/src/dynamic/framework/adapters/sqs_python.rs b/src/dynamic/framework/adapters/sqs_python.rs index b6a0a686..0d524bc3 100644 --- a/src/dynamic/framework/adapters/sqs_python.rs +++ b/src/dynamic/framework/adapters/sqs_python.rs @@ -78,7 +78,7 @@ impl FrameworkAdapter for SqsPythonAdapter { fn detect_sqs_python( summary: &FuncSummary, ssa_summary: Option<&SsaFuncSummary>, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { let matches_call = super::any_callee_matches(summary, callee_is_sqs); @@ -103,7 +103,7 @@ fn detect_sqs_python( route: None, request_params: Vec::new(), response_writer: None, - middleware: Vec::new(), + middleware: super::collect_message_middleware(Lang::Python, ast, file_bytes), }) } diff --git a/src/dynamic/framework/auth_markers.rs b/src/dynamic/framework/auth_markers.rs index 407294d0..a22ba216 100644 --- a/src/dynamic/framework/auth_markers.rs +++ b/src/dynamic/framework/auth_markers.rs @@ -85,7 +85,11 @@ const JS_EXACT: &[ExactRow] = &[ ("validateBody", AuthMarkerKind::InputValidation), ("validateRequest", AuthMarkerKind::InputValidation), ("validateSchema", AuthMarkerKind::InputValidation), + ("validateMessage", AuthMarkerKind::InputValidation), + ("validateEvent", AuthMarkerKind::InputValidation), ("schemaValidator", AuthMarkerKind::InputValidation), + ("jsonSchemaValidator", AuthMarkerKind::InputValidation), + ("ajvValidate", AuthMarkerKind::InputValidation), ("celebrate", AuthMarkerKind::InputValidation), ("joiValidate", AuthMarkerKind::InputValidation), ("zodValidate", AuthMarkerKind::InputValidation), @@ -125,6 +129,7 @@ const PYTHON_EXACT: &[ExactRow] = &[ ("CSRFProtect", AuthMarkerKind::Csrf), ("validate", AuthMarkerKind::InputValidation), ("validate_request", AuthMarkerKind::InputValidation), + ("validate_schema", AuthMarkerKind::InputValidation), ("ValidationMiddleware", AuthMarkerKind::InputValidation), ("pydantic_validate", AuthMarkerKind::InputValidation), ("SecurityMiddleware", AuthMarkerKind::OutputSanitization), @@ -159,6 +164,10 @@ const JAVA_EXACT: &[ExactRow] = &[ ("@Valid", AuthMarkerKind::InputValidation), ("@Validated", AuthMarkerKind::InputValidation), ("ValidationFilter", AuthMarkerKind::InputValidation), + ( + "ValidatingMessageConverter", + AuthMarkerKind::InputValidation, + ), ("@RateLimited", AuthMarkerKind::RateLimit), ]; @@ -277,6 +286,15 @@ fn classify_by_suffix(name: &str) -> Option { return Some(AuthMarkerKind::Authentication); } if name.ends_with("Interceptor") { + if name.contains("Validation") || name.contains("Validator") { + return Some(AuthMarkerKind::InputValidation); + } + if name.contains("Role") || name.contains("Permission") { + return Some(AuthMarkerKind::Authorization); + } + if name.contains("Auth") { + return Some(AuthMarkerKind::Authentication); + } return Some(AuthMarkerKind::Authentication); } if name.ends_with("Authenticator") { diff --git a/tests/message_handler_corpus.rs b/tests/message_handler_corpus.rs index 5899b06f..9f4cd9f7 100644 --- a/tests/message_handler_corpus.rs +++ b/tests/message_handler_corpus.rs @@ -196,15 +196,31 @@ fn ts_language_for(lang: Lang) -> tree_sitter::Language { fn detect_for(lang: Lang, fixture: &str, handler: &str) -> Option { let bytes = std::fs::read(fixture).expect("fixture exists"); + detect_from_bytes(lang, &bytes, handler) +} + +fn detect_inline(lang: Lang, src: &[u8], handler: &str) -> FrameworkBinding { + detect_from_bytes(lang, src, handler).expect("inline source binds") +} + +fn detect_from_bytes(lang: Lang, bytes: &[u8], handler: &str) -> Option { let ts_lang = ts_language_for(lang); let mut parser = tree_sitter::Parser::new(); parser.set_language(&ts_lang).unwrap(); - let tree = parser.parse(&bytes, None).unwrap(); + let tree = parser.parse(bytes, None).unwrap(); let summary = FuncSummary { name: handler.into(), ..Default::default() }; - detect_binding(&summary, tree.root_node(), &bytes, lang) + detect_binding(&summary, tree.root_node(), bytes, lang) +} + +fn middleware_names(binding: &FrameworkBinding) -> Vec { + binding + .middleware + .iter() + .map(|mw| mw.name.clone()) + .collect() } #[test] @@ -275,6 +291,111 @@ fn nats_go_adapter_binds_message_handler_kind() { assert!(matches!(b.kind, EntryKind::MessageHandler { .. })); } +#[test] +fn phase20_broker_adapters_collect_guard_middleware() { + let cases: &[(Lang, &[u8], &str, &[&str])] = &[ + ( + Lang::Python, + b"from kafka import KafkaConsumer\n\ +def handler(msg):\n validate_schema(msg)\n\ +consumer = KafkaConsumer('orders')\n", + "handler", + &["validate_schema"], + ), + ( + Lang::Java, + b"import org.springframework.kafka.annotation.KafkaListener;\n\ + public class Vuln {\n\ + @KafkaListener(topics = \"orders\")\n\ + public void onMessage(String body) {}\n\ + public void configure(Factory factory) {\n\ + factory.setRecordInterceptor(new ValidationInterceptor());\n\ + }\n\ + }\n", + "onMessage", + &["ValidationInterceptor"], + ), + ( + Lang::Python, + b"import boto3\n\ +sq = boto3.client('sqs')\n\ +def handler(envelope):\n validate_request(envelope)\n", + "handler", + &["validate_request"], + ), + ( + Lang::Java, + b"import io.awspring.cloud.sqs.annotation.SqsListener;\n\ + import javax.validation.Valid;\n\ + public class Vuln {\n\ + @SqsListener(\"jobs\")\n\ + public void handleMessage(@Valid String env) {}\n\ + }\n", + "handleMessage", + &["@Valid"], + ), + ( + Lang::JavaScript, + b"const { SQSClient } = require('@aws-sdk/client-sqs');\n\ + const client = new SQSClient({});\n\ + client.middlewareStack.add(validateMessage);\n\ + function handler(env) {}\n", + "handler", + &["validateMessage"], + ), + ( + Lang::Python, + b"from google.cloud import pubsub_v1\n\ +def callback(message):\n validate_schema(message)\n\ +subscriber = pubsub_v1.SubscriberClient()\n", + "callback", + &["validate_schema"], + ), + ( + Lang::Go, + b"package entry\n\ + import \"cloud.google.com/go/pubsub\"\n\ + func OnMessage(msg *pubsub.Message) { ValidatePayload(msg.Data) }\n", + "OnMessage", + &["ValidatePayload"], + ), + ( + Lang::Python, + b"import pika\n\ +def on_message(ch, method, properties, body):\n validate_request(body)\n", + "on_message", + &["validate_request"], + ), + ( + Lang::Java, + b"import org.springframework.amqp.rabbit.annotation.RabbitListener;\n\ + public class Vuln {\n\ + @RabbitListener(queues = \"work\")\n\ + public void onMessage(String body) {}\n\ + public void configure(Factory factory) {\n\ + factory.setMessageConverter(new ValidatingMessageConverter());\n\ + }\n\ + }\n", + "onMessage", + &["ValidatingMessageConverter"], + ), + ( + Lang::Go, + b"package entry\n\ + import \"github.com/nats-io/nats.go\"\n\ + func OnMessage(msg *nats.Msg) { ValidatePayload(msg.Data) }\n\ + func init() { nc.QueueSubscribe(\"events\", \"workers\", OnMessage) }\n", + "OnMessage", + &["ValidatePayload"], + ), + ]; + + for (lang, src, handler, expected) in cases { + let binding = detect_inline(*lang, src, handler); + assert_eq!(middleware_names(&binding), *expected); + } +} + #[test] fn registry_slices_include_phase_20_adapters() { let java_names: Vec<&'static str> = adapters_for(Lang::Java).iter().map(|a| a.name()).collect();