mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
220 lines
6.9 KiB
Rust
220 lines
6.9 KiB
Rust
//! Phase 20 (Track M.2) — Python Kafka consumer adapter.
|
|
//!
|
|
//! Fires when the surrounding source imports the canonical Python
|
|
//! Kafka clients (`kafka-python` or `confluent-kafka`) and the function
|
|
//! body invokes a consumer-shaped callee. The binding's
|
|
//! [`EntryKind::MessageHandler`] is stamped with a best-effort `queue`
|
|
//! extracted from the source (a `KafkaConsumer('topic', ...)` /
|
|
//! `Consumer({"group.id": ..., "topics": ["t"]}).subscribe([...])`
|
|
//! literal); a missing topic falls back to the empty string.
|
|
|
|
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
|
use crate::evidence::EntryKind;
|
|
use crate::summary::FuncSummary;
|
|
use crate::summary::ssa_summary::SsaFuncSummary;
|
|
use crate::symbol::Lang;
|
|
|
|
pub struct KafkaPythonAdapter;
|
|
|
|
const ADAPTER_NAME: &str = "kafka-python";
|
|
|
|
fn callee_is_kafka_consumer(name: &str) -> bool {
|
|
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
|
|
matches!(
|
|
last,
|
|
"KafkaConsumer" | "Consumer" | "subscribe" | "poll" | "consume" | "process_message"
|
|
)
|
|
}
|
|
|
|
fn source_imports_kafka(file_bytes: &[u8]) -> bool {
|
|
const NEEDLES: &[&[u8]] = &[
|
|
b"from kafka",
|
|
b"import kafka",
|
|
b"from confluent_kafka",
|
|
b"import confluent_kafka",
|
|
];
|
|
NEEDLES
|
|
.iter()
|
|
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
|
}
|
|
|
|
fn extract_topic_literal(file_bytes: &[u8]) -> String {
|
|
let text = std::str::from_utf8(file_bytes).unwrap_or("");
|
|
for needle in ["KafkaConsumer(", ".subscribe(", "topic="] {
|
|
if let Some(idx) = text.find(needle) {
|
|
let after = &text[idx + needle.len()..];
|
|
for (open, close) in [('"', '"'), ('\'', '\'')] {
|
|
if let Some(o) = after.find(open) {
|
|
let rest = &after[o + 1..];
|
|
if let Some(c) = rest.find(close) {
|
|
return rest[..c].to_owned();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
String::new()
|
|
}
|
|
|
|
impl FrameworkAdapter for KafkaPythonAdapter {
|
|
fn name(&self) -> &'static str {
|
|
ADAPTER_NAME
|
|
}
|
|
|
|
fn lang(&self) -> Lang {
|
|
Lang::Python
|
|
}
|
|
|
|
fn detect(
|
|
&self,
|
|
summary: &FuncSummary,
|
|
ast: tree_sitter::Node<'_>,
|
|
file_bytes: &[u8],
|
|
) -> Option<FrameworkBinding> {
|
|
detect_kafka_python(summary, None, ast, file_bytes)
|
|
}
|
|
|
|
fn detect_with_context(
|
|
&self,
|
|
summary: &FuncSummary,
|
|
ssa_summary: Option<&SsaFuncSummary>,
|
|
ast: tree_sitter::Node<'_>,
|
|
file_bytes: &[u8],
|
|
) -> Option<FrameworkBinding> {
|
|
detect_kafka_python(summary, ssa_summary, ast, file_bytes)
|
|
}
|
|
}
|
|
|
|
fn detect_kafka_python(
|
|
summary: &FuncSummary,
|
|
ssa_summary: Option<&SsaFuncSummary>,
|
|
ast: tree_sitter::Node<'_>,
|
|
file_bytes: &[u8],
|
|
) -> Option<FrameworkBinding> {
|
|
let matches_call = super::any_callee_matches(summary, callee_is_kafka_consumer);
|
|
let matches_source = source_imports_kafka(file_bytes);
|
|
if !(matches_call || matches_source) {
|
|
return None;
|
|
}
|
|
if !super::typed_receiver_facts_allow(
|
|
summary,
|
|
ssa_summary,
|
|
callee_is_kafka_consumer,
|
|
typed_container_allows_kafka,
|
|
) {
|
|
return None;
|
|
}
|
|
Some(FrameworkBinding {
|
|
adapter: ADAPTER_NAME.to_owned(),
|
|
kind: EntryKind::MessageHandler {
|
|
queue: extract_topic_literal(file_bytes),
|
|
message_schema: None,
|
|
},
|
|
route: None,
|
|
request_params: Vec::new(),
|
|
response_writer: None,
|
|
middleware: super::collect_message_middleware(Lang::Python, ast, file_bytes),
|
|
})
|
|
}
|
|
|
|
fn typed_container_allows_kafka(container: &str) -> bool {
|
|
let lc = container.to_ascii_lowercase();
|
|
lc.contains("kafka") || lc.contains("consumer")
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::summary::CalleeSite;
|
|
|
|
fn parse_python(src: &[u8]) -> tree_sitter::Tree {
|
|
let mut parser = tree_sitter::Parser::new();
|
|
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
|
|
parser.set_language(&lang).unwrap();
|
|
parser.parse(src, None).unwrap()
|
|
}
|
|
|
|
#[test]
|
|
fn fires_on_kafka_python_consumer() {
|
|
let src: &[u8] = b"from kafka import KafkaConsumer\n\n\
|
|
def handler(msg):\n print(msg)\n\n\
|
|
consumer = KafkaConsumer('orders', bootstrap_servers='broker:9092')\n";
|
|
let tree = parse_python(src);
|
|
let summary = FuncSummary {
|
|
name: "handler".into(),
|
|
..Default::default()
|
|
};
|
|
let binding = KafkaPythonAdapter
|
|
.detect(&summary, tree.root_node(), src)
|
|
.expect("kafka import binds");
|
|
assert_eq!(binding.adapter, "kafka-python");
|
|
assert!(matches!(binding.kind, EntryKind::MessageHandler { .. }));
|
|
if let EntryKind::MessageHandler { queue, .. } = binding.kind {
|
|
assert_eq!(queue, "orders");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn skips_plain_function() {
|
|
let src: &[u8] = b"def add(a, b):\n return a + b\n";
|
|
let tree = parse_python(src);
|
|
let summary = FuncSummary {
|
|
name: "add".into(),
|
|
..Default::default()
|
|
};
|
|
assert!(
|
|
KafkaPythonAdapter
|
|
.detect(&summary, tree.root_node(), src)
|
|
.is_none()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn ssa_receiver_type_rejects_non_kafka_poll_collision() {
|
|
let src: &[u8] = b"from kafka import KafkaConsumer\n\
|
|
def handler(msg):\n cache.poll(msg)\n";
|
|
let tree = parse_python(src);
|
|
let mut summary = FuncSummary {
|
|
name: "handler".into(),
|
|
..Default::default()
|
|
};
|
|
summary.callees.push(CalleeSite {
|
|
name: "cache.poll".to_owned(),
|
|
receiver: Some("cache".to_owned()),
|
|
ordinal: 0,
|
|
..Default::default()
|
|
});
|
|
let mut ssa = SsaFuncSummary::default();
|
|
ssa.typed_call_receivers.push((0, "Cache".to_owned()));
|
|
assert!(
|
|
KafkaPythonAdapter
|
|
.detect_with_context(&summary, Some(&ssa), tree.root_node(), src)
|
|
.is_none()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn ssa_receiver_type_keeps_kafka_consumer() {
|
|
let src: &[u8] = b"from kafka import KafkaConsumer\n\
|
|
def handler(msg):\n consumer.poll()\n";
|
|
let tree = parse_python(src);
|
|
let mut summary = FuncSummary {
|
|
name: "handler".into(),
|
|
..Default::default()
|
|
};
|
|
summary.callees.push(CalleeSite {
|
|
name: "consumer.poll".to_owned(),
|
|
receiver: Some("consumer".to_owned()),
|
|
ordinal: 0,
|
|
..Default::default()
|
|
});
|
|
let mut ssa = SsaFuncSummary::default();
|
|
ssa.typed_call_receivers
|
|
.push((0, "KafkaConsumer".to_owned()));
|
|
assert!(
|
|
KafkaPythonAdapter
|
|
.detect_with_context(&summary, Some(&ssa), tree.root_node(), src)
|
|
.is_some()
|
|
);
|
|
}
|
|
}
|