use super::*; use smallvec::smallvec; /// Test helper: build a [`SmallVec`] of one cap-only [`SinkSite`] for a /// parameter, matching the pre-`SinkSite` shape `(idx, Cap)`. Source /// coordinates stay default (`line=0, col=0`) since tests do not /// exercise the primary-location attribution path at this layer. fn cap_sites(cap: Cap) -> SmallVec<[SinkSite; 1]> { smallvec![SinkSite::cap_only(cap)] } fn make(name: &str, src: u32, san: u32, sink: u32) -> FuncSummary { FuncSummary { name: name.into(), file_path: "test.rs".into(), lang: "rust".into(), param_count: 0, param_names: vec![], source_caps: src, sanitizer_caps: san, sink_caps: sink, propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() } } #[test] fn merge_unions_conservatively() { let a = make("foo", 0x01, 0x00, 0x00); let b = FuncSummary { sink_caps: 0x04, propagating_params: vec![0], tainted_sink_params: vec![0], callees: vec!["bar".into()], ..make("foo", 0x00, 0x02, 0x00) }; let merged = merge_summaries(vec![a, b], None); let key = FuncKey { lang: Lang::Rust, namespace: "test.rs".into(), name: "foo".into(), arity: Some(0), ..Default::default() }; let foo = merged.get(&key).unwrap(); assert_eq!(foo.source_caps, 0x01); assert_eq!(foo.sanitizer_caps, 0x02); assert_eq!(foo.sink_caps, 0x04); assert!(foo.propagates_any()); assert_eq!(foo.propagating_params, vec![0]); assert_eq!(foo.tainted_sink_params, vec![0]); assert_eq!(foo.callees.len(), 1); assert_eq!(foo.callees[0].name, "bar"); } #[test] fn same_lang_different_namespace_no_merge() { let a = FuncSummary { name: "helper".into(), file_path: "file_a.rs".into(), lang: "rust".into(), param_count: 0, param_names: vec![], source_caps: Cap::all().bits(), sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }; let b = FuncSummary { name: "helper".into(), file_path: "file_b.rs".into(), lang: "rust".into(), param_count: 0, param_names: vec![], source_caps: 0, sanitizer_caps: 0, sink_caps: Cap::SHELL_ESCAPE.bits(), propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }; let global = merge_summaries(vec![a, b], None); // They should be stored under different FuncKeys let key_a = FuncKey { lang: Lang::Rust, namespace: "file_a.rs".into(), name: "helper".into(), arity: Some(0), ..Default::default() }; let key_b = FuncKey { lang: Lang::Rust, namespace: "file_b.rs".into(), name: "helper".into(), arity: Some(0), ..Default::default() }; assert!(global.get(&key_a).is_some()); assert!(global.get(&key_b).is_some()); // source_caps NOT merged assert_eq!(global.get(&key_a).unwrap().source_caps, Cap::all().bits()); assert_eq!(global.get(&key_b).unwrap().source_caps, 0); } #[test] fn same_lang_same_namespace_merges() { let a = FuncSummary { name: "helper".into(), file_path: "lib.rs".into(), lang: "rust".into(), param_count: 0, param_names: vec![], source_caps: 0x01, sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }; let b = FuncSummary { name: "helper".into(), file_path: "lib.rs".into(), lang: "rust".into(), param_count: 0, param_names: vec![], source_caps: 0, sanitizer_caps: 0x02, sink_caps: 0, propagating_params: vec![0], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }; let global = merge_summaries(vec![a, b], None); let key = FuncKey { lang: Lang::Rust, namespace: "lib.rs".into(), name: "helper".into(), arity: Some(0), ..Default::default() }; let merged = global.get(&key).unwrap(); assert_eq!(merged.source_caps, 0x01); assert_eq!(merged.sanitizer_caps, 0x02); assert!(merged.propagates_any()); assert_eq!(merged.propagating_params, vec![0]); } #[test] fn cross_lang_name_collision_stays_separate() { let py = FuncSummary { name: "process_data".into(), file_path: "handler.py".into(), lang: "python".into(), param_count: 0, param_names: vec![], source_caps: Cap::all().bits(), sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }; let c = FuncSummary { name: "process_data".into(), file_path: "handler.c".into(), lang: "c".into(), param_count: 1, param_names: vec!["s".into()], source_caps: 0, sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![0], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }; let global = merge_summaries(vec![py, c], None); let py_key = FuncKey { lang: Lang::Python, namespace: "handler.py".into(), name: "process_data".into(), arity: Some(0), ..Default::default() }; let c_key = FuncKey { lang: Lang::C, namespace: "handler.c".into(), name: "process_data".into(), arity: Some(1), ..Default::default() }; assert!(global.get(&py_key).is_some()); assert!(global.get(&c_key).is_some()); // Python's source_caps NOT merged into C assert_eq!(global.get(&c_key).unwrap().source_caps, 0); assert_eq!(global.get(&py_key).unwrap().source_caps, Cap::all().bits()); } #[test] fn lookup_same_lang_returns_all_matches() { let a = FuncSummary { name: "helper".into(), file_path: "a.rs".into(), lang: "rust".into(), param_count: 0, param_names: vec![], source_caps: 1, sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }; let b = FuncSummary { name: "helper".into(), file_path: "b.rs".into(), lang: "rust".into(), param_count: 0, param_names: vec![], source_caps: 2, sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }; let global = merge_summaries(vec![a, b], None); let matches = global.lookup_same_lang(Lang::Rust, "helper"); assert_eq!(matches.len(), 2); // No cross-language matches let py_matches = global.lookup_same_lang(Lang::Python, "helper"); assert!(py_matches.is_empty()); } #[test] fn cap_bits_round_trip_serde() { let summary = FuncSummary { name: "dangerous".into(), file_path: "test.rs".into(), lang: "rust".into(), param_count: 1, param_names: vec!["input".into()], source_caps: (Cap::SQL_QUERY | Cap::CODE_EXEC).bits(), sanitizer_caps: Cap::CRYPTO.bits(), sink_caps: (Cap::SSRF | Cap::DESERIALIZE).bits(), propagating_params: vec![0], propagates_taint: false, tainted_sink_params: vec![0], callees: vec!["query".into()], ..Default::default() }; let json = serde_json::to_string(&summary).unwrap(); let back: FuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(back.source_caps, (Cap::SQL_QUERY | Cap::CODE_EXEC).bits()); assert_eq!(back.sanitizer_caps, Cap::CRYPTO.bits()); assert_eq!(back.sink_caps, (Cap::SSRF | Cap::DESERIALIZE).bits()); assert!(back.propagates_any()); assert_eq!(back.propagating_params, vec![0]); // propagates_taint should NOT appear in serialized output assert!(!json.contains("propagates_taint")); } /// Every new cap class persists across the serde JSON round-trip used /// for SQLite blob storage and the `/debug` endpoint. Catches a /// width-mismatch (cap bits truncated to u16) as a hard fail rather than /// silent zeroing of the upper bits. #[test] fn new_cap_classes_round_trip_serde() { let new_caps = Cap::LDAP_INJECTION | Cap::XPATH_INJECTION | Cap::HEADER_INJECTION | Cap::OPEN_REDIRECT | Cap::SSTI | Cap::XXE | Cap::PROTOTYPE_POLLUTION; // Sanity: bit-width must accommodate every new cap. assert_ne!( new_caps.bits(), 0, "every new cap must carry a non-zero bit" ); assert_eq!( new_caps.bits().count_ones(), 7, "exactly seven bits must be set across the new caps" ); // Bit collisions with existing caps would mask a finding. let existing = Cap::ENV_VAR | Cap::HTML_ESCAPE | Cap::SHELL_ESCAPE | Cap::URL_ENCODE | Cap::JSON_PARSE | Cap::FILE_IO | Cap::FMT_STRING | Cap::SQL_QUERY | Cap::DESERIALIZE | Cap::SSRF | Cap::CODE_EXEC | Cap::CRYPTO | Cap::UNAUTHORIZED_ID | Cap::DATA_EXFIL; assert!( (existing & new_caps).is_empty(), "new caps must not collide" ); let summary = FuncSummary { name: "all_new_classes".into(), file_path: "fixture.rs".into(), lang: "rust".into(), param_count: 0, param_names: vec![], source_caps: 0, sanitizer_caps: 0, sink_caps: new_caps.bits(), propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }; // serde JSON round-trip (the on-disk SQLite format). let json = serde_json::to_string(&summary).unwrap(); let back: FuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(back.sink_caps, new_caps.bits()); assert!(back.sink_caps().contains(Cap::LDAP_INJECTION)); assert!(back.sink_caps().contains(Cap::PROTOTYPE_POLLUTION)); // Cap registry must surface a rule id for each new cap. for cap in [ Cap::LDAP_INJECTION, Cap::XPATH_INJECTION, Cap::HEADER_INJECTION, Cap::OPEN_REDIRECT, Cap::SSTI, Cap::XXE, Cap::PROTOTYPE_POLLUTION, ] { let meta = crate::labels::cap_rule_meta(cap) .unwrap_or_else(|| panic!("missing CAP_RULE_REGISTRY entry for {cap:?}")); assert!(meta.rule_id.starts_with("taint-")); assert!(!meta.title.is_empty()); assert!(!meta.description.is_empty()); } } #[test] fn backward_compat_u8_json_deserializes() { // Old u8-range values still deserialize correctly into u32 fields let json = r#"{ "name": "old_func", "file_path": "legacy.py", "lang": "python", "param_count": 0, "param_names": [], "source_caps": 127, "sanitizer_caps": 2, "sink_caps": 4, "propagates_taint": false, "tainted_sink_params": [], "callees": [] }"#; let summary: FuncSummary = serde_json::from_str(json).unwrap(); assert_eq!(summary.source_caps, 127); assert_eq!(summary.sanitizer_caps, 2); assert_eq!(summary.sink_caps, 4); } #[test] fn merge_propagating_params_union() { let a = FuncSummary { propagating_params: vec![0], ..make("foo", 0, 0, 0) }; let b = FuncSummary { propagating_params: vec![1], ..make("foo", 0, 0, 0) }; let merged = merge_summaries(vec![a, b], None); let key = FuncKey { lang: Lang::Rust, namespace: "test.rs".into(), name: "foo".into(), arity: Some(0), ..Default::default() }; let foo = merged.get(&key).unwrap(); assert_eq!(foo.propagating_params, vec![0, 1]); assert!(foo.propagates_any()); } #[test] fn backward_compat_legacy_propagates_taint_json() { // Old JSON with propagates_taint: true but no propagating_params let json = r#"{ "name": "old_func", "file_path": "legacy.py", "lang": "python", "param_count": 1, "param_names": ["x"], "source_caps": 0, "sanitizer_caps": 0, "sink_caps": 0, "propagates_taint": true, "tainted_sink_params": [], "callees": [] }"#; let summary: FuncSummary = serde_json::from_str(json).unwrap(); assert!(summary.propagates_taint); assert!(summary.propagating_params.is_empty()); assert!(summary.propagates_any()); } #[test] fn propagating_params_round_trip_serde() { let summary = FuncSummary { propagating_params: vec![0, 2], ..make("foo", 0, 0, 0) }; let json = serde_json::to_string(&summary).unwrap(); let back: FuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(back.propagating_params, vec![0, 2]); assert!(back.propagates_any()); // propagates_taint must NOT appear in serialized output assert!(!json.contains("propagates_taint")); } #[test] fn snapshot_caps_detects_change() { let a = FuncSummary { source_caps: 0x01, propagating_params: vec![0], ..make("foo", 0, 0, 0) }; let b = make("bar", 0, 0, 0x04); let mut gs = merge_summaries(vec![a, b], None); let snap1 = gs.snapshot_caps(); // Mutate one summary by inserting a changed version. let key = FuncKey { lang: Lang::Rust, namespace: "test.rs".into(), name: "bar".into(), arity: Some(0), ..Default::default() }; let updated = FuncSummary { sink_caps: 0x08, ..make("bar", 0, 0, 0) }; gs.insert(key, updated); let snap2 = gs.snapshot_caps(); assert_ne!(snap1, snap2, "snapshot should detect changed caps"); // Without further changes, snapshot should be stable. let snap3 = gs.snapshot_caps(); assert_eq!(snap2, snap3, "snapshot should be stable without changes"); } // ── SSA summary tests ─────────────────────────────────────────────────── use super::ssa_summary::{SsaFuncSummary, TaintTransform}; #[test] fn ssa_summary_serde_round_trip_identity() { let summary = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; let json = serde_json::to_string(&summary).unwrap(); let back: SsaFuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(summary, back); } #[test] fn ssa_summary_serde_round_trip_strip_bits() { let summary = SsaFuncSummary { param_to_return: vec![( 0, TaintTransform::StripBits(Cap::HTML_ESCAPE | Cap::URL_ENCODE), )], param_to_sink: vec![(1, cap_sites(Cap::SQL_QUERY))], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; let json = serde_json::to_string(&summary).unwrap(); let back: SsaFuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(summary, back); } #[test] fn ssa_summary_serde_round_trip_add_bits() { let summary = SsaFuncSummary { param_to_return: vec![(2, TaintTransform::AddBits(Cap::CODE_EXEC))], param_to_sink: vec![], source_caps: Cap::ENV_VAR | Cap::FILE_IO, param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; let json = serde_json::to_string(&summary).unwrap(); let back: SsaFuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(summary, back); } #[test] fn ssa_summary_serde_round_trip_all_variants() { let summary = SsaFuncSummary { param_to_return: vec![ (0, TaintTransform::Identity), (1, TaintTransform::StripBits(Cap::SHELL_ESCAPE)), (2, TaintTransform::AddBits(Cap::SSRF)), ], param_to_sink: vec![ (0, cap_sites(Cap::SQL_QUERY)), (1, cap_sites(Cap::CODE_EXEC | Cap::CRYPTO)), ], source_caps: Cap::all(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; let json = serde_json::to_string(&summary).unwrap(); let back: SsaFuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(summary, back); } #[test] fn global_summaries_insert_ssa_exact_key_replacement() { let mut gs = GlobalSummaries::new(); let key = FuncKey { lang: Lang::Python, namespace: "app.py".into(), name: "process".into(), arity: Some(1), ..Default::default() }; let v1 = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; gs.insert_ssa(key.clone(), v1.clone()); assert_eq!(gs.get_ssa(&key), Some(&v1)); // Replace with a different summary, exact replacement, not union let v2 = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::StripBits(Cap::HTML_ESCAPE))], param_to_sink: vec![(0, cap_sites(Cap::SQL_QUERY))], source_caps: Cap::ENV_VAR, param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; gs.insert_ssa(key.clone(), v2.clone()); assert_eq!(gs.get_ssa(&key), Some(&v2)); } #[test] fn global_summaries_merge_with_ssa_entries() { let mut gs1 = GlobalSummaries::new(); let mut gs2 = GlobalSummaries::new(); let key_a = FuncKey { lang: Lang::Python, namespace: "a.py".into(), name: "foo".into(), arity: Some(1), ..Default::default() }; let key_b = FuncKey { lang: Lang::Python, namespace: "b.py".into(), name: "bar".into(), arity: Some(2), ..Default::default() }; let sum_a = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; let sum_b = SsaFuncSummary { param_to_return: vec![], param_to_sink: vec![(0, cap_sites(Cap::CODE_EXEC))], source_caps: Cap::ENV_VAR, param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; gs1.insert_ssa(key_a.clone(), sum_a.clone()); gs2.insert_ssa(key_b.clone(), sum_b.clone()); gs1.merge(gs2); assert_eq!(gs1.get_ssa(&key_a), Some(&sum_a)); assert_eq!(gs1.get_ssa(&key_b), Some(&sum_b)); } #[test] fn global_summaries_is_empty_considers_ssa() { let mut gs = GlobalSummaries::new(); assert!(gs.is_empty()); let key = FuncKey { lang: Lang::Rust, namespace: "lib.rs".into(), name: "f".into(), arity: Some(1), ..Default::default() }; gs.insert_ssa( key, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); assert!(!gs.is_empty()); } #[test] fn ssa_summary_serde_round_trip_param_to_sink_param() { let summary = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![(0, cap_sites(Cap::SQL_QUERY))], source_caps: Cap::empty(), param_to_sink_param: vec![(0, 0, Cap::SQL_QUERY), (1, 0, Cap::CODE_EXEC)], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; let json = serde_json::to_string(&summary).unwrap(); let back: SsaFuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(summary, back); assert_eq!(back.param_to_sink_param.len(), 2); assert_eq!(back.param_to_sink_param[0], (0, 0, Cap::SQL_QUERY)); assert_eq!(back.param_to_sink_param[1], (1, 0, Cap::CODE_EXEC)); } #[test] fn ssa_summary_backward_compat_missing_param_to_sink_param() { // Old JSON without param_to_sink_param should deserialize with empty vec let json = r#"{ "param_to_return": [[0, "Identity"]], "param_to_sink": [], "source_caps": 0 }"#; let summary: SsaFuncSummary = serde_json::from_str(json).unwrap(); assert!(summary.param_to_sink_param.is_empty()); } #[test] fn ssa_summary_serde_round_trip_container_fields() { let summary = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![0], param_to_container_store: vec![(1, 0)], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; let json = serde_json::to_string(&summary).unwrap(); let back: SsaFuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(summary, back); assert_eq!(back.param_container_to_return, vec![0]); assert_eq!(back.param_to_container_store, vec![(1, 0)]); } #[test] fn ssa_summary_backward_compat_missing_container_fields() { // Old JSON without container fields should deserialize with empty vecs let json = r#"{ "param_to_return": [[0, "Identity"]], "param_to_sink": [], "source_caps": 0 }"#; let summary: SsaFuncSummary = serde_json::from_str(json).unwrap(); assert!(summary.param_container_to_return.is_empty()); assert!(summary.param_to_container_store.is_empty()); } #[test] fn ssa_summary_serde_round_trip_return_abstract() { use crate::abstract_interp::{AbstractValue, BitFact, IntervalFact, PathFact, StringFact}; let summary = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: Some(AbstractValue { interval: IntervalFact { lo: Some(-2_147_483_648), hi: Some(2_147_483_647), }, string: StringFact::top(), bits: BitFact::top(), path: PathFact::top(), }), source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; let json = serde_json::to_string(&summary).unwrap(); let back: SsaFuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(summary, back); assert!(back.return_abstract.is_some()); let abs = back.return_abstract.unwrap(); assert_eq!(abs.interval.lo, Some(-2_147_483_648)); assert_eq!(abs.interval.hi, Some(2_147_483_647)); assert!(abs.string.is_top()); } #[test] fn ssa_summary_backward_compat_missing_return_abstract() { // Old JSON without return_abstract should deserialize with None let json = r#"{ "param_to_return": [], "param_to_sink": [], "source_caps": 0 }"#; let summary: SsaFuncSummary = serde_json::from_str(json).unwrap(); assert_eq!(summary.return_abstract, None); } // ── CalleeSsaBody serde + GlobalSummaries body resolution ─────────────── /// Helper: build a minimal CalleeSsaBody with a given number of blocks. #[allow(dead_code)] // used by tests below fn make_callee_body( num_blocks: usize, param_count: usize, ) -> crate::taint::ssa_transfer::CalleeSsaBody { use crate::ssa::ir::*; use smallvec::smallvec; let mut blocks = Vec::new(); for i in 0..num_blocks { blocks.push(SsaBlock { id: BlockId(i as u32), phis: vec![], body: vec![SsaInst { value: SsaValue(i as u32), op: SsaOp::Const(Some("0".into())), cfg_node: petgraph::graph::NodeIndex::new(0), var_name: None, span: (0, 0), }], terminator: if i + 1 < num_blocks { Terminator::Goto(BlockId((i + 1) as u32)) } else { Terminator::Return(Some(SsaValue(0))) }, preds: smallvec![], succs: smallvec![], }); } let value_defs: Vec = (0..num_blocks) .map(|i| ValueDef { var_name: None, cfg_node: petgraph::graph::NodeIndex::new(0), block: BlockId(i as u32), }) .collect(); crate::taint::ssa_transfer::CalleeSsaBody { ssa: SsaBody { blocks, entry: BlockId(0), value_defs, cfg_node_map: std::collections::HashMap::new(), exception_edges: vec![], field_interner: crate::ssa::ir::FieldInterner::default(), field_writes: std::collections::HashMap::new(), synthetic_externals: std::collections::HashSet::new(), slot_scoped_assigns: std::collections::HashSet::new(), }, opt: crate::ssa::OptimizeResult { const_values: std::collections::HashMap::new(), type_facts: crate::ssa::type_facts::TypeFactResult { facts: std::collections::HashMap::new(), }, xml_parser_config: crate::ssa::xml_config::XmlParserConfigResult::default(), xpath_config: crate::ssa::xpath_config::XPathConfigResult::default(), alias_result: crate::ssa::alias::BaseAliasResult::empty(), points_to: crate::ssa::heap::PointsToResult::empty(), module_aliases: std::collections::HashMap::new(), branches_pruned: 0, copies_eliminated: 0, dead_defs_removed: 0, }, param_count, node_meta: std::collections::HashMap::new(), body_graph: None, cross_package_imports: std::sync::Arc::new(std::collections::HashMap::new()), } } #[test] fn callee_body_serde_round_trip_empty() { let body = make_callee_body(1, 0); let json = serde_json::to_string(&body).unwrap(); let back: crate::taint::ssa_transfer::CalleeSsaBody = serde_json::from_str(&json).unwrap(); assert_eq!(back.param_count, 0); assert_eq!(back.ssa.blocks.len(), 1); assert!(back.node_meta.is_empty()); } #[test] fn callee_body_serde_round_trip_multi_block() { let body = make_callee_body(5, 2); let json = serde_json::to_string(&body).unwrap(); let back: crate::taint::ssa_transfer::CalleeSsaBody = serde_json::from_str(&json).unwrap(); assert_eq!(back.param_count, 2); assert_eq!(back.ssa.blocks.len(), 5); // Verify block structure survived round-trip assert_eq!(back.ssa.entry, crate::ssa::ir::BlockId(0)); assert_eq!(back.ssa.value_defs.len(), 5); } #[test] fn callee_body_serde_round_trip_with_node_meta() { use crate::cfg::{NodeInfo, TaintMeta}; use crate::labels::{Cap, DataLabel}; use crate::taint::ssa_transfer::CrossFileNodeMeta; let mut body = make_callee_body(2, 1); body.node_meta.insert( 0, CrossFileNodeMeta { info: NodeInfo { bin_op: Some(crate::cfg::BinOp::Add), taint: TaintMeta { labels: smallvec::smallvec![DataLabel::Sink(Cap::HTML_ESCAPE)], ..Default::default() }, ..Default::default() }, }, ); body.node_meta.insert( 1, CrossFileNodeMeta { info: NodeInfo::default(), }, ); let json = serde_json::to_string(&body).unwrap(); let back: crate::taint::ssa_transfer::CalleeSsaBody = serde_json::from_str(&json).unwrap(); assert_eq!(back.node_meta.len(), 2); let meta0 = &back.node_meta[&0]; assert_eq!(meta0.info.bin_op, Some(crate::cfg::BinOp::Add)); assert_eq!(meta0.info.taint.labels.len(), 1); assert!(matches!(meta0.info.taint.labels[0], DataLabel::Sink(cap) if cap == Cap::HTML_ESCAPE)); assert!(back.node_meta[&1].info.taint.labels.is_empty()); } #[test] fn callee_body_serde_node_meta_skipped_when_empty() { // Verify #[serde(skip_serializing_if)] works: empty node_meta not in JSON let body = make_callee_body(1, 0); let json = serde_json::to_string(&body).unwrap(); assert!( !json.contains("node_meta"), "empty node_meta should be omitted from JSON" ); // But it should deserialize fine from JSON without node_meta field let back: crate::taint::ssa_transfer::CalleeSsaBody = serde_json::from_str(&json).unwrap(); assert!(back.node_meta.is_empty()); } #[test] fn callee_body_serde_with_all_ssa_op_variants() { use crate::ssa::ir::*; use smallvec::smallvec; let mut body = make_callee_body(1, 0); // Replace the single block's body with all SsaOp variants let node = petgraph::graph::NodeIndex::new(0); body.ssa.blocks[0].body = vec![ SsaInst { value: SsaValue(0), op: SsaOp::Const(Some("hello".into())), cfg_node: node, var_name: None, span: (0, 5), }, SsaInst { value: SsaValue(1), op: SsaOp::Const(None), cfg_node: node, var_name: None, span: (0, 0), }, SsaInst { value: SsaValue(2), op: SsaOp::Source, cfg_node: node, var_name: Some("src".into()), span: (6, 10), }, SsaInst { value: SsaValue(3), op: SsaOp::Param { index: 0 }, cfg_node: node, var_name: Some("p0".into()), span: (0, 0), }, SsaInst { value: SsaValue(4), op: SsaOp::CatchParam, cfg_node: node, var_name: None, span: (0, 0), }, SsaInst { value: SsaValue(5), op: SsaOp::Nop, cfg_node: node, var_name: None, span: (0, 0), }, SsaInst { value: SsaValue(6), op: SsaOp::Assign(smallvec![SsaValue(0), SsaValue(1)]), cfg_node: node, var_name: None, span: (0, 0), }, SsaInst { value: SsaValue(7), op: SsaOp::Call { callee: "foo".into(), callee_text: None, args: vec![smallvec![SsaValue(0)], smallvec![SsaValue(1)]], receiver: Some(SsaValue(2)), }, cfg_node: node, var_name: None, span: (11, 20), }, ]; body.ssa.blocks[0].phis = vec![SsaInst { value: SsaValue(8), op: SsaOp::Phi(smallvec![ (BlockId(0), SsaValue(0)), (BlockId(1), SsaValue(1)) ]), cfg_node: node, var_name: None, span: (0, 0), }]; let json = serde_json::to_string(&body).unwrap(); let back: crate::taint::ssa_transfer::CalleeSsaBody = serde_json::from_str(&json).unwrap(); assert_eq!(back.ssa.blocks[0].body.len(), 8); assert_eq!(back.ssa.blocks[0].phis.len(), 1); // Spot check: Call op preserved match &back.ssa.blocks[0].body[7].op { SsaOp::Call { callee, args, receiver, .. } => { assert_eq!(callee, "foo"); assert_eq!(args.len(), 2); assert_eq!(*receiver, Some(SsaValue(2))); } other => panic!("expected Call, got {:?}", other), } // Spot check: Phi op preserved match &back.ssa.blocks[0].phis[0].op { SsaOp::Phi(ops) => { assert_eq!(ops.len(), 2); assert_eq!(ops[0], (BlockId(0), SsaValue(0))); } other => panic!("expected Phi, got {:?}", other), } } #[test] fn callee_body_serde_with_branch_terminator() { use crate::constraint::lower::ConditionExpr; use crate::ssa::ir::*; let mut body = make_callee_body(3, 0); // Set a Branch terminator with a condition body.ssa.blocks[0].terminator = Terminator::Branch { cond: petgraph::graph::NodeIndex::new(0), true_blk: BlockId(1), false_blk: BlockId(2), condition: Some(Box::new(ConditionExpr::BoolTest { var: SsaValue(0) })), }; let json = serde_json::to_string(&body).unwrap(); let back: crate::taint::ssa_transfer::CalleeSsaBody = serde_json::from_str(&json).unwrap(); match &back.ssa.blocks[0].terminator { Terminator::Branch { true_blk, false_blk, condition, .. } => { assert_eq!(*true_blk, BlockId(1)); assert_eq!(*false_blk, BlockId(2)); assert!(condition.is_some()); match condition.as_deref() { Some(ConditionExpr::BoolTest { var }) => { assert_eq!(*var, SsaValue(0)); } other => panic!("expected BoolTest, got {:?}", other), } } other => panic!("expected Branch, got {:?}", other), } } // ── GlobalSummaries body resolution ────────────────────────────────────── #[test] fn global_summaries_insert_body_exact_key_replacement() { let mut gs = GlobalSummaries::new(); let key = FuncKey { lang: crate::symbol::Lang::Python, namespace: "helper.py".into(), name: "transform".into(), arity: Some(2), ..Default::default() }; let body1 = make_callee_body(3, 2); let body2 = make_callee_body(5, 2); gs.insert_body(key.clone(), body1); assert_eq!(gs.get_body(&key).unwrap().ssa.blocks.len(), 3); // Second insert replaces (exact-key, no union) gs.insert_body(key.clone(), body2); assert_eq!(gs.get_body(&key).unwrap().ssa.blocks.len(), 5); } #[test] fn global_summaries_get_body_not_found() { let gs = GlobalSummaries::new(); let key = FuncKey { lang: crate::symbol::Lang::Python, namespace: "missing.py".into(), name: "nope".into(), arity: Some(0), ..Default::default() }; assert!(gs.get_body(&key).is_none()); } #[test] fn global_summaries_merge_includes_bodies() { let mut gs1 = GlobalSummaries::new(); let mut gs2 = GlobalSummaries::new(); let key1 = FuncKey { lang: crate::symbol::Lang::Python, namespace: "a.py".into(), name: "func_a".into(), arity: Some(1), ..Default::default() }; let key2 = FuncKey { lang: crate::symbol::Lang::Python, namespace: "b.py".into(), name: "func_b".into(), arity: Some(2), ..Default::default() }; // Need to also insert regular summaries so the by_lang_name index is populated gs1.insert(key1.clone(), make("func_a", 0, 0, 0)); gs1.insert_body(key1.clone(), make_callee_body(2, 1)); gs2.insert(key2.clone(), make("func_b", 0, 0, 0)); gs2.insert_body(key2.clone(), make_callee_body(4, 2)); gs1.merge(gs2); assert!(gs1.get_body(&key1).is_some()); assert!(gs1.get_body(&key2).is_some()); assert_eq!(gs1.get_body(&key1).unwrap().ssa.blocks.len(), 2); assert_eq!(gs1.get_body(&key2).unwrap().ssa.blocks.len(), 4); } #[test] fn global_summaries_resolve_callee_body_exact_match() { let mut gs = GlobalSummaries::new(); let key = FuncKey { lang: crate::symbol::Lang::Python, namespace: "util.py".into(), name: "helper".into(), arity: Some(1), ..Default::default() }; gs.insert(key.clone(), make("helper", 0, 0, 0)); gs.insert_body(key.clone(), make_callee_body(3, 1)); // Resolve with matching lang/name/arity let resolved = gs.resolve_callee_body(crate::symbol::Lang::Python, "helper", Some(1), "app.py"); assert!(resolved.is_some()); assert_eq!(resolved.unwrap().ssa.blocks.len(), 3); } #[test] fn global_summaries_resolve_callee_body_not_found() { let gs = GlobalSummaries::new(); let resolved = gs.resolve_callee_body(crate::symbol::Lang::Python, "missing", Some(1), "app.py"); assert!(resolved.is_none()); } #[test] fn global_summaries_resolve_callee_body_ambiguous_returns_none() { let mut gs = GlobalSummaries::new(); // Two functions with same name but different namespaces let key1 = FuncKey { lang: crate::symbol::Lang::Python, namespace: "a.py".into(), name: "helper".into(), arity: Some(1), ..Default::default() }; let key2 = FuncKey { lang: crate::symbol::Lang::Python, namespace: "b.py".into(), name: "helper".into(), arity: Some(1), ..Default::default() }; gs.insert(key1.clone(), make("helper", 0, 0, 0)); gs.insert_body(key1.clone(), make_callee_body(2, 1)); gs.insert(key2.clone(), make("helper", 0, 0, 0)); gs.insert_body(key2.clone(), make_callee_body(4, 1)); // Resolution from a third namespace → ambiguous → None let resolved = gs.resolve_callee_body(crate::symbol::Lang::Python, "helper", Some(1), "c.py"); assert!( resolved.is_none(), "ambiguous resolution should return None" ); } #[test] fn global_summaries_resolve_callee_body_namespace_disambiguates() { let mut gs = GlobalSummaries::new(); let key1 = FuncKey { lang: crate::symbol::Lang::Python, namespace: "a.py".into(), name: "helper".into(), arity: Some(1), ..Default::default() }; let key2 = FuncKey { lang: crate::symbol::Lang::Python, namespace: "b.py".into(), name: "helper".into(), arity: Some(1), ..Default::default() }; gs.insert(key1.clone(), make("helper", 0, 0, 0)); gs.insert_body(key1.clone(), make_callee_body(2, 1)); gs.insert(key2.clone(), make("helper", 0, 0, 0)); gs.insert_body(key2.clone(), make_callee_body(4, 1)); // Resolution from a.py → namespace match → key1 (2 blocks) let resolved = gs.resolve_callee_body(crate::symbol::Lang::Python, "helper", Some(1), "a.py"); assert!(resolved.is_some()); assert_eq!(resolved.unwrap().ssa.blocks.len(), 2); } #[test] fn global_summaries_resolve_body_requires_body_present() { let mut gs = GlobalSummaries::new(); // Insert summary but no body let key = FuncKey { lang: crate::symbol::Lang::Python, namespace: "util.py".into(), name: "helper".into(), arity: Some(1), ..Default::default() }; gs.insert(key.clone(), make("helper", 0, 0, 0)); gs.insert_ssa( key.clone(), SsaFuncSummary { param_to_return: vec![], param_to_sink: vec![], source_caps: crate::labels::Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); // Don't insert body // Resolution finds the key but no body let resolved = gs.resolve_callee_body(crate::symbol::Lang::Python, "helper", Some(1), "app.py"); assert!( resolved.is_none(), "should return None when key resolves but no body stored" ); } // ── Identity-model regression tests ───────────────────────────────────── // Each test below encodes one ambiguity the old `(file, name, arity)` key // couldn't express. They guard the new `(lang, namespace, container, name, // arity, disambig, kind)` model and the container-aware resolver. fn fs_with( namespace: &str, container: &str, name: &str, arity: usize, kind: FuncKind, disambig: Option, sink_bits: u32, ) -> (FuncKey, FuncSummary) { let key = FuncKey { lang: Lang::Java, namespace: namespace.into(), container: container.into(), name: name.into(), arity: Some(arity), disambig, kind, }; let summary = FuncSummary { name: name.into(), file_path: namespace.into(), lang: "java".into(), param_count: arity, sink_caps: sink_bits, container: container.into(), disambig, kind, ..Default::default() }; (key, summary) } #[test] fn same_name_methods_on_different_classes_stay_distinct() { let mut gs = GlobalSummaries::new(); let (k1, s1) = fs_with( "src/svc.java", "OrderService", "process", 1, FuncKind::Method, Some(100), 0x01, ); let (k2, s2) = fs_with( "src/svc.java", "UserService", "process", 1, FuncKind::Method, Some(500), 0x02, ); gs.insert(k1.clone(), s1); gs.insert(k2.clone(), s2); assert_eq!(gs.get(&k1).unwrap().sink_caps, 0x01); assert_eq!(gs.get(&k2).unwrap().sink_caps, 0x02); let order = gs.resolve_callee_key_with_container( "process", Lang::Java, "src/other.java", Some("OrderService"), Some(1), ); assert_eq!(order, CalleeResolution::Resolved(k1)); let user = gs.resolve_callee_key_with_container( "process", Lang::Java, "src/other.java", Some("UserService"), Some(1), ); assert_eq!(user, CalleeResolution::Resolved(k2)); } #[test] fn free_function_and_method_with_same_name_resolve_separately() { let mut gs = GlobalSummaries::new(); let (kf, sf) = fs_with( "src/app.java", "", "process", 1, FuncKind::Function, Some(10), 0x10, ); let (km, sm) = fs_with( "src/app.java", "Worker", "process", 1, FuncKind::Method, Some(200), 0x20, ); gs.insert(kf.clone(), sf); gs.insert(km.clone(), sm); let free = gs.resolve_callee_key_with_container("process", Lang::Java, "src/app.java", None, Some(1)); let method = gs.resolve_callee_key_with_container( "process", Lang::Java, "src/app.java", Some("Worker"), Some(1), ); assert_eq!(method, CalleeResolution::Resolved(km)); // Without any qualifier, receiver, or receiver_type, a bare // `process()` call is syntactically a free-function invocation, a // method cannot be invoked that way from outside its class. The // resolver's bare-call preference (step 5.5) picks the sole // empty-container candidate deterministically. assert_eq!(free, CalleeResolution::Resolved(kf)); } #[test] fn disambig_separates_same_name_closures_in_same_container() { let mut gs = GlobalSummaries::new(); let (k1, s1) = fs_with( "src/f.js", "outer", "", 0, FuncKind::Closure, Some(123), 0x01, ); let (k2, s2) = fs_with( "src/f.js", "outer", "", 0, FuncKind::Closure, Some(456), 0x02, ); gs.insert(k1.clone(), s1); gs.insert(k2.clone(), s2); assert_ne!(k1, k2); assert_eq!(gs.get(&k1).unwrap().sink_caps, 0x01); assert_eq!(gs.get(&k2).unwrap().sink_caps, 0x02); } #[test] fn interop_lookup_tolerates_missing_disambig() { // Interop edges written by external configuration don't know byte offsets. // `get_for_interop` should still find a single matching key when disambig // is None and the rest of the identity uniquely identifies a summary. let mut gs = GlobalSummaries::new(); let (k, s) = fs_with( "lib.go", "", "fetch_env", 0, FuncKind::Function, Some(7777), 0x04, ); // Go summaries are actually keyed with Lang::Go; use a distinct key here. let go_key = FuncKey { lang: Lang::Go, namespace: "lib.go".into(), container: String::new(), name: "fetch_env".into(), arity: Some(0), disambig: Some(7777), kind: FuncKind::Function, }; let go_sum = FuncSummary { name: "fetch_env".into(), file_path: "lib.go".into(), lang: "go".into(), ..s }; gs.insert(go_key, go_sum); let _ = k; // unused: only needed for symmetry with fs_with signature let interop_query = FuncKey { lang: Lang::Go, namespace: "lib.go".into(), container: String::new(), name: "fetch_env".into(), arity: Some(0), disambig: None, kind: FuncKind::Function, }; let hit = gs .get_for_interop(&interop_query) .expect("interop lookup should tolerate missing disambig"); assert_eq!(hit.sink_caps, 0x04); } #[test] fn interop_lookup_returns_none_when_disambig_none_matches_many() { // If multiple summaries share (lang, ns, container, name, arity, kind) // and only disambig distinguishes them, the relaxed interop lookup must // return None rather than picking arbitrarily. let mut gs = GlobalSummaries::new(); let mk = |disambig: u32, bits: u32| { let k = FuncKey { lang: Lang::Go, namespace: "lib.go".into(), container: String::new(), name: "dup".into(), arity: Some(0), disambig: Some(disambig), kind: FuncKind::Function, }; let s = FuncSummary { name: "dup".into(), file_path: "lib.go".into(), lang: "go".into(), sink_caps: bits, disambig: Some(disambig), ..Default::default() }; (k, s) }; let (k1, s1) = mk(1, 0x01); let (k2, s2) = mk(2, 0x02); gs.insert(k1, s1); gs.insert(k2, s2); let ambiguous_query = FuncKey { lang: Lang::Go, namespace: "lib.go".into(), container: String::new(), name: "dup".into(), arity: Some(0), disambig: None, kind: FuncKind::Function, }; assert!( gs.get_for_interop(&ambiguous_query).is_none(), "disambig=None must not pick arbitrarily when multiple keys match" ); } // ── CalleeSite metadata ───────────────────────────────────────────────── #[test] fn callee_site_bare_constructor() { let site = CalleeSite::bare("helper"); assert_eq!(site.name, "helper"); assert_eq!(site.arity, None); assert_eq!(site.receiver, None); assert_eq!(site.qualifier, None); assert_eq!(site.ordinal, 0); } #[test] fn callee_site_str_into_coercion() { // Tests that `"name".into()` still works for building callee lists in // test code, despite the field now being `Vec`. let v: Vec = vec!["foo".into(), "bar".into()]; assert_eq!(v.len(), 2); assert_eq!(v[0].name, "foo"); assert_eq!(v[1].name, "bar"); } #[test] fn callee_site_structured_roundtrip() { let summary = FuncSummary { name: "parent".into(), file_path: "x.rs".into(), lang: "rust".into(), param_count: 0, callees: vec![ CalleeSite { name: "obj.method".into(), arity: Some(2), receiver: Some("obj".into()), qualifier: None, ordinal: 1, }, CalleeSite { name: "env::var".into(), arity: Some(1), receiver: None, qualifier: Some("env".into()), ordinal: 2, }, ], ..Default::default() }; let json = serde_json::to_string(&summary).unwrap(); let back: FuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(back.callees.len(), 2); assert_eq!(back.callees[0].name, "obj.method"); assert_eq!(back.callees[0].arity, Some(2)); assert_eq!(back.callees[0].receiver.as_deref(), Some("obj")); assert_eq!(back.callees[0].ordinal, 1); assert_eq!(back.callees[1].qualifier.as_deref(), Some("env")); } #[test] fn legacy_callees_string_array_deserializes() { // Old on-disk rows stored callees as a plain Vec. // The custom deserializer must lift those into CalleeSite { name, .. } // without other metadata so persisted indexes keep working. let json = r#"{ "name": "legacy", "file_path": "legacy.rs", "lang": "rust", "param_count": 0, "param_names": [], "source_caps": 0, "sanitizer_caps": 0, "sink_caps": 0, "propagating_params": [], "tainted_sink_params": [], "callees": ["foo", "bar::baz"] }"#; let s: FuncSummary = serde_json::from_str(json).unwrap(); assert_eq!(s.callees.len(), 2); assert_eq!(s.callees[0].name, "foo"); assert_eq!(s.callees[0].arity, None); assert_eq!(s.callees[1].name, "bar::baz"); assert_eq!(s.callees[1].receiver, None); } #[test] fn mixed_callee_form_deserializes() { // Interop / partial-migration rows may mix legacy strings with // structured entries in the same array, deserializer accepts both. let json = r#"{ "name": "mixed", "file_path": "m.rs", "lang": "rust", "param_count": 0, "param_names": [], "source_caps": 0, "sanitizer_caps": 0, "sink_caps": 0, "propagating_params": [], "tainted_sink_params": [], "callees": [ "legacy_fn", {"name": "new_fn", "arity": 3, "receiver": "obj"} ] }"#; let s: FuncSummary = serde_json::from_str(json).unwrap(); assert_eq!(s.callees.len(), 2); assert_eq!(s.callees[0].name, "legacy_fn"); assert_eq!(s.callees[0].arity, None); assert_eq!(s.callees[1].name, "new_fn"); assert_eq!(s.callees[1].arity, Some(3)); assert_eq!(s.callees[1].receiver.as_deref(), Some("obj")); } // ── Rust module-path resolution (qualified Rust paths) ────────────────── /// Helper: build a Rust summary populated with module-path + use-map fields. fn rust_summary_with_mod( name: &str, file_path: &str, param_count: usize, module_path: Option<&str>, use_map: &[(&str, &str)], wildcards: &[&str], callees: Vec, ) -> FuncSummary { let aliases: BTreeMap = use_map .iter() .map(|(k, v)| ((*k).to_string(), (*v).to_string())) .collect(); FuncSummary { name: name.into(), file_path: file_path.into(), lang: "rust".into(), param_count, param_names: vec![], source_caps: 0, sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees, module_path: module_path.map(str::to_string), rust_use_map: if aliases.is_empty() { None } else { Some(aliases) }, rust_wildcards: if wildcards.is_empty() { None } else { Some(wildcards.iter().map(|s| (*s).to_string()).collect()) }, ..Default::default() } } #[test] fn rust_use_map_disambiguates_same_name_across_modules() { // Two `validate` functions in different modules. let token = rust_summary_with_mod( "validate", "/proj/src/auth/token.rs", 1, Some("auth::token"), &[], &[], vec![], ); let session = rust_summary_with_mod( "validate", "/proj/src/auth/session.rs", 1, Some("auth::session"), &[], &[], vec![], ); // Caller imports crate::auth::token::validate and calls `validate(x)`. let caller = rust_summary_with_mod( "handler", "/proj/src/main.rs", 0, Some(""), &[("validate", "crate::auth::token::validate")], &[], vec![CalleeSite { name: "validate".into(), arity: Some(1), ..Default::default() }], ); let gs = merge_summaries(vec![token, session, caller], Some("/proj")); // Pull the token key back out and verify exact-one resolution. let caller_key = FuncKey { lang: Lang::Rust, namespace: "src/main.rs".into(), name: "handler".into(), arity: Some(0), ..Default::default() }; let caller_sum = gs.get(&caller_key).expect("caller summary"); let use_map = crate::rust_resolve::RustUseMap { aliases: caller_sum.rust_use_map.clone().unwrap_or_default(), wildcards: caller_sum.rust_wildcards.clone().unwrap_or_default(), }; let resolution = gs.resolve_callee_key_rust( "validate", None, Some(1), &caller_key.namespace, Some(&use_map), ); match resolution { CalleeResolution::Resolved(k) => { assert_eq!(k.namespace, "src/auth/token.rs"); assert_eq!(k.name, "validate"); } other => panic!( "expected token::validate to resolve uniquely, got {:?}", other ), } } #[test] fn rust_use_map_qualified_call_via_module_alias() { // `use crate::auth::token; token::validate(x);` let token = rust_summary_with_mod( "validate", "/proj/src/auth/token.rs", 1, Some("auth::token"), &[], &[], vec![], ); let caller = rust_summary_with_mod( "handler", "/proj/src/main.rs", 0, Some(""), &[("token", "crate::auth::token")], &[], vec![CalleeSite { name: "token::validate".into(), arity: Some(1), qualifier: Some("crate::auth::token".into()), ..Default::default() }], ); let gs = merge_summaries(vec![token, caller], Some("/proj")); let um = crate::rust_resolve::RustUseMap { aliases: [("token".to_string(), "crate::auth::token".to_string())] .into_iter() .collect(), wildcards: Vec::new(), }; // The site's structured qualifier is the full `crate::auth::token`; the // resolver's alias map matches the first segment. let resolution = gs.resolve_callee_key_rust("validate", Some("token"), Some(1), "src/main.rs", Some(&um)); match resolution { CalleeResolution::Resolved(k) => { assert_eq!(k.namespace, "src/auth/token.rs"); } other => panic!("expected unique resolution, got {:?}", other), } } #[test] fn rust_wildcard_import_resolves_uniquely() { let token = rust_summary_with_mod( "validate", "/proj/src/auth/token.rs", 1, Some("auth::token"), &[], &[], vec![], ); let caller = rust_summary_with_mod( "handler", "/proj/src/main.rs", 0, Some(""), &[], &["crate::auth::token"], vec![CalleeSite { name: "validate".into(), arity: Some(1), ..Default::default() }], ); let gs = merge_summaries(vec![token, caller], Some("/proj")); let um = crate::rust_resolve::RustUseMap { aliases: BTreeMap::new(), wildcards: vec!["crate::auth::token".to_string()], }; let resolution = gs.resolve_callee_key_rust("validate", None, Some(1), "src/main.rs", Some(&um)); match resolution { CalleeResolution::Resolved(k) => { assert_eq!(k.namespace, "src/auth/token.rs"); } other => panic!("wildcard should resolve uniquely, got {:?}", other), } } #[test] fn rust_use_map_fallback_when_absent() { // No use_map entry, falls through to generic same-language resolution, // which for an unqualified caller in the same namespace still works. let helper = rust_summary_with_mod("helper", "/proj/src/lib.rs", 0, Some(""), &[], &[], vec![]); let caller = rust_summary_with_mod( "caller", "/proj/src/lib.rs", 0, Some(""), &[], &[], vec![CalleeSite { name: "helper".into(), arity: Some(0), ..Default::default() }], ); let gs = merge_summaries(vec![helper, caller], Some("/proj")); let resolution = gs.resolve_callee_key_rust("helper", None, Some(0), "src/lib.rs", None); assert!(matches!(resolution, CalleeResolution::Resolved(_))); } #[test] fn rust_use_map_ambiguous_stays_ambiguous_without_hint() { // Two modules define `validate`; no use-map on the caller, resolution // should remain Ambiguous rather than silently picking one. let token = rust_summary_with_mod( "validate", "/proj/src/auth/token.rs", 1, Some("auth::token"), &[], &[], vec![], ); let session = rust_summary_with_mod( "validate", "/proj/src/auth/session.rs", 1, Some("auth::session"), &[], &[], vec![], ); let caller = rust_summary_with_mod( "handler", "/proj/src/main.rs", 0, Some(""), &[], &[], vec![CalleeSite { name: "validate".into(), arity: Some(1), ..Default::default() }], ); let gs = merge_summaries(vec![token, session, caller], Some("/proj")); let resolution = gs.resolve_callee_key_rust("validate", None, Some(1), "src/main.rs", None); assert!(matches!(resolution, CalleeResolution::Ambiguous(_))); } // ── Serde round-trip / backward compatibility ──────────────────────────── #[test] fn pre_rust_fields_json_deserializes_with_defaults() { // A summary JSON written before the Rust `module_path`/`rust_use_map`/ // `rust_wildcards` fields existed must still deserialise cleanly with // all three defaulting to `None`. let legacy_json = r#"{ "name": "old", "file_path": "src/lib.rs", "lang": "rust", "param_count": 1, "param_names": ["x"], "source_caps": 0, "sanitizer_caps": 0, "sink_caps": 0, "propagating_params": [0], "tainted_sink_params": [], "callees": [] }"#; let s: FuncSummary = serde_json::from_str(legacy_json).unwrap(); assert_eq!(s.name, "old"); assert!(s.module_path.is_none()); assert!(s.rust_use_map.is_none()); assert!(s.rust_wildcards.is_none()); } #[test] fn rust_fields_roundtrip_through_json() { let mut aliases = BTreeMap::new(); aliases.insert( "validate".to_string(), "crate::auth::token::validate".to_string(), ); let s = FuncSummary { name: "handler".into(), file_path: "src/main.rs".into(), lang: "rust".into(), param_count: 0, param_names: vec![], source_caps: 0, sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], module_path: Some(String::new()), rust_use_map: Some(aliases.clone()), rust_wildcards: Some(vec!["crate::auth::session".to_string()]), ..Default::default() }; let json = serde_json::to_string(&s).unwrap(); let back: FuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(back.module_path.as_deref(), Some("")); assert_eq!(back.rust_use_map.unwrap(), aliases); assert_eq!( back.rust_wildcards.unwrap(), vec!["crate::auth::session".to_string()] ); } // ── Qualified-first callee resolution (adversarial) ───────────────────── // // Each test here stages a same-leaf-name collision that the old leaf-only // resolver would either silently pick wrong or flag as ambiguous. Under // the new `CalleeQuery` path, qualified identity (receiver type / // namespace qualifier / caller container) must win before any bare-leaf // fallback kicks in. fn method_summary( namespace: &str, container: &str, name: &str, arity: usize, sink_bits: u32, ) -> (FuncKey, FuncSummary) { fs_with( namespace, container, name, arity, FuncKind::Method, Some((namespace.len() + container.len() + name.len()) as u32), sink_bits, ) } fn free_summary( namespace: &str, name: &str, arity: usize, sink_bits: u32, ) -> (FuncKey, FuncSummary) { fs_with( namespace, "", name, arity, FuncKind::Function, Some((namespace.len() + name.len()) as u32), sink_bits, ) } #[test] fn query_prefers_receiver_type_over_leaf_collision() { // Two classes in different files both expose `send/1`. A free // function also named `send/1` sits in yet another file to make the // leaf-name index ambiguous. The caller lives outside all three. let mut gs = GlobalSummaries::new(); let (k_http, s_http) = method_summary("src/http.java", "HttpClient", "send", 1, 0x01); let (k_queue, s_queue) = method_summary("src/queue.java", "MessageQueue", "send", 1, 0x02); let (k_free, s_free) = free_summary("src/util.java", "send", 1, 0x04); gs.insert(k_http.clone(), s_http); gs.insert(k_queue.clone(), s_queue); gs.insert(k_free.clone(), s_free); // With `receiver_type = HttpClient`, resolution MUST land on the // HttpClient method even though `MessageQueue::send/1` and the free // function `send/1` would both match the leaf name. let resolved = gs.resolve_callee(&CalleeQuery { name: "send", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("HttpClient"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!(resolved, CalleeResolution::Resolved(k_http.clone())); // Old behaviour-parity regression: `resolve_callee_key_with_container` // (now a thin wrapper) used to treat `MessageQueue` as an authoritative // qualifier that *only* picked on exact match. The new resolver must // still do that, swap to `MessageQueue` and we get its method back. let resolved_queue = gs.resolve_callee(&CalleeQuery { name: "send", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("MessageQueue"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!(resolved_queue, CalleeResolution::Resolved(k_queue)); // And the leaf-name index *does* know about the free function: no // hint → ambiguous (not a silent mis-resolve). let bare = gs.resolve_callee_key("send", Lang::Java, "src/app.java", Some(1)); match bare { CalleeResolution::Ambiguous(cands) => { assert_eq!(cands.len(), 3); assert!(cands.contains(&k_http)); assert!(cands.contains(&k_free)); } other => panic!("bare leaf lookup with 3 candidates must be Ambiguous, got {other:?}"), } } #[test] fn query_authoritative_receiver_miss_does_not_fall_through_to_leaf() { // When `receiver_type = HttpClient` is supplied but no // `HttpClient::send` exists, the resolver MUST NOT silently pick a // same-leaf collision in another container, that would be the // classic "resolved by leaf name" bug the refactor aims to prevent. let mut gs = GlobalSummaries::new(); let (k_queue, s_queue) = method_summary("src/queue.java", "MessageQueue", "send", 1, 0x02); let (k_free, s_free) = free_summary("src/util.java", "send", 1, 0x04); gs.insert(k_queue.clone(), s_queue); gs.insert(k_free.clone(), s_free); let resolved = gs.resolve_callee(&CalleeQuery { name: "send", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("HttpClient"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); match resolved { CalleeResolution::Ambiguous(cands) => { // Candidates list reports the leaf-name matches so callers // can diagnose, but we refused to pick one of them. assert!(cands.contains(&k_queue)); assert!(cands.contains(&k_free)); } other => panic!( "authoritative receiver_type miss must return Ambiguous (never silently resolve to a \ different container), got {other:?}" ), } } #[test] fn query_namespace_qualifier_resolves_env_var_style_call() { // Rust / C++-style namespace qualifiers should land on the module // that exposes the leaf, even when same-leaf functions live in // unrelated modules. let mut gs = GlobalSummaries::new(); let (k_env, s_env) = fs_with( "src/env.rs", "env", "var", 1, FuncKind::Function, Some(1), 0x01, ); // Force the insertion to use Rust lang by shadowing fs_with's Java default. let k_env = FuncKey { lang: Lang::Rust, ..k_env }; let s_env = FuncSummary { lang: "rust".into(), ..s_env }; let (k_other, s_other) = fs_with( "src/other.rs", "config", "var", 1, FuncKind::Function, Some(2), 0x02, ); let k_other = FuncKey { lang: Lang::Rust, ..k_other }; let s_other = FuncSummary { lang: "rust".into(), ..s_other }; gs.insert(k_env.clone(), s_env); gs.insert(k_other.clone(), s_other); // `env::var` → namespace_qualifier = "env" → env::var wins. let resolved = gs.resolve_callee(&CalleeQuery { name: "var", caller_lang: Lang::Rust, caller_namespace: "src/consumer.rs", caller_container: None, receiver_type: None, namespace_qualifier: Some("env"), receiver_var: None, arity: Some(1), }); assert_eq!(resolved, CalleeResolution::Resolved(k_env.clone())); // `config::var` → namespace_qualifier = "config" → config::var wins. let resolved_cfg = gs.resolve_callee(&CalleeQuery { name: "var", caller_lang: Lang::Rust, caller_namespace: "src/consumer.rs", caller_container: None, receiver_type: None, namespace_qualifier: Some("config"), receiver_var: None, arity: Some(1), }); assert_eq!(resolved_cfg, CalleeResolution::Resolved(k_other)); // Bare `var(...)` call (no qualifier) across namespaces → Ambiguous. let bare = gs.resolve_callee_key("var", Lang::Rust, "src/consumer.rs", Some(1)); assert!(matches!(bare, CalleeResolution::Ambiguous(_))); } #[test] fn query_caller_container_resolves_self_call() { // Bare `helper()` from inside `OrderService::place` must resolve to // the `OrderService::helper` method rather than a same-name helper // exposed by an unrelated class in a different file. let mut gs = GlobalSummaries::new(); let (k_order, s_order) = method_summary("src/order.java", "OrderService", "helper", 0, 0xA); let (k_user, s_user) = method_summary("src/user.java", "UserService", "helper", 0, 0xB); gs.insert(k_order.clone(), s_order); gs.insert(k_user.clone(), s_user); let resolved = gs.resolve_callee(&CalleeQuery { name: "helper", caller_lang: Lang::Java, caller_namespace: "src/order.java", caller_container: Some("OrderService"), receiver_type: None, namespace_qualifier: None, receiver_var: None, arity: Some(0), }); assert_eq!(resolved, CalleeResolution::Resolved(k_order.clone())); // Swap the caller to `UserService` and we should land on its helper. let resolved_user = gs.resolve_callee(&CalleeQuery { name: "helper", caller_lang: Lang::Java, caller_namespace: "src/user.java", caller_container: Some("UserService"), receiver_type: None, namespace_qualifier: None, receiver_var: None, arity: Some(0), }); assert_eq!(resolved_user, CalleeResolution::Resolved(k_user)); // With no caller-container hint (free call from module-level code), // the resolver must not pick either class's helper blindly. let no_hint = gs.resolve_callee(&CalleeQuery { name: "helper", caller_lang: Lang::Java, caller_namespace: "src/main.java", caller_container: None, receiver_type: None, namespace_qualifier: None, receiver_var: None, arity: Some(0), }); assert!(matches!(no_hint, CalleeResolution::Ambiguous(_))); } #[test] fn query_leaf_same_namespace_still_resolves_intra_file_calls() { // Two definitions share a leaf name but live in different files. // A same-namespace call (intra-file) must resolve to the local one // without requiring any structured hint, this is the common case // for bare top-level function calls. let mut gs = GlobalSummaries::new(); let (k_a, s_a) = free_summary("src/a.js", "helper", 1, 0x01); let (k_b, s_b) = free_summary("src/b.js", "helper", 1, 0x02); gs.insert(k_a.clone(), s_a); gs.insert(k_b.clone(), s_b); // Caller in a.js → resolves to a.js::helper. let resolved = gs.resolve_callee(&CalleeQuery { name: "helper", caller_lang: Lang::Java, caller_namespace: "src/a.js", caller_container: None, receiver_type: None, namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!(resolved, CalleeResolution::Resolved(k_a)); // Caller in a third file → ambiguous (we refuse to guess). let cross = gs.resolve_callee(&CalleeQuery { name: "helper", caller_lang: Lang::Java, caller_namespace: "src/c.js", caller_container: None, receiver_type: None, namespace_qualifier: None, receiver_var: None, arity: Some(1), }); match cross { CalleeResolution::Ambiguous(cands) => { assert_eq!(cands.len(), 2); assert!(cands.contains(&k_b)); } other => panic!("cross-file bare leaf should be Ambiguous, got {other:?}"), } } #[test] fn query_arity_filter_is_hard() { // Same container and leaf, different arities, resolution must // honour the arity filter before any qualifier-based tie-break. let mut gs = GlobalSummaries::new(); let (k_1arg, s_1arg) = method_summary("src/svc.py", "Svc", "render", 1, 0x01); let (k_2arg, s_2arg) = method_summary("src/svc.py", "Svc", "render", 2, 0x02); gs.insert(k_1arg.clone(), s_1arg); gs.insert(k_2arg.clone(), s_2arg); let one = gs.resolve_callee(&CalleeQuery { name: "render", caller_lang: Lang::Java, caller_namespace: "src/caller.py", caller_container: None, receiver_type: Some("Svc"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!(one, CalleeResolution::Resolved(k_1arg)); let two = gs.resolve_callee(&CalleeQuery { name: "render", caller_lang: Lang::Java, caller_namespace: "src/caller.py", caller_container: None, receiver_type: Some("Svc"), namespace_qualifier: None, receiver_var: None, arity: Some(2), }); assert_eq!(two, CalleeResolution::Resolved(k_2arg)); // With a non-existent arity, arity filter prunes everything and we // get NotFound, not a "closest match" guess. let mismatched = gs.resolve_callee(&CalleeQuery { name: "render", caller_lang: Lang::Java, caller_namespace: "src/caller.py", caller_container: None, receiver_type: Some("Svc"), namespace_qualifier: None, receiver_var: None, arity: Some(5), }); match mismatched { CalleeResolution::NotFound | CalleeResolution::Ambiguous(_) => {} CalleeResolution::Resolved(k) => { panic!("arity mismatch must not resolve; got {k:?}") } } } #[test] fn query_receiver_var_is_soft_tiebreak_not_primary() { // Adversarial case: a variable named "obj" exists and a class // happens to also be called "obj". The old resolver used the // variable name as container_hint #1, which could mis-pick when // the qualified index had a coincidental hit. The new resolver // treats `receiver_var` as a *soft* tie-break, it only fires // after same-namespace unique-leaf resolution fails. let mut gs = GlobalSummaries::new(); let (k_same_ns, s_same_ns) = free_summary("src/app.js", "method", 1, 0xAA); let (k_other_class, s_other_class) = method_summary("src/other.js", "obj", "method", 1, 0xBB); gs.insert(k_same_ns.clone(), s_same_ns); gs.insert(k_other_class.clone(), s_other_class); // Caller lives in app.js → intra-file unique-leaf wins, regardless // of the variable-named `obj` coincidence in other.js. let intra = gs.resolve_callee(&CalleeQuery { name: "method", caller_lang: Lang::Java, caller_namespace: "src/app.js", caller_container: None, receiver_type: None, namespace_qualifier: None, receiver_var: Some("obj"), arity: Some(1), }); assert_eq!(intra, CalleeResolution::Resolved(k_same_ns)); // Caller in a third file where no same-namespace match exists → // receiver_var tie-break fires and picks `obj::method`. let cross = gs.resolve_callee(&CalleeQuery { name: "method", caller_lang: Lang::Java, caller_namespace: "src/elsewhere.js", caller_container: None, receiver_type: None, namespace_qualifier: None, receiver_var: Some("obj"), arity: Some(1), }); assert_eq!(cross, CalleeResolution::Resolved(k_other_class)); } #[test] fn query_qualifier_miss_refuses_to_guess_leaf() { // `namespace_qualifier = "Missing"` does not exist as a container. // We have two leaf candidates. The resolver must NOT fall back // and pick one of them silently. (It may return the leaf set // as Ambiguous for diagnostics.) let mut gs = GlobalSummaries::new(); let (k_a, s_a) = free_summary("src/a.go", "run", 1, 0x1); let (k_b, s_b) = free_summary("src/b.go", "run", 1, 0x2); gs.insert(k_a.clone(), s_a); gs.insert(k_b.clone(), s_b); let resolved = gs.resolve_callee(&CalleeQuery { name: "run", caller_lang: Lang::Java, caller_namespace: "src/caller.go", caller_container: None, receiver_type: None, namespace_qualifier: Some("Missing"), receiver_var: None, arity: Some(1), }); match resolved { CalleeResolution::Ambiguous(cands) => { assert_eq!(cands.len(), 2); assert!(cands.contains(&k_a)); assert!(cands.contains(&k_b)); } CalleeResolution::Resolved(k) => { panic!("unresolved qualifier must not silently pick a leaf-only match; got {k:?}") } CalleeResolution::NotFound => panic!("candidates exist — should be Ambiguous not NotFound"), } } #[test] fn legacy_wrapper_preserves_test_contract() { // The old `resolve_callee_key_with_container` entry point is kept as // a thin wrapper. The pre-refactor tests // (`same_name_methods_on_different_classes_stay_distinct` and // `free_function_and_method_with_same_name_resolve_separately`) // already cover the happy paths; this test pins the *contract* of // the wrapper itself so we do not drift: a container hint is // treated as a non-authoritative namespace qualifier and falls // through to leaf lookup when it misses. let mut gs = GlobalSummaries::new(); let (k_a, s_a) = free_summary("src/a.java", "only", 1, 0x1); gs.insert(k_a.clone(), s_a); // container_hint doesn't match any container, but the leaf name has // exactly one candidate, the wrapper should still resolve. let resolved = gs.resolve_callee_key_with_container( "only", Lang::Java, "src/caller.java", Some("NonExistent"), Some(1), ); assert_eq!(resolved, CalleeResolution::Resolved(k_a)); } // ── Adversarial: same-name identity collisions in the SAME file ───────── // // These tests target the most error-prone identity cases: two or more // definitions that share `(lang, namespace, name, arity)` but differ in // `container`. The resolver must either resolve to the exact container // target or refuse to guess, silently falling back to a same-leaf // collision in a different container is a correctness bug, and mis- // ordering the resolution steps can cause either false positives (wrong // summary picked) or false negatives (missed flow because Ambiguous // took a confident hint off the table). #[test] fn same_file_two_classes_same_method_typed_receiver_picks_exact() { // Two classes in the SAME file, both defining `run/1` with // incompatible security behaviour: `Safe::run` is a sanitizer-ish // passthrough (no sink bits) while `Unsafe::run` is a shell sink. // When the caller has a typed receiver (via type inference), the // resolver must pick the exact class, the wrong pick would either // miss the Unsafe sink or wrongly flag the Safe path. let mut gs = GlobalSummaries::new(); let (k_safe, s_safe) = method_summary("src/app.java", "Safe", "run", 1, 0x00); let (k_unsafe, s_unsafe) = method_summary("src/app.java", "Unsafe", "run", 1, Cap::SHELL_ESCAPE.bits()); gs.insert(k_safe.clone(), s_safe); gs.insert(k_unsafe.clone(), s_unsafe); let unsafe_call = gs.resolve_callee(&CalleeQuery { name: "run", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("Unsafe"), namespace_qualifier: None, receiver_var: Some("u"), arity: Some(1), }); assert_eq!( unsafe_call, CalleeResolution::Resolved(k_unsafe.clone()), "typed receiver `Unsafe` MUST land on Unsafe::run, not Safe::run" ); let safe_call = gs.resolve_callee(&CalleeQuery { name: "run", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("Safe"), namespace_qualifier: None, receiver_var: Some("s"), arity: Some(1), }); assert_eq!( safe_call, CalleeResolution::Resolved(k_safe.clone()), "typed receiver `Safe` MUST land on Safe::run, not Unsafe::run" ); // Sink-cap sanity: if the resolver ever silently swapped them the // cap mismatch would show up here. assert_eq!(gs.get(&k_safe).unwrap().sink_caps, 0x00); assert_eq!( gs.get(&k_unsafe).unwrap().sink_caps, Cap::SHELL_ESCAPE.bits() ); } #[test] fn same_file_two_classes_same_method_untyped_receiver_is_ambiguous_not_wrong() { // Same setup as above, but the caller only has a variable-name // receiver (no type facts). `receiver_var` is a SOFT hint, and in // the common case `s`/`u` don't match any container. The resolver // MUST refuse to pick one arbitrarily; returning `Safe::run` when // the call was `u.run(...)` would be a silent false negative of the // worst kind (wrong summary pickup). let mut gs = GlobalSummaries::new(); let (k_safe, s_safe) = method_summary("src/app.java", "Safe", "run", 1, 0x00); let (k_unsafe, s_unsafe) = method_summary("src/app.java", "Unsafe", "run", 1, Cap::SHELL_ESCAPE.bits()); gs.insert(k_safe.clone(), s_safe); gs.insert(k_unsafe.clone(), s_unsafe); let resolved = gs.resolve_callee(&CalleeQuery { name: "run", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: None, namespace_qualifier: None, receiver_var: Some("u"), arity: Some(1), }); match resolved { CalleeResolution::Ambiguous(cands) => { assert!(cands.contains(&k_safe)); assert!(cands.contains(&k_unsafe)); } CalleeResolution::Resolved(k) => panic!( "same-file same-name two-class collision with only a soft `receiver_var` MUST NOT \ pick a specific summary — got {k:?}" ), CalleeResolution::NotFound => { panic!("candidates exist in the same file — must be Ambiguous, not NotFound") } } } #[test] fn same_file_free_function_and_method_bare_call_prefers_free_function() { // Classic "I wrote a top-level helper AND a method with the same // name in the same file" trap. A bare `process()` call, no // receiver, no qualifier, caller outside any container, is // syntactically a FREE function call; the method cannot be invoked // this way. The resolver MUST resolve to the free function, not // return Ambiguous. // // NOTE: this test was FAILING under the pre-fix resolver, which // returned Ambiguous because step 4's same-namespace narrowing // still saw two candidates and step 6 had no qualified hint to // tie-break on. let mut gs = GlobalSummaries::new(); let (k_free, s_free) = free_summary("src/app.java", "process", 1, 0x0F); let (k_method, s_method) = method_summary("src/app.java", "Worker", "process", 1, 0xF0); gs.insert(k_free.clone(), s_free); gs.insert(k_method.clone(), s_method); // Caller is a top-level free function (caller_container = None). let bare = gs.resolve_callee(&CalleeQuery { name: "process", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: None, namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!( bare, CalleeResolution::Resolved(k_free.clone()), "bare `process()` from a top-level caller must resolve to the FREE function \ in the same file, not get lost in Ambiguous" ); // Cap sanity: if we accidentally resolved to `Worker::process` the // sink caps would leak and downstream taint would flag the wrong // flow. Pin the exact resolution, not just Resolved-vs-Ambiguous. if let CalleeResolution::Resolved(k) = bare { assert_eq!(gs.get(&k).unwrap().sink_caps, 0x0F); } } #[test] fn same_file_method_calling_sibling_free_function_resolves_to_free() { // Variant of the previous test with the caller LIVING INSIDE a // class whose own container does NOT define `process`. Bare // `process()` inside `Runner::kick()` must still resolve to the // file-local free function, not get lost in Ambiguous because the // caller_container hint (`Runner`) misses both candidates. let mut gs = GlobalSummaries::new(); let (k_free, s_free) = free_summary("src/app.java", "process", 1, 0x0F); let (k_method, s_method) = method_summary("src/app.java", "Worker", "process", 1, 0xF0); // Runner::kick exists only so caller_container("Runner") is a real // container name in the global summaries. It is NOT a candidate. let (k_kick, s_kick) = method_summary("src/app.java", "Runner", "kick", 0, 0x00); gs.insert(k_free.clone(), s_free); gs.insert(k_method.clone(), s_method); gs.insert(k_kick, s_kick); let resolved = gs.resolve_callee(&CalleeQuery { name: "process", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: Some("Runner"), receiver_type: None, namespace_qualifier: None, receiver_var: None, arity: Some(1), }); match resolved { CalleeResolution::Resolved(k) => { assert_eq!( k, k_free, "bare `process()` inside Runner::kick must land on the free function; \ picking Worker::process would be wrong-summary pickup" ); } // Ambiguous is also wrong: syntactically this CANNOT be // Worker::process (no receiver, no this in the caller's // container), so the resolver has enough information to pick // the free function. other => panic!( "bare `process()` from Runner::kick should resolve to the free function; got {other:?}" ), } } #[test] fn same_file_method_calling_own_container_sibling_prefers_self_class() { // Inverse of the previous: caller is INSIDE `Worker::other()` and // calls bare `process()`. Both a free `process` AND `Worker::process` // exist in the file. The caller's own container resolution (step 3) // must prefer `Worker::process`, otherwise intra-class self calls // would get misresolved to a free function with possibly different // security behaviour. let mut gs = GlobalSummaries::new(); let (k_free, s_free) = free_summary("src/app.java", "process", 1, 0x0F); let (k_method, s_method) = method_summary("src/app.java", "Worker", "process", 1, 0xF0); gs.insert(k_free.clone(), s_free); gs.insert(k_method.clone(), s_method); let resolved = gs.resolve_callee(&CalleeQuery { name: "process", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: Some("Worker"), receiver_type: None, namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!( resolved, CalleeResolution::Resolved(k_method), "self-call from Worker::other() must resolve to Worker::process, not the free function" ); } #[test] fn same_file_nested_container_same_method_disambiguates_by_container() { // Two nested definitions: `Outer::foo/1` and `Outer::Inner::foo/1` // both live in the same file. The fully qualified container names // must be distinct keys and the resolver must pick each one exactly // when the container hint is given. A bug that stripped nested // container suffixes or used only the outermost name would collapse // them and mis-resolve. let mut gs = GlobalSummaries::new(); let (k_outer, s_outer) = method_summary("src/nested.java", "Outer", "foo", 1, 0x01); let (k_inner, s_inner) = method_summary("src/nested.java", "Outer::Inner", "foo", 1, 0x02); gs.insert(k_outer.clone(), s_outer); gs.insert(k_inner.clone(), s_inner); // Exact qualified hint "Outer::Inner" must land on the inner one. let inner = gs.resolve_callee(&CalleeQuery { name: "foo", caller_lang: Lang::Java, caller_namespace: "src/nested.java", caller_container: None, receiver_type: Some("Outer::Inner"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!( inner, CalleeResolution::Resolved(k_inner.clone()), "`Outer::Inner` receiver_type must pick the NESTED foo — picking `Outer::foo` would be \ wrong-summary pickup driven by prefix collapse" ); // Exact qualified hint "Outer" must land on the outer one, not the // nested one (nested container starts with "Outer::" but is not // equal to "Outer"). let outer = gs.resolve_callee(&CalleeQuery { name: "foo", caller_lang: Lang::Java, caller_namespace: "src/nested.java", caller_container: None, receiver_type: Some("Outer"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!( outer, CalleeResolution::Resolved(k_outer), "`Outer` receiver_type must pick only Outer::foo — not Outer::Inner::foo via prefix match" ); // Exact cap pinning, guards against merge_summaries accidentally // unioning caps across the two nested keys. assert_eq!(gs.get(&k_inner).unwrap().sink_caps, 0x02); } #[test] fn same_file_same_name_different_security_behaviour_no_cap_leak() { // Three `validate/1` entries in the same file: a sanitizer // passthrough (free function), an HTML-escape sanitizer in one // class, and a shell-exec sink in another class. These must end // up as three distinct keys with their caps preserved exactly , // no merge of sink caps into the sanitizer entry, no cross-leak // via `by_lang_name` fallback. let mut gs = GlobalSummaries::new(); let (k_free, mut s_free) = free_summary("src/val.py", "validate", 1, 0x00); s_free.sanitizer_caps = Cap::all().bits(); let (k_html, mut s_html) = method_summary("src/val.py", "HtmlGuard", "validate", 1, 0x00); s_html.sanitizer_caps = Cap::HTML_ESCAPE.bits(); let (k_shell, s_shell) = method_summary( "src/val.py", "ShellRunner", "validate", 1, Cap::SHELL_ESCAPE.bits(), ); gs.insert(k_free.clone(), s_free); gs.insert(k_html.clone(), s_html); gs.insert(k_shell.clone(), s_shell); // Each key retrieved independently must yield exactly its own caps. assert_eq!(gs.get(&k_free).unwrap().sink_caps, 0x00); assert_eq!(gs.get(&k_free).unwrap().sanitizer_caps, Cap::all().bits()); assert_eq!(gs.get(&k_html).unwrap().sink_caps, 0x00); assert_eq!( gs.get(&k_html).unwrap().sanitizer_caps, Cap::HTML_ESCAPE.bits() ); assert_eq!( gs.get(&k_shell).unwrap().sink_caps, Cap::SHELL_ESCAPE.bits() ); assert_eq!(gs.get(&k_shell).unwrap().sanitizer_caps, 0x00); // Each `receiver_type` hint must land on its OWN container. for (hint, expected) in [("HtmlGuard", &k_html), ("ShellRunner", &k_shell)] { let r = gs.resolve_callee(&CalleeQuery { name: "validate", caller_lang: Lang::Java, caller_namespace: "src/val.py", caller_container: None, receiver_type: Some(hint), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!( r, CalleeResolution::Resolved(expected.clone()), "receiver_type `{hint}` must pick its own container's validate" ); } } // ── Tightened-merge regression tests ──────────────────────────────────── // These guard the identity-collision split added to `insert`, `insert_ssa`, // and `insert_body`. Each scenario encodes an "underspecified identity" // (typically `disambig: None` from legacy/interop/DB-loaded summaries) where // the old code silently collapsed structurally distinct functions. /// Build a minimal `FuncSummary` with `disambig: None`, mirrors the shape /// produced by legacy JSON rows / interop configs that don't know byte /// offsets. `file_path` is left blank so namespace normalisation doesn't /// separate the two otherwise-identical keys. fn legacy_summary( file_path: &str, name: &str, param_count: usize, param_names: Vec, kind: FuncKind, container: &str, sink: u32, ) -> FuncSummary { FuncSummary { name: name.into(), file_path: file_path.into(), lang: "java".into(), param_count, param_names, sink_caps: sink, container: container.into(), disambig: None, kind, ..Default::default() } } #[test] fn insert_mismatched_module_path_does_not_silently_merge() { // Two Rust summaries with the same leaf key but different // `module_path` (e.g. produced by loading a stale DB alongside a // freshly-scanned file, or by two different scan_root anchors). // `module_path` is not part of `FuncKey` but identifies the defining // crate module; two distinct modules with the same file path relative // to different scan roots must stay separate. let mut gs = GlobalSummaries::new(); let a = FuncSummary { name: "validate".into(), file_path: "src/lib.rs".into(), lang: "rust".into(), param_count: 1, param_names: vec!["x".into()], sink_caps: Cap::SHELL_ESCAPE.bits(), disambig: None, module_path: Some("auth::token".into()), ..Default::default() }; let b = FuncSummary { name: "validate".into(), file_path: "src/lib.rs".into(), lang: "rust".into(), param_count: 1, param_names: vec!["x".into()], sink_caps: Cap::SQL_QUERY.bits(), disambig: None, module_path: Some("billing::invoice".into()), ..Default::default() }; let k = a.func_key(None); assert_eq!( k, b.func_key(None), "pre-fix: both summaries would land on the same key" ); gs.insert(k.clone(), a); gs.insert(k.clone(), b); let hits = gs.lookup_same_lang(Lang::Rust, "validate"); assert_eq!( hits.len(), 2, "different module_path summaries must stay distinct — got {hits:?}" ); let auth = hits .iter() .find(|(_, s)| s.module_path.as_deref() == Some("auth::token")) .expect("auth::token summary preserved"); let billing = hits .iter() .find(|(_, s)| s.module_path.as_deref() == Some("billing::invoice")) .expect("billing::invoice summary preserved"); // Cross-contamination guard: the two crates must not have their // caps unioned, that's the observable failure mode of a silent // merge. assert_eq!(auth.1.sink_caps, Cap::SHELL_ESCAPE.bits()); assert_eq!(billing.1.sink_caps, Cap::SQL_QUERY.bits()); assert_eq!(auth.1.sink_caps & Cap::SQL_QUERY.bits(), 0); assert_eq!(billing.1.sink_caps & Cap::SHELL_ESCAPE.bits(), 0); } #[test] fn insert_mismatched_kind_does_not_silently_merge() { // A free function and a method with the same name, arity, namespace, // and container ("" vs "") can't actually occur, but kind alone // mismatching does happen in interop configs where a getter is // described as a function. Make sure the two end up distinct. let mut gs = GlobalSummaries::new(); let f = legacy_summary( "src/a.java", "size", 0, vec![], FuncKind::Function, "Widget", 0, ); let g = legacy_summary( "src/a.java", "size", 0, vec![], FuncKind::Getter, "Widget", Cap::SHELL_ESCAPE.bits(), ); gs.insert(f.func_key(None), f); gs.insert(g.func_key(None), g); // Two distinct keys in the same-lang index. let hits = gs.lookup_same_lang(Lang::Java, "size"); assert_eq!(hits.len(), 2); // The getter's sink caps must not have been unioned into the // function, that would be a security-relevant leak. let func_hit = hits .iter() .find(|(k, _)| k.kind == FuncKind::Function) .expect("function summary kept separate"); assert_eq!( func_hit.1.sink_caps, 0, "function's sink caps must not absorb the getter's SHELL_ESCAPE" ); } #[test] fn insert_mismatched_param_names_does_not_silently_merge() { // Two overloads in Java/C++ with the same arity but different // parameter types/names, a classic case where arity-only identity // collapses distinct functions. Neither summary ships a disambig // because it was loaded from legacy JSON. let mut gs = GlobalSummaries::new(); let a = legacy_summary( "src/app.java", "handle", 1, vec!["msg".into()], FuncKind::Function, "", 0, ); let b = legacy_summary( "src/app.java", "handle", 1, vec!["event".into()], FuncKind::Function, "", Cap::SHELL_ESCAPE.bits(), ); gs.insert(a.func_key(None), a); gs.insert(b.func_key(None), b); let hits = gs.lookup_same_lang(Lang::Java, "handle"); assert_eq!( hits.len(), 2, "same name + arity + kind but different param_names → distinct functions" ); // Exactly one carries the sink cap. let sinky: Vec<_> = hits .iter() .filter(|(_, s)| s.sink_caps == Cap::SHELL_ESCAPE.bits()) .collect(); assert_eq!(sinky.len(), 1); } #[test] fn insert_synthetic_disambig_bit_set_only_for_collisions() { // A single legacy-style insert with `disambig: None` must NOT gain a // synthetic disambig, we only rekey to resolve collisions, never // speculatively. This prevents downstream lookups keyed with // `disambig: None` from spuriously missing legitimately-single // summaries. let mut gs = GlobalSummaries::new(); let sole = legacy_summary( "src/only.java", "alone", 0, vec![], FuncKind::Function, "", Cap::SHELL_ESCAPE.bits(), ); let key = sole.func_key(None); gs.insert(key.clone(), sole); assert_eq!(key.disambig, None); assert!(gs.get(&key).is_some(), "unique legacy insert keeps its key"); } #[test] fn insert_compatible_refinement_still_unions() { // Two summaries describing the same function (structurally identical // head, differing only on behaviour fields) must still union, the // tightened check doesn't regress the classic parallel-fold merge. let mut gs = GlobalSummaries::new(); let a = FuncSummary { name: "f".into(), file_path: "src/x.rs".into(), lang: "rust".into(), param_count: 1, param_names: vec!["x".into()], source_caps: Cap::ENV_VAR.bits(), container: "".into(), disambig: None, kind: FuncKind::Function, ..Default::default() }; let b = FuncSummary { name: "f".into(), file_path: "src/x.rs".into(), lang: "rust".into(), param_count: 1, param_names: vec!["x".into()], sink_caps: Cap::SHELL_ESCAPE.bits(), container: "".into(), disambig: None, kind: FuncKind::Function, ..Default::default() }; let k = a.func_key(None); gs.insert(k.clone(), a); gs.insert(k.clone(), b); let merged = gs.get(&k).expect("compatible summaries still merge"); assert_eq!(merged.source_caps, Cap::ENV_VAR.bits()); assert_eq!(merged.sink_caps, Cap::SHELL_ESCAPE.bits()); // Single entry, no accidental split for the compatible case. let hits = gs.lookup_same_lang(Lang::Rust, "f"); assert_eq!(hits.len(), 1); } #[test] fn insert_body_param_count_mismatch_rekeys() { // Two CalleeSsaBody instances arrive at the same FuncKey (both // `disambig: None`) but claim different `param_count`. Silently // replacing would lose the first body and mis-route future cross- // file symex resolutions to the second. let mut gs = GlobalSummaries::new(); let key = FuncKey { lang: Lang::Python, namespace: "mod.py".into(), name: "run".into(), arity: Some(2), ..Default::default() }; gs.insert_body(key.clone(), make_callee_body(2, 2)); // Incoming body with a different param_count, must not overwrite. gs.insert_body(key.clone(), make_callee_body(5, 4)); // Invariant 1: the original body stays at the original key (not // silently replaced by the param_count=4 body). let head = gs.get_body(&key).expect("original 2-param body kept"); assert_eq!(head.param_count, 2); // Invariant 2: the conflicting body is preserved under a synthetic // disambig, not dropped. Reconstruct the expected synth disambig // using the same formula as `reconcile_body_key`. let mut found_conflicting = false; let base = (4u32).wrapping_mul(0x9E37_79B9); for probe in 0u32..1024 { let synth = base.wrapping_add(probe); let synth_key = FuncKey { disambig: Some(0x8000_0000 | (synth & 0x7FFF_FFFF)), ..key.clone() }; if let Some(body) = gs.get_body(&synth_key) && body.param_count == 4 { found_conflicting = true; break; } } assert!( found_conflicting, "the 4-param body must be preserved under a synthetic disambig key" ); } #[test] fn insert_ssa_arity_overflow_rekeys() { // Key claims arity 1, but the incoming SSA summary references // param index 3, structurally impossible for the same function. // The fix must split so the key arity invariant is preserved. let mut gs = GlobalSummaries::new(); let key = FuncKey { lang: Lang::Python, namespace: "mod.py".into(), name: "f".into(), arity: Some(1), ..Default::default() }; let legit = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], ..Default::default() }; gs.insert_ssa(key.clone(), legit.clone()); assert_eq!( gs.get_ssa(&key).unwrap().param_to_return, vec![(0, TaintTransform::Identity)] ); // Bad-arity incoming summary, must not overwrite the legitimate one. let overflowing = SsaFuncSummary { param_to_return: vec![(3, TaintTransform::Identity)], param_to_sink: vec![(2, cap_sites(Cap::SQL_QUERY))], ..Default::default() }; gs.insert_ssa(key.clone(), overflowing); // Original summary still exactly intact at the original key. let kept = gs.get_ssa(&key).expect("legit summary not overwritten"); assert_eq!(kept.param_to_return, vec![(0, TaintTransform::Identity)]); assert!(kept.param_to_sink.is_empty()); } /// Audit gap A.2.1.G1 reproducer: a summary whose only param-index /// references come from synthetic SSA `Param` ops for external /// captures (free identifiers, module imports, unresolved method /// names) lands at the original key when no existing entry occupies /// it. /// /// This is the case `lower_to_ssa` produces for Java instance/static /// methods that reference free identifiers (e.g. `f.close()` where /// `close` is treated as an external capture, the synthetic Param 0 /// then leaks into `param_to_return`/`param_to_sink`). Without the /// audit-gap fix, `reconcile_ssa_summary_key` would synthesise a /// disambig and the analysis's `summaries.get_ssa(caller_key)` lookup /// (consuming `typed_call_receivers` at the FuncSummary-aligned key) /// would miss. #[test] fn insert_ssa_arity_overflow_keeps_original_key_when_no_collision() { // Single-file fresh insert: no prior entry at `key` to protect, so // the synthetic-Param overflow is treated as the function's own // signal and lands at the original FuncKey. let mut gs = GlobalSummaries::new(); let key = FuncKey { lang: Lang::Java, namespace: "Reader.java".into(), container: "Reader".into(), name: "read".into(), arity: Some(0), ..Default::default() }; let summary = SsaFuncSummary { // Synthetic Param-0 for the external `close` identifier inside // the static `read()` body, `param_count == 0` per the source- // level signature. param_to_return: vec![(0, TaintTransform::Identity)], typed_call_receivers: vec![(1, "FileHandle".to_string())], ..Default::default() }; gs.insert_ssa(key.clone(), summary.clone()); let kept = gs .get_ssa(&key) .expect("Reader::read SSA must be reachable at the FuncSummary-aligned key"); assert_eq!(kept.typed_call_receivers, summary.typed_call_receivers); // The synthetic Param-0 reference is preserved verbatim, pass-2 // analysis still aligns it with the caller's implicit-uses // argument group at the same index. assert_eq!(kept.param_to_return, summary.param_to_return); } /// Companion to `insert_ssa_arity_overflow_keeps_original_key_when_no_collision`: /// when both rounds of an iterative scan produce summaries whose /// param-index references overflow the FuncKey arity (the same /// synthetic-Param signal each round), the second-round insert must /// land at the original key (last-writer-wins for the same function), /// not split off into a synthetic disambig. #[test] fn insert_ssa_arity_overflow_iterative_rescan_stays_at_original_key() { let mut gs = GlobalSummaries::new(); let key = FuncKey { lang: Lang::Java, namespace: "Reader.java".into(), container: "Reader".into(), name: "read".into(), arity: Some(0), ..Default::default() }; let round1 = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], typed_call_receivers: vec![(1, "FileHandle".to_string())], ..Default::default() }; gs.insert_ssa(key.clone(), round1); // Iteration 2 of the scan loop produces the same shape with // refined typed_call_receivers (e.g. a new constructor type // discovered cross-file). let round2 = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], typed_call_receivers: vec![(1, "FileHandle".to_string()), (2, "Cache".to_string())], ..Default::default() }; gs.insert_ssa(key.clone(), round2.clone()); let kept = gs .get_ssa(&key) .expect("iterative-rescan summary must stay at the original key"); assert_eq!(kept.typed_call_receivers, round2.typed_call_receivers); assert_eq!(kept.param_to_return, round2.param_to_return); } // ── Primary sink-location attribution, SinkSite round-trips ──────────── #[test] fn sink_site_serde_round_trip_solo() { let site = SinkSite { file_rel: "src/auth/token.rs".into(), line: 42, col: 9, snippet: "Command::new(\"sh\").arg(cmd).status()".into(), cap: Cap::CODE_EXEC | Cap::SHELL_ESCAPE, from_chain: false, }; let json = serde_json::to_string(&site).unwrap(); let back: SinkSite = serde_json::from_str(&json).unwrap(); assert_eq!(site, back); } #[test] fn sink_site_serde_round_trip_cap_only_defaults() { // Extraction paths without tree access produce cap-only sites. The // `skip_serializing_if` default attributes let the JSON drop empty // fields, and deserialisation must recover the same value. let site = SinkSite::cap_only(Cap::SQL_QUERY); let json = serde_json::to_string(&site).unwrap(); // Zero/empty fields are dropped by `skip_serializing_if`. assert!(!json.contains("\"line\"")); assert!(!json.contains("\"col\"")); assert!(!json.contains("\"file_rel\"")); assert!(!json.contains("\"snippet\"")); let back: SinkSite = serde_json::from_str(&json).unwrap(); assert_eq!(site, back); } #[test] fn ssa_summary_serde_round_trip_with_sink_sites() { use smallvec::smallvec; let site_a = SinkSite { file_rel: "db.py".into(), line: 10, col: 4, snippet: "cursor.execute(sql)".into(), cap: Cap::SQL_QUERY, from_chain: false, }; let site_b = SinkSite { file_rel: "exec.py".into(), line: 33, col: 12, snippet: "subprocess.call(cmd, shell=True)".into(), cap: Cap::CODE_EXEC | Cap::SHELL_ESCAPE, from_chain: false, }; let summary = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![ (0, smallvec![site_a.clone(), site_b.clone()]), (1, smallvec![site_b.clone()]), ], source_caps: Cap::empty(), ..Default::default() }; let json = serde_json::to_string(&summary).unwrap(); let back: SsaFuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(summary, back); // Cap-derivation helpers still produce the expected unions. let caps = back.param_to_sink_caps(); assert_eq!(caps.len(), 2); assert!( caps.iter() .any(|&(i, c)| i == 0 && c == (site_a.cap | site_b.cap)) ); assert!(caps.iter().any(|&(i, c)| i == 1 && c == site_b.cap)); assert_eq!(back.total_param_sink_caps(), site_a.cap | site_b.cap); } #[test] fn ssa_summary_deserialize_legacy_param_to_sink_missing_defaults_empty() { // Legacy summaries omitted the new field entirely. The // `#[serde(default)]` attribute must carry the missing field through as // an empty vec rather than erroring out. let json = r#"{ "param_to_return": [], "source_caps": 0 }"#; let back: SsaFuncSummary = serde_json::from_str(json).unwrap(); assert!(back.param_to_sink.is_empty()); assert_eq!(back.total_param_sink_caps(), Cap::empty()); } #[test] fn func_summary_deserialize_legacy_param_to_sink_missing_defaults_empty() { let json = r#"{ "name": "legacy", "file_path": "app.py", "lang": "python", "param_count": 1, "param_names": ["data"], "source_caps": 0, "sanitizer_caps": 0, "sink_caps": 0, "tainted_sink_params": [] }"#; let back: FuncSummary = serde_json::from_str(json).unwrap(); assert!(back.param_to_sink.is_empty()); } #[test] fn merge_unions_sink_sites_with_dedup() { use smallvec::smallvec; let key = FuncKey { lang: Lang::Python, namespace: "svc.py".into(), name: "run".into(), arity: Some(1), ..Default::default() }; let site_a = SinkSite { file_rel: "svc.py".into(), line: 10, col: 1, snippet: "execute(sql)".into(), cap: Cap::SQL_QUERY, from_chain: false, }; let site_b = SinkSite { file_rel: "svc.py".into(), line: 20, col: 4, snippet: "os.system(cmd)".into(), cap: Cap::CODE_EXEC, from_chain: false, }; let mut left = FuncSummary { name: "run".into(), file_path: "svc.py".into(), lang: "python".into(), param_count: 1, param_names: vec!["x".into()], param_to_sink: vec![(0, smallvec![site_a.clone()])], ..Default::default() }; let right = FuncSummary { name: "run".into(), file_path: "svc.py".into(), lang: "python".into(), param_count: 1, param_names: vec!["x".into()], // Mix a duplicate of site_a (same file/line/col/cap) with a new site. param_to_sink: vec![(0, smallvec![site_a.clone(), site_b.clone()])], ..Default::default() }; let mut gs = GlobalSummaries::new(); gs.insert(key.clone(), left.clone()); gs.insert(key.clone(), right); let merged = gs.get(&key).unwrap(); assert_eq!(merged.param_to_sink.len(), 1); let (_, sites) = &merged.param_to_sink[0]; // Exactly two distinct sites survive; the duplicate was deduped. assert_eq!(sites.len(), 2); assert!(sites.iter().any(|s| s.dedup_key() == site_a.dedup_key())); assert!(sites.iter().any(|s| s.dedup_key() == site_b.dedup_key())); // Idempotent: re-inserting the original left introduces no new sites. left.param_to_sink = vec![(0, smallvec![site_a.clone()])]; gs.insert(key.clone(), left); let merged = gs.get(&key).unwrap(); assert_eq!(merged.param_to_sink[0].1.len(), 2); } // ── Per-return-path decomposition ─────────────────────────────────────── use super::ssa_summary::{ MAX_RETURN_PATHS, ReturnPathTransform, merge_return_paths, union_param_return_paths, }; fn rpt(transform: TaintTransform, hash: u64, kt: u8, kf: u8) -> ReturnPathTransform { ReturnPathTransform { transform, path_predicate_hash: hash, known_true: kt, known_false: kf, abstract_contribution: None, } } #[test] fn cf4_return_path_transform_serde_round_trip() { let summary = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![( 0, smallvec![ rpt(TaintTransform::Identity, 0x1234, 0b001, 0), rpt( TaintTransform::StripBits(Cap::HTML_ESCAPE), 0x5678, 0, 0b010 ), ], )], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }; let json = serde_json::to_string(&summary).unwrap(); let back: SsaFuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(summary, back); // Missing-field backwards compat: older JSON without `param_return_paths` // deserialises cleanly with an empty vector. let legacy = r#"{"param_to_return":[],"source_caps":0}"#; let legacy_back: SsaFuncSummary = serde_json::from_str(legacy).unwrap(); assert!(legacy_back.param_return_paths.is_empty()); } #[test] fn cf4_merge_return_paths_dedup_by_key() { let mut existing: SmallVec<[ReturnPathTransform; 2]> = SmallVec::new(); let incoming = [ rpt(TaintTransform::Identity, 1, 0, 0), rpt(TaintTransform::StripBits(Cap::HTML_ESCAPE), 2, 0, 0), rpt(TaintTransform::Identity, 1, 0, 0), // dup of first ]; merge_return_paths(&mut existing, &incoming); assert_eq!(existing.len(), 2, "duplicate path hash+transform collapsed"); assert!( existing .iter() .any(|e| matches!(e.transform, TaintTransform::Identity) && e.path_predicate_hash == 1) ); assert!(existing.iter().any( |e| matches!(&e.transform, TaintTransform::StripBits(b) if *b == Cap::HTML_ESCAPE) && e.path_predicate_hash == 2 )); } #[test] fn cf4_merge_return_paths_caps_at_max() { let mut existing: SmallVec<[ReturnPathTransform; 2]> = SmallVec::new(); let many: Vec = (0..(MAX_RETURN_PATHS as u64 + 3)) .map(|i| rpt(TaintTransform::StripBits(Cap::HTML_ESCAPE), i + 10, 0, 0)) .collect(); merge_return_paths(&mut existing, &many); assert_eq!( existing.len(), 1, "overflow collapses to a single Top-predicate entry" ); // Joined entry has no predicate gate (hash=0) and conservatively takes // the intersection of all strip bits, which here is HTML_ESCAPE. let joined = &existing[0]; assert_eq!(joined.path_predicate_hash, 0); assert!(matches!( &joined.transform, TaintTransform::StripBits(b) if *b == Cap::HTML_ESCAPE )); } #[test] fn cf4_merge_return_paths_overflow_with_mixed_kinds() { let mut existing: SmallVec<[ReturnPathTransform; 2]> = SmallVec::new(); let mut many: Vec = (0..(MAX_RETURN_PATHS as u64 + 1)) .map(|i| rpt(TaintTransform::StripBits(Cap::HTML_ESCAPE), i + 10, 0, 0)) .collect(); // One identity path forces the join to degrade to Identity (nothing // stripped on every path). many.push(rpt(TaintTransform::Identity, 99, 0, 0)); merge_return_paths(&mut existing, &many); assert_eq!(existing.len(), 1); assert!(matches!(existing[0].transform, TaintTransform::Identity)); } #[test] fn cf4_merge_return_paths_joins_abstract_contribution_on_collision() { use crate::abstract_interp::{AbstractValue, BitFact, IntervalFact, PathFact, StringFact}; let av_a = AbstractValue { interval: IntervalFact::exact(0), string: StringFact::top(), bits: BitFact::top(), path: PathFact::top(), }; let av_b = AbstractValue { interval: IntervalFact::exact(10), string: StringFact::top(), bits: BitFact::top(), path: PathFact::top(), }; let mut first = rpt(TaintTransform::Identity, 42, 0, 0); first.abstract_contribution = Some(av_a.clone()); let mut second = rpt(TaintTransform::Identity, 42, 0, 0); second.abstract_contribution = Some(av_b.clone()); let mut existing: SmallVec<[ReturnPathTransform; 2]> = SmallVec::new(); merge_return_paths(&mut existing, &[first]); merge_return_paths(&mut existing, &[second]); assert_eq!(existing.len(), 1, "same key, merged"); // The abstract_contribution is the join of the two inputs. let joined = existing[0].abstract_contribution.as_ref().unwrap(); let expected = av_a.join(&av_b); assert_eq!(joined, &expected); } #[test] fn cf4_union_param_return_paths_by_index() { let mut existing: Vec<(usize, SmallVec<[ReturnPathTransform; 2]>)> = vec![(0, smallvec![rpt(TaintTransform::Identity, 1, 0, 0)])]; let incoming: Vec<(usize, SmallVec<[ReturnPathTransform; 2]>)> = vec![ ( 0, smallvec![rpt(TaintTransform::StripBits(Cap::HTML_ESCAPE), 2, 0, 0)], ), (1, smallvec![rpt(TaintTransform::Identity, 3, 0, 0)]), ]; union_param_return_paths(&mut existing, &incoming); assert_eq!(existing.len(), 2); let (_, p0) = existing.iter().find(|(i, _)| *i == 0).unwrap(); assert_eq!(p0.len(), 2, "per-param merge preserves both predicates"); let (_, p1) = existing.iter().find(|(i, _)| *i == 1).unwrap(); assert_eq!(p1.len(), 1); } #[test] fn cf4_ssa_summary_fits_arity_keeps_out_of_range_path_idx_at_original_key() { // A path whose param index exceeds the key's arity is treated as a // synthetic external-capture artefact (audit gap A.2.1.G1, see // `project_typed_callgraph_audit_gap_ssa_disambig.md`). When no // existing entry sits at the key, `insert_ssa` keeps the (untrimmed) // summary at the original key so the SSA FuncKey stays aligned with // the matching FuncSummary FuncKey, the analysis's // `summaries.get_ssa(caller_key)` lookup (consuming // `typed_call_receivers`) depends on this alignment. let bad = SsaFuncSummary { param_return_paths: vec![(5, smallvec![rpt(TaintTransform::Identity, 1, 0, 0)])], ..Default::default() }; let key = FuncKey { lang: Lang::Rust, namespace: "test.rs".into(), name: "helper".into(), arity: Some(2), // too small for idx 5, synthetic-Param marker ..Default::default() }; let mut gs = GlobalSummaries::new(); gs.insert_ssa(key.clone(), bad); let kept = gs .get_ssa(&key) .expect("synthetic-Param summary inserted at original key"); assert_eq!(kept.param_return_paths.len(), 1); assert_eq!(kept.param_return_paths[0].0, 5); } // ── Parameter-granularity points-to summary ───────────────────────────── #[test] fn cf6_ssa_summary_serde_round_trip_with_points_to() { use crate::summary::points_to::{AliasKind, AliasPosition, PointsToSummary}; let mut pts = PointsToSummary::empty(); pts.insert( AliasPosition::Param(0), AliasPosition::Param(1), AliasKind::MayAlias, ); pts.insert( AliasPosition::Param(0), AliasPosition::Return, AliasKind::MayAlias, ); let summary = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], points_to: pts.clone(), ..Default::default() }; let json = serde_json::to_string(&summary).unwrap(); let back: SsaFuncSummary = serde_json::from_str(&json).unwrap(); assert_eq!(summary, back); assert_eq!(back.points_to, pts); } #[test] fn cf6_ssa_summary_legacy_json_without_points_to_deserialises() { // Older on-disk JSON predates points-to tracking. The serde(default) on // `points_to` must let those rows load cleanly with an empty // alias graph. let legacy = r#"{ "param_to_return": [[0, "Identity"]], "source_caps": 0, "param_to_sink": [] }"#; let back: SsaFuncSummary = serde_json::from_str(legacy).unwrap(); assert!(back.points_to.edges.is_empty()); assert!(!back.points_to.overflow); } #[test] fn cf6_ssa_summary_fits_arity_keeps_out_of_range_points_to_idx_at_original_key() { // Same arity-overflow handling as `cf4_ssa_summary_fits_arity_*` // for the points-to channel: when the summary references a // synthetic-Param index beyond `key.arity` and no existing entry // occupies the key, `insert_ssa` preserves the FuncKey-aligned // identity by inserting at the original key (audit gap A.2.1.G1). use crate::summary::points_to::{AliasKind, AliasPosition, PointsToSummary}; let mut pts = PointsToSummary::empty(); pts.insert( AliasPosition::Param(7), AliasPosition::Return, AliasKind::MayAlias, ); let bad = SsaFuncSummary { points_to: pts, ..Default::default() }; let key = FuncKey { lang: Lang::Rust, namespace: "test.rs".into(), name: "helper".into(), arity: Some(2), ..Default::default() }; let mut gs = GlobalSummaries::new(); gs.insert_ssa(key.clone(), bad); let kept = gs .get_ssa(&key) .expect("synthetic-Param points_to summary inserted at original key"); assert_eq!(kept.points_to.max_param_index(), Some(7)); } /// two `findById` /// definitions on different containers must remain structurally /// disjoint after [`merge_summaries`], no cap union may leak /// across them. The FuncKey identity model already keys on /// `(lang, namespace, container, name, arity, ...)` so this is /// supposed to be true today; the test pins it down so a future /// refactor can't silently widen the merge granularity. /// /// Concretely: `Repository::findById` is parameterised (no /// `SQL_QUERY` sink cap), `UnsafeCache::findById` runs a string- /// concatenated query (carries `Cap::SQL_QUERY`). After merge, /// each FuncKey must own only its own caps, Repository must NOT /// inherit Cache's `SQL_QUERY` bit. #[test] fn cross_file_devirt_does_not_union_unrelated_findbyids() { use crate::labels::Cap; use crate::symbol::FuncKey; fn method_summary(name: &str, container: &str, file: &str, sink_caps: u32) -> FuncSummary { FuncSummary { name: name.into(), file_path: file.into(), lang: "rust".into(), param_count: 1, param_names: vec!["id".into()], source_caps: 0, sanitizer_caps: 0, sink_caps, propagating_params: vec![], propagates_taint: false, tainted_sink_params: if sink_caps != 0 { vec![0] } else { vec![] }, callees: vec![], container: container.into(), ..Default::default() } } let safe_repo = method_summary("findById", "Repository", "src/repo.rs", 0); let unsafe_cache = method_summary( "findById", "UnsafeCache", "src/cache.rs", Cap::SQL_QUERY.bits(), ); let gs = merge_summaries(vec![safe_repo, unsafe_cache], None); // Two distinct keys must coexist, no merge collision. let repo_key = FuncKey { lang: Lang::Rust, namespace: "src/repo.rs".into(), container: "Repository".into(), name: "findById".into(), arity: Some(1), ..Default::default() }; let cache_key = FuncKey { lang: Lang::Rust, namespace: "src/cache.rs".into(), container: "UnsafeCache".into(), name: "findById".into(), arity: Some(1), ..Default::default() }; let repo_sum = gs.get(&repo_key).expect("Repository::findById missing"); let cache_sum = gs.get(&cache_key).expect("UnsafeCache::findById missing"); // Sink caps stay on their own owner, the whole point of // devirtualisation. Repository must not have inherited the // SQL_QUERY bit from UnsafeCache. assert_eq!( repo_sum.sink_caps, 0, "Repository::findById inherited a sink cap from UnsafeCache::findById — \ the per-FuncKey identity model has been broken (sink_caps bits = {:#x})", repo_sum.sink_caps, ); assert_eq!( cache_sum.sink_caps, Cap::SQL_QUERY.bits(), "UnsafeCache::findById lost its own sink cap during merge" ); // Same invariant on tainted_sink_params, must not bleed across. assert!( repo_sum.tainted_sink_params.is_empty(), "Repository::findById inherited tainted_sink_params from UnsafeCache: {:?}", repo_sum.tainted_sink_params, ); assert_eq!(cache_sum.tainted_sink_params, vec![0]); } /// Cross-file router-dep resolution: parent `__init__.py` declares /// `Security(...)` deps on a router and lifts them onto a child via /// `.include_router(., ...)`. The /// resolution must produce a ` → Vec<(CallSite, scoped)>` /// map for the child file's `module_id`, and absent edges must yield /// empty. #[test] fn resolve_cross_file_router_deps_lifts_parent_security_dep_onto_child_router() { use crate::auth_analysis::model::CallSite; use crate::auth_analysis::router_facts::{PerFileRouterFacts, RouterIncludeEdge}; let mut gs = GlobalSummaries::new(); // Parent (__init__.py) declares scoped Security on `authenticated_router` // and emits two include_router edges (task_instances + dag_runs). let parent_callsite = CallSite { name: "require_auth".into(), args: Vec::new(), span: (0, 0), args_value_refs: Vec::new(), }; let mut parent_facts = PerFileRouterFacts::default(); parent_facts.local_router_deps.insert( "authenticated_router".into(), vec![(parent_callsite.clone(), true)], ); parent_facts.include_router_edges.push(RouterIncludeEdge { parent_var: "authenticated_router".into(), child_module_id: "task_instances".into(), child_var: "router".into(), }); parent_facts.include_router_edges.push(RouterIncludeEdge { parent_var: "authenticated_router".into(), child_module_id: "dag_runs".into(), child_var: "router".into(), }); gs.insert_router_facts("routes::__init__".into(), parent_facts); // Child (task_instances.py) declares a bare router → expects to // inherit the parent's deps via the cross-file resolution. gs.insert_router_facts("task_instances".into(), PerFileRouterFacts::default()); // Resolve for task_instances → should get one entry under `router` // carrying the require_auth (scoped=true) dep. let resolved = gs.resolve_cross_file_router_deps("task_instances"); let deps = resolved.get("router").expect("router child resolved"); assert_eq!(deps.len(), 1); assert_eq!(deps[0].0.name, "require_auth"); assert!(deps[0].1, "scoped flag preserved"); // dag_runs has the same parent → same lift. let resolved_dag = gs.resolve_cross_file_router_deps("dag_runs"); assert_eq!(resolved_dag.get("router").map(|v| v.len()), Some(1)); // Unrelated module → no lift. let resolved_other = gs.resolve_cross_file_router_deps("nonexistent"); assert!(resolved_other.is_empty()); } /// Edge: parent without local deps for the named var emits nothing — /// the resolver requires both an edge AND a non-empty parent dep list. #[test] fn resolve_cross_file_router_deps_skips_edges_with_no_parent_deps() { use crate::auth_analysis::router_facts::{PerFileRouterFacts, RouterIncludeEdge}; let mut gs = GlobalSummaries::new(); let mut parent = PerFileRouterFacts::default(); parent.include_router_edges.push(RouterIncludeEdge { parent_var: "ghost_router".into(), child_module_id: "child".into(), child_var: "router".into(), }); gs.insert_router_facts("parent".into(), parent); let resolved = gs.resolve_cross_file_router_deps("child"); assert!(resolved.is_empty()); } /// Multiple parents declaring different deps for the same child /// accumulate without duplication. Same dep declared twice (one /// from each parent) must dedup by (callee.name, scoped). #[test] fn resolve_cross_file_router_deps_dedups_duplicate_parent_deps() { use crate::auth_analysis::model::CallSite; use crate::auth_analysis::router_facts::{PerFileRouterFacts, RouterIncludeEdge}; let cs = CallSite { name: "require_auth".into(), args: Vec::new(), span: (0, 0), args_value_refs: Vec::new(), }; let mut gs = GlobalSummaries::new(); // Parent A: include_router(child.router) with `require_auth` dep. let mut p_a = PerFileRouterFacts::default(); p_a.local_router_deps .insert("router_a".into(), vec![(cs.clone(), true)]); p_a.include_router_edges.push(RouterIncludeEdge { parent_var: "router_a".into(), child_module_id: "child".into(), child_var: "router".into(), }); gs.insert_router_facts("parent_a".into(), p_a); // Parent B: SAME dep, different parent file. let mut p_b = PerFileRouterFacts::default(); p_b.local_router_deps .insert("router_b".into(), vec![(cs, true)]); p_b.include_router_edges.push(RouterIncludeEdge { parent_var: "router_b".into(), child_module_id: "child".into(), child_var: "router".into(), }); gs.insert_router_facts("parent_b".into(), p_b); let resolved = gs.resolve_cross_file_router_deps("child"); let deps = resolved.get("router").expect("router resolved"); assert_eq!(deps.len(), 1, "duplicate (callee, scoped) deduplicated"); } // ── the analysis ──────────────────── // // `GlobalSummaries::resolve_callee_widened` is the runtime counterpart of // the call-graph builder's `TypeHierarchyIndex::resolve_with_hierarchy`. // These tests pin the contract that *every* concrete implementer is // reachable when the receiver type is statically a super-class / trait / // interface, with the explicit fall-throughs that preserve today's // behaviour when no fan-out applies. mod hierarchy_widened_tests { use super::*; /// Build a minimal `(FuncKey, FuncSummary)` for a method on the /// given container with optional `hierarchy_edges` carried through. fn java_method( namespace: &str, container: &str, name: &str, arity: usize, sink_bits: u32, hierarchy_edges: Vec<(String, String)>, ) -> (FuncKey, FuncSummary) { let (key, mut summary) = fs_with( namespace, container, name, arity, FuncKind::Method, Some((namespace.len() + container.len() + name.len()) as u32), sink_bits, ); summary.hierarchy_edges = hierarchy_edges; (key, summary) } /// A1, no hierarchy installed. Widening collapses to today's /// single-result behaviour: one key in / one key out. #[test] fn widened_without_hierarchy_returns_single_resolved() { let mut gs = GlobalSummaries::new(); let (k, s) = java_method("src/http.java", "HttpClient", "send", 1, 0x01, vec![]); gs.insert(k.clone(), s); // Hierarchy is intentionally NOT installed. let widened = gs.resolve_callee_widened(&CalleeQuery { name: "send", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("HttpClient"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!(widened, vec![k]); } /// A2, hierarchy installed but the receiver type has no recorded /// sub-types. Falls through to today's single-result behaviour. #[test] fn widened_no_subtypes_returns_single() { let mut gs = GlobalSummaries::new(); let (k, s) = java_method("src/http.java", "HttpClient", "send", 1, 0x01, vec![]); gs.insert(k.clone(), s); gs.install_hierarchy(); let widened = gs.resolve_callee_widened(&CalleeQuery { name: "send", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("HttpClient"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!(widened, vec![k]); } /// A3, hierarchy with one sub-type implementer. Widening returns /// both the direct receiver match and the sub-type's match. #[test] fn widened_one_subtype_returns_two_keys() { let mut gs = GlobalSummaries::new(); // Carrier: ILogger -> ConsoleLogger edge. let (k_iface, s_iface) = java_method( "src/logger.java", "ILogger", "log", 1, 0x00, vec![("ConsoleLogger".to_string(), "ILogger".to_string())], ); let (k_impl, s_impl) = java_method("src/logger.java", "ConsoleLogger", "log", 1, 0x01, vec![]); gs.insert(k_iface.clone(), s_iface); gs.insert(k_impl.clone(), s_impl); gs.install_hierarchy(); let widened = gs.resolve_callee_widened(&CalleeQuery { name: "log", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("ILogger"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!( widened.len(), 2, "expected ILogger + ConsoleLogger fan-out, got {widened:?}" ); assert!(widened.contains(&k_iface)); assert!(widened.contains(&k_impl)); } /// A4, hierarchy with multiple sub-types: every implementer's /// matching method is in the result, deduplicated. #[test] fn widened_multiple_subtypes_returns_all() { let mut gs = GlobalSummaries::new(); // Three impls + one interface. The interface itself has no // body so we omit a method on it (that is the more common // shape, a pure interface plus concrete classes). let edges = vec![ ("FileLogger".to_string(), "ILogger".to_string()), ("NetLogger".to_string(), "ILogger".to_string()), ("StdLogger".to_string(), "ILogger".to_string()), ]; let (k_file, s_file) = java_method( "src/file_logger.java", "FileLogger", "log", 1, 0x01, edges.clone(), ); let (k_net, s_net) = java_method("src/net_logger.java", "NetLogger", "log", 1, 0x02, vec![]); let (k_std, s_std) = java_method("src/std_logger.java", "StdLogger", "log", 1, 0x04, vec![]); gs.insert(k_file.clone(), s_file); gs.insert(k_net.clone(), s_net); gs.insert(k_std.clone(), s_std); gs.install_hierarchy(); let widened = gs.resolve_callee_widened(&CalleeQuery { name: "log", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("ILogger"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!(widened.len(), 3, "expected three impls, got {widened:?}"); assert!(widened.contains(&k_file)); assert!(widened.contains(&k_net)); assert!(widened.contains(&k_std)); } /// A5, the arity filter must apply across the whole fan-out, not /// just the direct-receiver leg. An implementer with a different /// arity must not leak into the result. #[test] fn widened_arity_filter_applies_across_fanout() { let mut gs = GlobalSummaries::new(); let edges = vec![ ("OneArg".to_string(), "IBase".to_string()), ("TwoArg".to_string(), "IBase".to_string()), ]; let (k_one, s_one) = java_method("src/one.java", "OneArg", "do_it", 1, 0x01, edges.clone()); let (k_two, s_two) = java_method("src/two.java", "TwoArg", "do_it", 2, 0x02, vec![]); gs.insert(k_one.clone(), s_one); gs.insert(k_two.clone(), s_two); gs.install_hierarchy(); let widened = gs.resolve_callee_widened(&CalleeQuery { name: "do_it", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("IBase"), namespace_qualifier: None, receiver_var: None, arity: Some(1), }); assert_eq!(widened, vec![k_one], "arity-2 impl must be filtered out"); } /// A6, fan-out is bounded at `MAX_HIERARCHY_FANOUT`. Build a /// hierarchy with more impls than the cap allows and assert the /// result is exactly capped (and that early impls are preserved ///, the cap drops the *tail*, not the head). #[test] fn widened_caps_at_max_hierarchy_fanout() { let cap = GlobalSummaries::MAX_HIERARCHY_FANOUT; let mut gs = GlobalSummaries::new(); // Build cap+3 impls so we can assert the tail truncates and a // deterministic prefix remains. let extra = 3; let total = cap + extra; let edges: Vec<(String, String)> = (0..total) .map(|i| (format!("Impl{i:02}"), "IBase".to_string())) .collect(); // Carrier, first impl carries every edge so the index is // populated in one shot. let (k0, s0) = java_method("src/impl00.java", "Impl00", "run", 0, 0x01, edges); gs.insert(k0.clone(), s0); for i in 1..total { let (k, s) = java_method( &format!("src/impl{i:02}.java"), &format!("Impl{i:02}"), "run", 0, 0x01, vec![], ); gs.insert(k, s); } gs.install_hierarchy(); let widened = gs.resolve_callee_widened(&CalleeQuery { name: "run", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("IBase"), namespace_qualifier: None, receiver_var: None, arity: Some(0), }); assert_eq!( widened.len(), cap, "fan-out must cap at MAX_HIERARCHY_FANOUT={cap}, got {}", widened.len() ); } /// A7, when hierarchy widening produces no candidates AND the /// receiver_type lookup is authoritative (Step 1), the secondary /// fall-through goes through `resolve_callee` which returns /// Ambiguous/NotFound rather than silently picking an unrelated /// leaf, exactly the "subset of today's targets, never a /// superset" rule. Test asserts the empty result is preserved. #[test] fn widened_empty_does_not_silently_pick_unrelated_leaf() { let mut gs = GlobalSummaries::new(); // Edge: IUnused has a sub Used, but neither declares // `something`. An unrelated free function `something` exists // in the same namespace, under today's authoritative // receiver_type rules, that function MUST NOT be picked when // the call is annotated with receiver_type "IUnused". let edges = vec![("Used".to_string(), "IUnused".to_string())]; let (k_carrier, s_carrier) = java_method("src/util.java", "Used", "carrier", 0, 0x00, edges); let (k_free, s_free) = free_summary("src/app.java", "something", 0, 0x01); gs.insert(k_carrier, s_carrier); gs.insert(k_free, s_free); gs.install_hierarchy(); let widened = gs.resolve_callee_widened(&CalleeQuery { name: "something", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("IUnused"), namespace_qualifier: None, receiver_var: None, arity: Some(0), }); assert!( widened.is_empty(), "receiver_type IUnused with no matching method must NOT silently \ pick an unrelated free function — got {widened:?}" ); } /// A7b, when hierarchy widening produces nothing AND today's /// `resolve_callee` *does* resolve (no receiver_type, just bare /// leaf or qualifier hint), the fallback returns the single key. /// This pins the secondary-fallback contract on the path where it /// actually matters (no authoritative receiver_type). #[test] fn widened_falls_through_when_resolve_callee_resolves() { let mut gs = GlobalSummaries::new(); let (k_free, s_free) = free_summary("src/app.java", "helper", 0, 0x01); gs.insert(k_free.clone(), s_free); gs.install_hierarchy(); // No receiver_type → first branch of `resolve_callee_widened` // is the single-result fallback path. let widened = gs.resolve_callee_widened(&CalleeQuery { name: "helper", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: None, namespace_qualifier: None, receiver_var: None, arity: Some(0), }); assert_eq!(widened, vec![k_free]); } /// A8, receiver_type is None → no widening; behaves identically /// to `resolve_callee` (single-result wrap). #[test] fn widened_no_receiver_type_collapses_to_resolve_callee() { let mut gs = GlobalSummaries::new(); let (k_free, s_free) = free_summary("src/app.java", "helper", 0, 0x01); gs.insert(k_free.clone(), s_free); gs.install_hierarchy(); let widened = gs.resolve_callee_widened(&CalleeQuery { name: "helper", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: None, namespace_qualifier: None, receiver_var: None, arity: Some(0), }); assert_eq!(widened, vec![k_free]); } /// A9, `merge()` must invalidate the cached hierarchy index so a /// post-merge call to `resolve_callee_widened` doesn't look up a /// stale view. Since `install_hierarchy` is required after merges, /// the test asserts: post-merge, before reinstall, fan-out must /// fall through to single-result behaviour. #[test] fn merge_invalidates_hierarchy_cache() { let mut gs_a = GlobalSummaries::new(); let edges = vec![("Sub".to_string(), "Super".to_string())]; let (k_super, s_super) = java_method("src/super.java", "Super", "m", 0, 0x00, edges); let (k_sub, s_sub) = java_method("src/sub.java", "Sub", "m", 0, 0x01, vec![]); gs_a.insert(k_super.clone(), s_super); gs_a.insert(k_sub.clone(), s_sub); gs_a.install_hierarchy(); // Before merge: fan-out works. let pre_merge = gs_a.resolve_callee_widened(&CalleeQuery { name: "m", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("Super"), namespace_qualifier: None, receiver_var: None, arity: Some(0), }); assert_eq!(pre_merge.len(), 2); // Merge in an empty `gs_b`, should invalidate the cached // hierarchy. gs_a.merge(GlobalSummaries::new()); assert!( gs_a.hierarchy().is_none(), "merge() must clear the cached hierarchy" ); // After merge, before reinstall: the resolver must fall back // to single-result behaviour (no fan-out). let post_merge_no_install = gs_a.resolve_callee_widened(&CalleeQuery { name: "m", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("Super"), namespace_qualifier: None, receiver_var: None, arity: Some(0), }); assert_eq!(post_merge_no_install.len(), 1); assert_eq!(post_merge_no_install[0], k_super); // After reinstall: fan-out is restored. gs_a.install_hierarchy(); let post_merge_reinstalled = gs_a.resolve_callee_widened(&CalleeQuery { name: "m", caller_lang: Lang::Java, caller_namespace: "src/app.java", caller_container: None, receiver_type: Some("Super"), namespace_qualifier: None, receiver_var: None, arity: Some(0), }); assert_eq!(post_merge_reinstalled.len(), 2); assert!(post_merge_reinstalled.contains(&k_super)); assert!(post_merge_reinstalled.contains(&k_sub)); } } #[test] fn cross_package_imports_round_trip_via_global_summaries() { use crate::symbol::{FuncKey, FuncKind, Lang}; let mut gs = GlobalSummaries::new(); let mut map: std::collections::HashMap = std::collections::HashMap::new(); map.insert( "escape".to_string(), FuncKey { lang: Lang::TypeScript, namespace: "packages/util/src/escape.ts".to_string(), container: String::new(), name: "escape".to_string(), arity: None, disambig: None, kind: FuncKind::Function, }, ); let arc = std::sync::Arc::new(map); gs.insert_cross_package_imports("apps/api/handler.ts".to_string(), arc.clone()); assert_eq!(gs.cross_package_imports_len(), 1); let looked_up = gs .get_cross_package_imports("apps/api/handler.ts") .expect("namespace lookup must hit"); assert_eq!(looked_up.len(), 1); assert!(looked_up.contains_key("escape")); assert!(gs.get_cross_package_imports("missing").is_none()); // Inserting an empty map is a no-op so the index does not get // polluted with bookkeeping rows when a file's resolver produces // no resolved bindings. gs.insert_cross_package_imports( "apps/api/no_imports.ts".to_string(), std::sync::Arc::new(std::collections::HashMap::new()), ); assert_eq!(gs.cross_package_imports_len(), 1); } #[test] fn cross_package_imports_merged_across_thread_local_summaries() { use crate::symbol::{FuncKey, FuncKind, Lang}; let mut gs_a = GlobalSummaries::new(); let mut map_a: std::collections::HashMap = std::collections::HashMap::new(); map_a.insert( "escape".to_string(), FuncKey { lang: Lang::TypeScript, namespace: "packages/util/src/escape.ts".to_string(), container: String::new(), name: "escape".to_string(), arity: None, disambig: None, kind: FuncKind::Function, }, ); gs_a.insert_cross_package_imports( "apps/api/handler_a.ts".to_string(), std::sync::Arc::new(map_a), ); let mut gs_b = GlobalSummaries::new(); let mut map_b: std::collections::HashMap = std::collections::HashMap::new(); map_b.insert( "format".to_string(), FuncKey { lang: Lang::TypeScript, namespace: "packages/util/src/format.ts".to_string(), container: String::new(), name: "format".to_string(), arity: None, disambig: None, kind: FuncKind::Function, }, ); gs_b.insert_cross_package_imports( "apps/api/handler_b.ts".to_string(), std::sync::Arc::new(map_b), ); gs_a.merge(gs_b); assert_eq!(gs_a.cross_package_imports_len(), 2); assert!( gs_a.get_cross_package_imports("apps/api/handler_a.ts") .is_some() ); assert!( gs_a.get_cross_package_imports("apps/api/handler_b.ts") .is_some() ); }