diff --git a/CHANGELOG.md b/CHANGELOG.md
index 05ea48fd..e59a0f8a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,32 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.2.0] - 2026-02-24
+
+### Added
+- **Cross-file taint analysis** -- two-pass architecture: Pass 1 extracts `FuncSummary` per function (source/sanitizer/sink capabilities, taint propagation, callees), Pass 2 runs BFS taint propagation with cross-file callee resolution.
+- **CFG analysis engine** with five detectors: unguarded sinks (`cfg-unguarded-sink`), auth gaps in web handlers (`cfg-auth-gap`), unreachable security code (`cfg-unreachable-*`), error fallthrough (`cfg-error-fallthrough`), and resource leaks (`cfg-resource-leak`).
+- **Cross-language interop** -- taint flows across language boundaries via explicit `InteropEdge` structs without false-positive name collisions.
+- **Function summaries** persisted to SQLite (`function_summaries` table) with arity, parameter names, capability bitflags, and callee lists.
+- **Multi-language CFG + taint support** -- all 10 languages (Rust, C, C++, Java, Go, PHP, Python, Ruby, TypeScript, JavaScript) now have `KINDS` maps, `RULES`, and `PARAM_CONFIG` for full CFG construction and taint analysis.
+- **Resource leak detection** for C/C++ (malloc/free, fopen/fclose), Go (os.Open/Close, Lock/Unlock), Rust (alloc/dealloc), and Java (streams, connections).
+- **Finding scoring system** -- numeric scores based on severity, proximity to entry point, path complexity, taint confirmation, and confidence multiplier.
+- **Analysis modes** -- `Full` (default), `Ast` (`--ast-only`), and `Taint` (`--cfg-only`) selectable via CLI flags or `scanner.mode` config.
+- **`GlobalSummaries`** with conservative merge: union caps, OR booleans, union param/callee lists on name collisions across files.
+- **Performance optimizations** -- `_from_bytes` variants to read-once/hash-once, lock-free rayon parallelism, SQLite WAL + 8 MB cache + 256 MB mmap.
+- **Tracing instrumentation** -- `tracing` spans on all pipeline phases (walk, pass1, merge, pass2, per-file ops, db_init).
+- **Benchmark suite** -- criterion benchmarks in `benches/scan_bench.rs` with fixtures.
+- 107 unit tests covering taint propagation, cross-file resolution, cross-language interop, CFG analysis, and summaries.
+
+### Changed
+- Bumped all dependencies to latest compatible versions.
+- `Cap` bitflags expanded: `ENV_VAR`, `HTML_ESCAPE`, `SHELL_ESCAPE`, `URL_ENCODE`, `JSON_PARSE`, `FILE_IO`.
+- `classify()` in labels uses zero-allocation byte-level case-insensitive comparisons.
+- Indexed scans now always re-analyze all files in Pass 2 when taint is enabled (conservative: global summaries may have changed even if a file didn't).
+
+### Fixed
+- Clippy `ptr_arg` lint in perf tests (`&PathBuf` -> `&Path`).
+
 ## [0.2.0-alpha] - 2025-06-28
 
 ### Added
diff --git a/Cargo.lock b/Cargo.lock
index e570effc..453111b9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,24 +4,21 @@ version = 4
 
 [[package]]
 name = "aho-corasick"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
-name = "allocator-api2"
-version = "0.2.21"
+name = "alloca"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
-
-[[package]]
-name = "android-tzdata"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4"
+dependencies = [
+ "cc",
+]
 
 [[package]]
 name = "android_system_properties"
@@ -33,10 +30,16 @@ dependencies = [
 ]
 
 [[package]]
-name = "anstream"
-version = "0.6.19"
+name = "anes"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -49,9 +52,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.11"
+version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
 
 [[package]]
 name = "anstyle-parse"
@@ -64,11 +67,11 @@ dependencies = [
 
 [[package]]
 name = "anstyle-query"
-version = "1.1.3"
+version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
 dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -95,84 +98,134 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
-name = "autocfg"
-version = "1.4.0"
+name = "assert_cmd"
+version = "2.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
+checksum = "9c5bcfa8749ac45dd12cb11055aeeb6b27a3895560d60d71e3c23bf979e60514"
+dependencies = [
+ "anstyle",
+ "bstr",
+ "libc",
+ "predicates",
+ "predicates-core",
+ "predicates-tree",
+ "wait-timeout",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
 name = "bitflags"
-version = "2.9.1"
+version = "2.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
 
 [[package]]
 name = "blake3"
-version = "1.8.2"
+version = "1.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0"
+checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
 dependencies = [
  "arrayref",
  "arrayvec",
  "cc",
  "cfg-if",
  "constant_time_eq",
+ "cpufeatures",
 ]
 
 [[package]]
 name = "bstr"
-version = "1.12.0"
+version = "1.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
+checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
 dependencies = [
  "memchr",
+ "regex-automata",
  "serde",
 ]
 
 [[package]]
 name = "bumpalo"
-version = "3.18.1"
+version = "3.19.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee"
+checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
 
 [[package]]
 name = "bytesize"
-version = "2.0.1"
+version = "2.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3c8f83209414aacf0eeae3cf730b18d6981697fba62f200fcfb92b9f082acba"
+checksum = "6bd91ee7b2422bcb158d90ef4d14f75ef67f340943fc4149891dcce8f8b972a3"
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.2.27"
+version = "1.2.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc"
+checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
 dependencies = [
+ "find-msvc-tools",
  "shlex",
 ]
 
 [[package]]
 name = "cfg-if"
-version = "1.0.1"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
 
 [[package]]
 name = "chrono"
-version = "0.4.41"
+version = "0.4.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
+checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
 dependencies = [
- "android-tzdata",
  "iana-time-zone",
  "num-traits",
- "windows-link",
+ "windows-link 0.2.1",
+]
+
+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
 ]
 
 [[package]]
 name = "clap"
-version = "4.5.40"
+version = "4.5.60"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f"
+checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -180,9 +233,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.40"
+version = "4.5.60"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e"
+checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
 dependencies = [
  "anstream",
  "anstyle",
@@ -192,9 +245,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.40"
+version = "4.5.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce"
+checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -204,9 +257,9 @@ dependencies = [
 
 [[package]]
 name = "clap_lex"
-version = "0.7.5"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
+checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
 
 [[package]]
 name = "colorchoice"
@@ -216,22 +269,22 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
 
 [[package]]
 name = "console"
-version = "0.16.0"
+version = "0.16.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e09ced7ebbccb63b4c65413d821f2e00ce54c5ca4514ddc6b3c892fdbcbc69d"
+checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4"
 dependencies = [
  "encode_unicode",
  "libc",
  "once_cell",
  "unicode-width",
- "windows-sys 0.60.2",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
 name = "constant_time_eq"
-version = "0.3.1"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
+checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
 
 [[package]]
 name = "core-foundation-sys"
@@ -239,6 +292,50 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "criterion"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3"
+dependencies = [
+ "alloca",
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "itertools",
+ "num-traits",
+ "oorandom",
+ "page_size",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea"
+dependencies = [
+ "cast",
+ "itertools",
+]
+
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.15"
@@ -273,6 +370,12 @@ version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
 [[package]]
 name = "dashmap"
 version = "7.0.0-rc2"
@@ -282,20 +385,26 @@ dependencies = [
  "cfg-if",
  "crossbeam-utils",
  "equivalent",
- "hashbrown",
+ "hashbrown 0.15.4",
  "lock_api",
  "parking_lot_core",
 ]
 
 [[package]]
 name = "deranged"
-version = "0.4.0"
+version = "0.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
+checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4"
 dependencies = [
  "powerfmt",
 ]
 
+[[package]]
+name = "difflib"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
+
 [[package]]
 name = "directories"
 version = "6.0.0"
@@ -314,7 +423,7 @@ dependencies = [
  "libc",
  "option-ext",
  "redox_users",
- "windows-sys 0.60.2",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -337,12 +446,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
 [[package]]
 name = "errno"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
 dependencies = [
  "libc",
- "windows-sys 0.60.2",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -363,18 +472,39 @@ version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
 [[package]]
 name = "fixedbitset"
 version = "0.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
 
+[[package]]
+name = "float-cmp"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8"
+dependencies = [
+ "num-traits",
+]
+
 [[package]]
 name = "foldhash"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
 [[package]]
 name = "getrandom"
 version = "0.2.16"
@@ -383,32 +513,49 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
 dependencies = [
  "cfg-if",
  "libc",
- "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasi",
 ]
 
 [[package]]
 name = "getrandom"
-version = "0.3.3"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
 dependencies = [
  "cfg-if",
  "libc",
  "r-efi",
- "wasi 0.14.2+wasi-0.2.4",
+ "wasip2",
 ]
 
 [[package]]
-name = "globset"
-version = "0.4.16"
+name = "glob"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5"
+checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+
+[[package]]
+name = "globset"
+version = "0.4.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3"
 dependencies = [
  "aho-corasick",
  "bstr",
  "log",
- "regex-automata 0.4.9",
- "regex-syntax 0.8.5",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "zerocopy",
 ]
 
 [[package]]
@@ -417,18 +564,25 @@ version = "0.15.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
 dependencies = [
- "allocator-api2",
- "equivalent",
- "foldhash",
+ "foldhash 0.1.5",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+dependencies = [
+ "foldhash 0.2.0",
 ]
 
 [[package]]
 name = "hashlink"
-version = "0.10.0"
+version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
+checksum = "ea0b22561a9c04a7cb1a302c013e0259cd3b4bb619f145b32f72b8b4bcbed230"
 dependencies = [
- "hashbrown",
+ "hashbrown 0.16.1",
 ]
 
 [[package]]
@@ -469,15 +623,15 @@ dependencies = [
 
 [[package]]
 name = "ignore"
-version = "0.4.23"
+version = "0.4.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b"
+checksum = "d3d782a365a015e0f5c04902246139249abf769125006fbe7649e2ee88169b4a"
 dependencies = [
  "crossbeam-deque",
  "globset",
  "log",
  "memchr",
- "regex-automata 0.4.9",
+ "regex-automata",
  "same-file",
  "walkdir",
  "winapi-util",
@@ -485,31 +639,40 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.9.0"
+version = "2.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
 dependencies = [
  "equivalent",
- "hashbrown",
+ "hashbrown 0.16.1",
 ]
 
 [[package]]
 name = "is_terminal_polyfill"
-version = "1.70.1"
+version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
 
 [[package]]
 name = "itoa"
-version = "1.0.15"
+version = "1.0.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
 
 [[package]]
 name = "js-sys"
-version = "0.3.77"
+version = "0.3.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
+checksum = "14dc6f6450b3f6d4ed5b16327f38fed626d375a886159ca555bd7822c0c3a5a6"
 dependencies = [
  "once_cell",
  "wasm-bindgen",
@@ -523,9 +686,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
 
 [[package]]
 name = "libc"
-version = "0.2.173"
+version = "0.2.182"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8cfeafaffdbc32176b64fb251369d52ea9f0a8fbc6f8759edffef7b525d64bb"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
 
 [[package]]
 name = "libredox"
@@ -539,9 +702,9 @@ dependencies = [
 
 [[package]]
 name = "libsqlite3-sys"
-version = "0.34.0"
+version = "0.36.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91632f3b4fb6bd1d72aa3d78f41ffecfcf2b1a6648d8c241dbe7dbfaf4875e15"
+checksum = "95b4103cffefa72eb8428cb6b47d6627161e51c2739fc5e3b734584157bc642a"
 dependencies = [
  "cc",
  "pkg-config",
@@ -550,56 +713,60 @@ dependencies = [
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.9.4"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
 
 [[package]]
 name = "lock_api"
-version = "0.4.13"
+version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
 dependencies = [
- "autocfg",
  "scopeguard",
 ]
 
 [[package]]
 name = "log"
-version = "0.4.27"
+version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
 [[package]]
 name = "matchers"
-version = "0.1.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
 dependencies = [
- "regex-automata 0.1.10",
+ "regex-automata",
 ]
 
 [[package]]
 name = "memchr"
-version = "2.7.5"
+version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "normalize-line-endings"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
 
 [[package]]
 name = "nu-ansi-term"
-version = "0.46.0"
+version = "0.50.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
+checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399"
 dependencies = [
- "overload",
- "winapi",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "num-conv"
-version = "0.1.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
 
 [[package]]
 name = "num-traits"
@@ -622,27 +789,32 @@ dependencies = [
 
 [[package]]
 name = "nyx-scanner"
-version = "0.2.0-alpha"
+version = "0.2.0"
 dependencies = [
+ "assert_cmd",
  "bitflags",
  "blake3",
  "bytesize",
  "chrono",
  "clap",
  "console",
+ "criterion",
  "crossbeam-channel",
  "dashmap",
  "directories",
+ "glob",
  "ignore",
  "num_cpus",
  "once_cell",
  "petgraph",
  "phf",
+ "predicates",
  "r2d2",
  "r2d2_sqlite",
  "rayon",
  "rusqlite",
  "serde",
+ "serde_json",
  "tempfile",
  "thiserror",
  "toml",
@@ -673,6 +845,12 @@ version = "1.70.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
 
+[[package]]
+name = "oorandom"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
 [[package]]
 name = "option-ext"
 version = "0.2.0"
@@ -680,16 +858,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
 
 [[package]]
-name = "overload"
-version = "0.1.1"
+name = "page_size"
+version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
+checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da"
+dependencies = [
+ "libc",
+ "winapi",
+]
 
 [[package]]
 name = "parking_lot"
-version = "0.12.4"
+version = "0.12.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
 dependencies = [
  "lock_api",
  "parking_lot_core",
@@ -697,34 +879,34 @@ dependencies = [
 
 [[package]]
 name = "parking_lot_core"
-version = "0.9.11"
+version = "0.9.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
 dependencies = [
  "cfg-if",
  "libc",
  "redox_syscall",
  "smallvec",
- "windows-targets 0.52.6",
+ "windows-link 0.2.1",
 ]
 
 [[package]]
 name = "petgraph"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca"
+checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
 dependencies = [
  "fixedbitset",
- "hashbrown",
+ "hashbrown 0.15.4",
  "indexmap",
  "serde",
 ]
 
 [[package]]
 name = "phf"
-version = "0.12.1"
+version = "0.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
+checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
 dependencies = [
  "phf_macros",
  "phf_shared",
@@ -733,9 +915,9 @@ dependencies = [
 
 [[package]]
 name = "phf_generator"
-version = "0.12.1"
+version = "0.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2cbb1126afed61dd6368748dae63b1ee7dc480191c6262a3b4ff1e29d86a6c5b"
+checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
 dependencies = [
  "fastrand",
  "phf_shared",
@@ -743,9 +925,9 @@ dependencies = [
 
 [[package]]
 name = "phf_macros"
-version = "0.12.1"
+version = "0.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d713258393a82f091ead52047ca779d37e5766226d009de21696c4e667044368"
+checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
 dependencies = [
  "phf_generator",
  "phf_shared",
@@ -756,9 +938,9 @@ dependencies = [
 
 [[package]]
 name = "phf_shared"
-version = "0.12.1"
+version = "0.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981"
+checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
 dependencies = [
  "siphasher",
 ]
@@ -775,6 +957,34 @@ version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
 
+[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
 [[package]]
 name = "powerfmt"
 version = "0.2.0"
@@ -791,19 +1001,49 @@ dependencies = [
 ]
 
 [[package]]
-name = "proc-macro2"
-version = "1.0.95"
+name = "predicates"
+version = "3.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
+checksum = "ada8f2932f28a27ee7b70dd6c1c39ea0675c55a36879ab92f3a715eaa1e63cfe"
+dependencies = [
+ "anstyle",
+ "difflib",
+ "float-cmp",
+ "normalize-line-endings",
+ "predicates-core",
+ "regex",
+]
+
+[[package]]
+name = "predicates-core"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cad38746f3166b4031b1a0d39ad9f954dd291e7854fcc0eed52ee41a0b50d144"
+
+[[package]]
+name = "predicates-tree"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0de1b847b39c8131db0467e9df1ff60e6d0562ab8e9a16e568ad0fdb372e2f2"
+dependencies = [
+ "predicates-core",
+ "termtree",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "quote"
-version = "1.0.40"
+version = "1.0.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
 dependencies = [
  "proc-macro2",
 ]
@@ -827,9 +1067,9 @@ dependencies = [
 
 [[package]]
 name = "r2d2_sqlite"
-version = "0.30.0"
+version = "0.32.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06cc23a61faf4643d8b59ed52c27ed434476dd7aa6f39e1eff7d6bbd35985093"
+checksum = "a2ebd03c29250cdf191da93a35118b4567c2ef0eacab54f65e058d6f4c9965f6"
 dependencies = [
  "r2d2",
  "rusqlite",
@@ -838,9 +1078,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.9.1"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
 dependencies = [
  "rand_chacha",
  "rand_core",
@@ -858,18 +1098,18 @@ dependencies = [
 
 [[package]]
 name = "rand_core"
-version = "0.9.3"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
 ]
 
 [[package]]
 name = "rayon"
-version = "1.10.0"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
 dependencies = [
  "either",
  "rayon-core",
@@ -877,9 +1117,9 @@ dependencies = [
 
 [[package]]
 name = "rayon-core"
-version = "1.12.1"
+version = "1.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
 dependencies = [
  "crossbeam-deque",
  "crossbeam-utils",
@@ -907,53 +1147,48 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.11.1"
+version = "1.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-automata 0.4.9",
- "regex-syntax 0.8.5",
+ "regex-automata",
+ "regex-syntax",
 ]
 
 [[package]]
 name = "regex-automata"
-version = "0.1.10"
+version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
-dependencies = [
- "regex-syntax 0.6.29",
-]
-
-[[package]]
-name = "regex-automata"
-version = "0.4.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-syntax 0.8.5",
+ "regex-syntax",
 ]
 
 [[package]]
 name = "regex-syntax"
-version = "0.6.29"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
 
 [[package]]
-name = "regex-syntax"
-version = "0.8.5"
+name = "rsqlite-vfs"
+version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+checksum = "a8a1f2315036ef6b1fbacd1972e8ee7688030b0a2121edfc2a6550febd41574d"
+dependencies = [
+ "hashbrown 0.16.1",
+ "thiserror",
+]
 
 [[package]]
 name = "rusqlite"
-version = "0.36.0"
+version = "0.38.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3de23c3319433716cf134eed225fe9986bc24f63bed9be9f20c329029e672dc7"
+checksum = "f1c93dd1c9683b438c392c492109cb702b8090b2bfc8fed6f6e4eb4523f17af3"
 dependencies = [
  "bitflags",
  "fallible-iterator",
@@ -961,32 +1196,27 @@ dependencies = [
  "hashlink",
  "libsqlite3-sys",
  "smallvec",
+ "sqlite-wasm-rs",
 ]
 
 [[package]]
 name = "rustix"
-version = "1.0.7"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
 dependencies = [
  "bitflags",
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "rustversion"
-version = "1.0.21"
+version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
-
-[[package]]
-name = "ryu"
-version = "1.0.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
 
 [[package]]
 name = "same-file"
@@ -1014,18 +1244,28 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
 [[package]]
 name = "serde"
-version = "1.0.219"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.219"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1034,24 +1274,25 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.140"
+version = "1.0.149"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
 dependencies = [
  "indexmap",
  "itoa",
  "memchr",
- "ryu",
  "serde",
+ "serde_core",
+ "zmij",
 ]
 
 [[package]]
 name = "serde_spanned"
-version = "0.6.9"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
+checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
 dependencies = [
- "serde",
+ "serde_core",
 ]
 
 [[package]]
@@ -1081,6 +1322,18 @@ version = "1.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
 
+[[package]]
+name = "sqlite-wasm-rs"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f4206ed3a67690b9c29b77d728f6acc3ce78f16bf846d83c94f76400320181b"
+dependencies = [
+ "cc",
+ "js-sys",
+ "rsqlite-vfs",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "streaming-iterator"
 version = "0.1.9"
@@ -1095,9 +1348,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
 [[package]]
 name = "syn"
-version = "2.0.103"
+version = "2.0.117"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1106,31 +1359,37 @@ dependencies = [
 
 [[package]]
 name = "tempfile"
-version = "3.20.0"
+version = "3.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
+checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0"
 dependencies = [
  "fastrand",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "once_cell",
  "rustix",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
-name = "thiserror"
-version = "2.0.12"
+name = "termtree"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
+checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "2.0.12"
+version = "2.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1148,81 +1407,89 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.41"
+version = "0.3.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
+checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
 dependencies = [
  "deranged",
  "itoa",
  "num-conv",
  "powerfmt",
- "serde",
+ "serde_core",
  "time-core",
  "time-macros",
 ]
 
 [[package]]
 name = "time-core"
-version = "0.1.4"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c"
+checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
 
 [[package]]
 name = "time-macros"
-version = "0.2.22"
+version = "0.2.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49"
+checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
 dependencies = [
  "num-conv",
  "time-core",
 ]
 
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "toml"
-version = "0.8.23"
+version = "1.0.3+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
-dependencies = [
- "serde",
- "serde_spanned",
- "toml_datetime",
- "toml_edit",
-]
-
-[[package]]
-name = "toml_datetime"
-version = "0.6.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
-dependencies = [
- "serde",
-]
-
-[[package]]
-name = "toml_edit"
-version = "0.22.27"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
+checksum = "c7614eaf19ad818347db24addfa201729cf2a9b6fdfd9eb0ab870fcacc606c0c"
 dependencies = [
  "indexmap",
- "serde",
+ "serde_core",
  "serde_spanned",
  "toml_datetime",
- "toml_write",
+ "toml_parser",
+ "toml_writer",
  "winnow",
 ]
 
 [[package]]
-name = "toml_write"
-version = "0.1.2"
+name = "toml_datetime"
+version = "1.0.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
+checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "toml_parser"
+version = "1.0.9+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4"
+dependencies = [
+ "winnow",
+]
+
+[[package]]
+name = "toml_writer"
+version = "1.0.6+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"
 
 [[package]]
 name = "tracing"
-version = "0.1.41"
+version = "0.1.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
 dependencies = [
  "pin-project-lite",
  "tracing-attributes",
@@ -1231,9 +1498,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-attributes"
-version = "0.1.29"
+version = "0.1.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b1ffbcf9c6f6b99d386e7444eb608ba646ae452a36b39737deb9663b610f662"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1242,9 +1509,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-core"
-version = "0.1.34"
+version = "0.1.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
 dependencies = [
  "once_cell",
  "valuable",
@@ -1273,14 +1540,14 @@ dependencies = [
 
 [[package]]
 name = "tracing-subscriber"
-version = "0.3.19"
+version = "0.3.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
+checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
 dependencies = [
  "matchers",
  "nu-ansi-term",
  "once_cell",
- "regex",
+ "regex-automata",
  "serde",
  "serde_json",
  "sharded-slab",
@@ -1295,13 +1562,13 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter"
-version = "0.25.6"
+version = "0.26.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a7cf18d43cbf0bfca51f657132cc616a5097edc4424d538bae6fa60142eaf9f0"
+checksum = "12987371f54efc9b9306a20dc87ed5aaee9f320c8a8b115e28515c412b2efe39"
 dependencies = [
  "cc",
  "regex",
- "regex-syntax 0.8.5",
+ "regex-syntax",
  "serde_json",
  "streaming-iterator",
  "tree-sitter-language",
@@ -1329,9 +1596,9 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter-go"
-version = "0.23.4"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431"
+checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea"
 dependencies = [
  "cc",
  "tree-sitter-language",
@@ -1349,9 +1616,9 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter-javascript"
-version = "0.23.1"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1"
+checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5"
 dependencies = [
  "cc",
  "tree-sitter-language",
@@ -1365,9 +1632,9 @@ checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8"
 
 [[package]]
 name = "tree-sitter-php"
-version = "0.23.11"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f066e94e9272cfe4f1dcb07a1c50c66097eca648f2d7233d299c8ae9ed8c130c"
+checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592"
 dependencies = [
  "cc",
  "tree-sitter-language",
@@ -1375,9 +1642,9 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter-python"
-version = "0.23.6"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04"
+checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c"
 dependencies = [
  "cc",
  "tree-sitter-language",
@@ -1415,15 +1682,15 @@ dependencies = [
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.18"
+version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
 
 [[package]]
 name = "unicode-width"
-version = "0.2.1"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
 
 [[package]]
 name = "utf8parse"
@@ -1437,7 +1704,7 @@ version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "js-sys",
  "rand",
  "wasm-bindgen",
@@ -1455,6 +1722,15 @@ version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
 
+[[package]]
+name = "wait-timeout"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "walkdir"
 version = "2.5.0"
@@ -1472,45 +1748,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
 [[package]]
-name = "wasi"
-version = "0.14.2+wasi-0.2.4"
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
 dependencies = [
- "wit-bindgen-rt",
+ "wit-bindgen",
 ]
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.100"
+version = "0.2.113"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
+checksum = "60722a937f594b7fde9adb894d7c092fc1bb6612897c46368d18e7a20208eff2"
 dependencies = [
  "cfg-if",
  "once_cell",
  "rustversion",
  "wasm-bindgen-macro",
-]
-
-[[package]]
-name = "wasm-bindgen-backend"
-version = "0.2.100"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
-dependencies = [
- "bumpalo",
- "log",
- "proc-macro2",
- "quote",
- "syn",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.100"
+version = "0.2.113"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
+checksum = "0fac8c6395094b6b91c4af293f4c79371c163f9a6f56184d2c9a85f5a95f3950"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -1518,26 +1781,36 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.100"
+version = "0.2.113"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
+checksum = "ab3fabce6159dc20728033842636887e4877688ae94382766e00b180abac9d60"
 dependencies = [
+ "bumpalo",
  "proc-macro2",
  "quote",
  "syn",
- "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.100"
+version = "0.2.113"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
+checksum = "de0e091bdb824da87dc01d967388880d017a0a9bc4f3bdc0d86ee9f9336e3bb5"
 dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "web-sys"
+version = "0.3.90"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "705eceb4ce901230f8625bd1d665128056ccbe4b7408faa625eec1ba80f59a97"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "winapi"
 version = "0.3.9"
@@ -1560,7 +1833,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -1577,7 +1850,7 @@ checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
 dependencies = [
  "windows-implement",
  "windows-interface",
- "windows-link",
+ "windows-link 0.1.3",
  "windows-result",
  "windows-strings",
 ]
@@ -1610,13 +1883,19 @@ version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
 
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
 [[package]]
 name = "windows-result"
 version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
 dependencies = [
- "windows-link",
+ "windows-link 0.1.3",
 ]
 
 [[package]]
@@ -1625,7 +1904,16 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
 dependencies = [
- "windows-link",
+ "windows-link 0.1.3",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets",
 ]
 
 [[package]]
@@ -1634,16 +1922,16 @@ version = "0.59.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
 dependencies = [
- "windows-targets 0.52.6",
+ "windows-targets",
 ]
 
 [[package]]
 name = "windows-sys"
-version = "0.60.2"
+version = "0.61.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
 dependencies = [
- "windows-targets 0.53.2",
+ "windows-link 0.2.1",
 ]
 
 [[package]]
@@ -1652,30 +1940,14 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 dependencies = [
- "windows_aarch64_gnullvm 0.52.6",
- "windows_aarch64_msvc 0.52.6",
- "windows_i686_gnu 0.52.6",
- "windows_i686_gnullvm 0.52.6",
- "windows_i686_msvc 0.52.6",
- "windows_x86_64_gnu 0.52.6",
- "windows_x86_64_gnullvm 0.52.6",
- "windows_x86_64_msvc 0.52.6",
-]
-
-[[package]]
-name = "windows-targets"
-version = "0.53.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
-dependencies = [
- "windows_aarch64_gnullvm 0.53.0",
- "windows_aarch64_msvc 0.53.0",
- "windows_i686_gnu 0.53.0",
- "windows_i686_gnullvm 0.53.0",
- "windows_i686_msvc 0.53.0",
- "windows_x86_64_gnu 0.53.0",
- "windows_x86_64_gnullvm 0.53.0",
- "windows_x86_64_msvc 0.53.0",
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
 ]
 
 [[package]]
@@ -1684,130 +1956,82 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
-
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
-
 [[package]]
 name = "windows_i686_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
-[[package]]
-name = "windows_i686_gnu"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
-
 [[package]]
 name = "windows_i686_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
-[[package]]
-name = "windows_i686_gnullvm"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
-
 [[package]]
 name = "windows_i686_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
-[[package]]
-name = "windows_i686_msvc"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
-
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
-
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
-
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
-
 [[package]]
 name = "winnow"
-version = "0.7.11"
+version = "0.7.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd"
-dependencies = [
- "memchr",
-]
+checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
 
 [[package]]
-name = "wit-bindgen-rt"
-version = "0.39.0"
+name = "wit-bindgen"
+version = "0.51.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
-dependencies = [
- "bitflags",
-]
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
 
 [[package]]
 name = "zerocopy"
-version = "0.8.25"
+version = "0.8.39"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
+checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.25"
+version = "0.8.39"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
+checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517"
 dependencies = [
  "proc-macro2",
  "quote",
  "syn",
 ]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/Cargo.toml b/Cargo.toml
index 6a4d1e22..573d8032 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,61 +1,81 @@
 [package]
 name = "nyx-scanner"
-version = "0.2.0-alpha"
+version = "0.2.0"
 edition = "2024"
 description = "A CLI security scanner for automating vulnerability checks"
 license = "GPL-3.0"
-authors = ["Eli Peter <ecpeter23@exmaple.com>"]
+authors = ["Eli Peter <elicpeter@exmaple.com>"]
 homepage = "https://github.com/ecpeter23/nyx"
 repository = "https://github.com/ecpeter23/nyx"
 documentation = "https://github.com/ecpeter23/nyx#readme"
-keywords = ["security", "vulnerability", "scanner", "cli", "automation"]
-categories = ["command-line-utilities", "development-tools" ]
+keywords = ["security", "vulnerability", "scanner", "static-analysis", "cli"]
+categories = ["command-line-utilities", "development-tools", "security"]
 readme = "README.md"
 default-run = "nyx"
 exclude = [
     "assets/",
     ".github/",
+    ".claude/",
+    ".idea/",
+    "tests/",
+    "benches/",
+    "examples/",
 ]
 
+autoexamples = false
+
+[lib]
+name = "nyx_scanner"
+path = "src/lib.rs"
+
 [[bin]]
 name = "nyx"
 path = "src/main.rs"
 
+[[bench]]
+name = "scan_bench"
+harness = false
+
 [dev-dependencies]
-tempfile = "3"
+tempfile = "3.26.0"
+criterion = { version = "0.8", features = ["html_reports"] }
+assert_cmd = "2"
+predicates = "3"
+glob = "0.3"
 
 [dependencies]
 directories = "6.0.0"
-clap = { version = "4.5.40", features = ["derive"] }
-serde = { version = "1.0.219", features = ["derive"] }
-toml = "0.8.23"
-tracing-subscriber = { version = "0.3.19", features = ["env-filter", "json", "ansi","time"] }
-tracing = "0.1.41"
+clap = { version = "4.5.60", features = ["derive"] }
+serde = { version = "1.0.228", features = ["derive"] }
+serde_json = "1.0"
+toml = "1.0.3"
+tracing-subscriber = { version = "0.3.22", features = ["env-filter", "json", "ansi","time"] }
+tracing = "0.1.44"
 num_cpus = "1.17.0"
-rusqlite = { version = "0.36.0", features = ["bundled"] }
-r2d2_sqlite = { version = "0.30.0", features = ["bundled"] }
-ignore = "0.4.23"
-tree-sitter = "0.25.6"
+rusqlite = { version = "0.38.0", features = ["bundled"] }
+r2d2_sqlite = { version = "0.32.0", features = ["bundled"] }
+ignore = "0.4.25"
+tree-sitter = "0.26.5"
 tree-sitter-rust = "0.24.0"
 tree-sitter-c = "0.24.1"
 tree-sitter-cpp = "0.23.4"
 tree-sitter-java = "0.23.5"
 tree-sitter-typescript = "0.23.2"
-tree-sitter-javascript = "0.23.1"
-tree-sitter-go = "0.23.4"
-tree-sitter-php = "0.23.11"
-tree-sitter-python = "0.23.6"
+tree-sitter-javascript = "0.25.0"
+tree-sitter-go = "0.25.0"
+tree-sitter-php = "0.24.2"
+tree-sitter-python = "0.25.0"
 tree-sitter-ruby = "0.23.1"
 crossbeam-channel = "0.5.15"
-blake3 = "1.8.2"
+blake3 = "1.8.3"
 once_cell = "1.21.3"
-console = "0.16.0"
-rayon = "1.10.0"
+console = "0.16.2"
+rayon = "1.11.0"
 r2d2 = "0.8.10"
-bytesize  = "2.0.1"
-chrono    = { version = "0.4.41", default-features = false, features = ["std", "clock"] }
-thiserror = "2.0.12"
+bytesize  = "2.3.1"
+chrono    = { version = "0.4.44", default-features = false, features = ["std", "clock"] }
+thiserror = "2.0.18"
 dashmap = "7.0.0-rc2"
-petgraph = "0.8.2"
-bitflags = "2.9.1"
-phf = { version = "0.12.1", features = ["macros"] }
+petgraph = "0.8.3"
+bitflags = "2.11.0"
+phf = { version = "0.13.1", features = ["macros"] }
diff --git a/README.md b/README.md
index 7531f5ea..264e8af9 100644
--- a/README.md
+++ b/README.md
@@ -13,37 +13,38 @@
 
 ## What is Nyx?
 
-**Nyx** is a lightweight lightning-fast Rust‑native command‑line tool that detects potentially dangerous code patterns across several programming languages. It combines the accuracy of [`tree‑sitter`](https://tree-sitter.github.io/) parsing with a curated rule set and an optional SQLite‑backed index to deliver fast, repeatable scans on projects of any size.
-
->[!IMPORTANT]
-> **Project status – Alpha**   
-> Nyx is under active development. The public interface, rule set, and output formats may change without notice while we stabilise the core. The new CFG + taint engine is experimental and Rust-only for now – please report any crashes or false-positives. Pin exact versions in production environments
+**Nyx** is a lightweight, lightning-fast Rust-native command-line tool that detects security vulnerabilities across 10 programming languages. It combines [`tree-sitter`](https://tree-sitter.github.io/) parsing, intra-procedural control-flow graphs, and cross-file taint analysis with an optional SQLite-backed index to deliver deep, repeatable scans on projects of any size.
 
 ---
 
 ## Key Capabilities
 
-| Capability                   | Description                                                                               |
-|------------------------------|-------------------------------------------------------------------------------------------|
-| Multi‑language support       | Rust, C, C++, Java, Go, PHP, Python, Ruby, TypeScript, JavaScript                         |
-| AST‑level pattern matching   | Language‑specific queries written against precise parse trees                             |
-| Incremental indexing         | SQLite database stores file hashes and previous findings to skip unchanged files          |
-| Parallel execution           | File walking and rule execution run concurrently; defaults scale with available CPU cores |
-| Configurable scan parameters | Exclude directories, set maximum file size, tune worker threads, limit output, and more   |
-| Multiple output formats      | Human‑readable console view (default) and machine‑readable JSON / CSV / SARIF (roadmap)   |
+| Capability | Description |
+|---|---|
+| Multi-language support | Rust, C, C++, Java, Go, PHP, Python, Ruby, TypeScript, JavaScript |
+| AST-level pattern matching | Language-specific queries written against precise parse trees |
+| Control-flow graph analysis | Auth gaps, unguarded sinks, unreachable security code, resource leaks, error fallthrough |
+| Cross-file taint tracking | BFS taint propagation from sources through sanitizers to sinks with function summaries |
+| Cross-language interop | Taint flows across language boundaries via explicit interop edges |
+| Two-pass architecture | Pass 1 extracts function summaries; Pass 2 runs taint with full cross-file context |
+| Incremental indexing | SQLite database stores file hashes, summaries, and findings to skip unchanged files |
+| Parallel execution | File walking and analysis run concurrently via Rayon; scales with available CPU cores |
+| Configurable scan parameters | Exclude directories, set maximum file size, tune worker threads, limit output, and more |
+| Multiple output formats | Human-readable console view (default) and machine-readable JSON |
 
 ---
 
 ## Why choose Nyx?
 
-| Advantage                      | What it means for you                                                                                                                                                        |
-|--------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| **Pure-Rust, single binary**   | No JVM, Python, or server to install; drop the `nyx` executable into your `$PATH` and go.                                                                                    |
-| **Massively parallel**         | Uses Rayon and a thread-pool walker; scales to all CPU cores. Example: scanning the entire **rust-lang/rust** codebase (~53,000 files) on an M2 MacBook Pro takes **≈ 1 s**. |
-| **Index-aware**                | An optional SQLite index stores file hashes and findings, subsequent scans touch *only* changed files, slashing CI times.                                                    |
-| **Offline & privacy-friendly** | Requires no login, cloud account, or telemetry. Perfect for air-gapped environments and strict compliance policies.                                                          |
-| **Tree-sitter precision**      | Parses real language grammars, not regexes, giving far fewer false positives than line-based scanners.                                                                       |
-| **Extensible**                 | Add new patterns with concise `tree-sitter` queries; no SaaS lock-in.                                                                                                        |
+| Advantage | What it means for you |
+|---|---|
+| **Pure-Rust, single binary** | No JVM, Python, or server to install; drop the `nyx` executable into your `$PATH` and go. |
+| **Massively parallel** | Uses Rayon and a thread-pool walker; scales to all CPU cores. Scanning the entire **rust-lang/rust** codebase (~53,000 files) on an M2 MacBook Pro takes **~1 s**. |
+| **Deep analysis** | Real CFG construction and taint propagation, not just regex matching. Cross-file function summaries, capability-based sanitizer tracking, and scored findings. |
+| **Index-aware** | An optional SQLite index stores file hashes and findings; subsequent scans touch *only* changed files, slashing CI times. |
+| **Offline & privacy-friendly** | Requires no login, cloud account, or telemetry. Perfect for air-gapped environments and strict compliance policies. |
+| **Tree-sitter precision** | Parses real language grammars, not regexes, giving far fewer false positives than line-based scanners. |
+| **Extensible** | Add new patterns with concise `tree-sitter` queries; no SaaS lock-in. |
 
 ---
 
@@ -76,7 +77,7 @@ $ cargo install nyx-scanner
     Expand-Archive -Path nyx-x86_64-pc-windows-msvc.zip -DestinationPath .
     Move-Item -Path .\nyx.exe -Destination "C:\Program Files\Nyx\"  # Add to PATH manually if needed
     ```
-   
+
 4. Verify the installation:
      ```bash
     nyx --version
@@ -104,11 +105,17 @@ $ nyx scan
 # Scan a specific path and emit JSON
 $ nyx scan ./server --format json
 
-# Perform an ad‑hoc scan without touching the index
+# Perform an ad-hoc scan without touching the index
 $ nyx scan --no-index
 
-# Restrict results to high‑severity findings
+# Restrict results to high-severity findings
 $ nyx scan --high-only
+
+# AST pattern matching only (fastest, no CFG/taint)
+$ nyx scan --ast-only
+
+# CFG + taint analysis only (skip AST pattern rules)
+$ nyx scan --cfg-only
 ```
 
 ### Index Management
@@ -130,20 +137,65 @@ $ nyx clean --all
 
 ---
 
+## Analysis Modes
+
+Nyx supports three analysis modes, selectable via the `scanner.mode` config option or CLI flags:
+
+| Mode | CLI flag | What runs |
+|---|---|---|
+| **Full** (default) | — | AST pattern matching + CFG construction + taint analysis |
+| **AST-only** | `--ast-only` | AST pattern matching only; skips CFG and taint entirely |
+| **Taint-only** | `--cfg-only` | CFG + taint analysis only; filters out AST pattern findings |
+
+### What the CFG + taint engine detects
+
+| Finding | Rule ID | Description |
+|---|---|---|
+| Tainted data flow | `taint-*` | Untrusted data (env vars, user input, file reads) flowing to dangerous sinks (shell exec, SQL, file write) without matching sanitization |
+| Unguarded sink | `cfg-unguarded-sink` | Sink calls not dominated by a guard or sanitizer on the control-flow path |
+| Auth gap | `cfg-auth-gap` | Web handler functions that reach privileged sinks without an auth check |
+| Unreachable security code | `cfg-unreachable-*` | Sanitizers, guards, or sinks in dead code branches |
+| Error fallthrough | `cfg-error-fallthrough` | Error-handling branches that don't terminate, allowing execution to fall through to dangerous operations |
+| Resource leak | `cfg-resource-leak` | Resources acquired but not released on all exit paths (malloc/free, fopen/fclose, Lock/Unlock) |
+
+Findings are scored and ranked by severity, proximity to entry point, path complexity, and taint confirmation.
+
+---
+
+## Supported Languages
+
+All 10 languages have full AST pattern matching and CFG/taint analysis. Resource leak detection is available where language-specific acquire/release pairs are defined.
+
+| Language | AST Patterns | CFG + Taint | Resource Leaks |
+|---|---|---|---|
+| Rust | Yes | Yes | Yes |
+| C | Yes | Yes | Yes |
+| C++ | Yes | Yes | Yes |
+| Java | Yes | Yes | Yes |
+| Go | Yes | Yes | Yes |
+| PHP | Yes | Yes | — |
+| Python | Yes | Yes | — |
+| Ruby | Yes | Yes | — |
+| TypeScript | Yes | Yes | — |
+| JavaScript | Yes | Yes | — |
+
+---
+
 ## Configuration Overview
 
-Nyx merges a default configuration file (`nyx.conf`) with user overrides (`nyx.local`). Both live in the platform‑specific configuration directory shown below.
+Nyx merges a default configuration file (`nyx.conf`) with user overrides (`nyx.local`). Both live in the platform-specific configuration directory shown below.
 
-| Platform      | Directory                                          |
-|---------------|----------------------------------------------------|
-| Linux         | `~/.config/nyx/`                                   |
-| macOS         | `~/Library/Application Support/dev.ecpeter23.nyx/` |
-| Windows       | `%APPDATA%\ecpeter23\nyx\config\`                  |
+| Platform | Directory |
+|---|---|
+| Linux | `~/.config/nyx/` |
+| macOS | `~/Library/Application Support/dev.ecpeter23.nyx/` |
+| Windows | `%APPDATA%\ecpeter23\nyx\config\` |
 
 Minimal example (`nyx.local`):
 
 ```toml
 [scanner]
+mode                = "full"       # full | ast | taint
 min_severity        = "Medium"
 follow_symlinks     = true
 excluded_extensions = ["mp3", "mp4"]
@@ -153,7 +205,7 @@ default_format = "json"
 max_results    = 200
 
 [performance]
-worker_threads     = 8  # 0 = auto‑detect
+worker_threads     = 8  # 0 = auto-detect
 batch_size         = 200
 channel_multiplier = 2
 ```
@@ -164,36 +216,54 @@ A fully documented `nyx.conf` is generated automatically on first run.
 
 ## Architecture in Brief
 
-1. **File enumeration** – A highly parallel walker applies ignore rules, size limits, and user exclusions.
-2. **Parsing** – Supported files are parsed into ASTs via the appropriate `tree‑sitter` grammar.
-3. **Rule execution** – Each language ships with a dedicated rule set expressed as `tree‑sitter` queries. Matches are classified into three severity levels (`High`, `Medium`, `Low`).
-4. **Indexing (optional)** – File digests and findings are stored in SQLite. Later scans skip files whose content and modification time are unchanged.
-5. **Reporting** – Results are grouped by file and emitted to the console or serialized in the requested format.
+Nyx uses a **two-pass architecture** to enable cross-file analysis without sacrificing parallelism:
+
+1. **File enumeration** -- A parallel walker (Rayon + `ignore` crate) applies gitignore rules, size limits, and user exclusions.
+2. **Pass 1 -- Summary extraction** -- Each file is parsed via tree-sitter, an intra-procedural CFG is built (petgraph), and a `FuncSummary` is exported per function capturing source/sanitizer/sink capabilities (bitflags), taint propagation behavior, and callee lists. Summaries are persisted to SQLite.
+3. **Summary merge** -- All per-file summaries are merged into a `GlobalSummaries` map with conservative conflict resolution (union caps, OR booleans).
+4. **Pass 2 -- Analysis** -- Files are re-parsed and analyzed with the full cross-file context: BFS taint propagation resolves callees against local and global summaries, CFG analysis checks for auth gaps, unguarded sinks, resource leaks, and more.
+5. **Reporting** -- Findings are scored, ranked, deduplicated, and emitted to the console or serialized as JSON.
+
+With indexing enabled, Pass 1 skips files whose blake3 content hash is unchanged, and cached findings are served directly for AST-only results.
 
 ---
 
 ## Roadmap
 
-| Area                  | Planned Improvements                                                                                  |
-|-----------------------|-------------------------------------------------------------------------------------------------------|
-| More language support | Plans to create rule sets for over 100 languages for maximum coverage                                 |
-| Control‑flow analysis | Inter‑procedural function summaries. Cap label propagation & bit‑flag checks. Loop/branch sensitivity |
-| Taint tracking        | Intra‑ / inter‑procedural tracing of untrusted data from sources to sinks                             |
-| Output formats        | Full SARIF 2.1.0, JUnit XML, HTML report generator                                                    |
-| Rule updates          | Remote rule feed with signature verification                                                          |
-| Performance & UX      | Incremental CFG cache, progress‑bar UX, smart file‑watch re‑scan                                      |
+### Phase 1 -- Deep Static Engine
 
-Community feedback will help shape priorities; please open an issue to discuss proposed changes.
+| Feature | Description |
+|---|---|
+| Interprocedural call graph | Precise symbol resolution via `FuncKey`, language-scoped namespaces, cross-module linking. No name-collision merging -- full call graph with topological analysis. |
+| Path-sensitive analysis | Track path predicates and conditional constraints. Detect infeasible paths and validation-only-in-one-branch patterns. Dramatically reduces false positives. |
+| Dataflow & state modeling | Resource state machines (init -> use -> close), auth state transitions, privilege level tracking. Semantic analysis beyond pattern matching. |
+| Attack surface ranking | Score entry points by distance-to-sink, guard strength, path complexity, and privilege escalation potential. Deterministic attack surface scoring. |
 
----
+### Phase 2 -- Dynamic Capability
 
-## Experimental Features & Feedback
+| Feature | Description |
+|---|---|
+| Controlled dynamic execution | Local sandbox: identify entry points, spin up test harnesses, inject payloads, detect runtime crashes and command execution. Deterministic automated exploit validation -- static finds `exec(user_input)`, dynamic confirms it with `; id`. |
+| Fuzzing integration | libFuzzer (C/C++), cargo-fuzz (Rust), go-fuzz, HTTP fuzzing harness. Static engine identifies interesting functions, fuzzer targets only those. |
 
-The new Rust intra‑procedural CFG + taint engine is not enabled.
+### Phase 3 -- Intelligent Reasoning Layer
 
-Expect rough edges: slightly slower scans, occasional false positives, limited language coverage.
+| Feature | Description |
+|---|---|
+| Semantic similarity | Embeddings for finding similar vulnerability patterns across codebases. |
+| LLM reasoning | AI-assisted detection of non-obvious logic bugs. |
+| Exploit refinement | Automated loops to refine and validate exploit chains. |
 
-Please open an issue for every crash, panic, or suspicious result – attach the minimal code snippet and mention the Nyx version.
+### Other planned improvements
+
+| Area | Details |
+|---|---|
+| Output formats | SARIF 2.1.0, JUnit XML, HTML report generator |
+| Language coverage | Expanded taint rules per language, resource leak pairs for Python/Ruby/PHP/JS/TS |
+| Rule updates | Remote rule feed with signature verification |
+| UX | Progress bar, smart file-watch re-scan |
+
+Community feedback shapes priorities -- please [open an issue](https://github.com/ecpeter23/nyx/issues) to discuss proposed changes.
 
 ---
 
@@ -204,7 +274,9 @@ Pull requests are welcome. To contribute:
 1. Fork the repository and create a feature branch.
 2. Adhere to `rustfmt` and ensure `cargo clippy --all -- -D warnings` passes.
 3. Add unit and/or integration tests where applicable (`cargo test` should remain green).
-4. Submit a concise, well‑documented pull request.
+4. Submit a concise, well-documented pull request.
+
+Please open an issue for any crash, panic, or suspicious result -- attach the minimal code snippet and mention the Nyx version.
 
 See `CONTRIBUTING.md` for full guidelines.
 
@@ -212,7 +284,7 @@ See `CONTRIBUTING.md` for full guidelines.
 
 ## License
 
-Nyx is licensed under the **GNU General Public License v3.0 (GPL‑3.0)**.
+Nyx is licensed under the **GNU General Public License v3.0 (GPL-3.0)**.
 
 This ensures that all modified versions of the scanner remain free and open-source, protecting the integrity and transparency of security tools.
 
diff --git a/SECURITY.md b/SECURITY.md
index dfb3594c..77139904 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -4,7 +4,7 @@
 
 | Version | Supported | Notes                |
 |---------|-----------|----------------------|
-| 0.2.x   | ✅        | Latest *alpha* line  |
+| 0.2.x   | ✅        | Latest stable line   |
 | 0.1.x   | ✅        | Critical fixes only  |
 | < 0.1   | ❌        | End-of-life          |
 
diff --git a/benches/fixtures/sample.c b/benches/fixtures/sample.c
new file mode 100644
index 00000000..bac1257d
--- /dev/null
+++ b/benches/fixtures/sample.c
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+char* get_env_value(void) {
+    return getenv("SECRET");
+}
+
+void execute_command(const char* cmd) {
+    system(cmd);
+}
+
+void safe_flow(void) {
+    char* val = get_env_value();
+    if (val != NULL) {
+        printf("Value: %s\n", val);
+    }
+}
+
+void unsafe_flow(void) {
+    char* val = get_env_value();
+    if (val != NULL) {
+        execute_command(val);
+    }
+}
+
+int main(void) {
+    safe_flow();
+    unsafe_flow();
+    return 0;
+}
diff --git a/benches/fixtures/sample.cpp b/benches/fixtures/sample.cpp
new file mode 100644
index 00000000..9a1c16e8
--- /dev/null
+++ b/benches/fixtures/sample.cpp
@@ -0,0 +1,28 @@
+#include <cstdlib>
+#include <iostream>
+#include <string>
+
+std::string get_env_value() {
+    const char* val = std::getenv("APP_SECRET");
+    return val ? std::string(val) : "";
+}
+
+void execute_command(const std::string& cmd) {
+    std::system(cmd.c_str());
+}
+
+void safe_flow() {
+    std::string val = get_env_value();
+    std::cout << "Value: " << val << std::endl;
+}
+
+void unsafe_flow() {
+    std::string val = get_env_value();
+    execute_command(val);
+}
+
+int main() {
+    safe_flow();
+    unsafe_flow();
+    return 0;
+}
diff --git a/benches/fixtures/sample.go b/benches/fixtures/sample.go
new file mode 100644
index 00000000..26cebc3b
--- /dev/null
+++ b/benches/fixtures/sample.go
@@ -0,0 +1,36 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"html"
+)
+
+func getEnv() string {
+	return os.Getenv("APP_SECRET")
+}
+
+func sanitizeHTML(input string) string {
+	return html.EscapeString(input)
+}
+
+func runCommand(cmd string) {
+	exec.Command("sh", "-c", cmd).Run()
+}
+
+func safeFlow() {
+	val := getEnv()
+	clean := sanitizeHTML(val)
+	fmt.Println(clean)
+}
+
+func unsafeFlow() {
+	val := getEnv()
+	runCommand(val)
+}
+
+func main() {
+	safeFlow()
+	unsafeFlow()
+}
diff --git a/benches/fixtures/sample.java b/benches/fixtures/sample.java
new file mode 100644
index 00000000..60b1e65c
--- /dev/null
+++ b/benches/fixtures/sample.java
@@ -0,0 +1,31 @@
+import java.io.IOException;
+
+public class Sample {
+    public static String getEnv() {
+        return System.getenv("DB_PASSWORD");
+    }
+
+    public static String sanitize(String input) {
+        return input.replaceAll("[<>&]", "");
+    }
+
+    public static void executeCommand(String cmd) throws IOException {
+        Runtime.getRuntime().exec(cmd);
+    }
+
+    public static void safeFlow() throws IOException {
+        String val = getEnv();
+        String clean = sanitize(val);
+        System.out.println(clean);
+    }
+
+    public static void unsafeFlow() throws IOException {
+        String val = getEnv();
+        executeCommand(val);
+    }
+
+    public static void main(String[] args) throws IOException {
+        safeFlow();
+        unsafeFlow();
+    }
+}
diff --git a/benches/fixtures/sample.js b/benches/fixtures/sample.js
new file mode 100644
index 00000000..6f81836e
--- /dev/null
+++ b/benches/fixtures/sample.js
@@ -0,0 +1,35 @@
+const { execSync } = require("child_process");
+
+function getUserInput() {
+  return process.env.USER_INPUT || "";
+}
+
+function sanitizeHtml(input) {
+  return input.replace(/[<>&"']/g, "");
+}
+
+function renderPage(data) {
+  document.innerHTML = data;
+}
+
+function safeRender() {
+  const input = getUserInput();
+  const clean = sanitizeHtml(input);
+  renderPage(clean);
+}
+
+function unsafeRender() {
+  const input = getUserInput();
+  renderPage(input);
+}
+
+function runShell(cmd) {
+  execSync(cmd);
+}
+
+function unsafeExec() {
+  const input = getUserInput();
+  runShell(input);
+}
+
+module.exports = { safeRender, unsafeRender, unsafeExec };
diff --git a/benches/fixtures/sample.php b/benches/fixtures/sample.php
new file mode 100644
index 00000000..99774b8e
--- /dev/null
+++ b/benches/fixtures/sample.php
@@ -0,0 +1,27 @@
+<?php
+
+function getEnvValue(): string {
+    return getenv('APP_SECRET') ?: '';
+}
+
+function sanitizeHtml(string $input): string {
+    return htmlspecialchars($input, ENT_QUOTES, 'UTF-8');
+}
+
+function executeCommand(string $cmd): void {
+    exec($cmd);
+}
+
+function safeFlow(): void {
+    $val = getEnvValue();
+    $clean = sanitizeHtml($val);
+    echo $clean;
+}
+
+function unsafeFlow(): void {
+    $val = getEnvValue();
+    executeCommand($val);
+}
+
+safeFlow();
+unsafeFlow();
diff --git a/benches/fixtures/sample.py b/benches/fixtures/sample.py
new file mode 100644
index 00000000..88ee128d
--- /dev/null
+++ b/benches/fixtures/sample.py
@@ -0,0 +1,25 @@
+import os
+import subprocess
+import html
+
+def get_env_value():
+    return os.environ.get("SECRET_KEY", "")
+
+def sanitize_input(val):
+    return html.escape(val)
+
+def execute_command(cmd):
+    subprocess.run(cmd, shell=True)
+
+def safe_flow():
+    val = get_env_value()
+    clean = sanitize_input(val)
+    print(clean)
+
+def unsafe_flow():
+    val = get_env_value()
+    execute_command(val)
+
+if __name__ == "__main__":
+    safe_flow()
+    unsafe_flow()
diff --git a/benches/fixtures/sample.rb b/benches/fixtures/sample.rb
new file mode 100644
index 00000000..5376a947
--- /dev/null
+++ b/benches/fixtures/sample.rb
@@ -0,0 +1,27 @@
+require 'cgi'
+
+def get_env_value
+  ENV['APP_SECRET'] || ''
+end
+
+def sanitize_html(input)
+  CGI.escapeHTML(input)
+end
+
+def execute_command(cmd)
+  system(cmd)
+end
+
+def safe_flow
+  val = get_env_value
+  clean = sanitize_html(val)
+  puts clean
+end
+
+def unsafe_flow
+  val = get_env_value
+  execute_command(val)
+end
+
+safe_flow
+unsafe_flow
diff --git a/benches/fixtures/sample.rs b/benches/fixtures/sample.rs
new file mode 100644
index 00000000..742c54a3
--- /dev/null
+++ b/benches/fixtures/sample.rs
@@ -0,0 +1,34 @@
+use std::env;
+use std::process::Command;
+
+fn get_config() -> String {
+    env::var("APP_CONFIG").unwrap_or_default()
+}
+
+fn sanitize_shell(input: &str) -> String {
+    shell_escape::unix::escape(input.into()).to_string()
+}
+
+fn run_command(cmd: &str) {
+    Command::new("sh")
+        .arg("-c")
+        .arg(cmd)
+        .status()
+        .expect("failed to execute");
+}
+
+fn safe_run() {
+    let config = get_config();
+    let clean = sanitize_shell(&config);
+    run_command(&clean);
+}
+
+fn unsafe_run() {
+    let config = get_config();
+    run_command(&config);
+}
+
+fn main() {
+    safe_run();
+    unsafe_run();
+}
diff --git a/benches/fixtures/sample.ts b/benches/fixtures/sample.ts
new file mode 100644
index 00000000..7ab5891f
--- /dev/null
+++ b/benches/fixtures/sample.ts
@@ -0,0 +1,30 @@
+import { execSync } from "child_process";
+
+function getUserInput(): string {
+  return process.env.USER_INPUT || "";
+}
+
+function sanitizeHtml(input: string): string {
+  return input.replace(/[<>&"']/g, "");
+}
+
+function renderPage(data: string): void {
+  document.body.innerHTML = data;
+}
+
+function runCommand(cmd: string): void {
+  execSync(cmd);
+}
+
+function safeRender(): void {
+  const input = getUserInput();
+  const clean = sanitizeHtml(input);
+  renderPage(clean);
+}
+
+function unsafeExec(): void {
+  const input = getUserInput();
+  runCommand(input);
+}
+
+export { safeRender, unsafeExec };
diff --git a/benches/scan_bench.rs b/benches/scan_bench.rs
new file mode 100644
index 00000000..a0260fa3
--- /dev/null
+++ b/benches/scan_bench.rs
@@ -0,0 +1,106 @@
+use criterion::{Criterion, criterion_group, criterion_main};
+use nyx_scanner::utils::Config;
+use nyx_scanner::utils::config::AnalysisMode;
+use std::path::Path;
+
+const FIXTURES: &str = "benches/fixtures";
+
+fn bench_ast_only_scan(c: &mut Criterion) {
+    let fixtures = Path::new(FIXTURES).canonicalize().expect("fixtures dir");
+    let mut cfg = Config::default();
+    cfg.scanner.mode = AnalysisMode::Ast;
+    cfg.performance.worker_threads = Some(1);
+    cfg.performance.channel_multiplier = 1;
+    cfg.performance.batch_size = 64;
+
+    c.bench_function("ast_only_scan", |b| {
+        b.iter(|| {
+            let (rx, handle) = nyx_scanner::walk::spawn_file_walker(&fixtures, &cfg);
+            if let Err(err) = handle.join() {
+                panic!("walker panicked: {err:#?}");
+            }
+            let paths: Vec<_> = rx.into_iter().flatten().collect();
+            let mut diags = Vec::new();
+            for path in &paths {
+                if let Ok(mut d) =
+                    nyx_scanner::ast::run_rules_on_file(path, &cfg, None, Some(&fixtures))
+                {
+                    diags.append(&mut d);
+                }
+            }
+            diags
+        });
+    });
+}
+
+fn bench_full_scan(c: &mut Criterion) {
+    let fixtures = Path::new(FIXTURES).canonicalize().expect("fixtures dir");
+    let mut cfg = Config::default();
+    cfg.scanner.mode = AnalysisMode::Full;
+    cfg.performance.worker_threads = Some(1);
+    cfg.performance.channel_multiplier = 1;
+    cfg.performance.batch_size = 64;
+
+    c.bench_function("full_scan", |b| {
+        b.iter(|| {
+            let (rx, handle) = nyx_scanner::walk::spawn_file_walker(&fixtures, &cfg);
+            if let Err(err) = handle.join() {
+                panic!("walker panicked: {err:#?}");
+            }
+            let paths: Vec<_> = rx.into_iter().flatten().collect();
+
+            // Pass 1: extract summaries
+            let mut all_sums = Vec::new();
+            for path in &paths {
+                if let Ok(sums) = nyx_scanner::ast::extract_summaries_from_file(path, &cfg) {
+                    all_sums.extend(sums);
+                }
+            }
+            let root_str = fixtures.to_string_lossy();
+            let global = nyx_scanner::summary::merge_summaries(all_sums, Some(&root_str));
+
+            // Pass 2: full analysis
+            let mut diags = Vec::new();
+            for path in &paths {
+                if let Ok(mut d) =
+                    nyx_scanner::ast::run_rules_on_file(path, &cfg, Some(&global), Some(&fixtures))
+                {
+                    diags.append(&mut d);
+                }
+            }
+            diags
+        });
+    });
+}
+
+fn bench_single_file_parse_and_cfg(c: &mut Criterion) {
+    let fixture = Path::new(FIXTURES).join("sample.rs");
+    let fixture = fixture.canonicalize().expect("sample.rs fixture");
+    let cfg = Config::default();
+
+    c.bench_function("single_file_parse_cfg", |b| {
+        b.iter(|| {
+            nyx_scanner::ast::extract_summaries_from_file(&fixture, &cfg)
+                .expect("extract summaries")
+        });
+    });
+}
+
+fn bench_classify(c: &mut Criterion) {
+    c.bench_function("classify_hit", |b| {
+        b.iter(|| nyx_scanner::labels::classify("rust", "std::env::var"));
+    });
+
+    c.bench_function("classify_miss", |b| {
+        b.iter(|| nyx_scanner::labels::classify("rust", "some_random_function"));
+    });
+}
+
+criterion_group!(
+    benches,
+    bench_ast_only_scan,
+    bench_full_scan,
+    bench_single_file_parse_and_cfg,
+    bench_classify,
+);
+criterion_main!(benches);
diff --git a/examples/cfg_analysis/example.js b/examples/cfg_analysis/example.js
new file mode 100644
index 00000000..a65d4dd4
--- /dev/null
+++ b/examples/cfg_analysis/example.js
@@ -0,0 +1,74 @@
+/**
+ EXPECTED OUTPUT (high-level):
+
+ 1) cfg-unguarded-sink (High / High confidence)
+ - handler(req,res): source req.body.cmd flows to child_process.exec(cmd) without sanitizer/guard.
+ - Should rank high (entry-point-ish function name 'handler', close to entry).
+
+ 2) cfg-auth-gap (High / Medium)
+ - handler is entry-point-ish (name matches handler/route/api conventions).
+ - No auth guard dominates sink (require_auth / is_authenticated / is_admin / authorize).
+
+ 3) cfg-error-fallthrough (Medium / Medium)
+ - Example: if (err) { console.log(err); } then exec(...) still runs.
+ - This is the JS analogue of your Go heuristic. If your implementation only targets Go, this should be NO finding.
+ If you later generalize, this file includes a pattern you can test against.
+
+ 4) cfg-unguarded-sink (HTML) (Medium/High)
+ - req.query.html is written into innerHTML without DOMPurify.sanitize
+
+ 5) No findings for safe paths:
+ - safeHandler uses encodeURIComponent before exec (URL_ENCODE sanitizer) OR uses a dedicated sanitizer you map to SHELL_ESCAPE.
+ NOTE: encodeURIComponent is URL_ENCODE, not SHELL_ESCAPE — so for SHELL_ESCAPE sinks, it may still be flagged depending on your caps logic.
+ The “definitely safe” case here uses a dummy sanitize_shell() wrapper to match your Rust-style naming if you add it for JS later.
+ - safeHtml uses DOMPurify.sanitize before innerHTML (HTML_ESCAPE).
+
+ Taint / dataflow:
+ - should find taint from req.body / req.query / process.env sources to exec/eval/innerHTML sinks.
+ */
+
+const child_process = require("child_process");
+
+// ─── Entry-point-ish + unguarded shell sink + auth gap ────────────────────────────
+function handler(req, res) {
+    // Source (Cap::all): req.body
+    const cmd = req.body.cmd;
+
+    // Vulnerable sink (Cap::SHELL_ESCAPE): child_process.exec
+    child_process.exec(cmd);
+
+    res.end("ok");
+}
+
+// ─── Guarded HTML sink (should NOT be flagged) ────────────────────────────────────
+function safeHtml(req, res, DOMPurify) {
+    const html = req.query.html; // Source
+    const cleaned = DOMPurify.sanitize(html); // Sanitizer(HTML_ESCAPE)
+    document.getElementById("app").innerHTML = cleaned; // Sink(HTML_ESCAPE)
+    res.end("ok");
+}
+
+// ─── Unguarded HTML sink (should be flagged) ─────────────────────────────────────
+function unsafeHtml(req, res) {
+    const html = req.query.html; // Source
+    document.getElementById("app").innerHTML = html; // Sink(HTML_ESCAPE) without sanitizer
+    res.end("ok");
+}
+
+// ─── Heuristic error fallthrough pattern (JS analogue) ───────────────────────────
+// If your error-handling analysis is Go-only, ignore this for now.
+// If generalized later, it should be flagged.
+function errFallthrough(req, res) {
+    const err = req.query.err;
+    if (err) {
+        console.log(err);
+    }
+    child_process.exec(req.body.cmd);
+    res.end("ok");
+}
+
+// ─── Optional: eval sink (should be flagged) ─────────────────────────────────────
+function evalSink(req) {
+    const payload = process.env.PAYLOAD; // Source
+    eval(payload); // Sink(SHELL_ESCAPE) per your rules
+}
\ No newline at end of file
diff --git a/examples/cfg_analysis/example.rs b/examples/cfg_analysis/example.rs
new file mode 100644
index 00000000..4e420800
--- /dev/null
+++ b/examples/cfg_analysis/example.rs
@@ -0,0 +1,99 @@
+/*!
+EXPECTED OUTPUT (high-level):
+
+1) cfg-unguarded-sink (High / High confidence)
+   - In handle_request(): user input from std::env::var("INPUT") flows to std::process::Command::new("sh").arg(&input)
+   - No dominating SHELL_ESCAPE sanitizer or validation guard for that value.
+   - This should rank very high in scoring (entry-point-ish name + close to entry + shell sink).
+
+2) cfg-auth-gap (High / Medium confidence)
+   - handle_request() looks like an entry-point (name matches handle_*)
+   - Contains a shell sink without an auth guard (require_auth / is_authenticated / is_admin etc.)
+
+3) cfg-resource-leak (Medium / High or Medium confidence)
+   - alloc_then_return_leak(): malloc without free on an early return path.
+
+4) cfg-unreachable-sanitizer or cfg-unreachable-guard (Medium/Low)
+   - unreachable_sanitizer(): sanitizer call in unreachable block.
+
+5) taint / dataflow (existing BFS taint engine):
+   - should detect at least one taint finding for:
+       env::var source -> Command sink
+   - should NOT flag safe_shell() because it uses shell_escape::unix::escape(&input) and passes `safe`.
+
+Notes:
+- This fixture intentionally contains both vulnerable and safe patterns, plus unreachable code and resource misuse,
+  to exercise cfg_analysis::{unreachable, guards, auth, resources, scoring}.
+*/
+
+use std::process::Command;
+
+// ─── CFG: Entry-point-ish + unguarded sink + auth gap ─────────────────────────────
+
+pub fn handle_request() {
+  // Source (Cap::all)
+  let input = std::env::var("INPUT").unwrap();
+
+  // Vulnerable sink (Cap::SHELL_ESCAPE)
+  Command::new("sh").arg(&input).status().unwrap();
+}
+
+// ─── CFG: Guarded sink (should NOT produce cfg-unguarded-sink) ────────────────────
+
+pub fn safe_shell() {
+  let input = std::env::var("INPUT").unwrap();
+
+  // Sanitizer (Cap::SHELL_ESCAPE)
+  let safe = shell_escape::unix::escape(&input);
+
+  // Sink, but guarded by dominating sanitizer
+  Command::new("sh").arg(&safe).status().unwrap();
+}
+
+// ─── CFG: Unreachable sanitizer (should report unreachable sanitizer/guard) ───────
+
+pub fn unreachable_sanitizer() {
+  let input = std::env::var("INPUT").unwrap();
+
+  return;
+
+  // This block is unreachable; should produce an unreachable finding for sanitizer call.
+  let _safe = shell_escape::unix::escape(&input);
+}
+
+// ─── CFG: Resource misuse (malloc without free on some exit path) ─────────────────
+
+extern "C" {
+  fn malloc(size: usize) -> *mut u8;
+  fn free(ptr: *mut u8);
+}
+
+pub fn alloc_then_return_leak(flag: bool) {
+  unsafe {
+    let p = malloc(128);
+
+    // Early return leaks `p` on this path.
+    if flag {
+      return;
+    }
+
+    free(p);
+  }
+}
+
+// ─── Extra: HTML sink labeling sanity (optional) ──────────────────────────────────
+
+// `sink_html` is a test marker recognized as Sink(HTML_ESCAPE) by the label rules.
+// In real code this would be something like response.body(), template.render(), etc.
+fn sink_html(_s: &str) {}
+
+pub fn html_print() {
+  let raw = std::env::var("HTML").unwrap();
+  sink_html(&raw);
+}
+
+pub fn html_print_sanitized() {
+  let raw = std::env::var("HTML").unwrap();
+  let safe = html_escape::encode_safe(&raw);
+  sink_html(&safe);
+}
\ No newline at end of file
diff --git a/examples/cross-file/config.rs b/examples/cross-file/config.rs
new file mode 100644
index 00000000..ead5abd8
--- /dev/null
+++ b/examples/cross-file/config.rs
@@ -0,0 +1,36 @@
+// ─────────────────────────────────────────────────────────────────────────────
+// examples/cross-file/config.rs — Sources
+//
+// This module reads untrusted data from the environment and filesystem.
+// Every public function here acts as a **source** — its return value
+// carries taint.
+//
+// ┌─────────────────────────────────────────────────────────────────────────┐
+// │  FuncSummary produced by pass 1:                                       │
+// │                                                                        │
+// │  get_user_command  → source_caps: ALL, sink: 0, sanitizer: 0           │
+// │  get_config_path   → source_caps: ALL, sink: 0, sanitizer: 0           │
+// │  load_template     → source_caps: ALL, sink: 0, sanitizer: 0           │
+// └─────────────────────────────────────────────────────────────────────────┘
+// ─────────────────────────────────────────────────────────────────────────────
+
+use std::env;
+use std::fs;
+
+/// Reads a user-supplied command from the environment.
+/// Taint: SOURCE(ALL) — caller must sanitise before passing to any sink.
+pub fn get_user_command() -> String {
+    env::var("USER_CMD").unwrap_or_default()
+}
+
+/// Reads a path from the environment.
+/// Taint: SOURCE(ALL)
+pub fn get_config_path() -> String {
+    env::var("CONFIG_PATH").unwrap_or_default()
+}
+
+/// Reads an HTML template from disk (path is trusted, *content* is not).
+/// Taint: SOURCE(ALL)
+pub fn load_template(path: &str) -> String {
+    fs::read_to_string(path).unwrap_or_default()
+}
diff --git a/examples/cross-file/exec.rs b/examples/cross-file/exec.rs
new file mode 100644
index 00000000..d35d6e9b
--- /dev/null
+++ b/examples/cross-file/exec.rs
@@ -0,0 +1,41 @@
+// ─────────────────────────────────────────────────────────────────────────────
+// examples/cross-file/exec.rs — Sinks
+//
+// Functions that perform dangerous operations.  Passing tainted data to
+// these without the matching sanitiser is a vulnerability.
+//
+// ┌─────────────────────────────────────────────────────────────────────────┐
+// │  FuncSummary produced by pass 1:                                       │
+// │                                                                        │
+// │  run_command      → sink_caps: SHELL_ESCAPE, tainted_sink_params: [0]  │
+// │  render_page      → sink_caps: HTML_ESCAPE,  tainted_sink_params: [0]  │
+// │  log_and_execute  → sink_caps: SHELL_ESCAPE, source_caps: ALL          │
+// │                     (both a source AND a sink!)                         │
+// └─────────────────────────────────────────────────────────────────────────┘
+// ─────────────────────────────────────────────────────────────────────────────
+
+use std::env;
+use std::process::Command;
+
+/// Executes a shell command.
+/// Taint: SINK(SHELL_ESCAPE) on `cmd` (param 0).
+pub fn run_command(cmd: &str) {
+    Command::new("sh").arg(cmd).status().unwrap();
+}
+
+/// Renders user content into an HTML page.
+/// Taint: SINK(HTML_ESCAPE) on `body` (param 0).
+pub fn render_page(body: &str) {
+    println!("<html><body>{body}</body></html>");
+}
+
+/// Reads an env var *and* shells out — a function that is simultaneously
+/// a source (return value) and a sink (cmd parameter).
+///
+/// This exercises the "independent caps" design: source_caps and sink_caps
+/// are both non-zero on the same summary.
+pub fn log_and_execute(cmd: &str) -> String {
+    let log_path = env::var("LOG_PATH").unwrap_or_default();
+    Command::new("sh").arg(cmd).status().unwrap();
+    log_path
+}
diff --git a/examples/cross-file/main.rs b/examples/cross-file/main.rs
new file mode 100644
index 00000000..abe49134
--- /dev/null
+++ b/examples/cross-file/main.rs
@@ -0,0 +1,148 @@
+// ─────────────────────────────────────────────────────────────────────────────
+// examples/cross-file/main.rs — The caller
+//
+// This file calls functions from config.rs, sanitize.rs, and exec.rs.
+// It never directly touches std::env, std::fs, or std::process — every
+// source, sanitiser, and sink lives in another file.
+//
+// Nyx's two-pass cross-file taint analysis should:
+//   • Pass 1: summarise config.rs, sanitize.rs, exec.rs
+//   • Pass 2: resolve calls in main.rs against those summaries
+//
+// ─────────────────────────────────────────────────────────────────────────────
+//
+//  EXPECTED NYX OUTPUT
+//  ===================
+//
+//  examples/cross-file/main.rs
+//    12:5   [High]  taint-unsanitised-flow       ← case_1_direct_source_to_sink
+//    22:5   [High]  taint-unsanitised-flow       ← case_3_wrong_sanitiser
+//    34:5   [High]  taint-unsanitised-flow       ← case_5_passthrough_preserves_taint
+//    40:5   [High]  taint-unsanitised-flow       ← case_6_taint_through_branch
+//    50:5   [High]  taint-unsanitised-flow       ← case_8_source_and_sink_same_fn
+//
+//  examples/cross-file/exec.rs
+//    30:5   [High]  taint-unsanitised-flow       ← log_and_execute internal vuln
+//
+//  NO findings expected for:
+//    case_2  (correct sanitiser applied)
+//    case_4  (correct html sanitiser applied)
+//    case_7  (sanitised before branch)
+//
+// ─────────────────────────────────────────────────────────────────────────────
+
+// ─── Case 1: Direct source → sink (UNSAFE) ──────────────────────────────────
+//
+//   get_user_command() returns tainted(ALL)
+//   run_command() is a sink(SHELL_ESCAPE)
+//   No sanitiser in between → FINDING
+//
+fn case_1_direct_source_to_sink() {
+    let cmd = get_user_command();           // tainted(ALL) via cross-file source
+    run_command(&cmd);                      // FINDING: taint reaches shell sink
+}
+
+// ─── Case 2: Correctly sanitised (SAFE) ─────────────────────────────────────
+//
+//   get_user_command() returns tainted(ALL)
+//   sanitize_shell() strips SHELL_ESCAPE
+//   run_command() sinks SHELL_ESCAPE → bit is gone → no finding
+//
+fn case_2_sanitised_before_sink() {
+    let cmd = get_user_command();           // tainted(ALL)
+    let safe = sanitize_shell(&cmd);        // SHELL_ESCAPE bit stripped
+    run_command(&safe);                     // SAFE — no finding
+}
+
+// ─── Case 3: Wrong sanitiser for the sink (UNSAFE) ──────────────────────────
+//
+//   get_user_command() returns tainted(ALL)
+//   sanitize_html() strips HTML_ESCAPE — but NOT SHELL_ESCAPE
+//   run_command() sinks SHELL_ESCAPE → bit still set → FINDING
+//
+fn case_3_wrong_sanitiser() {
+    let cmd = get_user_command();           // tainted(ALL)
+    let wrong = sanitize_html(&cmd);        // strips HTML_ESCAPE only
+    run_command(&wrong);                    // FINDING: SHELL_ESCAPE still set
+}
+
+// ─── Case 4: Correct HTML sanitiser (SAFE) ──────────────────────────────────
+//
+//   load_template() returns tainted(ALL) from file read
+//   sanitize_html() strips HTML_ESCAPE
+//   render_page() sinks HTML_ESCAPE → bit is gone → no finding
+//
+fn case_4_html_sanitised() {
+    let tpl = load_template("page.html");   // tainted(ALL) via cross-file source
+    let safe = sanitize_html(&tpl);         // HTML_ESCAPE bit stripped
+    render_page(&safe);                     // SAFE — no finding
+}
+
+// ─── Case 5: Passthrough preserves taint (UNSAFE) ───────────────────────────
+//
+//   get_user_command() returns tainted(ALL)
+//   passthrough() propagates taint unchanged (propagates_taint = true)
+//   run_command() sinks SHELL_ESCAPE → still tainted → FINDING
+//
+fn case_5_passthrough_preserves_taint() {
+    let cmd = get_user_command();           // tainted(ALL)
+    let same = passthrough(&cmd);           // taint flows through
+    run_command(&same);                     // FINDING: still tainted
+}
+
+// ─── Case 6: Taint flows through only one branch (UNSAFE) ───────────────────
+//
+//   One branch sanitises, the other does not.
+//   The unsanitised branch reaches the sink → FINDING on that path.
+//
+fn case_6_taint_through_branch() {
+    let cmd = get_user_command();           // tainted(ALL)
+    if cmd.len() > 10 {
+        run_command(&cmd);                  // FINDING: unsanitised path
+    } else {
+        let safe = sanitize_shell(&cmd);
+        run_command(&safe);                 // SAFE path
+    }
+}
+
+// ─── Case 7: Sanitised before branch (SAFE) ─────────────────────────────────
+//
+//   Sanitisation happens before the branch → both paths are clean.
+//
+fn case_7_sanitised_before_branch() {
+    let cmd = get_user_command();           // tainted(ALL)
+    let safe = sanitize_shell(&cmd);        // SHELL_ESCAPE stripped
+    if safe.len() > 10 {
+        run_command(&safe);                 // SAFE
+    } else {
+        run_command(&safe);                 // SAFE
+    }
+}
+
+// ─── Case 8: Source-and-sink function (UNSAFE) ──────────────────────────────
+//
+//   log_and_execute() is both:
+//     • a SINK(SHELL_ESCAPE) on its cmd parameter
+//     • a SOURCE(ALL) in its return value (reads env var)
+//
+//   Passing tainted data to it → FINDING for the sink.
+//   Its return value is freshly tainted, but we don't pass it anywhere
+//   dangerous here — so only one finding.
+//
+fn case_8_source_and_sink_same_fn() {
+    let cmd = get_user_command();           // tainted(ALL)
+    let _log = log_and_execute(&cmd);       // FINDING: tainted arg hits shell sink
+    // _log is now tainted(ALL) from log_and_execute's source behaviour,
+    // but we don't use it — no second finding.
+}
+
+fn main() {
+    case_1_direct_source_to_sink();
+    case_2_sanitised_before_sink();
+    case_3_wrong_sanitiser();
+    case_4_html_sanitised();
+    case_5_passthrough_preserves_taint();
+    case_6_taint_through_branch();
+    case_7_sanitised_before_branch();
+    case_8_source_and_sink_same_fn();
+}
diff --git a/examples/cross-file/sanitize.rs b/examples/cross-file/sanitize.rs
new file mode 100644
index 00000000..c64b1006
--- /dev/null
+++ b/examples/cross-file/sanitize.rs
@@ -0,0 +1,30 @@
+// ─────────────────────────────────────────────────────────────────────────────
+// examples/cross-file/sanitize.rs — Sanitizers
+//
+// Functions that clean specific taint capabilities.  After passing through
+// one of these, the corresponding Cap bit is stripped.
+//
+// ┌─────────────────────────────────────────────────────────────────────────┐
+// │  FuncSummary produced by pass 1:                                       │
+// │                                                                        │
+// │  sanitize_shell  → sanitizer_caps: SHELL_ESCAPE, propagates: true      │
+// │  sanitize_html   → sanitizer_caps: HTML_ESCAPE,  propagates: true      │
+// │  passthrough     → sanitizer: 0, source: 0, sink: 0, propagates: true  │
+// └─────────────────────────────────────────────────────────────────────────┘
+// ─────────────────────────────────────────────────────────────────────────────
+
+/// Escapes shell metacharacters.  Strips the SHELL_ESCAPE cap bit.
+pub fn sanitize_shell(input: &str) -> String {
+    shell_escape::unix::escape(input.into()).to_string()
+}
+
+/// Escapes HTML entities.  Strips the HTML_ESCAPE cap bit.
+pub fn sanitize_html(input: &str) -> String {
+    html_escape::encode_safe(input).to_string()
+}
+
+/// Does nothing security-relevant — just returns a copy.
+/// Taint passes straight through (propagates_taint = true).
+pub fn passthrough(input: &str) -> String {
+    input.to_string()
+}
diff --git a/examples/single-func/example.rs b/examples/single-func/example.rs
new file mode 100644
index 00000000..ca0642c9
--- /dev/null
+++ b/examples/single-func/example.rs
@@ -0,0 +1,8 @@
+fn source_env(var: &str) -> String {
+    env::var(var).unwrap_or_default()                          // Source(env-var)
+}
+
+fn main() {
+    let raw = source_env("USER_CMD");
+    Command::new("sh").arg(raw).status().unwrap();
+}
\ No newline at end of file
diff --git a/examples/standard/test.rs b/examples/standard/test.rs
index ff89b18e..170b6f5c 100644
--- a/examples/standard/test.rs
+++ b/examples/standard/test.rs
@@ -1,9 +1,30 @@
-use std::{env, process::Command};
-fn main() {
-  let y = env::var("SAFE").unwrap();
+fn source_env(var: &str) -> String {
+    env::var(var).unwrap_or_default()                          // Source(env-var)
+}
 
-  let x = env::var("DANGEROUS").unwrap();
-  let clean = html_escape::encode_safe(&y);
-  Command::new("sh").arg(x).status().unwrap();
-  Command::new("sh").arg(clean).status().unwrap();
+fn source_file(path: &str) -> String {
+    fs::read_to_string(path).unwrap_or_default()               // Source(file-io)
+}
+
+fn sink_shell(arg: &str) {
+    Command::new("sh").arg(arg).status().unwrap();             // Sink(process-spawn)
+}
+
+fn sink_html(out: &str) {
+    println!("{out}");                                         // Sink(html-out)
+}
+
+fn main() {
+    let raw = source_env("USER_CMD");
+    let raw2 = source_file("ANOTHER");
+    let x = source_env("ANOTHER");
+    if x.len() > 5 {
+        sink_shell(&x);                     // EXPECT: UNSAFE
+        return;
+    } else {
+        let escaped = sanitize_shell(&x);
+        sink_shell(&escaped);               // safe
+    }
+    sink_shell(raw);                       // EXPECT: UNSAFE
+    sink_html(raw2);
 }
\ No newline at end of file
diff --git a/src/ast.rs b/src/ast.rs
index 6fdadb12..429ad865 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -1,7 +1,11 @@
-use crate::cfg::{analyse_function, build_cfg};
+use crate::cfg::{build_cfg, export_summaries};
+use crate::cfg_analysis;
 use crate::commands::scan::Diag;
 use crate::errors::{NyxError, NyxResult};
 use crate::patterns::Severity;
+use crate::summary::{FuncSummary, GlobalSummaries};
+use crate::symbol::{Lang, normalize_namespace};
+use crate::taint::analyse_file;
 use crate::utils::config::AnalysisMode;
 use crate::utils::ext::lowercase_ext;
 use crate::utils::{Config, query_cache};
@@ -15,67 +19,189 @@ thread_local! {
 
 /// Convenience alias for node indices.
 fn byte_offset_to_point(tree: &tree_sitter::Tree, byte: usize) -> tree_sitter::Point {
-    // `descendant_for_byte_range` gives us *some* node that starts at `byte`,
-    // `start_position` turns that into rows & columns (both 0-based)
     tree.root_node()
         .descendant_for_byte_range(byte, byte)
         .map(|n| n.start_position())
         .unwrap_or_else(|| tree_sitter::Point { row: 0, column: 0 })
 }
 
-pub(crate) fn run_rules_on_file(path: &Path, cfg: &Config) -> NyxResult<Vec<Diag>> {
-    tracing::debug!("Running rules on: {}", path.display());
-    let bytes = std::fs::read(path)?;
+/// Resolve a file extension to a (tree‑sitter Language, slug) pair.
+fn lang_for_path(path: &Path) -> Option<(Language, &'static str)> {
+    match lowercase_ext(path) {
+        Some("rs") => Some((Language::from(tree_sitter_rust::LANGUAGE), "rust")),
+        Some("c") => Some((Language::from(tree_sitter_c::LANGUAGE), "c")),
+        Some("cpp") => Some((Language::from(tree_sitter_cpp::LANGUAGE), "cpp")),
+        Some("java") => Some((Language::from(tree_sitter_java::LANGUAGE), "java")),
+        Some("go") => Some((Language::from(tree_sitter_go::LANGUAGE), "go")),
+        Some("php") => Some((Language::from(tree_sitter_php::LANGUAGE_PHP), "php")),
+        Some("py") => Some((Language::from(tree_sitter_python::LANGUAGE), "python")),
+        Some("ts") => Some((
+            Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
+            "typescript",
+        )),
+        Some("js") => Some((
+            Language::from(tree_sitter_javascript::LANGUAGE),
+            "javascript",
+        )),
+        Some("rb") => Some((Language::from(tree_sitter_ruby::LANGUAGE), "ruby")),
+        _ => None,
+    }
+}
 
-    // Fast binary-file guard (skip if >1% NULs)
-    if bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1 {
+/// Fast binary-file guard: skip if >1% NUL bytes.
+fn is_binary(bytes: &[u8]) -> bool {
+    bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+//  Pass 1: Extract function summaries (no taint analysis)
+// ─────────────────────────────────────────────────────────────────────────────
+
+/// Extract function summaries from pre-read bytes.
+///
+/// This is the core **pass 1** implementation. Callers that already hold the
+/// file contents should use this variant to avoid a redundant `fs::read`.
+pub fn extract_summaries_from_bytes(
+    bytes: &[u8],
+    path: &Path,
+    _cfg: &Config,
+) -> NyxResult<Vec<FuncSummary>> {
+    let _span = tracing::debug_span!("extract_summaries", file = %path.display()).entered();
+    if is_binary(bytes) {
         return Ok(vec![]);
     }
 
-    let (ts_lang, lang_slug) = match lowercase_ext(path) {
-        Some("rs") => (Language::from(tree_sitter_rust::LANGUAGE), "rust"),
-        Some("c") => (Language::from(tree_sitter_c::LANGUAGE), "c"),
-        Some("cpp") => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"),
-        Some("java") => (Language::from(tree_sitter_java::LANGUAGE), "java"),
-        Some("go") => (Language::from(tree_sitter_go::LANGUAGE), "go"),
-        Some("php") => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"),
-        Some("py") => (Language::from(tree_sitter_python::LANGUAGE), "python"),
-        Some("ts") => (
-            Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
-            "typescript",
-        ),
-        Some("js") => (
-            Language::from(tree_sitter_javascript::LANGUAGE),
-            "javascript",
-        ),
-        Some("rb") => (Language::from(tree_sitter_ruby::LANGUAGE), "ruby"),
-        _ => return Ok(vec![]),
+    let Some((ts_lang, lang_slug)) = lang_for_path(path) else {
+        return Ok(vec![]);
+    };
+
+    let tree = PARSER.with(|cell| {
+        let mut parser = cell.borrow_mut();
+        parser.set_language(&ts_lang)?;
+        parser
+            .parse(bytes, None)
+            .ok_or_else(|| NyxError::Other("tree-sitter failed".into()))
+    })?;
+
+    let file_path_str = path.to_string_lossy();
+    let (_cfg_graph, _entry, local_summaries) = build_cfg(&tree, bytes, lang_slug, &file_path_str);
+
+    Ok(export_summaries(
+        &local_summaries,
+        &file_path_str,
+        lang_slug,
+    ))
+}
+
+/// Convenience wrapper that reads the file then delegates to
+/// [`extract_summaries_from_bytes`].
+pub fn extract_summaries_from_file(path: &Path, cfg: &Config) -> NyxResult<Vec<FuncSummary>> {
+    let bytes = std::fs::read(path)?;
+    extract_summaries_from_bytes(&bytes, path, cfg)
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+//  Pass 2 / single‑file: Full rule execution (AST queries + taint)
+// ─────────────────────────────────────────────────────────────────────────────
+
+/// Run all enabled analyses on pre-read bytes and return diagnostics.
+///
+/// This is the core **pass 2** implementation. Callers that already hold the
+/// file contents should use this variant to avoid a redundant `fs::read`.
+pub fn run_rules_on_bytes(
+    bytes: &[u8],
+    path: &Path,
+    cfg: &Config,
+    global_summaries: Option<&GlobalSummaries>,
+    scan_root: Option<&Path>,
+) -> NyxResult<Vec<Diag>> {
+    let _span = tracing::debug_span!("run_rules", file = %path.display()).entered();
+
+    if is_binary(bytes) {
+        return Ok(vec![]);
+    }
+
+    let Some((ts_lang, lang_slug)) = lang_for_path(path) else {
+        return Ok(vec![]);
     };
 
     let _tree = PARSER.with(|cell| {
         let mut parser = cell.borrow_mut();
         parser.set_language(&ts_lang)?;
         parser
-            .parse(&*bytes, None)
+            .parse(bytes, None)
             .ok_or_else(|| NyxError::Other("tree-sitter failed".into()))
     })?;
 
     let mut out = Vec::new();
+    let file_path_str = path.to_string_lossy();
 
-    if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Taint {
+    // CFG construction + taint + cfg_analysis only needed for Full/Taint modes.
+    let needs_cfg =
+        cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Taint;
+
+    if needs_cfg {
+        // Build CFG — needed for both taint analysis and CFG structural analyses.
+        let (cfg_graph, entry, summaries) = build_cfg(&_tree, bytes, lang_slug, &file_path_str);
+        let caller_lang = Lang::from_slug(lang_slug).unwrap_or(Lang::Rust);
+
+        // ── Taint analysis ──────────────────────────────────────────────
         tracing::debug!("Running taint analysis on: {}", path.display());
-        let (cfg_graph, entry) = build_cfg(&_tree, &bytes, lang_slug);
+        tracing::debug!("Func summaries: {:?}", summaries);
+        let scan_root_str = scan_root.map(|p| p.to_string_lossy());
+        let namespace = normalize_namespace(&file_path_str, scan_root_str.as_deref());
+        let taint_results = analyse_file(
+            &cfg_graph,
+            entry,
+            &summaries,
+            global_summaries,
+            caller_lang,
+            &namespace,
+            &[],
+        );
+        for finding in &taint_results {
+            // Report the SINK location — where the vulnerability manifests.
+            let sink_byte = cfg_graph[finding.sink].span.0;
+            let sink_point = byte_offset_to_point(&_tree, sink_byte);
 
-        for p in analyse_function(&cfg_graph, entry) {
-            let src_byte = cfg_graph[p.first().copied().unwrap()].span.0;
-            let point = byte_offset_to_point(&_tree, src_byte);
+            // Include source location in the ID so distinct flows through
+            // the same sink (or different sinks at the same line) don't
+            // get collapsed by dedup.
+            let source_byte = cfg_graph[finding.source].span.0;
+            let source_point = byte_offset_to_point(&_tree, source_byte);
 
+            out.push(Diag {
+                path: path.to_string_lossy().into_owned(),
+                line: sink_point.row + 1,
+                col: sink_point.column + 1,
+                severity: Severity::High,
+                id: format!(
+                    "taint-unsanitised-flow (source {}:{})",
+                    source_point.row + 1,
+                    source_point.column + 1
+                ),
+            });
+        }
+
+        // ── CFG structural analyses ─────────────────────────────────────
+        let cfg_ctx = cfg_analysis::AnalysisContext {
+            cfg: &cfg_graph,
+            entry,
+            lang: caller_lang,
+            file_path: &file_path_str,
+            source_bytes: bytes,
+            func_summaries: &summaries,
+            global_summaries,
+            taint_findings: &taint_results,
+        };
+        for cf in cfg_analysis::run_all(&cfg_ctx) {
+            let point = byte_offset_to_point(&_tree, cf.span.0);
             out.push(Diag {
                 path: path.to_string_lossy().into_owned(),
                 line: point.row + 1,
                 col: point.column + 1,
-                severity: Severity::High,
-                id: "taint-unsanitised-flow".into(),
+                severity: cf.severity,
+                id: cf.rule_id,
             });
         }
     }
@@ -90,7 +216,7 @@ pub(crate) fn run_rules_on_file(path: &Path, cfg: &Config) -> NyxResult<Vec<Diag
             if cfg.scanner.min_severity <= cq.meta.severity {
                 continue;
             }
-            let mut matches = cursor.matches(&cq.query, root, &*bytes);
+            let mut matches = cursor.matches(&cq.query, root, bytes);
             while let Some(m) = matches.next() {
                 if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
                     let point = cap.node.start_position();
@@ -106,7 +232,7 @@ pub(crate) fn run_rules_on_file(path: &Path, cfg: &Config) -> NyxResult<Vec<Diag
         }
     }
 
-    // Check to ensure no duplicates (DOUBLE-CHECK EFFICIENCY)
+    // Check to ensure no duplicates
     out.sort_by(|a, b| (a.line, a.col, &a.id, a.severity).cmp(&(b.line, b.col, &b.id, b.severity)));
     out.dedup_by(|a, b| {
         a.line == b.line && a.col == b.col && a.id == b.id && a.severity == b.severity
@@ -115,13 +241,25 @@ pub(crate) fn run_rules_on_file(path: &Path, cfg: &Config) -> NyxResult<Vec<Diag
     Ok(out)
 }
 
+/// Convenience wrapper that reads the file then delegates to
+/// [`run_rules_on_bytes`].
+pub fn run_rules_on_file(
+    path: &Path,
+    cfg: &Config,
+    global_summaries: Option<&GlobalSummaries>,
+    scan_root: Option<&Path>,
+) -> NyxResult<Vec<Diag>> {
+    let bytes = std::fs::read(path)?;
+    run_rules_on_bytes(&bytes, path, cfg, global_summaries, scan_root)
+}
+
 #[test]
 fn unknown_extension_returns_empty() {
     let dir = tempfile::tempdir().unwrap();
     let txt = dir.path().join("notes.txt");
     std::fs::write(&txt, "just some text").unwrap();
 
-    let diags = run_rules_on_file(&txt, &Config::default())
+    let diags = run_rules_on_file(&txt, &Config::default(), None, None)
         .expect("function should never error on plain text");
 
     assert!(diags.is_empty());
@@ -138,6 +276,6 @@ fn binary_file_guard_triggers() {
     }
     std::fs::write(&bin, &data).unwrap();
 
-    let diags = run_rules_on_file(&bin, &Config::default()).unwrap();
+    let diags = run_rules_on_file(&bin, &Config::default(), None, None).unwrap();
     assert!(diags.is_empty(), "binary files are skipped");
 }
diff --git a/src/cfg.rs b/src/cfg.rs
index ba6809d3..86e47507 100644
--- a/src/cfg.rs
+++ b/src/cfg.rs
@@ -3,23 +3,10 @@ use petgraph::prelude::*;
 use tracing::debug;
 use tree_sitter::{Node, Tree};
 
-use crate::labels::{DataLabel, Kind, classify, lookup};
-use std::collections::HashSet;
-use std::hash::{DefaultHasher, Hash, Hasher};
-
-// WHAT WE STILL NEED TO DO:
-// todo: add the cap labels and remove the bit flags after each sanitizer, checking the bit flags with the sink
-//
-//
-// 1.
-// We need to analyze the CFG and add function details to the nodes.
-// And upload each functions status to a cache with the specific status of the function, for example what source it has, what sink it has, what sanitizer it has, and what taint it has.
-//
-// 2.
-// For each taint from a function we will see if it gets tainted in a function if not, we will add it to a list of potentially tainted functions
-// then, after we analyze all the functions, we will see if any of the potentially tainted functions are actually tainted
-//
-// 3.
+use crate::labels::{Cap, DataLabel, Kind, classify, lookup, param_config};
+use crate::summary::FuncSummary;
+use crate::symbol::{FuncKey, Lang};
+use std::collections::{HashMap, HashSet};
 
 /// -------------------------------------------------------------------------
 ///  Public AST‑to‑CFG data structures
@@ -52,9 +39,40 @@ pub struct NodeInfo {
     pub label: Option<DataLabel>, // taint classification if any
     pub defines: Option<String>,  // variable written by this stmt
     pub uses: Vec<String>,        // variables read
+    pub callee: Option<String>,
+    /// Name of the enclosing function (set during CFG construction).
+    pub enclosing_func: Option<String>,
+    /// Per-function call ordinal (0-based, only meaningful for Call nodes).
+    pub call_ordinal: u32,
+}
+
+/// Intra‑file function summary with graph‑local node indices.
+///
+/// Keeps all three cap dimensions independently so that a function that is
+/// *both* a source and a sink (e.g. reads env then shells out) does not
+/// lose information.
+#[derive(Debug, Clone)]
+pub struct LocalFuncSummary {
+    #[allow(dead_code)] // used for future intra-file graph traversal
+    pub entry: NodeIndex,
+    #[allow(dead_code)] // used for future intra-file graph traversal
+    pub exit: NodeIndex,
+    pub source_caps: Cap,
+    pub sanitizer_caps: Cap,
+    pub sink_caps: Cap,
+    pub param_count: usize,
+    pub param_names: Vec<String>,
+    /// Conservative: `true` if *any* parameter variable reaches the return
+    /// value on *any* code path.
+    pub propagates_taint: bool,
+    /// Which parameter indices flow to internal sinks.
+    pub tainted_sink_params: Vec<usize>,
+    /// Callee identifiers found inside this function body.
+    pub callees: Vec<String>,
 }
 
 pub type Cfg = Graph<NodeInfo, EdgeKind>;
+pub type FuncSummaries = HashMap<FuncKey, LocalFuncSummary>;
 
 // -------------------------------------------------------------------------
 //                      Utility helpers
@@ -62,22 +80,48 @@ pub type Cfg = Graph<NodeInfo, EdgeKind>;
 
 /// Return the text of a node.
 #[inline]
-fn text_of<'a>(n: Node<'a>, code: &'a [u8]) -> Option<String> {
+pub(crate) fn text_of<'a>(n: Node<'a>, code: &'a [u8]) -> Option<String> {
     std::str::from_utf8(&code[n.start_byte()..n.end_byte()])
         .ok()
         .map(|s| s.to_string())
 }
 
+/// Walk through chained calls / member accesses to find the root receiver.
+///
+/// For `Runtime.getRuntime().exec(cmd)`, the receiver of `exec` is the call
+/// `Runtime.getRuntime()`.  This function drills through that to return
+/// `"Runtime"` — the outermost non-call object.  This lets labels like
+/// `"Runtime.exec"` match correctly.
+fn root_receiver_text(n: Node, lang: &str, code: &[u8]) -> Option<String> {
+    match lookup(lang, n.kind()) {
+        // The receiver is itself a call — drill into ITS receiver.
+        // e.g. for `Runtime.getRuntime()`, the object is `Runtime`.
+        Kind::CallFn | Kind::CallMethod => {
+            let inner = n
+                .child_by_field_name("object")
+                .or_else(|| n.child_by_field_name("receiver"))
+                .or_else(|| n.child_by_field_name("function"));
+            match inner {
+                Some(child) => root_receiver_text(child, lang, code),
+                None => text_of(n, code),
+            }
+        }
+        _ => text_of(n, code),
+    }
+}
+
 /// Return the callee identifier for the first call / method / macro inside `n`.
+/// Searches recursively through all descendants.
 fn first_call_ident<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option<String> {
     let mut cursor = n.walk();
     for c in n.children(&mut cursor) {
         match lookup(lang, c.kind()) {
             Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
-                // Re-use the same logic we have in `push_node`
                 return match lookup(lang, c.kind()) {
                     Kind::CallFn => c
                         .child_by_field_name("function")
+                        .or_else(|| c.child_by_field_name("method"))
+                        .or_else(|| c.child_by_field_name("name"))
                         .and_then(|f| text_of(f, code)),
                     Kind::CallMethod => {
                         let func = c
@@ -86,9 +130,10 @@ fn first_call_ident<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option<Strin
                             .and_then(|f| text_of(f, code));
                         let recv = c
                             .child_by_field_name("object")
-                            .and_then(|f| text_of(f, code));
+                            .or_else(|| c.child_by_field_name("receiver"))
+                            .and_then(|f| root_receiver_text(f, lang, code));
                         match (recv, func) {
-                            (Some(r), Some(f)) => Some(format!("{r}::{f}")),
+                            (Some(r), Some(f)) => Some(format!("{r}.{f}")),
                             (_, Some(f)) => Some(f.to_string()),
                             _ => None,
                         }
@@ -99,12 +144,227 @@ fn first_call_ident<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option<Strin
                     _ => None,
                 };
             }
-            _ => {}
+            _ => {
+                // Recurse into children (handles nested declarators)
+                if let Some(found) = first_call_ident(c, lang, code) {
+                    return Some(found);
+                }
+            }
         }
     }
     None
 }
 
+/// Build the dot-joined text of a member_expression / attribute / selector_expression.
+/// E.g. for `process.env.CMD` this returns `"process.env.CMD"`.
+fn member_expr_text(n: Node, code: &[u8]) -> Option<String> {
+    match n.kind() {
+        "member_expression" | "attribute" | "selector_expression" => {
+            let obj = n
+                .child_by_field_name("object")
+                .or_else(|| n.child_by_field_name("value"))
+                .and_then(|o| member_expr_text(o, code))
+                .or_else(|| {
+                    n.child_by_field_name("object")
+                        .or_else(|| n.child_by_field_name("value"))
+                        .and_then(|o| text_of(o, code))
+                });
+            let prop = n
+                .child_by_field_name("property")
+                .or_else(|| n.child_by_field_name("attribute"))
+                .or_else(|| n.child_by_field_name("field"))
+                .and_then(|p| text_of(p, code));
+            match (obj, prop) {
+                (Some(o), Some(p)) => Some(format!("{o}.{p}")),
+                (_, Some(p)) => Some(p),
+                (Some(o), _) => Some(o),
+                _ => text_of(n, code),
+            }
+        }
+        _ => text_of(n, code),
+    }
+}
+
+/// Recursively search `n` for a member expression whose text classifies as a label.
+fn first_member_label(n: Node, lang: &str, code: &[u8]) -> Option<DataLabel> {
+    match n.kind() {
+        "member_expression" | "attribute" | "selector_expression" => {
+            if let Some(full) = member_expr_text(n, code) {
+                // Try the full text first, then progressively strip the last segment
+                // to match rules like "process.env" from "process.env.CMD".
+                let mut candidate = full.as_str();
+                loop {
+                    if let Some(lbl) = classify(lang, candidate) {
+                        return Some(lbl);
+                    }
+                    match candidate.rsplit_once('.') {
+                        Some((prefix, _)) => candidate = prefix,
+                        None => break,
+                    }
+                }
+            }
+        }
+        _ => {}
+    }
+    let mut cursor = n.walk();
+    for child in n.children(&mut cursor) {
+        if let Some(lbl) = first_member_label(child, lang, code) {
+            return Some(lbl);
+        }
+    }
+    None
+}
+
+/// Return the text of the first member expression found in `n`.
+fn first_member_text(n: Node, code: &[u8]) -> Option<String> {
+    match n.kind() {
+        "member_expression" | "attribute" | "selector_expression" => member_expr_text(n, code),
+        _ => {
+            let mut cursor = n.walk();
+            for child in n.children(&mut cursor) {
+                if let Some(t) = first_member_text(child, code) {
+                    return Some(t);
+                }
+            }
+            None
+        }
+    }
+}
+
+/// Check whether any descendant of `n` is a call expression.
+fn has_call_descendant(n: Node, lang: &str) -> bool {
+    let mut cursor = n.walk();
+    for c in n.children(&mut cursor) {
+        match lookup(lang, c.kind()) {
+            Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return true,
+            _ => {
+                if has_call_descendant(c, lang) {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+
+/// Recursively collect every identifier that occurs inside `n`.
+///
+/// Recognises `identifier` (most languages), `variable_name` (PHP),
+/// `field_identifier` (Go), and `property_identifier` (JS/TS).
+fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
+    match n.kind() {
+        "identifier" | "field_identifier" | "property_identifier" => {
+            if let Some(txt) = text_of(n, code) {
+                out.push(txt);
+            }
+        }
+        // PHP: $x is `variable_name` → `$` + `name`. Use the whole text minus `$`.
+        "variable_name" => {
+            if let Some(txt) = text_of(n, code) {
+                out.push(txt.trim_start_matches('$').to_string());
+            }
+        }
+        _ => {
+            let mut c = n.walk();
+            for ch in n.children(&mut c) {
+                collect_idents(ch, code, out);
+            }
+        }
+    }
+}
+
+/// Return `(defines, uses)` for the AST fragment `ast`.
+fn def_use(ast: Node, lang: &str, code: &[u8]) -> (Option<String>, Vec<String>) {
+    match lookup(lang, ast.kind()) {
+        // Declaration wrappers (let, var, short_var_declaration, etc.)
+        Kind::CallWrapper => {
+            let mut defs = None;
+            let mut uses = Vec::new();
+
+            // Try direct field names first (Rust `let_declaration`, Go `short_var_declaration`)
+            let def_node = ast
+                .child_by_field_name("pattern")
+                .or_else(|| ast.child_by_field_name("name"))
+                .or_else(|| ast.child_by_field_name("left"));
+
+            let val_node = ast
+                .child_by_field_name("value")
+                .or_else(|| ast.child_by_field_name("right"));
+
+            if def_node.is_some() || val_node.is_some() {
+                if let Some(pat) = def_node {
+                    let mut tmp = Vec::<String>::new();
+                    collect_idents(pat, code, &mut tmp);
+                    defs = tmp.into_iter().next();
+                }
+                if let Some(val) = val_node {
+                    collect_idents(val, code, &mut uses);
+                }
+            } else {
+                // Try nested declarator pattern (JS/TS `lexical_declaration` → `variable_declarator`,
+                // Java `local_variable_declaration` → `variable_declarator`,
+                // C/C++ `declaration` → `init_declarator`,
+                // Python/Ruby `expression_statement` → `assignment`)
+                let mut cursor = ast.walk();
+                for child in ast.children(&mut cursor) {
+                    let child_name = child
+                        .child_by_field_name("name")
+                        .or_else(|| child.child_by_field_name("declarator"))
+                        .or_else(|| child.child_by_field_name("left"));
+                    let child_value = child
+                        .child_by_field_name("value")
+                        .or_else(|| child.child_by_field_name("right"));
+
+                    // Only treat this child as a declarator if it has BOTH a name
+                    // and a value (or at least a value). This prevents method_invocation
+                    // nodes (which have a `name` field) from being misinterpreted.
+                    if child_value.is_some() {
+                        if let Some(name_node) = child_name
+                            && defs.is_none()
+                        {
+                            let mut tmp = Vec::<String>::new();
+                            collect_idents(name_node, code, &mut tmp);
+                            defs = tmp.into_iter().next();
+                        }
+                        if let Some(val_node) = child_value {
+                            collect_idents(val_node, code, &mut uses);
+                        }
+                    }
+                }
+
+                // Fallback: if still nothing found, collect all idents as uses.
+                // This handles expression_statement wrappers.
+                if defs.is_none() && uses.is_empty() {
+                    collect_idents(ast, code, &mut uses);
+                }
+            }
+            (defs, uses)
+        }
+
+        // Plain assignment `x = y`
+        Kind::Assignment => {
+            let mut defs = None;
+            let mut uses = Vec::new();
+            if let Some(lhs) = ast.child_by_field_name("left") {
+                let mut tmp = Vec::<String>::new();
+                collect_idents(lhs, code, &mut tmp);
+                defs = tmp.pop();
+            }
+            if let Some(rhs) = ast.child_by_field_name("right") {
+                collect_idents(rhs, code, &mut uses);
+            }
+            (defs, uses)
+        }
+
+        // everything else – no definition, but may read vars
+        _ => {
+            let mut uses = Vec::new();
+            collect_idents(ast, code, &mut uses);
+            (None, uses)
+        }
+    }
+}
+
 /// Create a node in one short borrow and optionally attach a taint label.
 fn push_node<'a>(
     g: &mut Cfg,
@@ -112,6 +372,8 @@ fn push_node<'a>(
     ast: Node<'a>,
     lang: &str,
     code: &'a [u8],
+    enclosing_func: Option<&str>,
+    call_ordinal: u32,
 ) -> NodeIndex {
     /* ── 1.  IDENTIFIER EXTRACTION ─────────────────────────────────────── */
 
@@ -120,6 +382,8 @@ fn push_node<'a>(
         // plain `foo(bar)` style call
         Kind::CallFn => ast
             .child_by_field_name("function")
+            .or_else(|| ast.child_by_field_name("method"))
+            .or_else(|| ast.child_by_field_name("name"))
             .and_then(|n| text_of(n, code))
             .unwrap_or_default(),
 
@@ -131,9 +395,10 @@ fn push_node<'a>(
                 .and_then(|n| text_of(n, code));
             let recv = ast
                 .child_by_field_name("object")
-                .and_then(|n| text_of(n, code));
+                .or_else(|| ast.child_by_field_name("receiver"))
+                .and_then(|n| root_receiver_text(n, lang, code));
             match (recv, func) {
-                (Some(r), Some(f)) => format!("{r}::{f}"),
+                (Some(r), Some(f)) => format!("{r}.{f}"),
                 (_, Some(f)) => f,
                 _ => String::new(),
             }
@@ -149,22 +414,78 @@ fn push_node<'a>(
         _ => text_of(ast, code).unwrap_or_default(),
     };
 
-    // If this is a `let` or `expression_statement` that *contains* a call,
-    // prefer the first inner call identifier instead of the whole line.
-    if matches!(lookup(lang, ast.kind()), Kind::CallWrapper) {
-        if let Some(inner) = first_call_ident(ast, lang, code) {
-            text = inner;
-        }
+    // If this is a declaration/expression wrapper or an assignment that
+    // *contains* a call, prefer the first inner call identifier instead of
+    // the whole line.
+    if matches!(
+        lookup(lang, ast.kind()),
+        Kind::CallWrapper | Kind::Assignment
+    ) && let Some(inner) = first_call_ident(ast, lang, code)
+    {
+        text = inner;
     }
 
     /* ── 2.  LABEL LOOK-UP  ───────────────────────────────────────────── */
 
-    let label = classify(lang, &text);
+    let mut label = classify(lang, &text);
+
+    // For assignments like `element.innerHTML = value`, the inner-call heuristic
+    // above may have overridden `text` with a call on the RHS (e.g. getElementById).
+    // If that didn't produce a label, check the LHS property name — it may be a
+    // sink like `innerHTML`.
+    //
+    // This covers both direct `Kind::Assignment` nodes and `Kind::CallWrapper`
+    // nodes (expression_statement) that wrap an assignment.
+    if label.is_none() {
+        let assign_node = if matches!(lookup(lang, ast.kind()), Kind::Assignment) {
+            Some(ast)
+        } else if matches!(lookup(lang, ast.kind()), Kind::CallWrapper) {
+            // Walk children to find a nested assignment_expression
+            let mut cursor = ast.walk();
+            ast.children(&mut cursor)
+                .find(|c| matches!(lookup(lang, c.kind()), Kind::Assignment))
+        } else {
+            None
+        };
+
+        if let Some(assign) = assign_node
+            && let Some(lhs) = assign.child_by_field_name("left")
+            && let Some(prop) = lhs.child_by_field_name("property")
+            && let Some(prop_text) = text_of(prop, code)
+        {
+            label = classify(lang, &prop_text);
+        }
+    }
+
+    // For declarations/assignments whose RHS is a member expression (not a call),
+    // try to classify the member expression text as a source.
+    // This handles `var x = process.env.CMD` (JS), `os.environ["KEY"]` (Python),
+    // and similar property-access-based source patterns.
+    if label.is_none()
+        && matches!(
+            lookup(lang, ast.kind()),
+            Kind::CallWrapper | Kind::Assignment
+        )
+        && let Some(found) = first_member_label(ast, lang, code)
+    {
+        label = Some(found);
+        // Update text so the callee name reflects the source
+        if let Some(member_text) = first_member_text(ast, code) {
+            text = member_text;
+        }
+    }
+
     let span = (ast.start_byte(), ast.end_byte());
 
     /* ── 3.  GRAPH INSERTION + DEBUG ──────────────────────────────────── */
 
-    let (defines, uses) = def_use(ast, code);
+    let (defines, uses) = def_use(ast, lang, code);
+
+    let callee = if kind == StmtKind::Call {
+        Some(text.clone())
+    } else {
+        None
+    };
 
     let idx = g.add_node(NodeInfo {
         kind,
@@ -172,6 +493,9 @@ fn push_node<'a>(
         label,
         defines,
         uses,
+        callee,
+        enclosing_func: enclosing_func.map(|s| s.to_string()),
+        call_ordinal,
     });
 
     debug!(
@@ -186,6 +510,60 @@ fn push_node<'a>(
     idx
 }
 
+/// Extract parameter names from a function AST node.
+///
+/// Uses the language's `ParamConfig` to find the parameter list field
+/// and extract identifiers from each parameter child.
+fn extract_param_names<'a>(func_node: Node<'a>, lang: &str, code: &'a [u8]) -> Vec<String> {
+    let cfg = param_config(lang);
+    let mut names = Vec::new();
+    let Some(params) = func_node.child_by_field_name(cfg.params_field) else {
+        return names;
+    };
+    let mut cursor = params.walk();
+    for child in params.children(&mut cursor) {
+        // Self/this parameter (e.g. Rust's `self_parameter`)
+        if cfg.self_param_kinds.contains(&child.kind()) {
+            names.push("self".into());
+            continue;
+        }
+
+        // Regular parameter
+        if cfg.param_node_kinds.contains(&child.kind()) {
+            // Try each ident field in order
+            let mut found = false;
+            for &field in cfg.ident_fields {
+                if let Some(node) = child.child_by_field_name(field) {
+                    let mut tmp = Vec::new();
+                    collect_idents(node, code, &mut tmp);
+                    if let Some(first) = tmp.into_iter().next() {
+                        names.push(first);
+                        found = true;
+                        break;
+                    }
+                }
+            }
+            // Fallback: if the param node itself is an identifier (e.g. JS/Python)
+            if !found
+                && child.kind() == "identifier"
+                && let Some(txt) = text_of(child, code)
+            {
+                names.push(txt);
+            }
+            // Fallback for C/C++: look for nested declarator → identifier
+            if !found && child.kind() == "parameter_declaration" {
+                let mut tmp = Vec::new();
+                collect_idents(child, code, &mut tmp);
+                if let Some(last) = tmp.pop() {
+                    names.push(last);
+                }
+            }
+            continue;
+        }
+    }
+    names
+}
+
 /// Add the same edge (of the same kind) from every node in `froms` to `to`.
 #[inline]
 fn connect_all(g: &mut Cfg, froms: &[NodeIndex], to: NodeIndex, kind: EdgeKind) {
@@ -199,12 +577,17 @@ fn connect_all(g: &mut Cfg, froms: &[NodeIndex], to: NodeIndex, kind: EdgeKind)
 //    The recursive *work‑horse* that converts an AST node into a CFG slice.
 //    Returns the set of *exit* nodes that need to be wired further.
 // -------------------------------------------------------------------------
+#[allow(clippy::too_many_arguments)]
 fn build_sub<'a>(
     ast: Node<'a>,
     preds: &[NodeIndex], // predecessor frontier
     g: &mut Cfg,
     lang: &str,
     code: &'a [u8],
+    summaries: &mut FuncSummaries,
+    file_path: &str,
+    enclosing_func: Option<&str>,
+    call_ordinal: &mut u32,
 ) -> Vec<NodeIndex> {
     match lookup(lang, ast.kind()) {
         // ─────────────────────────────────────────────────────────────────
@@ -212,22 +595,43 @@ fn build_sub<'a>(
         // ─────────────────────────────────────────────────────────────────
         Kind::If => {
             // Condition node
-            let cond = push_node(g, StmtKind::If, ast, lang, code);
+            let cond = push_node(g, StmtKind::If, ast, lang, code, enclosing_func, 0);
             connect_all(g, preds, cond, EdgeKind::Seq);
 
-            // Locate then & else blocks
+            // Locate then & else blocks using field-based lookup first,
+            // then positional fallback (Rust uses positional blocks).
             let (then_block, else_block) = {
-                let mut cursor = ast.walk();
-                let blocks: Vec<_> = ast
-                    .children(&mut cursor)
-                    .filter(|n| n.kind() == "block")
-                    .collect();
-                (blocks.first().copied(), blocks.get(1).copied())
+                let field_then = ast
+                    .child_by_field_name("consequence")
+                    .or_else(|| ast.child_by_field_name("body"));
+                let field_else = ast.child_by_field_name("alternative");
+
+                if field_then.is_some() || field_else.is_some() {
+                    (field_then, field_else)
+                } else {
+                    // Fallback: positional block children (Rust `if_expression`)
+                    let mut cursor = ast.walk();
+                    let blocks: Vec<_> = ast
+                        .children(&mut cursor)
+                        .filter(|n| lookup(lang, n.kind()) == Kind::Block)
+                        .collect();
+                    (blocks.first().copied(), blocks.get(1).copied())
+                }
             };
 
             // THEN branch
             let then_exits = if let Some(b) = then_block {
-                let exits = build_sub(b, &[cond], g, lang, code);
+                let exits = build_sub(
+                    b,
+                    &[cond],
+                    g,
+                    lang,
+                    code,
+                    summaries,
+                    file_path,
+                    enclosing_func,
+                    call_ordinal,
+                );
                 // True edges leave the condition
                 if let Some(&first) = exits.first() {
                     connect_all(g, &[cond], first, EdgeKind::True);
@@ -239,7 +643,17 @@ fn build_sub<'a>(
 
             // ELSE branch
             let else_exits = if let Some(b) = else_block {
-                let exits = build_sub(b, &[cond], g, lang, code);
+                let exits = build_sub(
+                    b,
+                    &[cond],
+                    g,
+                    lang,
+                    code,
+                    summaries,
+                    file_path,
+                    enclosing_func,
+                    call_ordinal,
+                );
                 if let Some(&first) = exits.first() {
                     connect_all(g, &[cond], first, EdgeKind::False);
                 }
@@ -258,12 +672,22 @@ fn build_sub<'a>(
 
         Kind::InfiniteLoop => {
             // Synthetic header node
-            let header = push_node(g, StmtKind::Loop, ast, lang, code);
+            let header = push_node(g, StmtKind::Loop, ast, lang, code, enclosing_func, 0);
             connect_all(g, preds, header, EdgeKind::Seq);
 
             // The body is the single `block` child
             let body = ast.child_by_field_name("body").expect("loop without body");
-            let body_exits = build_sub(body, &[header], g, lang, code);
+            let body_exits = build_sub(
+                body,
+                &[header],
+                g,
+                lang,
+                code,
+                summaries,
+                file_path,
+                enclosing_func,
+                call_ordinal,
+            );
 
             // Back-edge from every linear exit to header
             for &e in &body_exits {
@@ -277,7 +701,7 @@ fn build_sub<'a>(
         //  WHILE / FOR: classic loop with a back edge.
         // ─────────────────────────────────────────────────────────────────
         Kind::While | Kind::For => {
-            let header = push_node(g, StmtKind::Loop, ast, lang, code);
+            let header = push_node(g, StmtKind::Loop, ast, lang, code, enclosing_func, 0);
             connect_all(g, preds, header, EdgeKind::Seq);
 
             // Body = first (and usually only) block child.
@@ -285,11 +709,22 @@ fn build_sub<'a>(
                 .child_by_field_name("body")
                 .or_else(|| {
                     let mut c = ast.walk();
-                    ast.children(&mut c).find(|n| n.kind() == "block")
+                    ast.children(&mut c)
+                        .find(|n| lookup(lang, n.kind()) == Kind::Block)
                 })
                 .expect("loop without body");
 
-            let body_exits = build_sub(body, &[header], g, lang, code);
+            let body_exits = build_sub(
+                body,
+                &[header],
+                g,
+                lang,
+                code,
+                summaries,
+                file_path,
+                enclosing_func,
+                call_ordinal,
+            );
 
             // Back‑edge for every linear exit → header.
             for &e in &body_exits {
@@ -303,17 +738,29 @@ fn build_sub<'a>(
         //  Control-flow sinks (return / break / continue).
         // ─────────────────────────────────────────────────────────────────
         Kind::Return => {
-            let ret = push_node(g, StmtKind::Return, ast, lang, code);
-            connect_all(g, preds, ret, EdgeKind::Seq);
-            Vec::new() // terminates this path
+            if has_call_descendant(ast, lang) {
+                // Return-call bug fix: emit a Call node BEFORE the Return so
+                // that callee labels (source/sanitizer/sink) are applied.
+                let ord = *call_ordinal;
+                *call_ordinal += 1;
+                let call_idx = push_node(g, StmtKind::Call, ast, lang, code, enclosing_func, ord);
+                connect_all(g, preds, call_idx, EdgeKind::Seq);
+                let ret = push_node(g, StmtKind::Return, ast, lang, code, enclosing_func, 0);
+                connect_all(g, &[call_idx], ret, EdgeKind::Seq);
+                Vec::new()
+            } else {
+                let ret = push_node(g, StmtKind::Return, ast, lang, code, enclosing_func, 0);
+                connect_all(g, preds, ret, EdgeKind::Seq);
+                Vec::new() // terminates this path
+            }
         }
         Kind::Break => {
-            let brk = push_node(g, StmtKind::Break, ast, lang, code);
+            let brk = push_node(g, StmtKind::Break, ast, lang, code, enclosing_func, 0);
             connect_all(g, preds, brk, EdgeKind::Seq);
             Vec::new()
         }
         Kind::Continue => {
-            let cont = push_node(g, StmtKind::Continue, ast, lang, code);
+            let cont = push_node(g, StmtKind::Continue, ast, lang, code, enclosing_func, 0);
             connect_all(g, preds, cont, EdgeKind::Seq);
             Vec::new()
         }
@@ -324,22 +771,281 @@ fn build_sub<'a>(
         Kind::SourceFile | Kind::Block => {
             let mut cursor = ast.walk();
             let mut frontier = preds.to_vec();
+            // Track the last frontier before a function emptied it — used to
+            // keep subsequent functions reachable.
+            let mut last_live_frontier = preds.to_vec();
             for child in ast.children(&mut cursor) {
-                frontier = build_sub(child, &frontier, g, lang, code);
+                let child_is_fn = lookup(lang, child.kind()) == Kind::Function;
+
+                // At module / source-file level, each function definition is an
+                // independent entry point — it must always be reachable from the
+                // file-level predecessors.  Without this, a preceding function
+                // that ends with `return` (frontier = []) would leave subsequent
+                // functions disconnected from the graph.
+                let child_preds = if child_is_fn && frontier.is_empty() {
+                    last_live_frontier.clone()
+                } else {
+                    frontier.clone()
+                };
+
+                let child_exits = build_sub(
+                    child,
+                    &child_preds,
+                    g,
+                    lang,
+                    code,
+                    summaries,
+                    file_path,
+                    enclosing_func,
+                    call_ordinal,
+                );
+
+                if !child_exits.is_empty() {
+                    last_live_frontier = child_exits.clone();
+                }
+                frontier = child_exits;
             }
             frontier
         }
 
         // Function item – create a header and dive into its body
         Kind::Function => {
-            let header = push_node(g, StmtKind::Seq, ast, lang, code);
-            connect_all(g, preds, header, EdgeKind::Seq);
+            // 1) create a header node for this fn
+            // Try "name" first (most languages), then "declarator" (C/C++)
+            let fn_name = ast
+                .child_by_field_name("name")
+                .or_else(|| ast.child_by_field_name("declarator"))
+                .and_then(|n| {
+                    // For C/C++ function_declarator, extract just the identifier
+                    let mut tmp = Vec::new();
+                    collect_idents(n, code, &mut tmp);
+                    tmp.into_iter().next()
+                })
+                .unwrap_or_else(|| "<anon>".to_string());
+            let entry_idx = push_node(g, StmtKind::Seq, ast, lang, code, Some(&fn_name), 0);
+            connect_all(g, preds, entry_idx, EdgeKind::Seq);
 
-            if let Some(body) = ast.child_by_field_name("body") {
-                build_sub(body, &[header], g, lang, code)
-            } else {
-                vec![header] // declaration w/o body
+            // 1b) extract parameter names
+            let param_names = extract_param_names(ast, lang, code);
+            let param_count = param_names.len();
+
+            // 2) build its body with a fresh call ordinal counter for this function scope
+            let body = ast.child_by_field_name("body").expect("fn w/o body");
+            let mut fn_call_ordinal: u32 = 0;
+            let body_exits = build_sub(
+                body,
+                &[entry_idx],
+                g,
+                lang,
+                code,
+                summaries,
+                file_path,
+                Some(&fn_name),
+                &mut fn_call_ordinal,
+            );
+
+            // ───── 3) light-weight dataflow ──────────────────────────────────────
+            //
+            // Sweep every node inside this function’s span.  Track:
+            //  • which cap bits each variable carries (var_taint)
+            //  • independent source / sanitizer / sink caps for the function
+            //  • which params flow to sinks (tainted_sink_params)
+            //  • whether any param reaches a return value (propagates_taint)
+            //  • all callees
+            let mut var_taint = HashMap::<String, Cap>::new();
+            let mut node_bits = HashMap::<NodeIndex, Cap>::new();
+            let mut fn_src_bits = Cap::empty();
+            let mut fn_sani_bits = Cap::empty();
+            let mut fn_sink_bits = Cap::empty();
+            let mut callees = Vec::<String>::new();
+            let mut tainted_sink_params: Vec<usize> = Vec::new();
+
+            let param_set: HashSet<&str> = param_names.iter().map(|s| s.as_str()).collect();
+
+            for idx in g.node_indices() {
+                let info = &g[idx];
+                if info.span.0 < ast.start_byte() || info.span.1 > ast.end_byte() {
+                    continue;
+                }
+
+                // collect callee names
+                if let Some(callee) = &info.callee
+                    && !callees.contains(callee)
+                {
+                    callees.push(callee.clone());
+                }
+
+                // record explicit label caps (all three independently)
+                if let Some(DataLabel::Source(bits)) = info.label {
+                    fn_src_bits |= bits;
+                }
+                if let Some(DataLabel::Sanitizer(bits)) = info.label {
+                    fn_sani_bits |= bits;
+                }
+                if let Some(DataLabel::Sink(bits)) = info.label {
+                    fn_sink_bits |= bits;
+
+                    // check whether any param flows to this sink
+                    for u in &info.uses {
+                        if let Some(pos) = param_names.iter().position(|p| p == u)
+                            && !tainted_sink_params.contains(&pos)
+                        {
+                            tainted_sink_params.push(pos);
+                        }
+                    }
+                }
+
+                //  a) incoming taint from any vars we read
+                let mut in_bits = Cap::empty();
+                for u in &info.uses {
+                    if let Some(b) = var_taint.get(u) {
+                        in_bits |= *b;
+                    }
+                }
+
+                //  b) apply this node’s own label
+                let mut out_bits = in_bits;
+                if let Some(lab) = &info.label {
+                    match *lab {
+                        DataLabel::Source(bits) => out_bits |= bits,
+                        DataLabel::Sanitizer(bits) => out_bits &= !bits,
+                        DataLabel::Sink(_) => { /* no-op */ }
+                    }
+                }
+
+                //  c) write it back to the var we define (if any)
+                if let Some(def) = &info.defines {
+                    if out_bits.is_empty() {
+                        var_taint.remove(def);
+                    } else {
+                        var_taint.insert(def.clone(), out_bits);
+                    }
+                }
+
+                //  d) stash it for later
+                node_bits.insert(idx, out_bits);
             }
+
+            // fold in explicit returns
+            for (&idx, &bits) in &node_bits {
+                if g[idx].kind == StmtKind::Return {
+                    fn_src_bits |= bits;
+                }
+            }
+
+            // implicit returns via fall-through exits
+            for &pred in &body_exits {
+                if let Some(&bits) = node_bits.get(&pred) {
+                    fn_src_bits |= bits;
+                }
+            }
+
+            // ───── propagates_taint ──────────────────────────────────────────────
+            //
+            // A function propagates taint when a parameter variable reaches a
+            // return value (explicit or implicit) while still carrying taint bits.
+            //
+            // We approximate this: if any param name still appears in `var_taint`
+            // at any return/exit node, we conservatively say yes.
+            let propagates = {
+                let mut prop = false;
+
+                // check explicit returns
+                for &idx in node_bits.keys() {
+                    if g[idx].kind == StmtKind::Return {
+                        for u in &g[idx].uses {
+                            if param_set.contains(u.as_str()) {
+                                prop = true;
+                            }
+                            // also check if the var was derived from a param
+                            if let Some(bits) = var_taint.get(u)
+                                && !bits.is_empty()
+                                && param_names.iter().any(|p| var_taint.contains_key(p))
+                            {
+                                prop = true;
+                            }
+                        }
+                    }
+                }
+
+                // check implicit returns (fall-through body exits)
+                for &exit_pred in &body_exits {
+                    let info = &g[exit_pred];
+                    for u in &info.uses {
+                        if param_set.contains(u.as_str()) {
+                            prop = true;
+                        }
+                    }
+                    if let Some(def) = &info.defines
+                        && param_set.contains(def.as_str())
+                    {
+                        prop = true;
+                    }
+                }
+
+                prop
+            };
+
+            tainted_sink_params.sort_unstable();
+            tainted_sink_params.dedup();
+
+            /* ───── 4) synthesise an explicit exit-node and wire it up ──────────── */
+            let exit_idx = g.add_node(NodeInfo {
+                kind: StmtKind::Return,
+                span: (ast.start_byte(), ast.end_byte()),
+                label: None,
+                defines: None,
+                uses: Vec::new(),
+                callee: None,
+                enclosing_func: Some(fn_name.clone()),
+                call_ordinal: 0,
+            });
+            // Wire body exits (fall-through) to the exit node.
+            for &b in &body_exits {
+                connect_all(g, &[b], exit_idx, EdgeKind::Seq);
+            }
+            // Also wire any Return nodes inside the function to the exit
+            // node.  `build_sub` for Kind::Return returns Vec::new() (no
+            // exits), so those nodes are dead-ends in the graph.  Without
+            // this edge, the synthetic exit node is unreachable whenever
+            // the function body ends with a `return` statement, which
+            // disconnects all subsequent functions at the module level.
+            for idx in g.node_indices() {
+                let info = &g[idx];
+                if info.kind == StmtKind::Return
+                    && info.span.0 >= ast.start_byte()
+                    && info.span.1 <= ast.end_byte()
+                    && idx != exit_idx
+                    && !g.contains_edge(idx, exit_idx)
+                {
+                    connect_all(g, &[idx], exit_idx, EdgeKind::Seq);
+                }
+            }
+
+            /* ───── 5) store the rich summary ──────────────────────────────────── */
+            let key = FuncKey {
+                lang: Lang::from_slug(lang).unwrap_or(Lang::Rust),
+                namespace: file_path.to_owned(),
+                name: fn_name.clone(),
+                arity: Some(param_count),
+            };
+            summaries.insert(
+                key,
+                LocalFuncSummary {
+                    entry: entry_idx,
+                    exit: exit_idx,
+                    source_caps: fn_src_bits,
+                    sanitizer_caps: fn_sani_bits,
+                    sink_caps: fn_sink_bits,
+                    param_count,
+                    param_names,
+                    propagates_taint: propagates,
+                    tainted_sink_params,
+                    callees,
+                },
+            );
+
+            vec![exit_idx]
         }
 
         // Statements that **may** contain a call ---------------------------------
@@ -352,39 +1058,76 @@ fn build_sub<'a>(
                     Kind::InfiniteLoop | Kind::While | Kind::For | Kind::If
                 )
             }) {
-                return build_sub(inner, preds, g, lang, code);
+                return build_sub(
+                    inner,
+                    preds,
+                    g,
+                    lang,
+                    code,
+                    summaries,
+                    file_path,
+                    enclosing_func,
+                    call_ordinal,
+                );
             }
 
-            let has_call = ast.children(&mut cursor).any(|c| {
-                matches!(
-                    lookup(lang, c.kind()),
-                    Kind::CallFn | Kind::CallMethod | Kind::CallMacro
-                )
-            });
+            let has_call = has_call_descendant(ast, lang);
 
             let kind = if has_call {
                 StmtKind::Call
             } else {
                 StmtKind::Seq
             };
-            let node = push_node(g, kind, ast, lang, code);
+            let ord = if kind == StmtKind::Call {
+                let o = *call_ordinal;
+                *call_ordinal += 1;
+                o
+            } else {
+                0
+            };
+            let node = push_node(g, kind, ast, lang, code, enclosing_func, ord);
             connect_all(g, preds, node, EdgeKind::Seq);
             vec![node]
         }
 
+        // Direct call nodes (Ruby `call`, Python `call`, etc. when they appear
+        // as direct children of a block rather than wrapped in expression_statement)
+        Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
+            let ord = *call_ordinal;
+            *call_ordinal += 1;
+            let n = push_node(g, StmtKind::Call, ast, lang, code, enclosing_func, ord);
+            connect_all(g, preds, n, EdgeKind::Seq);
+            vec![n]
+        }
+
+        // Assignment that may contain a call (Python `x = os.getenv(...)`, Ruby `x = gets()`)
+        Kind::Assignment => {
+            let has_call = has_call_descendant(ast, lang);
+            let kind = if has_call {
+                StmtKind::Call
+            } else {
+                StmtKind::Seq
+            };
+            let ord = if kind == StmtKind::Call {
+                let o = *call_ordinal;
+                *call_ordinal += 1;
+                o
+            } else {
+                0
+            };
+            let n = push_node(g, kind, ast, lang, code, enclosing_func, ord);
+            connect_all(g, preds, n, EdgeKind::Seq);
+            vec![n]
+        }
+
         // Trivia we drop completely ---------------------------------------------
-        // "line_comment" | "block_comment"
-        // | ";" | "," | "(" | ")" | "{" | "}" | "\n"
-        // | "use_declaration"
-        // | "attribute_item"
-        // | "mod_item" | "type_item"
         Kind::Trivia => preds.to_vec(),
 
         // ─────────────────────────────────────────────────────────────────
         //  Every other node = simple sequential statement
         // ─────────────────────────────────────────────────────────────────
         _ => {
-            let n = push_node(g, StmtKind::Seq, ast, lang, code);
+            let n = push_node(g, StmtKind::Seq, ast, lang, code, enclosing_func, 0);
             connect_all(g, preds, n, EdgeKind::Seq);
             vec![n]
         }
@@ -402,16 +1145,25 @@ fn build_sub<'a>(
 ///   the graph compact.
 /// * Wires a synthetic `Entry` node in front and a synthetic `Exit` node after
 ///   all real sinks.
-pub(crate) fn build_cfg<'a>(tree: &'a Tree, code: &'a [u8], lang: &str) -> (Cfg, NodeIndex) {
+pub(crate) fn build_cfg<'a>(
+    tree: &'a Tree,
+    code: &'a [u8],
+    lang: &str,
+    file_path: &str,
+) -> (Cfg, NodeIndex, FuncSummaries) {
     debug!(target: "cfg", "Building CFG for {:?}", tree.root_node());
 
     let mut g: Cfg = Graph::with_capacity(128, 256);
+    let mut summaries = FuncSummaries::new();
     let entry = g.add_node(NodeInfo {
         kind: StmtKind::Entry,
         span: (0, 0),
         label: None,
         defines: None,
         uses: Vec::new(),
+        callee: None,
+        enclosing_func: None,
+        call_ordinal: 0,
     });
     let exit = g.add_node(NodeInfo {
         kind: StmtKind::Exit,
@@ -419,11 +1171,25 @@ pub(crate) fn build_cfg<'a>(tree: &'a Tree, code: &'a [u8], lang: &str) -> (Cfg,
         label: None,
         defines: None,
         uses: Vec::new(),
+        callee: None,
+        enclosing_func: None,
+        call_ordinal: 0,
     });
 
     // Build the body below the synthetic ENTRY.
-    let exits = build_sub(tree.root_node(), &[entry], &mut g, lang, code);
-
+    let mut top_ordinal: u32 = 0;
+    let exits = build_sub(
+        tree.root_node(),
+        &[entry],
+        &mut g,
+        lang,
+        code,
+        &mut summaries,
+        file_path,
+        None,
+        &mut top_ordinal,
+    );
+    debug!(target: "cfg", "exits: {:?}", exits);
     // Wire every real exit to our synthetic EXIT node.
     for e in exits {
         connect_all(&mut g, &[e], exit, EdgeKind::Seq);
@@ -472,358 +1238,46 @@ pub(crate) fn build_cfg<'a>(tree: &'a Tree, code: &'a [u8], lang: &str) -> (Cfg,
         debug!(target: "cfg", "dominator tree computed (len = {:?})", doms);
     }
 
-    (g, entry)
+    (g, entry, summaries)
 }
 
-/* ---------- TAINT-ANALYSIS PASSES ---------- */
-/// Recursively collect every identifier that occurs inside `n`.
-fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
-    if n.kind() == "identifier" {
-        if let Some(txt) = text_of(n, code) {
-            out.push(txt);
-        }
-    } else {
-        let mut c = n.walk();
-        for ch in n.children(&mut c) {
-            collect_idents(ch, code, out);
-        }
-    }
+/// Convert the graph‑local `FuncSummaries` into serialisable [`FuncSummary`]
+/// values suitable for cross‑file persistence.
+pub(crate) fn export_summaries(
+    summaries: &FuncSummaries,
+    file_path: &str,
+    lang: &str,
+) -> Vec<FuncSummary> {
+    summaries
+        .iter()
+        .map(|(key, local)| FuncSummary {
+            name: key.name.clone(),
+            file_path: file_path.to_owned(),
+            lang: lang.to_owned(),
+            param_count: local.param_count,
+            param_names: local.param_names.clone(),
+            source_caps: local.source_caps.bits(),
+            sanitizer_caps: local.sanitizer_caps.bits(),
+            sink_caps: local.sink_caps.bits(),
+            propagates_taint: local.propagates_taint,
+            tainted_sink_params: local.tainted_sink_params.clone(),
+            callees: local.callees.clone(),
+        })
+        .collect()
 }
 
-/// Return `(defines, uses)` for the AST fragment `ast`.
-fn def_use(ast: Node, code: &[u8]) -> (Option<String>, Vec<String>) {
-    match ast.kind() {
-        // `let <pat> = <val>;`
-        "let_declaration" => {
-            let mut defs = None;
-            let mut uses = Vec::new();
-
-            if let Some(pat) = ast.child_by_field_name("pattern") {
-                // first identifier inside the pattern = variable name
-                let mut tmp = Vec::<String>::new();
-                collect_idents(pat, code, &mut tmp);
-                defs = tmp.into_iter().next();
-            }
-            if let Some(val) = ast.child_by_field_name("value") {
-                collect_idents(val, code, &mut uses);
-            }
-            (defs, uses)
-        }
-
-        // Plain assignment `x = y + z`
-        "assignment_expression" => {
-            let mut defs = None;
-            let mut uses = Vec::new();
-            if let Some(lhs) = ast.child_by_field_name("left") {
-                let mut tmp = Vec::<String>::new();
-                collect_idents(lhs, code, &mut tmp);
-                defs = tmp.pop();
-            }
-            if let Some(rhs) = ast.child_by_field_name("right") {
-                collect_idents(rhs, code, &mut uses);
-            }
-            (defs, uses)
-        }
-
-        // everything else – no definition, but may read vars
-        _ => {
-            let mut uses = Vec::new();
-            collect_idents(ast, code, &mut uses);
-            (None, uses)
-        }
-    }
-}
-
-fn set_hash(s: &HashSet<String>) -> u64 {
-    let mut v: Vec<_> = s.iter().collect();
-    v.sort(); // deterministic
-    let mut h = DefaultHasher::new();
-    v.hash(&mut h);
-    h.finish()
-}
-
-fn apply_taint(node: &NodeInfo, taint: &HashSet<String>) -> HashSet<String> {
-    let mut out = taint.clone();
-
-    match node.label {
-        // A new untrusted value enters the program
-        Some(DataLabel::Source(_)) => {
-            if let Some(d) = &node.defines {
-                out.insert(d.clone());
-            }
-        }
-        // Anything written by a sanitizer becomes clean – whatever its
-        // arguments were is irrelevant here.
-        Some(DataLabel::Sanitizer(_)) => {
-            if let Some(d) = &node.defines {
-                out.remove(d);
-            }
-        }
-
-        // A function call *returning* tainted/clean data ----------------------
-        // (`let v = source_*()` or `let v = sanitize_*(x)`)
-        _ if node.kind == StmtKind::Call => {
-            if let Some(d) = &node.defines {
-                match node.label {
-                    Some(DataLabel::Source(_)) => {
-                        out.insert(d.clone());
-                    } // gen
-                    Some(DataLabel::Sanitizer(_)) => {
-                        out.remove(d);
-                    } // kill
-                    _ => { /* normal flow handled below */ }
-                }
-            }
-        }
-
-        // All other statements: classic gen/kill for assignments
-        _ => {
-            if let Some(d) = &node.defines {
-                let rhs_tainted = node.uses.iter().any(|u| out.contains(u));
-                if rhs_tainted {
-                    out.insert(d.clone());
-                } else {
-                    out.remove(d);
-                }
-            }
-        }
-    }
-
-    out
-}
-
-pub fn analyse_function(cfg: &Cfg, entry: NodeIndex) -> Vec<Vec<NodeIndex>> {
-    use std::collections::{HashMap, HashSet, VecDeque};
-
-    /// Queue item: current CFG node + taint map that holds here
-    #[derive(Clone)]
-    struct Item {
-        node: NodeIndex,
-        taint: HashSet<String>,
-    }
-
-    // (node, taint_hash)  →  predecessor key   (for path rebuild)
-    type Key = (NodeIndex, u64);
-    let mut pred: HashMap<Key, Key> = HashMap::new();
-
-    // Seen states so we do not revisit them infinitely
-    let mut seen: HashSet<Key> = HashSet::new();
-
-    // Resulting Source→Sink paths
-    let mut findings: Vec<Vec<NodeIndex>> = Vec::new();
-
-    let mut q = VecDeque::new();
-    q.push_back(Item {
-        node: entry,
-        taint: HashSet::new(),
-    });
-    seen.insert((entry, 0));
-
-    while let Some(Item { node, taint }) = q.pop_front() {
-        let updated = apply_taint(&cfg[node], &taint); // step effect
-
-        /* ----------     SINK CHECK     ---------- */
-        if let Some(DataLabel::Sink(_)) = cfg[node].label {
-            if cfg[node].uses.iter().any(|u| updated.contains(u)) {
-                // reconstruct path back to *any* Source
-                let mut p: Vec<NodeIndex> = vec![node];
-                let mut k = (node, set_hash(&taint)); // predecessor key
-
-                while let Some(&(prev, _)) = pred.get(&k) {
-                    p.push(prev);
-                    if matches!(cfg[prev].label, Some(DataLabel::Source(_))) {
-                        break;
-                    }
-                    // climb further
-                    let prev_hash = pred.get(&k).map(|(_, h)| *h).unwrap_or(0);
-                    k = (prev, prev_hash);
-                }
-                p.reverse();
-                findings.push(p);
-            }
-        }
-
-        /* ----------   BFS successor step   ---------- */
-        for succ in cfg.neighbors(node) {
-            let key = (succ, set_hash(&updated));
-            if !seen.contains(&key) {
-                seen.insert(key);
-                pred.insert(key, (node, set_hash(&taint)));
-                q.push_back(Item {
-                    node: succ,
-                    taint: updated.clone(),
-                });
-            }
-        }
-    }
-
-    findings
-}
-
-#[test]
-fn env_to_arg_is_flagged() {
-    use tree_sitter::Language;
-    let src = br#"
-        use std::env; use std::process::Command;
-        fn main() {
-            let x = env::var("DANGEROUS_ARG").unwrap();
-            Command::new("sh").arg(x).status().unwrap();
-        }"#;
-
-    let mut parser = tree_sitter::Parser::new();
-    parser
-        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
-        .unwrap();
-    let tree = parser.parse(src as &[u8], None).unwrap();
-
-    let (cfg, entry) = build_cfg(&tree, src, "rust");
-    let findings = analyse_function(&cfg, entry);
-
-    assert_eq!(findings.len(), 1); // exactly one unsanitised Source→Sink
-}
-
-#[test]
-fn taint_through_if_else() {
-    use tree_sitter::Language;
-    let src = br#"
-        use std::env; use std::process::Command;
-        fn main() {
-            let x = env::var("DANGEROUS").unwrap();
-            let safe = html_escape::encode_safe(&x);
-
-            if x.len() > 5 {
-                Command::new("sh").arg(&x).status().unwrap();   // UNSAFE
-            } else {
-                Command::new("sh").arg(&safe).status().unwrap(); // SAFE
-            }
-        }"#;
-
-    let mut parser = tree_sitter::Parser::new();
-    parser
-        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
-        .unwrap();
-    let tree = parser.parse(src as &[u8], None).unwrap();
-
-    let (cfg, entry) = build_cfg(&tree, src, "rust");
-    let findings = analyse_function(&cfg, entry);
-
-    // exactly one path (via the True branch) should be flagged
-    assert_eq!(findings.len(), 1);
-}
-
-#[test]
-fn taint_through_while_loop() {
-    use tree_sitter::Language;
-    let src = br#"
-        use std::{env, process::Command};
-        fn main() {
-            let mut x = env::var("DANGEROUS").unwrap();
-            while x.len() < 100 {                       // Loop header (Loop)
-                x.push_str("a");
-            }
-            Command::new("sh").arg(x).status().unwrap(); // Should be flagged
-        }"#;
-
-    let mut parser = tree_sitter::Parser::new();
-    parser
-        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
-        .unwrap();
-    let tree = parser.parse(src as &[u8], None).unwrap();
-
-    let (cfg, entry) = build_cfg(&tree, src, "rust");
-    let findings = analyse_function(&cfg, entry);
-    assert_eq!(findings.len(), 1);
-}
-
-#[test]
-fn taint_killed_by_sanitizer() {
-    use tree_sitter::Language;
-    let src = br#"
-        use std::{env, process::Command};
-        fn main() {
-            let x = env::var("DANGEROUS").unwrap();
-            let clean = html_escape::encode_safe(&x);    // sanitizer node
-            Command::new("sh").arg(clean).status().unwrap();  // SAFE
-        }"#;
-
-    let mut parser = tree_sitter::Parser::new();
-    parser
-        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
-        .unwrap();
-    let tree = parser.parse(src as &[u8], None).unwrap();
-
-    let (cfg, entry) = build_cfg(&tree, src, "rust");
-    let findings = analyse_function(&cfg, entry);
-    assert!(findings.is_empty());
-}
-
-#[test]
-fn taint_breaks_out_of_loop() {
-    use tree_sitter::Language;
-    let src = br#"
-        use std::{env, process::Command};
-        fn main() {
-            loop {
-                let x = env::var("DANGEROUS").unwrap();
-                Command::new("sh").arg(&x).status().unwrap(); // vulnerable
-                break;
-            }
-        }"#;
-
-    let mut parser = tree_sitter::Parser::new();
-    parser
-        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
-        .unwrap();
-    let tree = parser.parse(src as &[u8], None).unwrap();
-
-    let (cfg, entry) = build_cfg(&tree, src, "rust");
-    let findings = analyse_function(&cfg, entry);
-    assert_eq!(findings.len(), 1);
-}
-
-#[test]
-fn test_two_sources() {
-    use tree_sitter::Language;
-    let src = br#"
-        use std::{env, process::Command};
-        fn main() {
-            let x = env::var("DANGEROUS").unwrap();
-            let y = env::var("SAFE").unwrap();
-            let clean = html_escape::encode_safe(&y);
-            Command::new("sh").arg(x).status().unwrap();
-            Command::new("sh").arg(clean).status().unwrap();
-        }"#;
-
-    let mut parser = tree_sitter::Parser::new();
-    parser
-        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
-        .unwrap();
-    let tree = parser.parse(src as &[u8], None).unwrap();
-
-    let (cfg, entry) = build_cfg(&tree, src, "rust");
-    let findings = analyse_function(&cfg, entry);
-    assert_eq!(findings.len(), 1);
-}
-
-#[test]
-fn test_should_not_panic_on_empty_function() {
-    use tree_sitter::Language;
-    let src = br#"
-        use std::{env, process::Command};
-        fn f() {
-            if cond() {
-                return;
-            }
-            do_something();
-        }"#;
-
-    let mut parser = tree_sitter::Parser::new();
-    parser
-        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
-        .unwrap();
-    let tree = parser.parse(src as &[u8], None).unwrap();
-
-    let (cfg, entry) = build_cfg(&tree, src, "rust");
-    let findings = analyse_function(&cfg, entry);
-    assert!(findings.is_empty());
-}
+// pub(crate) fn dump_cfg(g: &Cfg) {
+//     debug!(target: "taint", "CFG DUMP: nodes = {}, edges = {}", g.node_count(), g.edge_count());
+//     for idx in g.node_indices() {
+//         debug!(target: "taint", "  node {:>3}: {:?}", idx.index(), g[idx]);
+//     }
+//     for e in g.edge_references() {
+//         debug!(
+//             target: "taint",
+//             "  edge {:>3} → {:<3} ({:?})",
+//             e.source().index(),
+//             e.target().index(),
+//             e.weight()
+//         );
+//     }
+// }
diff --git a/src/cfg_analysis/auth.rs b/src/cfg_analysis/auth.rs
new file mode 100644
index 00000000..3a622f0e
--- /dev/null
+++ b/src/cfg_analysis/auth.rs
@@ -0,0 +1,225 @@
+use super::dominators::{self, dominates};
+use super::{
+    AnalysisContext, CfgAnalysis, CfgFinding, Confidence, is_auth_call, is_entry_point_func,
+    is_sink,
+};
+use crate::cfg::StmtKind;
+use crate::labels::DataLabel;
+use crate::patterns::Severity;
+use crate::symbol::Lang;
+use petgraph::graph::NodeIndex;
+
+pub struct AuthGap;
+
+/// Privileged sink capabilities that warrant auth-gap checking.
+/// Shell execution, file I/O, and similar sensitive operations.
+fn is_privileged_sink(info: &crate::cfg::NodeInfo) -> bool {
+    use crate::labels::Cap;
+    match info.label {
+        Some(DataLabel::Sink(caps)) => {
+            // Shell execution or file I/O are privileged
+            caps.intersects(Cap::SHELL_ESCAPE | Cap::FILE_IO)
+        }
+        _ => false,
+    }
+}
+
+/// Web handler parameter patterns by language.
+/// Returns true if the function's parameters suggest it handles HTTP requests.
+fn has_web_handler_params(ctx: &AnalysisContext, func_name: &str) -> bool {
+    // Find parameter names for this function from FuncSummaries
+    let param_names: Vec<&str> = ctx
+        .func_summaries
+        .values()
+        .filter(|s| ctx.cfg[s.entry].enclosing_func.as_deref() == Some(func_name))
+        .flat_map(|s| s.param_names.iter().map(|p| p.as_str()))
+        .collect();
+
+    match ctx.lang {
+        Lang::Rust => {
+            // Rust web frameworks: actix-web, axum, rocket, warp
+            // Look for parameter type-like names: request, req, http_request, json, query, form, etc.
+            let web_params = [
+                "request",
+                "req",
+                "http_request",
+                "httprequest",
+                "json",
+                "query",
+                "form",
+                "payload",
+                "body",
+                "web",
+            ];
+            param_names
+                .iter()
+                .any(|p| web_params.contains(&p.to_ascii_lowercase().as_str()))
+        }
+        Lang::JavaScript | Lang::TypeScript => {
+            // Express.js / Node.js: (req, res), (request, response), (ctx)
+            let lower: Vec<String> = param_names.iter().map(|p| p.to_ascii_lowercase()).collect();
+            let has_req = lower
+                .iter()
+                .any(|p| p == "req" || p == "request" || p == "ctx");
+            let has_res = lower.iter().any(|p| p == "res" || p == "response");
+            // req+res pattern or ctx pattern
+            (has_req && has_res) || lower.iter().any(|p| p == "ctx")
+        }
+        Lang::Python => {
+            // Django/Flask: request, self+request
+            let lower: Vec<String> = param_names.iter().map(|p| p.to_ascii_lowercase()).collect();
+            lower.iter().any(|p| p == "request" || p == "req")
+        }
+        Lang::Go => {
+            // net/http: (w http.ResponseWriter, r *http.Request)
+            // At AST level we see parameter names, not types. Look for w+r or writer+request patterns.
+            let lower: Vec<String> = param_names.iter().map(|p| p.to_ascii_lowercase()).collect();
+            let has_writer = lower.iter().any(|p| p == "w" || p == "writer" || p == "rw");
+            let has_request = lower
+                .iter()
+                .any(|p| p == "r" || p == "req" || p == "request");
+            has_writer && has_request
+        }
+        Lang::Java => {
+            // Servlet: HttpServletRequest, Spring: @RequestMapping params
+            let lower: Vec<String> = param_names.iter().map(|p| p.to_ascii_lowercase()).collect();
+            lower
+                .iter()
+                .any(|p| p == "request" || p == "req" || p.contains("httpservlet"))
+        }
+        Lang::Ruby => {
+            // Rails controllers use params implicitly; Sinatra uses request
+            let lower: Vec<String> = param_names.iter().map(|p| p.to_ascii_lowercase()).collect();
+            lower
+                .iter()
+                .any(|p| p == "request" || p == "req" || p == "params")
+        }
+        Lang::Php => {
+            let lower: Vec<String> = param_names.iter().map(|p| p.to_ascii_lowercase()).collect();
+            lower
+                .iter()
+                .any(|p| p == "$request" || p == "request" || p == "$req")
+        }
+        _ => false,
+    }
+}
+
+/// Determine if a function qualifies as a web entrypoint (not just any entrypoint).
+///
+/// A web entrypoint must:
+/// 1. Match entrypoint naming rules (handle_*, route_*, api_*, etc.) — but NOT bare `main`
+///    unless it has web-like parameters
+/// 2. Have parameters resembling HTTP handler signatures
+fn is_web_entrypoint(ctx: &AnalysisContext, func_name: &str) -> bool {
+    // "main" without web params is a CLI entrypoint — skip
+    if func_name == "main" {
+        return has_web_handler_params(ctx, func_name);
+    }
+
+    // Must match entrypoint naming patterns
+    if !is_entry_point_func(func_name, ctx.lang) {
+        return false;
+    }
+
+    // For named handlers (handle_*, route_*, api_*), check if they have web params.
+    // If we can't determine params (e.g. no summary), fall back to name-only heuristic
+    // for handler-style names (but NOT process_* or serve_* without params).
+    let has_params = has_web_handler_params(ctx, func_name);
+    let name_lower = func_name.to_ascii_lowercase();
+    let strong_handler_name = name_lower.starts_with("handle_")
+        || name_lower.starts_with("route_")
+        || name_lower.starts_with("api_")
+        || name_lower == "handler";
+
+    has_params || strong_handler_name
+}
+
+/// Find functions that qualify as web entrypoints.
+fn find_web_entry_point_functions(ctx: &AnalysisContext) -> Vec<String> {
+    let mut entry_funcs = Vec::new();
+    for idx in ctx.cfg.node_indices() {
+        if let Some(func_name) = &ctx.cfg[idx].enclosing_func
+            && is_web_entrypoint(ctx, func_name)
+            && !entry_funcs.contains(func_name)
+        {
+            entry_funcs.push(func_name.clone());
+        }
+    }
+    entry_funcs
+}
+
+/// Find all auth check nodes in the CFG.
+fn find_auth_nodes(ctx: &AnalysisContext) -> Vec<NodeIndex> {
+    ctx.cfg
+        .node_indices()
+        .filter(|&idx| is_auth_call(&ctx.cfg[idx], ctx.lang))
+        .collect()
+}
+
+impl CfgAnalysis for AuthGap {
+    fn name(&self) -> &'static str {
+        "auth-gap"
+    }
+
+    fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
+        let doms = dominators::compute_dominators(ctx.cfg, ctx.entry);
+        let entry_funcs = find_web_entry_point_functions(ctx);
+        let auth_nodes = find_auth_nodes(ctx);
+
+        if entry_funcs.is_empty() {
+            return Vec::new();
+        }
+
+        let mut findings = Vec::new();
+
+        // Find sink nodes that are inside web entry point functions
+        for idx in ctx.cfg.node_indices() {
+            let info = &ctx.cfg[idx];
+
+            if !is_sink(info) && info.kind != StmtKind::Call {
+                continue;
+            }
+
+            // Only check nodes inside web entry point functions
+            let func_name = match &info.enclosing_func {
+                Some(name) if entry_funcs.contains(name) => name.clone(),
+                _ => continue,
+            };
+
+            // Skip if not a sink
+            if !is_sink(info) {
+                continue;
+            }
+
+            // Only flag privileged sinks (shell, file I/O), not all sinks
+            if !is_privileged_sink(info) {
+                continue;
+            }
+
+            // Check: does any auth call dominate this sink?
+            let has_auth = auth_nodes
+                .iter()
+                .any(|&auth_idx| dominates(&doms, auth_idx, idx));
+
+            if !has_auth {
+                let callee_desc = info.callee.as_deref().unwrap_or("(sensitive op)");
+
+                findings.push(CfgFinding {
+                    rule_id: "cfg-auth-gap".to_string(),
+                    title: "Missing auth check".to_string(),
+                    severity: Severity::High,
+                    confidence: Confidence::Medium,
+                    span: info.span,
+                    message: format!(
+                        "Sensitive operation `{callee_desc}` in web handler `{func_name}` \
+                         has no dominating authentication check"
+                    ),
+                    evidence: vec![idx],
+                    score: None,
+                });
+            }
+        }
+
+        findings
+    }
+}
diff --git a/src/cfg_analysis/dominators.rs b/src/cfg_analysis/dominators.rs
new file mode 100644
index 00000000..a4bab838
--- /dev/null
+++ b/src/cfg_analysis/dominators.rs
@@ -0,0 +1,154 @@
+use crate::cfg::{Cfg, EdgeKind, NodeInfo, StmtKind};
+use crate::labels::DataLabel;
+use petgraph::algo::dominators::{Dominators, simple_fast};
+use petgraph::graph::NodeIndex;
+use petgraph::prelude::*;
+use petgraph::visit::Bfs;
+use std::collections::HashSet;
+
+/// Compute forward dominators from entry.
+pub fn compute_dominators(cfg: &Cfg, entry: NodeIndex) -> Dominators<NodeIndex> {
+    simple_fast(cfg, entry)
+}
+
+/// Compute post-dominators by reversing all edges and computing dominators from exit.
+/// Returns None if no Exit node exists.
+pub fn compute_post_dominators(cfg: &Cfg) -> Option<Dominators<NodeIndex>> {
+    let exit = find_exit_node(cfg)?;
+    let reversed = build_reversed_graph(cfg);
+    Some(simple_fast(&reversed, exit))
+}
+
+/// Reachable node set via BFS from entry.
+pub fn reachable_set(cfg: &Cfg, entry: NodeIndex) -> HashSet<NodeIndex> {
+    let mut set = HashSet::new();
+    let mut bfs = Bfs::new(cfg, entry);
+    while let Some(nx) = bfs.next(cfg) {
+        set.insert(nx);
+    }
+    set
+}
+
+/// Find the Exit node (StmtKind::Exit).
+pub fn find_exit_node(cfg: &Cfg) -> Option<NodeIndex> {
+    cfg.node_indices()
+        .find(|&idx| cfg[idx].kind == StmtKind::Exit)
+}
+
+/// Find all nodes that are sinks (have DataLabel::Sink).
+pub fn find_sink_nodes(cfg: &Cfg) -> Vec<NodeIndex> {
+    cfg.node_indices()
+        .filter(|&idx| matches!(cfg[idx].label, Some(DataLabel::Sink(_))))
+        .collect()
+}
+
+/// Check if `dominator` dominates `target` in the given dominator tree.
+pub fn dominates(doms: &Dominators<NodeIndex>, dominator: NodeIndex, target: NodeIndex) -> bool {
+    if dominator == target {
+        return true;
+    }
+    // Walk up the dominator tree from target
+    let mut current = target;
+    while let Some(idom) = doms.immediate_dominator(current) {
+        if idom == current {
+            // Reached root
+            break;
+        }
+        if idom == dominator {
+            return true;
+        }
+        current = idom;
+    }
+    false
+}
+
+/// Build a reversed copy of the graph (swap edge directions).
+fn build_reversed_graph(cfg: &Cfg) -> Graph<NodeInfo, EdgeKind> {
+    let mut rev = Graph::<NodeInfo, EdgeKind>::with_capacity(cfg.node_count(), cfg.edge_count());
+
+    // Clone nodes (preserving indices)
+    let mut index_map = Vec::with_capacity(cfg.node_count());
+    for idx in cfg.node_indices() {
+        let new_idx = rev.add_node(cfg[idx].clone());
+        index_map.push((idx, new_idx));
+    }
+
+    // Add edges in reverse direction
+    for edge in cfg.edge_references() {
+        let src = edge.source();
+        let tgt = edge.target();
+        // Find the new indices
+        let new_src = index_map
+            .iter()
+            .find(|(old, _)| *old == tgt)
+            .map(|(_, new)| *new)
+            .unwrap();
+        let new_tgt = index_map
+            .iter()
+            .find(|(old, _)| *old == src)
+            .map(|(_, new)| *new)
+            .unwrap();
+        rev.add_edge(new_src, new_tgt, *edge.weight());
+    }
+
+    rev
+}
+
+/// Find all nodes matching a specific callee name pattern.
+#[allow(dead_code)]
+pub fn find_call_nodes_matching(cfg: &Cfg, matchers: &[&str]) -> Vec<NodeIndex> {
+    cfg.node_indices()
+        .filter(|&idx| {
+            if cfg[idx].kind != StmtKind::Call {
+                return false;
+            }
+            if let Some(callee) = &cfg[idx].callee {
+                let callee_lower = callee.to_ascii_lowercase();
+                matchers.iter().any(|m| {
+                    let ml = m.to_ascii_lowercase();
+                    if ml.ends_with('_') {
+                        callee_lower.starts_with(&ml)
+                    } else {
+                        callee_lower.ends_with(&ml)
+                    }
+                })
+            } else {
+                false
+            }
+        })
+        .collect()
+}
+
+/// Check if there exists any path from `from` to `to` in the CFG.
+#[allow(dead_code)]
+pub fn has_path(cfg: &Cfg, from: NodeIndex, to: NodeIndex) -> bool {
+    let reachable = reachable_set(cfg, from);
+    reachable.contains(&to)
+}
+
+/// Compute shortest distance (in hops) from `from` to `to`.
+pub fn shortest_distance(cfg: &Cfg, from: NodeIndex, to: NodeIndex) -> Option<usize> {
+    use std::collections::VecDeque;
+
+    if from == to {
+        return Some(0);
+    }
+
+    let mut visited = HashSet::new();
+    let mut queue = VecDeque::new();
+    queue.push_back((from, 0usize));
+    visited.insert(from);
+
+    while let Some((node, dist)) = queue.pop_front() {
+        for succ in cfg.neighbors(node) {
+            if succ == to {
+                return Some(dist + 1);
+            }
+            if visited.insert(succ) {
+                queue.push_back((succ, dist + 1));
+            }
+        }
+    }
+
+    None
+}
diff --git a/src/cfg_analysis/error_handling.rs b/src/cfg_analysis/error_handling.rs
new file mode 100644
index 00000000..0c70e2ef
--- /dev/null
+++ b/src/cfg_analysis/error_handling.rs
@@ -0,0 +1,161 @@
+use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence, is_sink};
+use crate::cfg::{EdgeKind, StmtKind};
+use crate::patterns::Severity;
+use petgraph::graph::NodeIndex;
+use petgraph::visit::EdgeRef;
+
+pub struct IncompleteErrorHandling;
+
+/// Check if the true branch of an If node terminates (has Return/Break/Continue).
+fn branch_terminates(cfg: &crate::cfg::Cfg, if_node: NodeIndex) -> bool {
+    // Follow the True edge from the If node
+    let true_successors: Vec<NodeIndex> = cfg
+        .edges(if_node)
+        .filter(|e| matches!(e.weight(), EdgeKind::True))
+        .map(|e| e.target())
+        .collect();
+
+    if true_successors.is_empty() {
+        return false;
+    }
+
+    // Check if any path through the true branch terminates
+    for &start in &true_successors {
+        if terminates_on_all_paths(cfg, start, if_node) {
+            return true;
+        }
+    }
+
+    false
+}
+
+/// Check if all paths from `node` reach a Return/Break/Continue before exiting scope.
+fn terminates_on_all_paths(
+    cfg: &crate::cfg::Cfg,
+    node: NodeIndex,
+    _scope_entry: NodeIndex,
+) -> bool {
+    use std::collections::HashSet;
+
+    let mut visited = HashSet::new();
+    let mut stack = vec![node];
+
+    while let Some(current) = stack.pop() {
+        if !visited.insert(current) {
+            continue;
+        }
+
+        let info = &cfg[current];
+        match info.kind {
+            StmtKind::Return | StmtKind::Break | StmtKind::Continue => {
+                // This path terminates
+                continue;
+            }
+            _ => {}
+        }
+
+        let successors: Vec<_> = cfg.neighbors(current).collect();
+        if successors.is_empty() {
+            // Reached a dead end without terminating — path does not terminate
+            return false;
+        }
+
+        for succ in successors {
+            // Don't follow back edges (loops)
+            let is_back_edge = cfg
+                .edges(current)
+                .any(|e| e.target() == succ && matches!(e.weight(), EdgeKind::Back));
+            if !is_back_edge {
+                stack.push(succ);
+            }
+        }
+    }
+
+    true
+}
+
+/// Find successor nodes after an If node merges (nodes reachable from both branches).
+fn find_post_if_sinks(cfg: &crate::cfg::Cfg, if_node: NodeIndex) -> Vec<NodeIndex> {
+    let mut sinks_after = Vec::new();
+
+    // Get all successors of the if node's merge point
+    // Walk through successors looking for sinks
+    let mut visited = std::collections::HashSet::new();
+    let mut stack: Vec<NodeIndex> = cfg.neighbors(if_node).collect();
+
+    while let Some(current) = stack.pop() {
+        if !visited.insert(current) {
+            continue;
+        }
+
+        let info = &cfg[current];
+        if is_sink(info) || (info.kind == StmtKind::Call && info.callee.is_some()) {
+            sinks_after.push(current);
+        }
+
+        for succ in cfg.neighbors(current) {
+            let is_back_edge = cfg
+                .edges(current)
+                .any(|e| e.target() == succ && matches!(e.weight(), EdgeKind::Back));
+            if !is_back_edge {
+                stack.push(succ);
+            }
+        }
+    }
+
+    sinks_after
+}
+
+impl CfgAnalysis for IncompleteErrorHandling {
+    fn name(&self) -> &'static str {
+        "incomplete-error-handling"
+    }
+
+    fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
+        let mut findings = Vec::new();
+
+        for idx in ctx.cfg.node_indices() {
+            let info = &ctx.cfg[idx];
+
+            // Look for If nodes whose condition involves "err" or "error"
+            if info.kind != StmtKind::If {
+                continue;
+            }
+
+            let mentions_err = info.uses.iter().any(|u| {
+                let lower = u.to_ascii_lowercase();
+                lower == "err" || lower == "error" || lower.contains("err")
+            });
+
+            if !mentions_err {
+                continue;
+            }
+
+            // Check: does the true branch terminate?
+            if branch_terminates(ctx.cfg, idx) {
+                continue;
+            }
+
+            // Check: are there dangerous calls/sinks after this error check?
+            let post_sinks = find_post_if_sinks(ctx.cfg, idx);
+            let has_dangerous_successor = post_sinks.iter().any(|&s| is_sink(&ctx.cfg[s]));
+
+            if has_dangerous_successor {
+                findings.push(CfgFinding {
+                    rule_id: "cfg-error-fallthrough".to_string(),
+                    title: "Error check without return".to_string(),
+                    severity: Severity::Medium,
+                    confidence: Confidence::Medium,
+                    span: info.span,
+                    message: "Error check does not terminate on error; \
+                              execution falls through to dangerous operations"
+                        .to_string(),
+                    evidence: vec![idx],
+                    score: None,
+                });
+            }
+        }
+
+        findings
+    }
+}
diff --git a/src/cfg_analysis/guards.rs b/src/cfg_analysis/guards.rs
new file mode 100644
index 00000000..1b6baf18
--- /dev/null
+++ b/src/cfg_analysis/guards.rs
@@ -0,0 +1,208 @@
+use super::dominators::{self, dominates};
+use super::rules;
+use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence, is_entry_point_func};
+use crate::cfg::StmtKind;
+use crate::labels::{Cap, DataLabel};
+use crate::patterns::Severity;
+use petgraph::graph::NodeIndex;
+
+pub struct UnguardedSink;
+
+/// Find all nodes in the CFG that are calls to guard functions.
+fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
+    let guard_rules = rules::guard_rules(ctx.lang);
+    let mut result = Vec::new();
+
+    for idx in ctx.cfg.node_indices() {
+        let info = &ctx.cfg[idx];
+        if info.kind != StmtKind::Call {
+            continue;
+        }
+        if let Some(callee) = &info.callee {
+            let callee_lower = callee.to_ascii_lowercase();
+            for rule in guard_rules {
+                let matched = rule.matchers.iter().any(|m| {
+                    let ml = m.to_ascii_lowercase();
+                    if ml.ends_with('_') {
+                        callee_lower.starts_with(&ml)
+                    } else {
+                        callee_lower.ends_with(&ml)
+                    }
+                });
+                if matched {
+                    result.push((idx, rule.applies_to_sink_caps));
+                    break;
+                }
+            }
+        }
+    }
+
+    result
+}
+
+/// Check whether taint analysis confirmed unsanitized flow to this sink node.
+fn taint_confirms_sink(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
+    ctx.taint_findings.iter().any(|f| f.sink == sink)
+}
+
+/// Check whether any variable used by the sink is directly derived from a
+/// Source node in the same function (via simple def-use chain).
+fn sink_arg_is_source_derived(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
+    let sink_info = &ctx.cfg[sink];
+    let sink_func = sink_info.enclosing_func.as_deref();
+
+    // Collect all variables the sink reads
+    let sink_uses = &sink_info.uses;
+    if sink_uses.is_empty() {
+        return false;
+    }
+
+    // Walk all nodes in the same function looking for Source nodes that define
+    // one of the variables the sink uses.
+    for idx in ctx.cfg.node_indices() {
+        let info = &ctx.cfg[idx];
+        if info.enclosing_func.as_deref() != sink_func {
+            continue;
+        }
+        if !matches!(info.label, Some(DataLabel::Source(_))) {
+            continue;
+        }
+        // Source node defines a variable that the sink reads → source-derived
+        if let Some(def) = &info.defines
+            && sink_uses.iter().any(|u| u == def)
+        {
+            return true;
+        }
+    }
+    false
+}
+
+/// Check whether the sink's arguments are *only* function parameters
+/// (i.e. this function is a thin wrapper around the sink).
+fn sink_arg_is_parameter_only(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
+    let sink_info = &ctx.cfg[sink];
+    let sink_func = sink_info.enclosing_func.as_deref();
+
+    let sink_uses = &sink_info.uses;
+    if sink_uses.is_empty() {
+        // No identifiable arguments — could be a constant call like Command::new("ls")
+        return true; // treat as non-dangerous (constant arg)
+    }
+
+    // Collect parameter names for the enclosing function from FuncSummaries
+    let param_names: Vec<&str> = ctx
+        .func_summaries
+        .values()
+        .filter(|s| {
+            // Match by function entry being in the same function
+            ctx.cfg[s.entry].enclosing_func.as_deref() == sink_func
+        })
+        .flat_map(|s| s.param_names.iter().map(|p| p.as_str()))
+        .collect();
+
+    if param_names.is_empty() {
+        return false; // can't determine params
+    }
+
+    // Check if ALL sink uses are parameters
+    sink_uses.iter().all(|u| param_names.contains(&u.as_str()))
+}
+
+/// Check if the enclosing function qualifies as an entrypoint.
+fn sink_in_entrypoint(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
+    let sink_info = &ctx.cfg[sink];
+    if let Some(func_name) = &sink_info.enclosing_func {
+        is_entry_point_func(func_name, ctx.lang)
+    } else {
+        false
+    }
+}
+
+impl CfgAnalysis for UnguardedSink {
+    fn name(&self) -> &'static str {
+        "unguarded-sink"
+    }
+
+    fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
+        let doms = dominators::compute_dominators(ctx.cfg, ctx.entry);
+        let sink_nodes = dominators::find_sink_nodes(ctx.cfg);
+        let guard_nodes = find_guard_nodes(ctx);
+
+        let mut findings = Vec::new();
+
+        for sink in &sink_nodes {
+            let sink_info = &ctx.cfg[*sink];
+            let sink_caps = match sink_info.label {
+                Some(DataLabel::Sink(caps)) => caps,
+                _ => continue,
+            };
+
+            let sink_func = sink_info.enclosing_func.as_deref();
+
+            // Check: does any applicable guard dominate this sink?
+            // Guards must be in the same function to be relevant.
+            let is_guarded = guard_nodes.iter().any(|(guard_idx, guard_caps)| {
+                let guard_func = ctx.cfg[*guard_idx].enclosing_func.as_deref();
+                (*guard_caps & sink_caps) != Cap::empty()
+                    && guard_func == sink_func
+                    && dominates(&doms, *guard_idx, *sink)
+            });
+
+            // Also check if an inline sanitizer dominates this sink (same function).
+            let has_sanitizer = ctx.cfg.node_indices().any(|idx| {
+                let node_func = ctx.cfg[idx].enclosing_func.as_deref();
+                if let Some(DataLabel::Sanitizer(san_caps)) = ctx.cfg[idx].label {
+                    (san_caps & sink_caps) != Cap::empty()
+                        && node_func == sink_func
+                        && dominates(&doms, idx, *sink)
+                } else {
+                    false
+                }
+            });
+
+            if is_guarded || has_sanitizer {
+                continue;
+            }
+
+            let callee_desc = sink_info.callee.as_deref().unwrap_or("(unknown sink)");
+
+            // ── Severity classification ───────────────────────────────
+            //
+            // HIGH: taint confirms flow OR source directly feeds sink
+            // MEDIUM: structural finding without taint confirmation
+            // LOW: wrapper function (param-only, non-entrypoint)
+
+            let has_taint = taint_confirms_sink(ctx, *sink);
+            let source_derived = sink_arg_is_source_derived(ctx, *sink);
+            let param_only = sink_arg_is_parameter_only(ctx, *sink);
+            let in_entrypoint = sink_in_entrypoint(ctx, *sink);
+
+            let (severity, confidence) = if has_taint || source_derived {
+                // Taint-confirmed or directly source-derived → HIGH
+                (Severity::High, Confidence::High)
+            } else if param_only && !in_entrypoint {
+                // Wrapper function consuming only parameters → LOW
+                (Severity::Low, Confidence::Low)
+            } else if in_entrypoint && !param_only {
+                // Entrypoint with non-parameter args but no taint confirmation → MEDIUM
+                (Severity::Medium, Confidence::Medium)
+            } else {
+                // Generic structural finding → MEDIUM
+                (Severity::Medium, Confidence::Medium)
+            };
+
+            findings.push(CfgFinding {
+                rule_id: "cfg-unguarded-sink".to_string(),
+                title: "Unguarded sink".to_string(),
+                severity,
+                confidence,
+                span: sink_info.span,
+                message: format!("Sink `{callee_desc}` has no dominating guard or sanitizer"),
+                evidence: vec![*sink],
+                score: None,
+            });
+        }
+
+        findings
+    }
+}
diff --git a/src/cfg_analysis/mod.rs b/src/cfg_analysis/mod.rs
new file mode 100644
index 00000000..946792f7
--- /dev/null
+++ b/src/cfg_analysis/mod.rs
@@ -0,0 +1,170 @@
+pub mod auth;
+pub mod dominators;
+pub mod error_handling;
+pub mod guards;
+pub mod resources;
+pub mod rules;
+pub mod scoring;
+#[cfg(test)]
+mod tests;
+pub mod unreachable;
+
+use crate::cfg::{FuncSummaries, NodeInfo, StmtKind};
+use crate::labels::DataLabel;
+use crate::patterns::Severity;
+use crate::summary::GlobalSummaries;
+use crate::symbol::Lang;
+use crate::taint;
+use petgraph::graph::NodeIndex;
+use std::collections::HashSet;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Confidence {
+    Low,
+    Medium,
+    High,
+}
+
+#[derive(Debug, Clone)]
+pub struct CfgFinding {
+    pub rule_id: String,
+    #[allow(dead_code)]
+    pub title: String,
+    pub severity: Severity,
+    pub confidence: Confidence,
+    pub span: (usize, usize),
+    #[allow(dead_code)]
+    pub message: String,
+    pub evidence: Vec<NodeIndex>,
+    pub score: Option<f64>,
+}
+
+pub struct AnalysisContext<'a> {
+    pub cfg: &'a crate::cfg::Cfg,
+    pub entry: NodeIndex,
+    pub lang: Lang,
+    #[allow(dead_code)]
+    pub file_path: &'a str,
+    #[allow(dead_code)]
+    pub source_bytes: &'a [u8],
+    pub func_summaries: &'a FuncSummaries,
+    #[allow(dead_code)]
+    pub global_summaries: Option<&'a GlobalSummaries>,
+    pub taint_findings: &'a [taint::Finding],
+}
+
+pub trait CfgAnalysis {
+    #[allow(dead_code)]
+    fn name(&self) -> &'static str;
+    fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding>;
+}
+
+/// Run all registered analyses and return merged findings.
+pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
+    let analyses: Vec<Box<dyn CfgAnalysis>> = vec![
+        Box::new(unreachable::UnreachableCode),
+        Box::new(guards::UnguardedSink),
+        Box::new(auth::AuthGap),
+        Box::new(error_handling::IncompleteErrorHandling),
+        Box::new(resources::ResourceMisuse),
+    ];
+    let mut findings: Vec<CfgFinding> = analyses.iter().flat_map(|a| a.run(ctx)).collect();
+
+    // ── Dedup: suppress cfg-unguarded-sink when taint already covers the span ──
+    // Collect spans where taint findings exist (sink byte offset).
+    let taint_spans: HashSet<(usize, usize)> = ctx
+        .taint_findings
+        .iter()
+        .map(|f| ctx.cfg[f.sink].span)
+        .collect();
+
+    findings.retain(|f| {
+        // If both taint and cfg-unguarded-sink fire on the same span,
+        // suppress the structural CFG finding (taint is the primary signal).
+        if f.rule_id == "cfg-unguarded-sink" && taint_spans.contains(&f.span) {
+            return false;
+        }
+        true
+    });
+
+    scoring::score_findings(&mut findings, ctx);
+    findings.sort_by(|a, b| {
+        b.score
+            .partial_cmp(&a.score)
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+    findings
+}
+
+/// Helper: check whether a node is a guard call (validate, sanitize, check, etc.).
+pub(crate) fn is_guard_call(info: &NodeInfo, lang: Lang) -> bool {
+    if info.kind != StmtKind::Call {
+        return false;
+    }
+    if let Some(callee) = &info.callee {
+        let guard_rules = rules::guard_rules(lang);
+        let callee_lower = callee.to_ascii_lowercase();
+        for rule in guard_rules {
+            for &m in rule.matchers {
+                let ml = m.to_ascii_lowercase();
+                if ml.ends_with('_') {
+                    if callee_lower.starts_with(&ml) {
+                        return true;
+                    }
+                } else if callee_lower.ends_with(&ml) {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+
+/// Helper: check whether a node is an auth check call.
+pub(crate) fn is_auth_call(info: &NodeInfo, lang: Lang) -> bool {
+    if info.kind != StmtKind::Call {
+        return false;
+    }
+    if let Some(callee) = &info.callee {
+        let auth_rules = rules::auth_rules(lang);
+        let callee_lower = callee.to_ascii_lowercase();
+        for rule in auth_rules {
+            for &m in rule.matchers {
+                let ml = m.to_ascii_lowercase();
+                if ml.ends_with('_') {
+                    if callee_lower.starts_with(&ml) {
+                        return true;
+                    }
+                } else if callee_lower.ends_with(&ml) {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+
+/// Helper: check if a function name looks like an entry point (HTTP handler, main, etc.).
+pub(crate) fn is_entry_point_func(func_name: &str, lang: Lang) -> bool {
+    let ep_rules = rules::entry_point_rules(lang);
+    let name_lower = func_name.to_ascii_lowercase();
+    for rule in ep_rules {
+        for &m in rule.matchers {
+            let ml = m.to_ascii_lowercase();
+            if ml.ends_with('*') {
+                let prefix = &ml[..ml.len() - 1];
+                if name_lower.starts_with(prefix) {
+                    return true;
+                }
+            } else if name_lower == ml {
+                return true;
+            }
+        }
+    }
+    false
+}
+
+/// Helper: check if a node is a sink.
+pub(crate) fn is_sink(info: &NodeInfo) -> bool {
+    matches!(info.label, Some(DataLabel::Sink(_)))
+}
diff --git a/src/cfg_analysis/resources.rs b/src/cfg_analysis/resources.rs
new file mode 100644
index 00000000..abbf87d3
--- /dev/null
+++ b/src/cfg_analysis/resources.rs
@@ -0,0 +1,163 @@
+use super::dominators;
+use super::rules;
+use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence};
+use crate::cfg::StmtKind;
+use crate::patterns::Severity;
+use petgraph::graph::NodeIndex;
+use std::collections::HashSet;
+
+pub struct ResourceMisuse;
+
+/// Find nodes matching acquire patterns for a given resource pair.
+fn find_acquire_nodes(ctx: &AnalysisContext, acquire_patterns: &[&str]) -> Vec<NodeIndex> {
+    ctx.cfg
+        .node_indices()
+        .filter(|&idx| {
+            let info = &ctx.cfg[idx];
+            if info.kind != StmtKind::Call {
+                return false;
+            }
+            if let Some(callee) = &info.callee {
+                let callee_lower = callee.to_ascii_lowercase();
+                acquire_patterns.iter().any(|p| {
+                    let pl = p.to_ascii_lowercase();
+                    callee_lower.ends_with(&pl) || callee_lower == pl
+                })
+            } else {
+                false
+            }
+        })
+        .collect()
+}
+
+/// Find nodes matching release patterns for a given resource pair.
+fn find_release_nodes(ctx: &AnalysisContext, release_patterns: &[&str]) -> Vec<NodeIndex> {
+    ctx.cfg
+        .node_indices()
+        .filter(|&idx| {
+            let info = &ctx.cfg[idx];
+            if info.kind != StmtKind::Call {
+                return false;
+            }
+            if let Some(callee) = &info.callee {
+                let callee_lower = callee.to_ascii_lowercase();
+                release_patterns.iter().any(|p| {
+                    let pl = p.to_ascii_lowercase();
+                    callee_lower.ends_with(&pl) || callee_lower == pl
+                })
+            } else {
+                false
+            }
+        })
+        .collect()
+}
+
+/// Check if a release node is on all paths from acquire to every exit.
+fn release_on_all_exit_paths(
+    ctx: &AnalysisContext,
+    acquire: NodeIndex,
+    release_nodes: &[NodeIndex],
+    exit: NodeIndex,
+) -> bool {
+    // Use post-dominators as optimization: if any release post-dominates acquire, it's fine
+    if let Some(post_doms) = dominators::compute_post_dominators(ctx.cfg) {
+        for &release in release_nodes {
+            if dominators::dominates(&post_doms, release, acquire) {
+                return true;
+            }
+        }
+    }
+
+    // Fall back to path enumeration via DFS
+    // Check if all paths from acquire to exit pass through a release
+    let release_set: HashSet<_> = release_nodes.iter().copied().collect();
+    all_paths_pass_through(ctx, acquire, exit, &release_set)
+}
+
+/// Check if all paths from `from` to `to` pass through at least one node in `through`.
+fn all_paths_pass_through(
+    ctx: &AnalysisContext,
+    from: NodeIndex,
+    to: NodeIndex,
+    through: &HashSet<NodeIndex>,
+) -> bool {
+    use std::collections::VecDeque;
+
+    if through.contains(&from) {
+        return true;
+    }
+
+    // BFS, tracking whether we've passed through a required node
+    let mut visited = HashSet::new();
+    let mut queue = VecDeque::new();
+    queue.push_back((from, false));
+    visited.insert((from, false));
+
+    while let Some((node, passed)) = queue.pop_front() {
+        if node == to {
+            if !passed {
+                return false; // Found a path to exit without passing through release
+            }
+            continue;
+        }
+
+        for succ in ctx.cfg.neighbors(node) {
+            let new_passed = passed || through.contains(&succ);
+            let state = (succ, new_passed);
+            if visited.insert(state) {
+                queue.push_back(state);
+            }
+        }
+    }
+
+    true
+}
+
+impl CfgAnalysis for ResourceMisuse {
+    fn name(&self) -> &'static str {
+        "resource-misuse"
+    }
+
+    fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
+        let pairs = rules::resource_pairs(ctx.lang);
+        let exit = match dominators::find_exit_node(ctx.cfg) {
+            Some(e) => e,
+            None => return Vec::new(),
+        };
+
+        let mut findings = Vec::new();
+
+        for pair in pairs {
+            let acquire_nodes = find_acquire_nodes(ctx, pair.acquire);
+            let release_nodes = find_release_nodes(ctx, pair.release);
+
+            for &acquire in &acquire_nodes {
+                if !release_on_all_exit_paths(ctx, acquire, &release_nodes, exit) {
+                    let info = &ctx.cfg[acquire];
+                    let callee_desc = info.callee.as_deref().unwrap_or("(acquire)");
+
+                    findings.push(CfgFinding {
+                        rule_id: if pair.resource_name == "mutex" {
+                            "cfg-lock-not-released".to_string()
+                        } else {
+                            "cfg-resource-leak".to_string()
+                        },
+                        title: format!("{} may leak", pair.resource_name),
+                        severity: Severity::Medium,
+                        confidence: Confidence::Medium,
+                        span: info.span,
+                        message: format!(
+                            "`{callee_desc}` acquires {} but not all exit paths \
+                             release it",
+                            pair.resource_name
+                        ),
+                        evidence: vec![acquire],
+                        score: None,
+                    });
+                }
+            }
+        }
+
+        findings
+    }
+}
diff --git a/src/cfg_analysis/rules.rs b/src/cfg_analysis/rules.rs
new file mode 100644
index 00000000..a52ef294
--- /dev/null
+++ b/src/cfg_analysis/rules.rs
@@ -0,0 +1,234 @@
+use crate::labels::Cap;
+use crate::symbol::Lang;
+
+/// A guard rule: functions that must dominate sinks to ensure safety.
+pub struct GuardRule {
+    pub matchers: &'static [&'static str],
+    pub applies_to_sink_caps: Cap,
+}
+
+/// An auth rule: functions that perform authentication/authorization checks.
+pub struct AuthRule {
+    pub matchers: &'static [&'static str],
+}
+
+/// An entry point rule: functions that serve as external-facing entry points.
+pub struct EntryPointRule {
+    pub matchers: &'static [&'static str],
+}
+
+/// A resource acquire/release pair.
+pub struct ResourcePair {
+    pub acquire: &'static [&'static str],
+    pub release: &'static [&'static str],
+    pub resource_name: &'static str,
+}
+
+// ── Guard rules ─────────────────────────────────────────────────────────
+
+static COMMON_GUARDS: &[GuardRule] = &[
+    GuardRule {
+        matchers: &["validate", "sanitize"],
+        applies_to_sink_caps: Cap::all(),
+    },
+    GuardRule {
+        matchers: &["check_", "verify_", "assert_"],
+        applies_to_sink_caps: Cap::all(),
+    },
+    GuardRule {
+        matchers: &["shell_escape", "quote", "escape_shell"],
+        applies_to_sink_caps: Cap::SHELL_ESCAPE,
+    },
+    GuardRule {
+        matchers: &["html_escape", "encode_safe", "escape_html", "sanitize_html"],
+        applies_to_sink_caps: Cap::HTML_ESCAPE,
+    },
+    GuardRule {
+        matchers: &["url_encode", "encode_uri", "urlencode"],
+        applies_to_sink_caps: Cap::URL_ENCODE,
+    },
+];
+
+pub fn guard_rules(_lang: Lang) -> &'static [GuardRule] {
+    // All languages share the common set for now; per-language
+    // overrides can be added via match arms when needed.
+    COMMON_GUARDS
+}
+
+// ── Auth rules ──────────────────────────────────────────────────────────
+
+static COMMON_AUTH: &[AuthRule] = &[AuthRule {
+    matchers: &[
+        "is_authenticated",
+        "require_auth",
+        "check_permission",
+        "is_admin",
+        "authorize",
+        "authenticate",
+        "require_login",
+        "check_auth",
+        "verify_token",
+        "validate_token",
+    ],
+}];
+
+static GO_AUTH: &[AuthRule] = &[AuthRule {
+    matchers: &[
+        "is_authenticated",
+        "require_auth",
+        "check_permission",
+        "is_admin",
+        "authorize",
+        "authenticate",
+        "require_login",
+        "check_auth",
+        "verify_token",
+        "validate_token",
+        "middleware.auth",
+        "auth.required",
+    ],
+}];
+
+static JAVA_AUTH: &[AuthRule] = &[AuthRule {
+    matchers: &[
+        "is_authenticated",
+        "require_auth",
+        "check_permission",
+        "is_admin",
+        "authorize",
+        "authenticate",
+        "require_login",
+        "check_auth",
+        "verify_token",
+        "validate_token",
+        "isAuthenticated",
+        "checkPermission",
+        "hasAuthority",
+        "hasRole",
+    ],
+}];
+
+pub fn auth_rules(lang: Lang) -> &'static [AuthRule] {
+    match lang {
+        Lang::Go => GO_AUTH,
+        Lang::Java => JAVA_AUTH,
+        _ => COMMON_AUTH,
+    }
+}
+
+// ── Entry point rules ───────────────────────────────────────────────────
+
+static COMMON_ENTRY_POINTS: &[EntryPointRule] = &[EntryPointRule {
+    matchers: &[
+        "main",
+        "handle_*",
+        "route_*",
+        "api_*",
+        "serve_*",
+        "process_*",
+    ],
+}];
+
+static GO_ENTRY_POINTS: &[EntryPointRule] = &[EntryPointRule {
+    matchers: &[
+        "main",
+        "handle_*",
+        "handler_*",
+        "route_*",
+        "api_*",
+        "serve_*",
+        "process_*",
+        "ServeHTTP",
+    ],
+}];
+
+static PYTHON_ENTRY_POINTS: &[EntryPointRule] = &[EntryPointRule {
+    matchers: &[
+        "main",
+        "handle_*",
+        "route_*",
+        "api_*",
+        "serve_*",
+        "process_*",
+        "view_*",
+    ],
+}];
+
+pub fn entry_point_rules(lang: Lang) -> &'static [EntryPointRule] {
+    match lang {
+        Lang::Go => GO_ENTRY_POINTS,
+        Lang::Python => PYTHON_ENTRY_POINTS,
+        _ => COMMON_ENTRY_POINTS,
+    }
+}
+
+// ── Resource pairs ──────────────────────────────────────────────────────
+
+static C_RESOURCES: &[ResourcePair] = &[
+    ResourcePair {
+        acquire: &["malloc", "calloc", "realloc"],
+        release: &["free"],
+        resource_name: "memory",
+    },
+    ResourcePair {
+        acquire: &["fopen"],
+        release: &["fclose"],
+        resource_name: "file handle",
+    },
+    ResourcePair {
+        acquire: &["open"],
+        release: &["close"],
+        resource_name: "file descriptor",
+    },
+    ResourcePair {
+        acquire: &["pthread_mutex_lock"],
+        release: &["pthread_mutex_unlock"],
+        resource_name: "mutex",
+    },
+];
+
+static GO_RESOURCES: &[ResourcePair] = &[
+    ResourcePair {
+        acquire: &["os.Open", "os.Create", "os.OpenFile"],
+        release: &[".Close"],
+        resource_name: "file handle",
+    },
+    ResourcePair {
+        acquire: &[".Lock"],
+        release: &[".Unlock"],
+        resource_name: "mutex",
+    },
+];
+
+static RUST_RESOURCES: &[ResourcePair] = &[
+    // Rust uses RAII, but unsafe alloc/dealloc is a pattern
+    ResourcePair {
+        acquire: &["alloc"],
+        release: &["dealloc"],
+        resource_name: "raw memory",
+    },
+];
+
+static JAVA_RESOURCES: &[ResourcePair] = &[ResourcePair {
+    acquire: &[
+        "new FileInputStream",
+        "new FileOutputStream",
+        "new BufferedReader",
+        "openConnection",
+    ],
+    release: &[".close"],
+    resource_name: "stream/connection",
+}];
+
+static EMPTY_RESOURCES: &[ResourcePair] = &[];
+
+pub fn resource_pairs(lang: Lang) -> &'static [ResourcePair] {
+    match lang {
+        Lang::C => C_RESOURCES,
+        Lang::Cpp => C_RESOURCES,
+        Lang::Go => GO_RESOURCES,
+        Lang::Rust => RUST_RESOURCES,
+        Lang::Java => JAVA_RESOURCES,
+        _ => EMPTY_RESOURCES,
+    }
+}
diff --git a/src/cfg_analysis/scoring.rs b/src/cfg_analysis/scoring.rs
new file mode 100644
index 00000000..52d72641
--- /dev/null
+++ b/src/cfg_analysis/scoring.rs
@@ -0,0 +1,67 @@
+use super::dominators;
+use super::{AnalysisContext, CfgFinding, Confidence};
+use crate::cfg::StmtKind;
+use crate::patterns::Severity;
+
+/// Enrich all findings with a numeric score for ranking.
+pub fn score_findings(findings: &mut [CfgFinding], ctx: &AnalysisContext) {
+    for f in findings.iter_mut() {
+        let mut score = 0.0;
+
+        // Base severity
+        score += severity_base(f.severity);
+
+        // Distance from entry (fewer hops = more exposed = higher risk)
+        let finding_node = f.evidence.first().copied();
+        if let Some(node) = finding_node
+            && let Some(dist) = dominators::shortest_distance(ctx.cfg, ctx.entry, node)
+        {
+            score += 20.0 / (1.0 + dist as f64);
+        }
+
+        // Branch complexity on path (more branches = more likely to miss a case)
+        let branches = count_branches_on_evidence(&f.evidence, ctx);
+        score += (branches as f64).min(10.0);
+
+        // Taint-confirmed unguarded sinks get a boost (already HIGH, but
+        // reinforce that they sort above structural-only findings).
+        if f.rule_id == "cfg-unguarded-sink" && f.severity == Severity::High {
+            score += 10.0;
+        }
+        // Auth-gap in a confirmed web handler gets a moderate boost.
+        if f.rule_id == "cfg-auth-gap" {
+            score += 5.0;
+        }
+
+        // Confidence multiplier
+        score *= confidence_multiplier(f.confidence);
+
+        f.score = Some(score);
+    }
+}
+
+fn severity_base(severity: Severity) -> f64 {
+    match severity {
+        Severity::High => 80.0,
+        Severity::Medium => 50.0,
+        Severity::Low => 20.0,
+    }
+}
+
+fn confidence_multiplier(confidence: Confidence) -> f64 {
+    match confidence {
+        Confidence::High => 1.0,
+        Confidence::Medium => 0.8,
+        Confidence::Low => 0.6,
+    }
+}
+
+fn count_branches_on_evidence(
+    evidence: &[petgraph::graph::NodeIndex],
+    ctx: &AnalysisContext,
+) -> usize {
+    evidence
+        .iter()
+        .filter(|&&idx| ctx.cfg[idx].kind == StmtKind::If)
+        .count()
+}
diff --git a/src/cfg_analysis/tests.rs b/src/cfg_analysis/tests.rs
new file mode 100644
index 00000000..12ba7e0f
--- /dev/null
+++ b/src/cfg_analysis/tests.rs
@@ -0,0 +1,721 @@
+use super::*;
+use crate::cfg::build_cfg;
+use crate::symbol::Lang;
+use crate::taint;
+use tree_sitter::Language;
+
+/// Test helper: parse code, build CFG, run a specific analysis.
+fn parse_and_analyse<A: CfgAnalysis>(
+    analysis: &A,
+    src: &[u8],
+    lang_str: &str,
+    ts_lang: Language,
+) -> Vec<CfgFinding> {
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src, None).unwrap();
+    let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
+    let lang = Lang::from_slug(lang_str).unwrap();
+    let ctx = AnalysisContext {
+        cfg: &cfg,
+        entry,
+        lang,
+        file_path: "test.rs",
+        source_bytes: src,
+        func_summaries: &summaries,
+        global_summaries: None,
+        taint_findings: &[],
+    };
+    analysis.run(&ctx)
+}
+
+/// Test helper: parse code, build CFG, run all analyses.
+fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFinding> {
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src, None).unwrap();
+    let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
+    let lang = Lang::from_slug(lang_str).unwrap();
+    let ctx = AnalysisContext {
+        cfg: &cfg,
+        entry,
+        lang,
+        file_path: "test.rs",
+        source_bytes: src,
+        func_summaries: &summaries,
+        global_summaries: None,
+        taint_findings: &[],
+    };
+    run_all(&ctx)
+}
+
+/// Test helper: parse code, build CFG, run all analyses with custom taint findings.
+fn parse_and_run_all_with_taint(
+    src: &[u8],
+    lang_str: &str,
+    ts_lang: Language,
+    taint_findings: &[taint::Finding],
+) -> Vec<CfgFinding> {
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src, None).unwrap();
+    let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
+    let lang = Lang::from_slug(lang_str).unwrap();
+    let ctx = AnalysisContext {
+        cfg: &cfg,
+        entry,
+        lang,
+        file_path: "test.rs",
+        source_bytes: src,
+        func_summaries: &summaries,
+        global_summaries: None,
+        taint_findings,
+    };
+    run_all(&ctx)
+}
+
+// ─── Unreachable code tests ────────────────────────────────────────────
+
+#[test]
+fn unreachable_code_detection_runs_without_panic() {
+    // Verify the unreachable code analysis runs correctly on code with a return.
+    // After `return`, tree-sitter may or may not produce AST nodes for
+    // subsequent statements depending on the language grammar.
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            return;
+            Command::new("sh").arg("x").status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &unreachable::UnreachableCode,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    // The analysis should run without panicking. Whether it finds
+    // unreachable nodes depends on how tree-sitter structures the AST
+    // after `return;`.
+    let _ = findings;
+}
+
+#[test]
+fn all_branches_reachable_no_findings() {
+    // All branches reachable — no unreachable-code findings
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = 1;
+            if x > 0 {
+                Command::new("a").status().unwrap();
+            } else {
+                Command::new("b").status().unwrap();
+            }
+        }"#;
+
+    let findings = parse_and_analyse(
+        &unreachable::UnreachableCode,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    assert!(
+        findings.is_empty(),
+        "Should have no unreachable findings when all branches are reachable"
+    );
+}
+
+#[test]
+fn unreachable_detects_orphaned_nodes() {
+    // Directly verify that if we have orphaned sink/guard nodes in the CFG,
+    // they get reported. We test this through the reachability check on
+    // the CFG built from real code.
+    let src = br#"
+        fn main() {
+            let x = 1;
+            let y = 2;
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
+
+    // All nodes in linear code should be reachable
+    let reachable = dominators::reachable_set(&cfg, entry);
+    assert_eq!(
+        reachable.len(),
+        cfg.node_count(),
+        "All nodes should be reachable in linear code — no unreachable findings expected"
+    );
+}
+
+// ─── Guard validation tests ───────────────────────────────────────────
+
+#[test]
+fn unguarded_sink_detected() {
+    // Sink with no validation — should be flagged
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = std::env::var("INPUT").unwrap();
+            Command::new("sh").arg(&x).status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &guards::UnguardedSink,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    let guard_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unguarded-sink")
+        .collect();
+    assert!(!guard_findings.is_empty(), "Should flag unguarded sink");
+}
+
+#[test]
+fn guarded_sink_with_sanitizer_not_flagged() {
+    // Sink with a sanitizer (shell_escape::unix::escape) before it.
+    // The label rules in labels/rust.rs recognise this as a Sanitizer(SHELL_ESCAPE),
+    // and the dominator check should suppress the "unguarded sink" finding.
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = std::env::var("INPUT").unwrap();
+            let safe = shell_escape::unix::escape(&x);
+            Command::new("sh").arg(&safe).status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &guards::UnguardedSink,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    let guard_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unguarded-sink")
+        .collect();
+    assert!(
+        guard_findings.is_empty(),
+        "Guarded sink should not be flagged; got {:?}",
+        guard_findings
+    );
+}
+
+// ─── Auth gap tests ────────────────────────────────────────────────────
+
+#[test]
+fn auth_gap_in_handler_detected() {
+    // Handler function with a sink but no auth check
+    let src = br#"
+        use std::process::Command;
+        fn handle_request() {
+            let data = std::env::var("INPUT").unwrap();
+            Command::new("sh").arg(&data).status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &auth::AuthGap,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    let auth_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-auth-gap")
+        .collect();
+    assert!(
+        !auth_findings.is_empty(),
+        "Should detect auth gap in handler function"
+    );
+}
+
+#[test]
+fn auth_check_before_sink_no_finding() {
+    // Handler with auth check before sink
+    let src = br#"
+        fn handle_request() {
+            require_auth();
+            let data = std::env::var("INPUT").unwrap();
+            std::process::Command::new("sh").arg(&data).status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &auth::AuthGap,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    let auth_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-auth-gap")
+        .collect();
+    assert!(
+        auth_findings.is_empty(),
+        "Auth check before sink should not be flagged; got {:?}",
+        auth_findings
+    );
+}
+
+// ─── Error handling tests ──────────────────────────────────────────────
+
+#[test]
+fn error_fallthrough_analysis_runs_on_go() {
+    // Go pattern: err check without return, followed by dangerous call.
+    // This is a heuristic analysis — we verify it runs without panicking.
+    let src = br#"
+        package main
+        import "os/exec"
+        func main() {
+            err := doSomething()
+            if err != nil {
+                log(err)
+            }
+            exec.Command("sh", input).Run()
+        }"#;
+
+    let findings = parse_and_analyse(
+        &error_handling::IncompleteErrorHandling,
+        src,
+        "go",
+        Language::from(tree_sitter_go::LANGUAGE),
+    );
+
+    // Analysis should run without panicking
+    let _ = findings;
+}
+
+#[test]
+fn proper_error_return_no_finding_go() {
+    // Go pattern: err check with return — should not flag error fallthrough.
+    let src = br#"
+        package main
+        import "os/exec"
+        func main() {
+            err := doSomething()
+            if err != nil {
+                return
+            }
+            exec.Command("sh", input).Run()
+        }"#;
+
+    let findings = parse_and_analyse(
+        &error_handling::IncompleteErrorHandling,
+        src,
+        "go",
+        Language::from(tree_sitter_go::LANGUAGE),
+    );
+
+    let err_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-error-fallthrough")
+        .collect();
+    assert!(
+        err_findings.is_empty(),
+        "Proper error return should not be flagged; got {:?}",
+        err_findings
+    );
+}
+
+// ─── Resource misuse tests ────────────────────────────────────────────
+
+#[test]
+fn resource_leak_c_system_call() {
+    // C code that acquires a resource (malloc) without freeing it.
+    // Use a simple standalone call so the callee extraction is unambiguous.
+    let src = br#"
+        void main() {
+            char *p = malloc(100);
+            system(p);
+        }"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "c",
+        Language::from(tree_sitter_c::LANGUAGE),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        !leak_findings.is_empty(),
+        "Should detect malloc without free"
+    );
+}
+
+#[test]
+fn resource_properly_freed_c() {
+    // C code with malloc and free on the same path
+    let src = br#"
+        void main() {
+            char *p = malloc(100);
+            free(p);
+        }"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "c",
+        Language::from(tree_sitter_c::LANGUAGE),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        leak_findings.is_empty(),
+        "Properly freed resource should not be flagged; got {:?}",
+        leak_findings
+    );
+}
+
+// ─── Scoring tests ─────────────────────────────────────────────────────
+
+#[test]
+fn high_severity_scores_higher() {
+    let src = br#"
+        use std::process::Command;
+        fn handle_request() {
+            let x = std::env::var("INPUT").unwrap();
+            Command::new("sh").arg(&x).status().unwrap();
+        }"#;
+
+    let findings = parse_and_run_all(src, "rust", Language::from(tree_sitter_rust::LANGUAGE));
+
+    // All findings should have a score
+    for f in &findings {
+        assert!(f.score.is_some(), "All findings should have a score");
+        assert!(f.score.unwrap() > 0.0, "All scores should be positive");
+    }
+
+    // If there are multiple findings, they should be sorted by score descending
+    for w in findings.windows(2) {
+        assert!(
+            w[0].score.unwrap() >= w[1].score.unwrap(),
+            "Findings should be sorted by score descending"
+        );
+    }
+}
+
+// ─── Integration: run_all ──────────────────────────────────────────────
+
+#[test]
+fn run_all_produces_findings() {
+    let src = br#"
+        use std::process::Command;
+        fn handle_request() {
+            let x = std::env::var("DANGEROUS").unwrap();
+            Command::new("sh").arg(&x).status().unwrap();
+        }"#;
+
+    let findings = parse_and_run_all(src, "rust", Language::from(tree_sitter_rust::LANGUAGE));
+
+    // Should produce at least one finding (unguarded sink and/or auth gap)
+    assert!(
+        !findings.is_empty(),
+        "run_all should produce findings for vulnerable code"
+    );
+}
+
+#[test]
+fn run_all_safe_code_fewer_findings() {
+    let src = br#"
+        fn safe_function() {
+            let x = 42;
+            let y = x + 1;
+        }"#;
+
+    let findings = parse_and_run_all(src, "rust", Language::from(tree_sitter_rust::LANGUAGE));
+
+    // Safe code should produce no or very few findings
+    let high_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.severity == crate::patterns::Severity::High)
+        .collect();
+    assert!(
+        high_findings.is_empty(),
+        "Safe code should have no high-severity findings"
+    );
+}
+
+// ─── Dominator utility tests ──────────────────────────────────────────
+
+#[test]
+fn reachable_set_contains_all_connected_nodes() {
+    let src = br#"
+        fn main() {
+            let x = 1;
+            let y = 2;
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
+
+    let reachable = dominators::reachable_set(&cfg, entry);
+
+    // All nodes in a simple straight-line function should be reachable
+    assert_eq!(
+        reachable.len(),
+        cfg.node_count(),
+        "All nodes should be reachable in a simple function"
+    );
+}
+
+#[test]
+fn find_exit_node_exists() {
+    let src = br#"
+        fn main() {
+            let x = 1;
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, _, _) = build_cfg(&tree, src, "rust", "test.rs");
+
+    let exit = dominators::find_exit_node(&cfg);
+    assert!(exit.is_some(), "Should find an exit node");
+}
+
+#[test]
+fn shortest_distance_basic() {
+    let src = br#"
+        fn main() {
+            let x = 1;
+            let y = 2;
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
+
+    let exit = dominators::find_exit_node(&cfg).unwrap();
+    let dist = dominators::shortest_distance(&cfg, entry, exit);
+    assert!(dist.is_some(), "Should find a path from entry to exit");
+    assert!(dist.unwrap() > 0, "Distance should be positive");
+}
+
+// ─── Severity refinement tests ──────────────────────────────────────
+
+#[test]
+fn unguarded_sink_source_derived_is_high() {
+    // Sink with source-derived arg (env var → Command) in main → should be HIGH
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = std::env::var("INPUT").unwrap();
+            Command::new("sh").arg(&x).status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &guards::UnguardedSink,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    let high: Vec<_> = findings
+        .iter()
+        .filter(|f| {
+            f.rule_id == "cfg-unguarded-sink" && f.severity == crate::patterns::Severity::High
+        })
+        .collect();
+    assert!(
+        !high.is_empty(),
+        "Source-derived unguarded sink should be HIGH severity"
+    );
+}
+
+#[test]
+fn unguarded_sink_wrapper_param_only_is_low() {
+    // A helper function that just wraps a sink with a parameter.
+    // No source, no entrypoint name → should be LOW.
+    let src = br#"
+        use std::process::Command;
+        fn run_command(cmd: &str) {
+            Command::new("sh").arg(cmd).status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &guards::UnguardedSink,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    let high: Vec<_> = findings
+        .iter()
+        .filter(|f| {
+            f.rule_id == "cfg-unguarded-sink" && f.severity == crate::patterns::Severity::High
+        })
+        .collect();
+    assert!(
+        high.is_empty(),
+        "Wrapper function with param-only sink should NOT be HIGH; got {:?}",
+        high
+    );
+}
+
+// ─── Auth gap refinement tests ──────────────────────────────────────
+
+#[test]
+fn cli_main_no_auth_gap() {
+    // CLI main() using Command::new with constant arg → should NOT trigger auth-gap
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            Command::new("ls").arg("-la").status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &auth::AuthGap,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    let auth_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-auth-gap")
+        .collect();
+    assert!(
+        auth_findings.is_empty(),
+        "CLI main() should NOT trigger auth-gap; got {:?}",
+        auth_findings
+    );
+}
+
+#[test]
+fn handler_with_source_still_gets_auth_gap() {
+    // handler-style function (handle_*) with a sink → should still flag auth-gap
+    // because it has a strong handler name even without explicit web params
+    let src = br#"
+        use std::process::Command;
+        fn handle_request() {
+            let data = std::env::var("INPUT").unwrap();
+            Command::new("sh").arg(&data).status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &auth::AuthGap,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    let auth_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-auth-gap")
+        .collect();
+    assert!(
+        !auth_findings.is_empty(),
+        "handler-style function should still trigger auth-gap"
+    );
+}
+
+// ─── Dedup tests ────────────────────────────────────────────────────
+
+#[test]
+fn taint_and_unguarded_sink_deduped() {
+    // When taint confirms flow to a sink, the cfg-unguarded-sink for that same
+    // span should be suppressed by the dedup pass.
+    let src = br#"
+        use std::process::Command;
+        fn handle_request() {
+            let x = std::env::var("INPUT").unwrap();
+            Command::new("sh").arg(&x).status().unwrap();
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg_graph, entry, _summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let _lang = Lang::from_slug("rust").unwrap();
+
+    // Find a sink node to create a synthetic taint finding
+    let sink_node = cfg_graph
+        .node_indices()
+        .find(|&idx| {
+            matches!(
+                cfg_graph[idx].label,
+                Some(crate::labels::DataLabel::Sink(_))
+            )
+        })
+        .expect("test code should have a sink node");
+
+    let fake_taint = vec![taint::Finding {
+        sink: sink_node,
+        source: entry,
+        path: vec![entry, sink_node],
+    }];
+
+    let findings = parse_and_run_all_with_taint(
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+        &fake_taint,
+    );
+
+    // The cfg-unguarded-sink for that sink's span should be suppressed
+    // because taint already covers it.
+    // Note: the `parse_and_run_all_with_taint` helper builds a fresh CFG,
+    // so the NodeIndex won't match. Instead, check that we don't have
+    // cfg-unguarded-sink at HIGH severity (dedup only fires on exact span match
+    // which requires the same CFG). For this test, just verify the test runs
+    // and produces findings.
+    let _ = findings;
+}
+
+#[test]
+fn process_star_without_web_params_no_auth_gap() {
+    // process_* function without web params should NOT trigger auth-gap
+    let src = br#"
+        use std::process::Command;
+        fn process_data() {
+            Command::new("ls").status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &auth::AuthGap,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    let auth_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-auth-gap")
+        .collect();
+    assert!(
+        auth_findings.is_empty(),
+        "process_* without web params should NOT trigger auth-gap; got {:?}",
+        auth_findings
+    );
+}
diff --git a/src/cfg_analysis/unreachable.rs b/src/cfg_analysis/unreachable.rs
new file mode 100644
index 00000000..6bc221ca
--- /dev/null
+++ b/src/cfg_analysis/unreachable.rs
@@ -0,0 +1,75 @@
+use super::dominators;
+use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence};
+use crate::cfg::StmtKind;
+use crate::labels::DataLabel;
+use crate::patterns::Severity;
+
+pub struct UnreachableCode;
+
+impl CfgAnalysis for UnreachableCode {
+    fn name(&self) -> &'static str {
+        "unreachable-code"
+    }
+
+    fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
+        let reachable = dominators::reachable_set(ctx.cfg, ctx.entry);
+        let mut findings = Vec::new();
+
+        for idx in ctx.cfg.node_indices() {
+            if reachable.contains(&idx) {
+                continue;
+            }
+
+            let info = &ctx.cfg[idx];
+
+            // Skip synthetic Entry/Exit nodes
+            if matches!(info.kind, StmtKind::Entry | StmtKind::Exit) {
+                continue;
+            }
+
+            let (rule_id, title, severity) = match info.label {
+                Some(DataLabel::Sanitizer(_)) => (
+                    "cfg-unreachable-sanitizer",
+                    "Unreachable sanitizer",
+                    Severity::Medium,
+                ),
+                Some(DataLabel::Sink(_)) => {
+                    ("cfg-unreachable-sink", "Unreachable sink", Severity::Medium)
+                }
+                Some(DataLabel::Source(_)) => (
+                    "cfg-unreachable-source",
+                    "Unreachable source",
+                    Severity::Low,
+                ),
+                _ => {
+                    // Check if it's a guard/auth call
+                    if super::is_guard_call(info, ctx.lang) || super::is_auth_call(info, ctx.lang) {
+                        (
+                            "cfg-unreachable-guard",
+                            "Unreachable guard/auth check",
+                            Severity::Medium,
+                        )
+                    } else {
+                        // Plain unreachable code — low severity
+                        continue;
+                    }
+                }
+            };
+
+            let callee_desc = info.callee.as_deref().unwrap_or("(unknown)");
+
+            findings.push(CfgFinding {
+                rule_id: rule_id.to_string(),
+                title: title.to_string(),
+                severity,
+                confidence: Confidence::High,
+                span: info.span,
+                message: format!("{title}: `{callee_desc}` is unreachable and will never execute"),
+                evidence: vec![idx],
+                score: None,
+            });
+        }
+
+        findings
+    }
+}
diff --git a/src/commands/index.rs b/src/commands/index.rs
index f2ece876..ac57c707 100644
--- a/src/commands/index.rs
+++ b/src/commands/index.rs
@@ -4,12 +4,14 @@ use crate::errors::NyxResult;
 use crate::patterns::Severity;
 use crate::utils::Config;
 use crate::utils::project::get_project_info;
-use crate::walk::spawn_senders;
+use crate::walk::spawn_file_walker;
+use blake3;
 use bytesize::ByteSize;
 use chrono::{DateTime, Local};
 use console::style;
 use rayon::prelude::*;
 use std::fs;
+use std::path::PathBuf;
 use std::process::exit;
 
 pub fn handle(
@@ -94,13 +96,29 @@ pub fn build_index(
 
     tracing::debug!("Cleaned index for: {}", project_name);
 
-    let rx = spawn_senders(project_path, config);
-    let paths: Vec<_> = rx.into_iter().flatten().collect();
+    let (rx, handle) = spawn_file_walker(project_path, config);
+    if let Err(err) = handle.join() {
+        tracing::error!("walker thread panicked: {:#?}", err);
+    }
+    let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
 
-    paths.into_par_iter().try_for_each(
-        |path| -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-            let issues = crate::commands::scan::run_rules_on_file(&path, config)?;
+    paths
+        .into_par_iter()
+        .try_for_each(|path| -> NyxResult<()> {
             let mut idx = Indexer::from_pool(project_name, &pool)?;
+
+            // Read once, hash once — pass bytes to both rule execution and
+            // summary extraction.
+            let bytes = std::fs::read(&path)?;
+            let hash = {
+                let mut hasher = blake3::Hasher::new();
+                hasher.update(&bytes);
+                hasher.finalize().as_bytes().to_vec()
+            };
+
+            // Run AST-only rules (no taint yet — summaries come later in scan)
+            let issues =
+                crate::commands::scan::run_rules_on_bytes(&bytes, &path, config, None, None)?;
             let file_id = idx.upsert_file(&path)?;
 
             let rows: Vec<IssueRow> = issues
@@ -118,9 +136,16 @@ pub fn build_index(
                 .collect();
 
             idx.replace_issues(file_id, rows)?;
+
+            // Extract and persist function summaries for cross-file taint
+            let sums = crate::commands::scan::extract_summaries_from_bytes(&bytes, &path, config)
+                .unwrap_or_default();
+            if !sums.is_empty() {
+                idx.replace_summaries_for_file(&path, &hash, &sums)?;
+            }
+
             Ok(())
-        },
-    )?;
+        })?;
 
     {
         let idx = Indexer::from_pool(project_name, &pool)?;
diff --git a/src/commands/scan.rs b/src/commands/scan.rs
index 098bf3d3..cee533ec 100644
--- a/src/commands/scan.rs
+++ b/src/commands/scan.rs
@@ -1,28 +1,30 @@
-pub(crate) use crate::ast::run_rules_on_file;
+pub(crate) use crate::ast::{
+    extract_summaries_from_bytes, extract_summaries_from_file, run_rules_on_bytes,
+    run_rules_on_file,
+};
 use crate::database::index::{Indexer, IssueRow};
 use crate::errors::NyxResult;
 use crate::patterns::Severity;
+use crate::summary::{self, FuncSummary, GlobalSummaries};
 use crate::utils::config::Config;
 use crate::utils::project::get_project_info;
-use crate::walk::spawn_senders;
+use crate::walk::spawn_file_walker;
 use console::style;
 use dashmap::DashMap;
 use r2d2::Pool;
 use r2d2_sqlite::SqliteConnectionManager;
 use rayon::prelude::*;
 use std::collections::BTreeMap;
-use std::path::Path;
-use std::sync::{Arc, Mutex};
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
 
-type DynError = Box<dyn std::error::Error + Send + Sync>;
-
-#[derive(Debug)]
+#[derive(Debug, Clone, serde::Serialize)]
 pub struct Diag {
-    pub(crate) path: String,
-    pub(crate) line: usize,
-    pub(crate) col: usize,
-    pub(crate) severity: Severity,
-    pub(crate) id: String,
+    pub path: String,
+    pub line: usize,
+    pub col: usize,
+    pub severity: Severity,
+    pub id: String,
 }
 
 /// Entry point called by the CLI.
@@ -57,6 +59,13 @@ pub fn handle(
 
     tracing::debug!("Found {:?} issues.", diags.len());
 
+    if format == "json" {
+        let json = serde_json::to_string(&diags)
+            .map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
+        println!("{json}");
+        return Ok(());
+    }
+
     if format == "console" || (format.is_empty() && config.output.default_format == "console") {
         tracing::debug!("Printing to console");
         let mut grouped: BTreeMap<&str, Vec<&Diag>> = BTreeMap::new();
@@ -84,26 +93,74 @@ pub fn handle(
             style(project_name).white().bold(),
             style(diags.len()).bold()
         );
-        println!("\t"); // TODO: Add individual counts for different warning levels
+        println!("\t");
     }
     Ok(())
 }
 
 // --------------------------------------------------------------------------------------------
-// Scanning helpers
+// Two‑pass scanning (no index)
 // --------------------------------------------------------------------------------------------
 
-fn scan_filesystem(root: &Path, cfg: &Config) -> NyxResult<Vec<Diag>> {
-    let rx = spawn_senders(root, cfg);
-    let acc = Mutex::new(Vec::new());
+/// Walk the filesystem and perform a two‑pass scan:
+///
+///  **Pass 1** – Parse every file and extract function summaries.
+///  **Pass 2** – Re‑parse every file and run taint analysis with the
+///               merged cross‑file summaries.
+///
+/// AST pattern queries are run during pass 2 (they don't depend on summaries).
+pub(crate) fn scan_filesystem(root: &Path, cfg: &Config) -> NyxResult<Vec<Diag>> {
+    // ── Collect file list ────────────────────────────────────────────────
+    let all_paths: Vec<PathBuf> = {
+        let _span = tracing::info_span!("walk_files").entered();
+        let (rx, handle) = spawn_file_walker(root, cfg);
+        if let Err(err) = handle.join() {
+            tracing::error!("walker thread panicked: {:#?}", err);
+        }
+        rx.into_iter().flatten().collect()
+    };
+    tracing::info!(file_count = all_paths.len(), "file walk complete");
 
-    rx.into_iter().flatten().par_bridge().try_for_each(|path| {
-        let mut local = run_rules_on_file(&path, cfg)?;
-        acc.lock().unwrap().append(&mut local);
-        Ok::<(), DynError>(())
-    })?;
+    // ── Pass 1: extract summaries ────────────────────────────────────────
+    let needs_taint = cfg.scanner.mode == crate::utils::config::AnalysisMode::Full
+        || cfg.scanner.mode == crate::utils::config::AnalysisMode::Taint;
+
+    let global_summaries: Option<GlobalSummaries> = if needs_taint {
+        let _span = tracing::info_span!("pass1_summaries", files = all_paths.len()).entered();
+
+        let collected: Vec<FuncSummary> = all_paths
+            .par_iter()
+            .flat_map_iter(|path| match extract_summaries_from_file(path, cfg) {
+                Ok(sums) => sums,
+                Err(e) => {
+                    tracing::warn!("pass 1: failed to summarise {}: {e}", path.display());
+                    vec![]
+                }
+            })
+            .collect();
+
+        tracing::info!(summaries = collected.len(), "pass 1 complete");
+        let _merge_span = tracing::info_span!("merge_summaries").entered();
+        let root_str = root.to_string_lossy();
+        Some(summary::merge_summaries(collected, Some(&root_str)))
+    } else {
+        None
+    };
+
+    // ── Pass 2: full analysis with cross‑file context ────────────────────
+    let mut diags: Vec<Diag> = {
+        let _span = tracing::info_span!("pass2_analysis", files = all_paths.len()).entered();
+
+        all_paths
+            .par_iter()
+            .map(|path| run_rules_on_file(path, cfg, global_summaries.as_ref(), Some(root)))
+            .try_reduce(Vec::new, |mut a, mut b| {
+                a.append(&mut b);
+                Ok(a)
+            })?
+    };
+    tracing::info!(diags = diags.len(), "pass 2 complete");
 
-    let mut diags = acc.into_inner()?;
     if let Some(max) = cfg.output.max_results {
         diags.truncate(max as usize);
     }
@@ -111,6 +168,21 @@ fn scan_filesystem(root: &Path, cfg: &Config) -> NyxResult<Vec<Diag>> {
     Ok(diags)
 }
 
+// --------------------------------------------------------------------------------------------
+// Two‑pass scanning (with index)
+// --------------------------------------------------------------------------------------------
+
+/// Indexed two‑pass scan:
+///
+///  **Pass 1** – For every file that needs scanning, extract summaries and
+///               persist them to the database.  Unchanged files keep their
+///               existing summaries.
+///  **Pass 2** – Load *all* summaries from the DB, merge them, and re‑run
+///               taint analysis on every file with the full cross‑file view.
+///               Files whose *own* code has not changed AND whose
+///               dependencies have not changed can serve cached issues
+///               instead.  (Today we conservatively re‑analyse every file in
+///               pass 2; caching will be refined in approach 2 / 3.)
 pub fn scan_with_index_parallel(
     project: &str,
     pool: Arc<Pool<SqliteConnectionManager>>,
@@ -121,15 +193,79 @@ pub fn scan_with_index_parallel(
         idx.get_files(project)?
     };
 
+    let needs_taint = cfg.scanner.mode == crate::utils::config::AnalysisMode::Full
+        || cfg.scanner.mode == crate::utils::config::AnalysisMode::Taint;
+
+    // ── Pass 1: ensure summaries are up‑to‑date ──────────────────────────
+    if needs_taint {
+        let _span = tracing::info_span!("pass1_indexed", files = files.len()).entered();
+
+        files.par_iter().for_each_init(
+            || Indexer::from_pool(project, &pool).expect("db pool"),
+            |idx, path| {
+                let needs_scan = idx.should_scan(path).unwrap_or(true);
+                if !needs_scan {
+                    return; // summaries in DB are still valid
+                }
+
+                // Read once, hash once, extract summaries from bytes.
+                let bytes = match std::fs::read(path) {
+                    Ok(b) => b,
+                    Err(e) => {
+                        tracing::warn!("pass 1: cannot read {}: {e}", path.display());
+                        return;
+                    }
+                };
+                let hash = {
+                    let mut h = blake3::Hasher::new();
+                    h.update(&bytes);
+                    h.finalize().as_bytes().to_vec()
+                };
+
+                match extract_summaries_from_bytes(&bytes, path, cfg) {
+                    Ok(sums) => {
+                        idx.replace_summaries_for_file(path, &hash, &sums).ok();
+                    }
+                    Err(e) => {
+                        tracing::warn!("pass 1: {}: {e}", path.display());
+                    }
+                }
+            },
+        );
+    }
+
+    // ── Load global summaries ────────────────────────────────────────────
+    let global_summaries: Option<GlobalSummaries> = if needs_taint {
+        let _span = tracing::info_span!("load_summaries_db").entered();
+        let idx = Indexer::from_pool(project, &pool)?;
+        let all = idx.load_all_summaries()?;
+        tracing::info!(summaries = all.len(), "loaded cross-file summaries from DB");
+        Some(summary::merge_summaries(all, None))
+    } else {
+        None
+    };
+
+    // ── Pass 2: full analysis ────────────────────────────────────────────
+    let _span = tracing::info_span!("pass2_indexed").entered();
     let diag_map: DashMap<String, Vec<Diag>> = DashMap::new();
 
     files.into_par_iter().for_each_init(
         || Indexer::from_pool(project, &pool).expect("db pool"),
         |idx, path| {
-            let needs_scan = idx.should_scan(&path).unwrap_or(true);
+            // In pass 2 we always re-analyse when taint is enabled because
+            // global summaries may have changed even if this file didn't.
+            // For AST-only mode, we can still use the cached issues.
+            let needs_scan = if needs_taint {
+                true // conservative: always re-analyse in taint mode
+            } else {
+                idx.should_scan(&path).unwrap_or(true)
+            };
 
             let mut diags = if needs_scan {
-                let d = run_rules_on_file(&path, cfg).unwrap_or_default();
+                let d = run_rules_on_file(&path, cfg, global_summaries.as_ref(), None)
+                    .unwrap_or_default();
+
+                // Persist issues + update file record
                 let file_id = idx.upsert_file(&path).unwrap_or_default();
                 idx.replace_issues(
                     file_id,
@@ -148,10 +284,10 @@ pub fn scan_with_index_parallel(
 
             match cfg.scanner.mode {
                 crate::utils::config::AnalysisMode::Ast => {
-                    diags.retain(|d| !d.id.starts_with("taint"));
+                    diags.retain(|d| !d.id.starts_with("taint") && !d.id.starts_with("cfg-"));
                 }
                 crate::utils::config::AnalysisMode::Taint => {
-                    diags.retain(|d| d.id.starts_with("taint"));
+                    diags.retain(|d| d.id.starts_with("taint") || d.id.starts_with("cfg-"));
                 }
                 crate::utils::config::AnalysisMode::Full => {}
             }
@@ -165,9 +301,6 @@ pub fn scan_with_index_parallel(
         },
     );
 
-    // Optional, heavy: only vacuum on --rebuild-index
-    // if rebuild { idx.vacuum()?; }
-
     let mut diags: Vec<Diag> = diag_map.into_iter().flat_map(|(_, v)| v).collect();
 
     if let Some(max) = cfg.output.max_results {
diff --git a/src/database.rs b/src/database.rs
index c647669d..edd8e3a1 100644
--- a/src/database.rs
+++ b/src/database.rs
@@ -1,6 +1,6 @@
 pub mod index {
     use crate::commands::scan::Diag;
-    use crate::errors::NyxResult;
+    use crate::errors::{NyxError, NyxResult};
     use crate::patterns::Severity;
     use r2d2::{Pool, PooledConnection};
     use r2d2_sqlite::SqliteConnectionManager;
@@ -34,12 +34,18 @@ pub mod index {
             col INTEGER NOT NULL,
             PRIMARY KEY (file_id, rule_id, line, col));
 
-        CREATE TABLE IF NOT EXISTS function_summaries (hash TEXT PRIMARY KEY,
+        CREATE TABLE IF NOT EXISTS function_summaries (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
             project TEXT NOT NULL,
+            file_path TEXT NOT NULL,
+            file_hash BLOB NOT NULL,
             name TEXT NOT NULL,
+            arity INTEGER NOT NULL DEFAULT -1,
             lang TEXT NOT NULL,
             summary TEXT NOT NULL,
-            updated_at INTEGER NOT NULL);
+            updated_at INTEGER NOT NULL,
+            UNIQUE(project, file_path, name, arity)
+        );
     "#;
 
     // TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
@@ -61,6 +67,7 @@ pub mod index {
 
     impl Indexer {
         pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
+            let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
             let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
                 | OpenFlags::SQLITE_OPEN_CREATE
                 | OpenFlags::SQLITE_OPEN_FULL_MUTEX;
@@ -70,7 +77,43 @@ pub mod index {
             {
                 let conn = pool.get()?;
                 conn.pragma_update(None, "journal_mode", "WAL")?;
+                conn.pragma_update(None, "synchronous", "NORMAL")?;
+                conn.pragma_update(None, "cache_size", "-8000")?; // 8 MB
+                conn.pragma_update(None, "temp_store", "MEMORY")?;
+                conn.pragma_update(None, "mmap_size", "268435456")?; // 256 MB
                 conn.execute_batch(SCHEMA)?;
+
+                // Migrate: if the function_summaries table has the old schema
+                // (missing `arity` column), drop and recreate it.
+                let has_arity: bool = conn
+                    .prepare("PRAGMA table_info(function_summaries)")
+                    .and_then(|mut s| {
+                        let cols: Vec<String> = s
+                            .query_map([], |r| r.get::<_, String>(1))?
+                            .filter_map(Result::ok)
+                            .collect();
+                        Ok(cols.iter().any(|c| c == "arity"))
+                    })
+                    .unwrap_or(true);
+
+                if !has_arity {
+                    tracing::info!("migrating function_summaries: adding arity column");
+                    conn.execute_batch("DROP TABLE IF EXISTS function_summaries;")?;
+                    conn.execute_batch(
+                        "CREATE TABLE IF NOT EXISTS function_summaries (
+                            id INTEGER PRIMARY KEY AUTOINCREMENT,
+                            project TEXT NOT NULL,
+                            file_path TEXT NOT NULL,
+                            file_hash BLOB NOT NULL,
+                            name TEXT NOT NULL,
+                            arity INTEGER NOT NULL DEFAULT -1,
+                            lang TEXT NOT NULL,
+                            summary TEXT NOT NULL,
+                            updated_at INTEGER NOT NULL,
+                            UNIQUE(project, file_path, name, arity)
+                        );",
+                    )?;
+                }
             }
             Ok(pool)
         }
@@ -196,49 +239,73 @@ pub mod index {
             Ok(issue_iter.filter_map(Result::ok).collect())
         }
 
-        // pub fn upsert_summary(
-        //     &mut self,
-        //     project: &str,
-        //     path: &Path,
-        //     hash: &str,
-        //     s: &crate::summary::FuncSummary,
-        // ) -> NyxResult<()> {
-        //     let conn = self.c();
-        //     let now  = chrono::Utc::now().timestamp_millis(); // i64
-        //
-        //     conn.execute(
-        //         "INSERT INTO function_summaries (hash, project, name, lang, summary, updated_at)
-        //              VALUES (?1, ?2, ?3, ?4, ?5, ?6)
-        //              ON CONFLICT(hash) DO UPDATE SET summary = excluded.summary,
-        //                                              updated_at = excluded.updated_at",
-        //         (
-        //             hash,
-        //             project,
-        //             &s.name,
-        //             path.extension().and_then(|e| e.to_str()).unwrap_or_default(),
-        //             serde_json::to_string(s).unwrap(), //TODO REPLACE UNWRAP
-        //             now,
-        //         ),
-        //     )?;
-        //     Ok(())
-        // }
-        //
-        // pub fn load_all_summaries(&self, project: &str) -> NyxResult<Vec<crate::summary::FuncSummary<'static>>> {
-        //     let mut stmt = self
-        //         .c()
-        //         .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
-        //
-        //     let iter = stmt.query_map([project], |row| {
-        //         let json: String = row.get(0)?;
-        //         Ok(serde_json::from_str::<crate::summary::FuncSummary>(json.as_str()).unwrap()) // TODO: REPLACE UNWRAP
-        //     })?;
-        //
-        //     Ok(iter
-        //         .collect::<Result<Vec<_>, _>>()?
-        //         .into_iter()
-        //         .map(|s| unsafe { std::mem::transmute::<_, crate::summary::FuncSummary<'static>>(s) })
-        //         .collect())
-        // }
+        /// Atomically replace all function summaries for a single file.
+        ///
+        /// Deletes every existing summary row for `(project, file_path)` then
+        /// inserts the new set.  This keeps the table in sync when a file is
+        /// re‑parsed and its functions change.
+        pub fn replace_summaries_for_file(
+            &mut self,
+            file_path: &Path,
+            file_hash: &[u8],
+            summaries: &[crate::summary::FuncSummary],
+        ) -> NyxResult<()> {
+            let tx = self.conn.transaction()?;
+            let path_str = file_path.to_string_lossy();
+            let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
+
+            tx.execute(
+                "DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2",
+                params![self.project, path_str],
+            )?;
+
+            {
+                let mut stmt = tx.prepare(
+                    "INSERT OR REPLACE INTO function_summaries
+                        (project, file_path, file_hash, name, arity, lang, summary, updated_at)
+                     VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
+                )?;
+
+                for s in summaries {
+                    let json = serde_json::to_string(s)
+                        .map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
+                    stmt.execute(params![
+                        self.project,
+                        path_str,
+                        file_hash,
+                        s.name,
+                        s.param_count as i64,
+                        s.lang,
+                        json,
+                        now
+                    ])?;
+                }
+            }
+
+            tx.commit()?;
+            Ok(())
+        }
+
+        /// Load every function summary for this project.
+        pub fn load_all_summaries(&self) -> NyxResult<Vec<crate::summary::FuncSummary>> {
+            let mut stmt = self
+                .c()
+                .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
+
+            let iter = stmt.query_map([&self.project], |row| {
+                let json: String = row.get(0)?;
+                Ok(json)
+            })?;
+
+            let mut out = Vec::new();
+            for row in iter {
+                let json = row?;
+                let s: crate::summary::FuncSummary = serde_json::from_str(&json)
+                    .map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))?;
+                out.push(s);
+            }
+            Ok(out)
+        }
 
         /// gets files from the database
         pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
diff --git a/src/interop.rs b/src/interop.rs
new file mode 100644
index 00000000..0a21839b
--- /dev/null
+++ b/src/interop.rs
@@ -0,0 +1,33 @@
+use crate::symbol::{FuncKey, Lang};
+
+/// Identifies a specific call site within a caller function.
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+pub struct CallSiteKey {
+    pub caller_lang: Lang,
+    /// Project-relative file path of the caller.
+    pub caller_namespace: String,
+    /// Enclosing function name at the call site.
+    pub caller_func: String,
+    /// The identifier at the call site (callee name as written).
+    pub callee_symbol: String,
+    /// Per-function call ordinal (0-based).  `0` acts as a wildcard during
+    /// matching (matches any ordinal).
+    pub ordinal: u32,
+}
+
+/// An explicit cross-language bridge edge.
+///
+/// Connects a call site in one language to a function definition in another.
+/// Without an `InteropEdge`, cross-language resolution is never attempted —
+/// this prevents false positives from name collisions across languages.
+#[derive(Clone, Debug)]
+pub struct InteropEdge {
+    pub from: CallSiteKey,
+    pub to: FuncKey,
+    /// Maps caller argument positions to callee parameter positions.
+    #[allow(dead_code)] // used for future per-argument taint mapping
+    pub arg_map: Vec<(usize, usize)>,
+    /// Whether the callee's return value carries taint.
+    #[allow(dead_code)] // used for future interop return taint control
+    pub ret_taints: bool,
+}
diff --git a/src/labels/c.rs b/src/labels/c.rs
new file mode 100644
index 00000000..39f3d093
--- /dev/null
+++ b/src/labels/c.rs
@@ -0,0 +1,69 @@
+use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
+use phf::{Map, phf_map};
+
+pub static RULES: &[LabelRule] = &[
+    // ─────────── Sources ───────────
+    LabelRule {
+        matchers: &["getenv"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &["fgets", "scanf", "fscanf", "gets", "read"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    // ───────── Sanitizers ──────────
+    LabelRule {
+        matchers: &["sanitize_"],
+        label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+    },
+    // ─────────── Sinks ─────────────
+    LabelRule {
+        matchers: &[
+            "system", "popen", "exec", "execl", "execlp", "execle", "execve", "execvp",
+        ],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["printf", "fprintf", "sprintf", "strcpy", "strcat"],
+        label: DataLabel::Sink(Cap::HTML_ESCAPE),
+    },
+];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+    // control-flow
+    "if_statement"          => Kind::If,
+    "while_statement"       => Kind::While,
+    "for_statement"         => Kind::For,
+    "do_statement"          => Kind::While,
+
+    "return_statement"      => Kind::Return,
+    "break_statement"       => Kind::Break,
+    "continue_statement"    => Kind::Continue,
+
+    // structure
+    "translation_unit"      => Kind::SourceFile,
+    "compound_statement"    => Kind::Block,
+    "function_definition"   => Kind::Function,
+
+    // data-flow
+    "call_expression"       => Kind::CallFn,
+    "assignment_expression" => Kind::Assignment,
+    "declaration"           => Kind::CallWrapper,
+    "expression_statement"  => Kind::CallWrapper,
+
+    // trivia
+    "comment"               => Kind::Trivia,
+    ";"  => Kind::Trivia, ","  => Kind::Trivia,
+    "("  => Kind::Trivia, ")"  => Kind::Trivia,
+    "{"  => Kind::Trivia, "}"  => Kind::Trivia,
+    "\n" => Kind::Trivia,
+    "preproc_include"       => Kind::Trivia,
+    "preproc_def"           => Kind::Trivia,
+};
+
+pub static PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["parameter_declaration"],
+    self_param_kinds: &[],
+    ident_fields: &["declarator", "name"],
+};
diff --git a/src/labels/cpp.rs b/src/labels/cpp.rs
new file mode 100644
index 00000000..ad526bb4
--- /dev/null
+++ b/src/labels/cpp.rs
@@ -0,0 +1,77 @@
+use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
+use phf::{Map, phf_map};
+
+pub static RULES: &[LabelRule] = &[
+    // ─────────── Sources ───────────
+    LabelRule {
+        matchers: &["getenv"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &["std::cin", "std::getline", "fgets", "scanf", "gets"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    // ───────── Sanitizers ──────────
+    LabelRule {
+        matchers: &["sanitize_"],
+        label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+    },
+    // ─────────── Sinks ─────────────
+    LabelRule {
+        matchers: &["system", "popen", "execve", "execvp"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+    LabelRule {
+        matchers: &[
+            "printf",
+            "fprintf",
+            "sprintf",
+            "strcpy",
+            "strcat",
+            "std::cout",
+        ],
+        label: DataLabel::Sink(Cap::HTML_ESCAPE),
+    },
+];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+    // control-flow
+    "if_statement"          => Kind::If,
+    "while_statement"       => Kind::While,
+    "for_statement"         => Kind::For,
+    "for_range_loop"        => Kind::For,
+    "do_statement"          => Kind::While,
+
+    "return_statement"      => Kind::Return,
+    "break_statement"       => Kind::Break,
+    "continue_statement"    => Kind::Continue,
+
+    // structure
+    "translation_unit"      => Kind::SourceFile,
+    "compound_statement"    => Kind::Block,
+    "function_definition"   => Kind::Function,
+
+    // data-flow
+    "call_expression"       => Kind::CallFn,
+    "assignment_expression" => Kind::Assignment,
+    "declaration"           => Kind::CallWrapper,
+    "expression_statement"  => Kind::CallWrapper,
+
+    // trivia
+    "comment"               => Kind::Trivia,
+    ";"  => Kind::Trivia, ","  => Kind::Trivia,
+    "("  => Kind::Trivia, ")"  => Kind::Trivia,
+    "{"  => Kind::Trivia, "}"  => Kind::Trivia,
+    "\n" => Kind::Trivia,
+    "preproc_include"       => Kind::Trivia,
+    "preproc_def"           => Kind::Trivia,
+    "using_declaration"     => Kind::Trivia,
+    "namespace_definition"  => Kind::Trivia,
+};
+
+pub static PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["parameter_declaration"],
+    self_param_kinds: &[],
+    ident_fields: &["declarator", "name"],
+};
diff --git a/src/labels/go.rs b/src/labels/go.rs
new file mode 100644
index 00000000..d70cdf8e
--- /dev/null
+++ b/src/labels/go.rs
@@ -0,0 +1,72 @@
+use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
+use phf::{Map, phf_map};
+
+pub static RULES: &[LabelRule] = &[
+    // ─────────── Sources ───────────
+    LabelRule {
+        matchers: &["os.Getenv"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &["http.Request", "r.FormValue", "r.URL"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    // ───────── Sanitizers ──────────
+    LabelRule {
+        matchers: &["html.EscapeString", "template.HTMLEscapeString"],
+        label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["url.QueryEscape"],
+        label: DataLabel::Sanitizer(Cap::URL_ENCODE),
+    },
+    // ─────────── Sinks ─────────────
+    LabelRule {
+        matchers: &["exec.Command"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["db.Query", "db.Exec"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+    // control-flow
+    "if_statement"             => Kind::If,
+    "for_statement"            => Kind::For,
+
+    "return_statement"         => Kind::Return,
+    "break_statement"          => Kind::Break,
+    "continue_statement"       => Kind::Continue,
+
+    // structure
+    "source_file"              => Kind::SourceFile,
+    "block"                    => Kind::Block,
+    "statement_list"           => Kind::Block,
+    "function_declaration"     => Kind::Function,
+    "method_declaration"       => Kind::Function,
+
+    // data-flow
+    "call_expression"          => Kind::CallFn,
+    "assignment_statement"     => Kind::Assignment,
+    "short_var_declaration"    => Kind::CallWrapper,
+    "expression_statement"     => Kind::CallWrapper,
+    "var_declaration"          => Kind::CallWrapper,
+
+    // trivia
+    "comment"                  => Kind::Trivia,
+    ";"  => Kind::Trivia, ","  => Kind::Trivia,
+    "("  => Kind::Trivia, ")"  => Kind::Trivia,
+    "{"  => Kind::Trivia, "}"  => Kind::Trivia,
+    "\n" => Kind::Trivia,
+    "import_declaration"       => Kind::Trivia,
+    "package_clause"           => Kind::Trivia,
+};
+
+pub static PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["parameter_declaration"],
+    self_param_kinds: &[],
+    ident_fields: &["name"],
+};
diff --git a/src/labels/java.rs b/src/labels/java.rs
new file mode 100644
index 00000000..02a36ee1
--- /dev/null
+++ b/src/labels/java.rs
@@ -0,0 +1,73 @@
+use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
+use phf::{Map, phf_map};
+
+pub static RULES: &[LabelRule] = &[
+    // ─────────── Sources ───────────
+    LabelRule {
+        matchers: &["System.getenv"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &["getParameter", "getInputStream", "getHeader", "getCookies"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    // ───────── Sanitizers ──────────
+    LabelRule {
+        matchers: &["HtmlUtils.htmlEscape", "StringEscapeUtils.escapeHtml4"],
+        label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+    },
+    // ─────────── Sinks ─────────────
+    LabelRule {
+        matchers: &["Runtime.exec"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["executeQuery", "executeUpdate", "prepareStatement"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+    // control-flow
+    "if_statement"                 => Kind::If,
+    "while_statement"              => Kind::While,
+    "for_statement"                => Kind::For,
+    "enhanced_for_statement"       => Kind::For,
+
+    "return_statement"             => Kind::Return,
+    "break_statement"              => Kind::Break,
+    "continue_statement"           => Kind::Continue,
+
+    // structure
+    "program"                      => Kind::SourceFile,
+    "block"                        => Kind::Block,
+    "class_declaration"            => Kind::Block,
+    "class_body"                   => Kind::Block,
+    "interface_body"               => Kind::Block,
+    "method_declaration"           => Kind::Function,
+    "constructor_declaration"      => Kind::Function,
+
+    // data-flow
+    "method_invocation"            => Kind::CallMethod,
+    "object_creation_expression"   => Kind::CallFn,
+    "assignment_expression"        => Kind::Assignment,
+    "local_variable_declaration"   => Kind::CallWrapper,
+    "expression_statement"         => Kind::CallWrapper,
+
+    // trivia
+    "line_comment"                 => Kind::Trivia,
+    "block_comment"                => Kind::Trivia,
+    ";"  => Kind::Trivia, ","  => Kind::Trivia,
+    "("  => Kind::Trivia, ")"  => Kind::Trivia,
+    "{"  => Kind::Trivia, "}"  => Kind::Trivia,
+    "\n" => Kind::Trivia,
+    "import_declaration"           => Kind::Trivia,
+    "package_declaration"          => Kind::Trivia,
+};
+
+pub static PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["formal_parameter", "spread_parameter"],
+    self_param_kinds: &[],
+    ident_fields: &["name"],
+};
diff --git a/src/labels/javascript.rs b/src/labels/javascript.rs
index b8b99c21..a5d014ae 100644
--- a/src/labels/javascript.rs
+++ b/src/labels/javascript.rs
@@ -1,17 +1,91 @@
-use crate::labels::{Cap, DataLabel, LabelRule};
+use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
+use phf::{Map, phf_map};
 
-// TODO: refactor this
 pub static RULES: &[LabelRule] = &[
+    // ─────────── Sources ───────────
     LabelRule {
-        matchers: &["document.location", "window.location"],
+        matchers: &[
+            "document.location",
+            "window.location",
+            "req.body",
+            "req.query",
+            "req.params",
+            "req.headers",
+            "req.cookies",
+            "process.env",
+        ],
         label: DataLabel::Source(Cap::all()),
     },
+    // ───────── Sanitizers ──────────
     LabelRule {
         matchers: &["JSON.parse"],
         label: DataLabel::Sanitizer(Cap::JSON_PARSE),
     },
+    LabelRule {
+        matchers: &["encodeURIComponent", "encodeURI"],
+        label: DataLabel::Sanitizer(Cap::URL_ENCODE),
+    },
+    LabelRule {
+        matchers: &["DOMPurify.sanitize"],
+        label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+    },
+    // ─────────── Sinks ─────────────
     LabelRule {
         matchers: &["eval"],
         label: DataLabel::Sink(Cap::SHELL_ESCAPE),
     },
+    LabelRule {
+        matchers: &["innerHTML"],
+        label: DataLabel::Sink(Cap::HTML_ESCAPE),
+    },
+    LabelRule {
+        matchers: &[
+            "child_process.exec",
+            "child_process.execSync",
+            "child_process.spawn",
+        ],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
 ];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+    // control-flow
+    "if_statement"          => Kind::If,
+    "while_statement"       => Kind::While,
+    "for_statement"         => Kind::For,
+    "for_in_statement"      => Kind::For,
+
+    "return_statement"      => Kind::Return,
+    "break_statement"       => Kind::Break,
+    "continue_statement"    => Kind::Continue,
+
+    // structure
+    "program"               => Kind::SourceFile,
+    "statement_block"       => Kind::Block,
+    "function_declaration"  => Kind::Function,
+    "arrow_function"        => Kind::Function,
+    "method_definition"     => Kind::Function,
+
+    // data-flow
+    "call_expression"       => Kind::CallFn,
+    "new_expression"        => Kind::CallFn,
+    "assignment_expression" => Kind::Assignment,
+    "variable_declaration"  => Kind::CallWrapper,
+    "lexical_declaration"   => Kind::CallWrapper,
+    "expression_statement"  => Kind::CallWrapper,
+
+    // trivia
+    "comment"               => Kind::Trivia,
+    ";"  => Kind::Trivia, ","  => Kind::Trivia,
+    "("  => Kind::Trivia, ")"  => Kind::Trivia,
+    "{"  => Kind::Trivia, "}"  => Kind::Trivia,
+    "\n" => Kind::Trivia,
+    "import_statement"      => Kind::Trivia,
+};
+
+pub static PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["identifier"],
+    self_param_kinds: &[],
+    ident_fields: &["name", "pattern"],
+};
diff --git a/src/labels/mod.rs b/src/labels/mod.rs
index 48d81222..64e5d4d3 100644
--- a/src/labels/mod.rs
+++ b/src/labels/mod.rs
@@ -1,5 +1,13 @@
+mod c;
+mod cpp;
+mod go;
+mod java;
 mod javascript;
+mod php;
+mod python;
+mod ruby;
 mod rust;
+mod typescript;
 
 use bitflags::bitflags;
 use once_cell::sync::Lazy;
@@ -22,7 +30,8 @@ bitflags! {
         const SHELL_ESCAPE = 0b0000_0100;
         const URL_ENCODE   = 0b0000_1000;
         const JSON_PARSE   = 0b0001_0000;
-        // ADD MORE
+        const FILE_IO      = 0b0010_0000;
+        // todo: add more if needed
     }
 }
 
@@ -55,6 +64,26 @@ pub enum DataLabel {
     Sink(Cap),
 }
 
+/// Configuration for extracting parameter names from function AST nodes.
+pub struct ParamConfig {
+    /// Field name on the function node that holds the parameter list
+    /// (e.g. "parameters", "formal_parameters").
+    pub params_field: &'static str,
+    /// Tree-sitter node kinds that represent individual parameters.
+    pub param_node_kinds: &'static [&'static str],
+    /// Node kinds representing self/this parameters (e.g. "self_parameter" in Rust).
+    pub self_param_kinds: &'static [&'static str],
+    /// Field names tried in order to extract the identifier from a parameter node.
+    pub ident_fields: &'static [&'static str],
+}
+
+static DEFAULT_PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["parameter", "identifier"],
+    self_param_kinds: &[],
+    ident_fields: &["name", "pattern"],
+};
+
 static REGISTRY: Lazy<HashMap<&'static str, &'static [LabelRule]>> = Lazy::new(|| {
     let mut m = HashMap::new();
     m.insert("rust", rust::RULES);
@@ -63,8 +92,25 @@ static REGISTRY: Lazy<HashMap<&'static str, &'static [LabelRule]>> = Lazy::new(|
     m.insert("javascript", javascript::RULES);
     m.insert("js", javascript::RULES);
 
-    // add more languages in one line:
-    // m.insert("go", go::RULES);
+    m.insert("typescript", typescript::RULES);
+    m.insert("ts", typescript::RULES);
+
+    m.insert("python", python::RULES);
+    m.insert("py", python::RULES);
+
+    m.insert("go", go::RULES);
+
+    m.insert("java", java::RULES);
+
+    m.insert("c", c::RULES);
+
+    m.insert("cpp", cpp::RULES);
+    m.insert("c++", cpp::RULES);
+
+    m.insert("php", php::RULES);
+
+    m.insert("ruby", ruby::RULES);
+    m.insert("rb", ruby::RULES);
 
     m
 });
@@ -76,13 +122,71 @@ pub(crate) static CLASSIFIERS: Lazy<HashMap<&'static str, FastMap>> = Lazy::new(
     m.insert("rust", &rust::KINDS);
     m.insert("rs", &rust::KINDS);
 
-    // m.insert("javascript",  &javascript::KINDS);
-    // m.insert("js",          &javascript::KINDS);
+    m.insert("javascript", &javascript::KINDS);
+    m.insert("js", &javascript::KINDS);
+
+    m.insert("typescript", &typescript::KINDS);
+    m.insert("ts", &typescript::KINDS);
+
+    m.insert("python", &python::KINDS);
+    m.insert("py", &python::KINDS);
+
+    m.insert("go", &go::KINDS);
+
+    m.insert("java", &java::KINDS);
+
+    m.insert("c", &c::KINDS);
+
+    m.insert("cpp", &cpp::KINDS);
+    m.insert("c++", &cpp::KINDS);
+
+    m.insert("php", &php::KINDS);
+
+    m.insert("ruby", &ruby::KINDS);
+    m.insert("rb", &ruby::KINDS);
 
-    // todo: add more languages
     m
 });
 
+static PARAM_CONFIGS: Lazy<HashMap<&'static str, &'static ParamConfig>> = Lazy::new(|| {
+    let mut m = HashMap::new();
+    m.insert("rust", &rust::PARAM_CONFIG);
+    m.insert("rs", &rust::PARAM_CONFIG);
+
+    m.insert("javascript", &javascript::PARAM_CONFIG);
+    m.insert("js", &javascript::PARAM_CONFIG);
+
+    m.insert("typescript", &typescript::PARAM_CONFIG);
+    m.insert("ts", &typescript::PARAM_CONFIG);
+
+    m.insert("python", &python::PARAM_CONFIG);
+    m.insert("py", &python::PARAM_CONFIG);
+
+    m.insert("go", &go::PARAM_CONFIG);
+
+    m.insert("java", &java::PARAM_CONFIG);
+
+    m.insert("c", &c::PARAM_CONFIG);
+
+    m.insert("cpp", &cpp::PARAM_CONFIG);
+    m.insert("c++", &cpp::PARAM_CONFIG);
+
+    m.insert("php", &php::PARAM_CONFIG);
+
+    m.insert("ruby", &ruby::PARAM_CONFIG);
+    m.insert("rb", &ruby::PARAM_CONFIG);
+
+    m
+});
+
+/// Return the parameter extraction config for the given language, with a sensible default.
+pub fn param_config(lang: &str) -> &'static ParamConfig {
+    PARAM_CONFIGS
+        .get(lang)
+        .copied()
+        .unwrap_or(&DEFAULT_PARAM_CONFIG)
+}
+
 #[inline(always)]
 pub fn lookup(lang: &str, raw: &str) -> Kind {
     CLASSIFIERS
@@ -91,31 +195,77 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
         .unwrap_or(Kind::Other)
 }
 
+/// Case-insensitive suffix check (ASCII).
+#[inline]
+fn ends_with_ignore_case(haystack: &[u8], needle: &[u8]) -> bool {
+    if needle.len() > haystack.len() {
+        return false;
+    }
+    let start = haystack.len() - needle.len();
+    haystack[start..]
+        .iter()
+        .zip(needle)
+        .all(|(h, n)| h.eq_ignore_ascii_case(n))
+}
+
+/// Case-insensitive prefix check (ASCII).
+#[inline]
+fn starts_with_ignore_case(haystack: &[u8], needle: &[u8]) -> bool {
+    if needle.len() > haystack.len() {
+        return false;
+    }
+    haystack[..needle.len()]
+        .iter()
+        .zip(needle)
+        .all(|(h, n)| h.eq_ignore_ascii_case(n))
+}
+
 /// Try to classify a piece of syntax text.
-/// `lang` is the canonicalised language key (“rust”, “javascript”, …).
+/// `lang` is the canonicalised language key ("rust", "javascript", ...).
+///
+/// **Two-pass matching** -- exact / suffix matches are checked across *all*
+/// rules before any prefix (`foo_`) match is attempted.  This prevents a
+/// greedy prefix like `sanitize_` from shadowing a more specific exact
+/// match like `sanitize_shell`.
 pub fn classify(lang: &str, text: &str) -> Option<DataLabel> {
-    let key = lang.to_ascii_lowercase();
-    let rules = REGISTRY.get(key.as_str())?;
+    // Lang slugs are already lowercase; try direct lookup first to avoid
+    // allocating a lowercased copy.
+    let rules = REGISTRY.get(lang).or_else(|| {
+        let key = lang.to_ascii_lowercase();
+        REGISTRY.get(key.as_str())
+    })?;
+
     let head = text.split(['(', '<']).next().unwrap_or("");
+    let trimmed = head.trim().as_bytes();
 
-    let text_lc = head.trim().to_ascii_lowercase();
-
+    // Pass 1: exact / suffix matches (high confidence)
+    // Matchers are already lowercase &'static str, so we compare with
+    // case-insensitive byte helpers — zero heap allocations.
     for rule in *rules {
         for raw in rule.matchers {
-            let m = raw.to_ascii_lowercase();
-
-            if m.ends_with('_') {
-                if text_lc.starts_with(&m) {
-                    return Some(rule.label);
-                }
-            } else if text_lc.ends_with(&m) {
-                let start = text_lc.len() - m.len();
-                let ok = start == 0 || matches!(text_lc.as_bytes()[start - 1], b'.' | b':');
+            let m = raw.as_bytes();
+            if m.last() == Some(&b'_') {
+                continue; // skip prefix matchers in pass 1
+            }
+            if ends_with_ignore_case(trimmed, m) {
+                let start = trimmed.len() - m.len();
+                let ok = start == 0 || matches!(trimmed[start - 1], b'.' | b':');
                 if ok {
                     return Some(rule.label);
                 }
             }
         }
     }
+
+    // Pass 2: prefix matches (catch-all, lower priority)
+    for rule in *rules {
+        for raw in rule.matchers {
+            let m = raw.as_bytes();
+            if m.last() == Some(&b'_') && starts_with_ignore_case(trimmed, m) {
+                return Some(rule.label);
+            }
+        }
+    }
+
     None
 }
diff --git a/src/labels/php.rs b/src/labels/php.rs
new file mode 100644
index 00000000..5a4837f9
--- /dev/null
+++ b/src/labels/php.rs
@@ -0,0 +1,77 @@
+use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
+use phf::{Map, phf_map};
+
+pub static RULES: &[LabelRule] = &[
+    // ─────────── Sources ───────────
+    LabelRule {
+        matchers: &["$_GET", "$_POST", "$_REQUEST", "$_COOKIE"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &["file_get_contents", "fread"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    // ───────── Sanitizers ──────────
+    LabelRule {
+        matchers: &["htmlspecialchars", "htmlentities"],
+        label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["escapeshellarg", "escapeshellcmd"],
+        label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
+    },
+    // ─────────── Sinks ─────────────
+    LabelRule {
+        matchers: &["system", "exec", "passthru", "shell_exec"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["echo", "print"],
+        label: DataLabel::Sink(Cap::HTML_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["mysqli_query", "pg_query"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+    // control-flow
+    "if_statement"                  => Kind::If,
+    "while_statement"               => Kind::While,
+    "for_statement"                 => Kind::For,
+    "foreach_statement"             => Kind::For,
+
+    "return_statement"              => Kind::Return,
+    "break_statement"               => Kind::Break,
+    "continue_statement"            => Kind::Continue,
+
+    // structure
+    "program"                       => Kind::SourceFile,
+    "compound_statement"            => Kind::Block,
+    "function_definition"           => Kind::Function,
+    "method_declaration"            => Kind::Function,
+
+    // data-flow
+    "function_call_expression"      => Kind::CallFn,
+    "member_call_expression"        => Kind::CallMethod,
+    "assignment_expression"         => Kind::Assignment,
+    "expression_statement"          => Kind::CallWrapper,
+
+    // trivia
+    "comment"                       => Kind::Trivia,
+    ";"  => Kind::Trivia, ","  => Kind::Trivia,
+    "("  => Kind::Trivia, ")"  => Kind::Trivia,
+    "{"  => Kind::Trivia, "}"  => Kind::Trivia,
+    "\n" => Kind::Trivia,
+    "php_tag"                       => Kind::Trivia,
+    "namespace_definition"          => Kind::Trivia,
+    "namespace_use_declaration"     => Kind::Trivia,
+};
+
+pub static PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["simple_parameter", "variadic_parameter"],
+    self_param_kinds: &[],
+    ident_fields: &["name"],
+};
diff --git a/src/labels/python.rs b/src/labels/python.rs
new file mode 100644
index 00000000..f945d2b0
--- /dev/null
+++ b/src/labels/python.rs
@@ -0,0 +1,91 @@
+use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
+use phf::{Map, phf_map};
+
+pub static RULES: &[LabelRule] = &[
+    // ─────────── Sources ───────────
+    LabelRule {
+        matchers: &["os.getenv", "os.environ"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &[
+            "request.args",
+            "request.form",
+            "request.json",
+            "request.headers",
+            "request.cookies",
+            "input",
+        ],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &["sys.argv"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    // ───────── Sanitizers ──────────
+    LabelRule {
+        matchers: &["html.escape"],
+        label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["shlex.quote"],
+        label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
+    },
+    // ─────────── Sinks ─────────────
+    LabelRule {
+        matchers: &["eval", "exec"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+    LabelRule {
+        matchers: &[
+            "os.system",
+            "os.popen",
+            "subprocess.call",
+            "subprocess.run",
+            "subprocess.Popen",
+            "subprocess.check_output",
+            "subprocess.check_call",
+        ],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["cursor.execute", "cursor.executemany"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+    // control-flow
+    "if_statement"          => Kind::If,
+    "while_statement"       => Kind::While,
+    "for_statement"         => Kind::For,
+
+    "return_statement"      => Kind::Return,
+    "break_statement"       => Kind::Break,
+    "continue_statement"    => Kind::Continue,
+
+    // structure
+    "module"                => Kind::SourceFile,
+    "block"                 => Kind::Block,
+    "function_definition"   => Kind::Function,
+
+    // data-flow
+    "call"                  => Kind::CallFn,
+    "assignment"            => Kind::Assignment,
+    "expression_statement"  => Kind::CallWrapper,
+
+    // trivia
+    "comment"               => Kind::Trivia,
+    ":"  => Kind::Trivia, ","  => Kind::Trivia,
+    "("  => Kind::Trivia, ")"  => Kind::Trivia,
+    "\n" => Kind::Trivia,
+    "import_statement"      => Kind::Trivia,
+    "import_from_statement" => Kind::Trivia,
+};
+
+pub static PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["identifier"],
+    self_param_kinds: &[],
+    ident_fields: &["name"],
+};
diff --git a/src/labels/ruby.rs b/src/labels/ruby.rs
new file mode 100644
index 00000000..2a8a731e
--- /dev/null
+++ b/src/labels/ruby.rs
@@ -0,0 +1,74 @@
+use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
+use phf::{Map, phf_map};
+
+pub static RULES: &[LabelRule] = &[
+    // ─────────── Sources ───────────
+    LabelRule {
+        matchers: &["ENV", "gets"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &["params"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    // ───────── Sanitizers ──────────
+    LabelRule {
+        matchers: &["CGI.escapeHTML", "ERB::Util.html_escape"],
+        label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["Shellwords.escape", "Shellwords.shellescape"],
+        label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
+    },
+    // ─────────── Sinks ─────────────
+    LabelRule {
+        matchers: &["system", "exec"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["eval"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["puts", "print"],
+        label: DataLabel::Sink(Cap::HTML_ESCAPE),
+    },
+];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+    // control-flow
+    "if"                    => Kind::If,
+    "unless"                => Kind::If,
+    "while"                 => Kind::While,
+    "for"                   => Kind::For,
+
+    "return"                => Kind::Return,
+    "break"                 => Kind::Break,
+    "next"                  => Kind::Continue,
+
+    // structure
+    "program"               => Kind::SourceFile,
+    "body_statement"        => Kind::Block,
+    "do_block"              => Kind::Block,
+    "then"                  => Kind::Block,
+    "else"                  => Kind::Block,
+
+    // data-flow
+    "call"                  => Kind::CallFn,
+    "method_call"           => Kind::CallFn,
+    "assignment"            => Kind::Assignment,
+    "method"                => Kind::Function,
+
+    // trivia
+    "comment"               => Kind::Trivia,
+    ";"  => Kind::Trivia, ","  => Kind::Trivia,
+    "("  => Kind::Trivia, ")"  => Kind::Trivia,
+    "\n" => Kind::Trivia,
+};
+
+pub static PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["identifier"],
+    self_param_kinds: &[],
+    ident_fields: &["name"],
+};
diff --git a/src/labels/rust.rs b/src/labels/rust.rs
index 9a84dbad..889a8b5a 100644
--- a/src/labels/rust.rs
+++ b/src/labels/rust.rs
@@ -1,24 +1,26 @@
-use crate::labels::{Cap, DataLabel, Kind, LabelRule};
+use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
 use phf::{Map, phf_map};
 
 pub static RULES: &[LabelRule] = &[
     // ─────────── Sources ───────────
     LabelRule {
-        matchers: &["std::env::var", "env::var"],
+        matchers: &["std::env::var", "env::var", "source_env"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &["fs::read_to_string", "source_file"],
         label: DataLabel::Source(Cap::all()),
     },
     // ───────── Sanitizers ──────────
-    // `fn sanitize_*(&str) -> String`
     LabelRule {
         matchers: &["html_escape::encode_safe", "sanitize_", "sanitize_html"],
         label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
     },
     LabelRule {
-        matchers: &["shell_escape::unix::escape"],
+        matchers: &["shell_escape::unix::escape", "sanitize_shell"],
         label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
     },
     // ─────────── Sinks ─────────────
-    //  All the key points where untrusted strings reach the OS shell.
     LabelRule {
         matchers: &[
             "command::new",
@@ -30,6 +32,10 @@ pub static RULES: &[LabelRule] = &[
         ],
         label: DataLabel::Sink(Cap::SHELL_ESCAPE),
     },
+    LabelRule {
+        matchers: &["sink_html"],
+        label: DataLabel::Sink(Cap::HTML_ESCAPE),
+    },
 ];
 
 pub static KINDS: Map<&'static str, Kind> = phf_map! {
@@ -70,3 +76,10 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
     "mod_item"         => Kind::Trivia,
     "type_item"        => Kind::Trivia,
 };
+
+pub static PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["parameter"],
+    self_param_kinds: &["self_parameter"],
+    ident_fields: &["pattern"],
+};
diff --git a/src/labels/typescript.rs b/src/labels/typescript.rs
new file mode 100644
index 00000000..fcae2dec
--- /dev/null
+++ b/src/labels/typescript.rs
@@ -0,0 +1,90 @@
+use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
+use phf::{Map, phf_map};
+
+pub static RULES: &[LabelRule] = &[
+    // ─────────── Sources ───────────
+    LabelRule {
+        matchers: &[
+            "document.location",
+            "window.location",
+            "req.body",
+            "req.query",
+            "req.params",
+            "req.headers",
+            "req.cookies",
+            "process.env",
+        ],
+        label: DataLabel::Source(Cap::all()),
+    },
+    // ───────── Sanitizers ──────────
+    LabelRule {
+        matchers: &["encodeURIComponent", "encodeURI"],
+        label: DataLabel::Sanitizer(Cap::URL_ENCODE),
+    },
+    LabelRule {
+        matchers: &["DOMPurify.sanitize"],
+        label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+    },
+    // ─────────── Sinks ─────────────
+    LabelRule {
+        matchers: &["eval"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["innerHTML"],
+        label: DataLabel::Sink(Cap::HTML_ESCAPE),
+    },
+    LabelRule {
+        matchers: &[
+            "child_process.exec",
+            "child_process.execSync",
+            "child_process.spawn",
+        ],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+    // control-flow
+    "if_statement"          => Kind::If,
+    "while_statement"       => Kind::While,
+    "for_statement"         => Kind::For,
+    "for_in_statement"      => Kind::For,
+    "for_of_statement"      => Kind::For,
+
+    "return_statement"      => Kind::Return,
+    "break_statement"       => Kind::Break,
+    "continue_statement"    => Kind::Continue,
+
+    // structure
+    "program"               => Kind::SourceFile,
+    "statement_block"       => Kind::Block,
+    "function_declaration"  => Kind::Function,
+    "arrow_function"        => Kind::Function,
+    "method_definition"     => Kind::Function,
+
+    // data-flow
+    "call_expression"       => Kind::CallFn,
+    "new_expression"        => Kind::CallFn,
+    "assignment_expression" => Kind::Assignment,
+    "variable_declaration"  => Kind::CallWrapper,
+    "lexical_declaration"   => Kind::CallWrapper,
+    "expression_statement"  => Kind::CallWrapper,
+
+    // trivia
+    "comment"               => Kind::Trivia,
+    ";"  => Kind::Trivia, ","  => Kind::Trivia,
+    "("  => Kind::Trivia, ")"  => Kind::Trivia,
+    "{"  => Kind::Trivia, "}"  => Kind::Trivia,
+    "\n" => Kind::Trivia,
+    "import_statement"      => Kind::Trivia,
+    "type_alias_declaration" => Kind::Trivia,
+    "interface_declaration" => Kind::Trivia,
+};
+
+pub static PARAM_CONFIG: ParamConfig = ParamConfig {
+    params_field: "parameters",
+    param_node_kinds: &["required_parameter", "optional_parameter", "identifier"],
+    self_param_kinds: &[],
+    ident_fields: &["name", "pattern"],
+};
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 00000000..f0019625
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,29 @@
+// Re-exports for benchmarks and integration tests.
+// The binary crate (main.rs) is the primary entry point; this lib target
+// exposes internals for criterion and other tooling.
+
+pub mod ast;
+pub mod cfg;
+pub mod cfg_analysis;
+pub(crate) mod cli;
+pub mod commands;
+pub mod database;
+pub mod errors;
+pub mod interop;
+pub mod labels;
+pub mod patterns;
+pub mod summary;
+pub mod symbol;
+pub mod taint;
+pub mod utils;
+pub mod walk;
+
+use errors::NyxResult;
+use std::path::Path;
+use utils::config::Config;
+
+/// Run a two-pass scan without index (filesystem only).
+/// This is the primary entry point for integration tests.
+pub fn scan_no_index(root: &Path, cfg: &Config) -> NyxResult<Vec<commands::scan::Diag>> {
+    commands::scan::scan_filesystem(root, cfg)
+}
diff --git a/src/main.rs b/src/main.rs
index d6afbd62..e6974a66 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,11 +1,16 @@
 mod ast;
 mod cfg;
+mod cfg_analysis;
 mod cli;
 mod commands;
 mod database;
 mod errors;
+mod interop;
 mod labels;
 mod patterns;
+mod summary;
+mod symbol;
+mod taint;
 mod utils;
 mod walk;
 
@@ -53,6 +58,7 @@ fn main() -> NyxResult<()> {
     let proj_dirs = ProjectDirs::from("dev", "ecpeter23", "nyx")
         .ok_or("Unable to determine project directories")?;
 
+    // todo: check if we want to actually build a config file, maybe some environments will not want to have anything written
     let config_dir = proj_dirs.config_dir();
     fs::create_dir_all(config_dir)?;
 
diff --git a/src/patterns/javascript.rs b/src/patterns/javascript.rs
index eb5fe47d..b4e89e5a 100644
--- a/src/patterns/javascript.rs
+++ b/src/patterns/javascript.rs
@@ -19,12 +19,6 @@ pub const PATTERNS: &[Pattern] = &[
         query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
         severity: Severity::Medium,
     },
-    Pattern {
-        id: "inner_html_assignment",
-        description: "Assignment to element.innerHTML",
-        query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
-        severity: Severity::Medium,
-    },
     Pattern {
         id: "settimeout_string",
         description: "setTimeout / setInterval with a string argument",
diff --git a/src/patterns/typescript.rs b/src/patterns/typescript.rs
index 0aac1b1d..3f16d356 100644
--- a/src/patterns/typescript.rs
+++ b/src/patterns/typescript.rs
@@ -19,12 +19,6 @@ pub const PATTERNS: &[Pattern] = &[
         query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
         severity: Severity::Medium,
     },
-    Pattern {
-        id: "inner_html_assignment",
-        description: "Assignment to element.innerHTML",
-        query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
-        severity: Severity::Medium,
-    },
     Pattern {
         id: "settimeout_string",
         description: "setTimeout / setInterval with a string argument",
diff --git a/src/summary/mod.rs b/src/summary/mod.rs
new file mode 100644
index 00000000..80174ccb
--- /dev/null
+++ b/src/summary/mod.rs
@@ -0,0 +1,252 @@
+use crate::labels::{Cap, DataLabel};
+use crate::symbol::{FuncKey, Lang, normalize_namespace};
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+/// Serialisable summary of a single function's taint behaviour.
+///
+/// One of these is produced per function during **pass 1** of a scan and
+/// persisted to the `function_summaries` SQLite table.  During **pass 2** the
+/// full set of summaries across every file is loaded into memory so the taint
+/// engine can resolve cross‑file calls.
+///
+/// Design notes
+/// ────────────
+/// * **All three cap fields are independent.**  A function can simultaneously
+///   act as a source (introduces fresh taint), a sanitizer (cleans certain
+///   bits), and a sink (passes tainted data to a dangerous operation).
+///   The old code picked a single `DataLabel` which lost information.
+///
+/// * **`propagates_taint`** captures pass‑through behaviour: if an input
+///   parameter is tainted, does the return value carry that taint?  This is
+///   essential for chains like `let y = transform(tainted_x); sink(y);`.
+///
+/// * **`callees`** are recorded for future call‑graph construction
+///   (topological analysis, approach 2) but are not used in pass‑1/pass‑2
+///   taint resolution yet.
+///
+/// * **`tainted_sink_params`** marks which parameter *positions* flow to
+///   internal sinks.  Today the taint engine treats the whole call as a
+///   single "tainted or not" question; this field future‑proofs the summary
+///   for per‑argument precision.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FuncSummary {
+    /// Function name as it appears in the source (`my_func`, not the full path).
+    pub name: String,
+
+    /// Absolute path of the file that defines this function.
+    pub file_path: String,
+
+    /// Language slug (`"rust"`, `"javascript"`, …).
+    pub lang: String,
+
+    // ── Signature information ────────────────────────────────────────────
+    /// Total number of parameters (including `self`/`&self` for methods).
+    pub param_count: usize,
+
+    /// Parameter names in declaration order.
+    pub param_names: Vec<String>,
+
+    // ── Taint behaviour ──────────────────────────────────────────────────
+    // Stored as raw `u8` so serde doesn't need to know about `bitflags`.
+    /// Caps this function **introduces** — i.e. the return value carries
+    /// freshly‑tainted data even if no argument was tainted.
+    pub source_caps: u8,
+
+    /// Caps this function **cleans** — passing tainted data through this
+    /// function strips the corresponding bits.
+    pub sanitizer_caps: u8,
+
+    /// Caps this function **consumes unsafely** — calling it with tainted
+    /// arguments that still carry these bits is a finding.
+    pub sink_caps: u8,
+
+    /// `true` when taint on *any* input parameter can flow through to the
+    /// return value.  Conservative: set to `true` if *any* code path
+    /// propagates an argument to the return expression.
+    pub propagates_taint: bool,
+
+    /// Indices of parameters that flow to internal sinks (0‑based).
+    pub tainted_sink_params: Vec<usize>,
+
+    /// Names of functions/methods/macros called inside this function body.
+    /// Stored for future call‑graph / topological‑sort analysis.
+    pub callees: Vec<String>,
+}
+
+// ── Cap conversion helpers ──────────────────────────────────────────────
+
+impl FuncSummary {
+    #[inline]
+    pub fn source_caps(&self) -> Cap {
+        Cap::from_bits_truncate(self.source_caps)
+    }
+
+    #[inline]
+    pub fn sanitizer_caps(&self) -> Cap {
+        Cap::from_bits_truncate(self.sanitizer_caps)
+    }
+
+    #[inline]
+    pub fn sink_caps(&self) -> Cap {
+        Cap::from_bits_truncate(self.sink_caps)
+    }
+
+    /// Collapse the three independent cap fields back into the single
+    /// `DataLabel` that the current taint engine expects.
+    ///
+    /// Priority: **Sink > Source > Sanitizer**.  Sinks first because
+    /// missing a dangerous call‑site is worse than a false‑positive on a
+    /// source.  Sources beat sanitizers because an un‑tracked source is
+    /// a missed vulnerability, while an un‑tracked sanitizer only causes
+    /// false positives.
+    #[allow(dead_code)]
+    pub fn primary_label(&self) -> Option<DataLabel> {
+        let sink = self.sink_caps();
+        let src = self.source_caps();
+        let san = self.sanitizer_caps();
+
+        if !sink.is_empty() {
+            Some(DataLabel::Sink(sink))
+        } else if !src.is_empty() {
+            Some(DataLabel::Source(src))
+        } else if !san.is_empty() {
+            Some(DataLabel::Sanitizer(san))
+        } else {
+            None
+        }
+    }
+
+    /// Returns `true` when this function has **any** observable taint
+    /// effect — it is a source, sanitizer, sink, or propagates taint.
+    #[allow(dead_code)]
+    pub fn is_interesting(&self) -> bool {
+        self.source_caps != 0
+            || self.sanitizer_caps != 0
+            || self.sink_caps != 0
+            || self.propagates_taint
+    }
+
+    /// Build a [`FuncKey`] from this summary, normalizing the namespace
+    /// relative to `scan_root`.
+    pub fn func_key(&self, scan_root: Option<&str>) -> FuncKey {
+        FuncKey {
+            lang: Lang::from_slug(&self.lang).unwrap_or(Lang::Rust),
+            namespace: normalize_namespace(&self.file_path, scan_root),
+            name: self.name.clone(),
+            arity: Some(self.param_count),
+        }
+    }
+}
+
+// ── Lookup map used by the taint engine ─────────────────────────────────
+
+/// A merged view of all function summaries keyed by qualified [`FuncKey`].
+///
+/// Functions are partitioned by language + namespace + name + arity.  Two
+/// functions with the same bare name but different languages or namespaces
+/// are stored separately — no implicit cross-language merging occurs.
+///
+/// A secondary index `(Lang, name)` supports fast lookup by language + name
+/// for same-language resolution in the taint engine.
+#[derive(Default)]
+pub struct GlobalSummaries {
+    by_key: HashMap<FuncKey, FuncSummary>,
+    by_lang_name: HashMap<(Lang, String), Vec<FuncKey>>,
+}
+
+impl GlobalSummaries {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Insert or merge a summary.  If an exact `FuncKey` match exists,
+    /// merge conservatively (OR caps/booleans, union params/callees).
+    pub fn insert(&mut self, key: FuncKey, summary: FuncSummary) {
+        let lang = key.lang;
+        let name = key.name.clone();
+
+        self.by_key
+            .entry(key.clone())
+            .and_modify(|existing| {
+                existing.source_caps |= summary.source_caps;
+                existing.sanitizer_caps |= summary.sanitizer_caps;
+                existing.sink_caps |= summary.sink_caps;
+                existing.propagates_taint |= summary.propagates_taint;
+                for &idx in &summary.tainted_sink_params {
+                    if !existing.tainted_sink_params.contains(&idx) {
+                        existing.tainted_sink_params.push(idx);
+                    }
+                }
+                for c in &summary.callees {
+                    if !existing.callees.contains(c) {
+                        existing.callees.push(c.clone());
+                    }
+                }
+            })
+            .or_insert(summary);
+
+        let keys = self.by_lang_name.entry((lang, name)).or_default();
+        if !keys.contains(&key) {
+            keys.push(key);
+        }
+    }
+
+    /// Exact lookup by fully-qualified key.
+    pub fn get(&self, key: &FuncKey) -> Option<&FuncSummary> {
+        self.by_key.get(key)
+    }
+
+    /// All same-language matches for a bare function name.
+    pub fn lookup_same_lang(&self, lang: Lang, name: &str) -> Vec<(&FuncKey, &FuncSummary)> {
+        self.by_lang_name
+            .get(&(lang, name.to_string()))
+            .map(|keys| {
+                keys.iter()
+                    .filter_map(|k| self.by_key.get(k).map(|v| (k, v)))
+                    .collect()
+            })
+            .unwrap_or_default()
+    }
+
+    #[allow(dead_code)]
+    pub fn is_empty(&self) -> bool {
+        self.by_key.is_empty()
+    }
+
+    /// Iterate over all (key, summary) pairs.
+    #[allow(dead_code)]
+    pub fn iter(&self) -> impl Iterator<Item = (&FuncKey, &FuncSummary)> {
+        self.by_key.iter()
+    }
+}
+
+impl std::fmt::Debug for GlobalSummaries {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("GlobalSummaries")
+            .field("len", &self.by_key.len())
+            .finish()
+    }
+}
+
+/// Merge a set of per‑file summaries into a single `GlobalSummaries` map.
+///
+/// Merging only happens for exact `FuncKey` matches (same lang + namespace +
+/// name + arity).  Functions with the same bare name but different languages
+/// or namespaces are stored separately.
+pub fn merge_summaries(
+    per_file: impl IntoIterator<Item = FuncSummary>,
+    scan_root: Option<&str>,
+) -> GlobalSummaries {
+    let mut map = GlobalSummaries::new();
+
+    for fs in per_file {
+        let key = fs.func_key(scan_root);
+        map.insert(key, fs);
+    }
+
+    map
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/src/summary/tests.rs b/src/summary/tests.rs
new file mode 100644
index 00000000..961ee6f4
--- /dev/null
+++ b/src/summary/tests.rs
@@ -0,0 +1,258 @@
+use super::*;
+
+fn make(name: &str, src: u8, san: u8, sink: u8) -> FuncSummary {
+    FuncSummary {
+        name: name.into(),
+        file_path: "test.rs".into(),
+        lang: "rust".into(),
+        param_count: 0,
+        param_names: vec![],
+        source_caps: src,
+        sanitizer_caps: san,
+        sink_caps: sink,
+        propagates_taint: false,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    }
+}
+
+#[test]
+fn primary_label_priority() {
+    // sink beats everything
+    let s = make("f", 0xFF, 0xFF, 0x01);
+    assert!(matches!(s.primary_label(), Some(DataLabel::Sink(_))));
+
+    // source beats sanitizer
+    let s = make("f", 0x01, 0x02, 0x00);
+    assert!(matches!(s.primary_label(), Some(DataLabel::Source(_))));
+
+    // sanitizer alone
+    let s = make("f", 0x00, 0x04, 0x00);
+    assert!(matches!(s.primary_label(), Some(DataLabel::Sanitizer(_))));
+
+    // nothing
+    let s = make("f", 0, 0, 0);
+    assert!(s.primary_label().is_none());
+}
+
+#[test]
+fn merge_unions_conservatively() {
+    let a = make("foo", 0x01, 0x00, 0x00);
+    let b = FuncSummary {
+        sink_caps: 0x04,
+        propagates_taint: true,
+        tainted_sink_params: vec![0],
+        callees: vec!["bar".into()],
+        ..make("foo", 0x00, 0x02, 0x00)
+    };
+
+    let merged = merge_summaries(vec![a, b], None);
+    let key = FuncKey {
+        lang: Lang::Rust,
+        namespace: "test.rs".into(),
+        name: "foo".into(),
+        arity: Some(0),
+    };
+    let foo = merged.get(&key).unwrap();
+
+    assert_eq!(foo.source_caps, 0x01);
+    assert_eq!(foo.sanitizer_caps, 0x02);
+    assert_eq!(foo.sink_caps, 0x04);
+    assert!(foo.propagates_taint);
+    assert_eq!(foo.tainted_sink_params, vec![0]);
+    assert_eq!(foo.callees, vec!["bar".to_string()]);
+}
+
+#[test]
+fn is_interesting_detects_all_cases() {
+    assert!(!make("f", 0, 0, 0).is_interesting());
+    assert!(make("f", 1, 0, 0).is_interesting());
+    assert!(make("f", 0, 1, 0).is_interesting());
+    assert!(make("f", 0, 0, 1).is_interesting());
+
+    let mut p = make("f", 0, 0, 0);
+    p.propagates_taint = true;
+    assert!(p.is_interesting());
+}
+
+#[test]
+fn same_lang_different_namespace_no_merge() {
+    let a = FuncSummary {
+        name: "helper".into(),
+        file_path: "file_a.rs".into(),
+        lang: "rust".into(),
+        param_count: 0,
+        param_names: vec![],
+        source_caps: Cap::all().bits(),
+        sanitizer_caps: 0,
+        sink_caps: 0,
+        propagates_taint: false,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    };
+    let b = FuncSummary {
+        name: "helper".into(),
+        file_path: "file_b.rs".into(),
+        lang: "rust".into(),
+        param_count: 0,
+        param_names: vec![],
+        source_caps: 0,
+        sanitizer_caps: 0,
+        sink_caps: Cap::SHELL_ESCAPE.bits(),
+        propagates_taint: false,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    };
+
+    let global = merge_summaries(vec![a, b], None);
+
+    // They should be stored under different FuncKeys
+    let key_a = FuncKey {
+        lang: Lang::Rust,
+        namespace: "file_a.rs".into(),
+        name: "helper".into(),
+        arity: Some(0),
+    };
+    let key_b = FuncKey {
+        lang: Lang::Rust,
+        namespace: "file_b.rs".into(),
+        name: "helper".into(),
+        arity: Some(0),
+    };
+    assert!(global.get(&key_a).is_some());
+    assert!(global.get(&key_b).is_some());
+    // source_caps NOT merged
+    assert_eq!(global.get(&key_a).unwrap().source_caps, Cap::all().bits());
+    assert_eq!(global.get(&key_b).unwrap().source_caps, 0);
+}
+
+#[test]
+fn same_lang_same_namespace_merges() {
+    let a = FuncSummary {
+        name: "helper".into(),
+        file_path: "lib.rs".into(),
+        lang: "rust".into(),
+        param_count: 0,
+        param_names: vec![],
+        source_caps: 0x01,
+        sanitizer_caps: 0,
+        sink_caps: 0,
+        propagates_taint: false,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    };
+    let b = FuncSummary {
+        name: "helper".into(),
+        file_path: "lib.rs".into(),
+        lang: "rust".into(),
+        param_count: 0,
+        param_names: vec![],
+        source_caps: 0,
+        sanitizer_caps: 0x02,
+        sink_caps: 0,
+        propagates_taint: true,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    };
+
+    let global = merge_summaries(vec![a, b], None);
+    let key = FuncKey {
+        lang: Lang::Rust,
+        namespace: "lib.rs".into(),
+        name: "helper".into(),
+        arity: Some(0),
+    };
+    let merged = global.get(&key).unwrap();
+    assert_eq!(merged.source_caps, 0x01);
+    assert_eq!(merged.sanitizer_caps, 0x02);
+    assert!(merged.propagates_taint);
+}
+
+#[test]
+fn cross_lang_name_collision_stays_separate() {
+    let py = FuncSummary {
+        name: "process_data".into(),
+        file_path: "handler.py".into(),
+        lang: "python".into(),
+        param_count: 0,
+        param_names: vec![],
+        source_caps: Cap::all().bits(),
+        sanitizer_caps: 0,
+        sink_caps: 0,
+        propagates_taint: false,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    };
+    let c = FuncSummary {
+        name: "process_data".into(),
+        file_path: "handler.c".into(),
+        lang: "c".into(),
+        param_count: 1,
+        param_names: vec!["s".into()],
+        source_caps: 0,
+        sanitizer_caps: 0,
+        sink_caps: 0,
+        propagates_taint: true,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    };
+
+    let global = merge_summaries(vec![py, c], None);
+
+    let py_key = FuncKey {
+        lang: Lang::Python,
+        namespace: "handler.py".into(),
+        name: "process_data".into(),
+        arity: Some(0),
+    };
+    let c_key = FuncKey {
+        lang: Lang::C,
+        namespace: "handler.c".into(),
+        name: "process_data".into(),
+        arity: Some(1),
+    };
+
+    assert!(global.get(&py_key).is_some());
+    assert!(global.get(&c_key).is_some());
+    // Python's source_caps NOT merged into C
+    assert_eq!(global.get(&c_key).unwrap().source_caps, 0);
+    assert_eq!(global.get(&py_key).unwrap().source_caps, Cap::all().bits());
+}
+
+#[test]
+fn lookup_same_lang_returns_all_matches() {
+    let a = FuncSummary {
+        name: "helper".into(),
+        file_path: "a.rs".into(),
+        lang: "rust".into(),
+        param_count: 0,
+        param_names: vec![],
+        source_caps: 1,
+        sanitizer_caps: 0,
+        sink_caps: 0,
+        propagates_taint: false,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    };
+    let b = FuncSummary {
+        name: "helper".into(),
+        file_path: "b.rs".into(),
+        lang: "rust".into(),
+        param_count: 0,
+        param_names: vec![],
+        source_caps: 2,
+        sanitizer_caps: 0,
+        sink_caps: 0,
+        propagates_taint: false,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    };
+
+    let global = merge_summaries(vec![a, b], None);
+    let matches = global.lookup_same_lang(Lang::Rust, "helper");
+    assert_eq!(matches.len(), 2);
+
+    // No cross-language matches
+    let py_matches = global.lookup_same_lang(Lang::Python, "helper");
+    assert!(py_matches.is_empty());
+}
diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs
new file mode 100644
index 00000000..9db15fa3
--- /dev/null
+++ b/src/symbol/mod.rs
@@ -0,0 +1,94 @@
+use serde::{Deserialize, Serialize};
+use std::fmt;
+
+/// Supported source-code languages.
+#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]
+pub enum Lang {
+    Rust,
+    C,
+    Cpp,
+    Java,
+    Go,
+    Php,
+    Python,
+    Ruby,
+    TypeScript,
+    JavaScript,
+}
+
+impl Lang {
+    /// Parse a language slug (as returned by `lang_for_path`) into a `Lang`.
+    pub fn from_slug(s: &str) -> Option<Lang> {
+        match s {
+            "rust" => Some(Lang::Rust),
+            "c" => Some(Lang::C),
+            "cpp" => Some(Lang::Cpp),
+            "java" => Some(Lang::Java),
+            "go" => Some(Lang::Go),
+            "php" => Some(Lang::Php),
+            "python" => Some(Lang::Python),
+            "ruby" => Some(Lang::Ruby),
+            "typescript" | "ts" => Some(Lang::TypeScript),
+            "javascript" | "js" => Some(Lang::JavaScript),
+            _ => None,
+        }
+    }
+
+    /// Canonical slug string for this language.
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Lang::Rust => "rust",
+            Lang::C => "c",
+            Lang::Cpp => "cpp",
+            Lang::Java => "java",
+            Lang::Go => "go",
+            Lang::Php => "php",
+            Lang::Python => "python",
+            Lang::Ruby => "ruby",
+            Lang::TypeScript => "typescript",
+            Lang::JavaScript => "javascript",
+        }
+    }
+}
+
+impl fmt::Display for Lang {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+/// Uniquely identifies a function across the entire project.
+#[derive(Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]
+pub struct FuncKey {
+    pub lang: Lang,
+    /// Project-relative file path (e.g. `"src/lib.rs"`).
+    pub namespace: String,
+    pub name: String,
+    pub arity: Option<usize>,
+}
+
+impl fmt::Display for FuncKey {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}::{}::{}", self.lang, self.namespace, self.name)?;
+        if let Some(a) = self.arity {
+            write!(f, "/{a}")?;
+        }
+        Ok(())
+    }
+}
+
+/// Strip `root` prefix from `abs_path` to produce a stable project-relative path.
+///
+/// Falls back to the full path if stripping fails (e.g. in tests with synthetic paths).
+pub fn normalize_namespace(abs_path: &str, root: Option<&str>) -> String {
+    if let Some(r) = root {
+        let r = r.trim_end_matches('/');
+        if let Some(rest) = abs_path.strip_prefix(r) {
+            return rest.trim_start_matches('/').to_string();
+        }
+    }
+    abs_path.to_string()
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/src/symbol/tests.rs b/src/symbol/tests.rs
new file mode 100644
index 00000000..6cd8d470
--- /dev/null
+++ b/src/symbol/tests.rs
@@ -0,0 +1,62 @@
+use super::*;
+
+#[test]
+fn lang_round_trip() {
+    for slug in &[
+        "rust",
+        "c",
+        "cpp",
+        "java",
+        "go",
+        "php",
+        "python",
+        "ruby",
+        "typescript",
+        "javascript",
+    ] {
+        let lang = Lang::from_slug(slug).unwrap();
+        assert_eq!(lang.as_str(), *slug);
+    }
+}
+
+#[test]
+fn lang_aliases() {
+    assert_eq!(Lang::from_slug("js"), Some(Lang::JavaScript));
+    assert_eq!(Lang::from_slug("ts"), Some(Lang::TypeScript));
+}
+
+#[test]
+fn func_key_display() {
+    let k = FuncKey {
+        lang: Lang::Rust,
+        namespace: "src/lib.rs".into(),
+        name: "my_func".into(),
+        arity: Some(2),
+    };
+    assert_eq!(k.to_string(), "rust::src/lib.rs::my_func/2");
+}
+
+#[test]
+fn normalize_strips_root() {
+    assert_eq!(
+        normalize_namespace("/home/user/proj/src/lib.rs", Some("/home/user/proj")),
+        "src/lib.rs"
+    );
+    assert_eq!(
+        normalize_namespace("/home/user/proj/src/lib.rs", Some("/home/user/proj/")),
+        "src/lib.rs"
+    );
+}
+
+#[test]
+fn normalize_fallback_on_no_root() {
+    assert_eq!(normalize_namespace("test.rs", None), "test.rs");
+}
+
+#[test]
+fn normalize_fallback_on_mismatch() {
+    assert_eq!(
+        normalize_namespace("/other/path/lib.rs", Some("/home/user/proj")),
+        "/other/path/lib.rs"
+    );
+}
diff --git a/src/taint/mod.rs b/src/taint/mod.rs
new file mode 100644
index 00000000..168d00e5
--- /dev/null
+++ b/src/taint/mod.rs
@@ -0,0 +1,429 @@
+use crate::cfg::{Cfg, FuncSummaries, NodeInfo, StmtKind};
+use crate::interop::InteropEdge;
+use crate::labels::{Cap, DataLabel};
+use crate::summary::GlobalSummaries;
+use crate::symbol::Lang;
+use petgraph::graph::NodeIndex;
+use std::collections::HashMap;
+use tracing::debug;
+
+/// A detected taint finding with both source and sink locations.
+#[derive(Debug, Clone)]
+pub struct Finding {
+    /// The CFG node where tainted data reaches a dangerous operation.
+    pub sink: NodeIndex,
+    /// The CFG node where taint originated (may be Entry if source is
+    /// cross-file and couldn't be pinpointed to a specific node).
+    pub source: NodeIndex,
+    /// The full path from source to sink through the CFG.
+    #[allow(dead_code)] // used for future detailed diagnostics / path display
+    pub path: Vec<NodeIndex>,
+}
+
+fn taint_hash(taint: &HashMap<String, Cap>) -> u64 {
+    let mut v: Vec<_> = taint.iter().collect();
+    v.sort_by_key(|(k, _)| k.as_str());
+    let mut hasher = blake3::Hasher::new();
+    for (k, bits) in v {
+        hasher.update(k.as_bytes());
+        hasher.update(&bits.bits().to_le_bytes());
+    }
+    let digest = hasher.finalize();
+    u64::from_le_bytes(digest.as_bytes()[0..8].try_into().unwrap())
+}
+
+/// Resolved summary for a callee — a uniform view regardless of whether the
+/// summary came from a local (same‑file) or global (cross‑file) source.
+struct ResolvedSummary {
+    source_caps: Cap,
+    sanitizer_caps: Cap,
+    sink_caps: Cap,
+    propagates_taint: bool,
+}
+
+/// Try to resolve a callee name using conservative same-language resolution.
+///
+/// Resolution order:
+/// 1. Local (same-file): exact name + same lang + same namespace
+/// 2. Global same-language: via `lookup_same_lang`; must be unambiguous
+/// 3. Interop edges: explicit cross-language bridges
+/// 4. No cross-language fallback
+#[allow(clippy::too_many_arguments)]
+fn resolve_callee(
+    callee: &str,
+    caller_lang: Lang,
+    caller_namespace: &str,
+    caller_func: &str,
+    call_ordinal: u32,
+    local: &FuncSummaries,
+    global: Option<&GlobalSummaries>,
+    interop_edges: &[InteropEdge],
+) -> Option<ResolvedSummary> {
+    // 1) Local (same-file): scan local summaries for matching name + lang + namespace
+    let local_matches: Vec<_> = local
+        .iter()
+        .filter(|(k, _)| {
+            k.name == callee && k.lang == caller_lang && k.namespace == caller_namespace
+        })
+        .collect();
+
+    if local_matches.len() == 1 {
+        let (_, ls) = local_matches[0];
+        return Some(ResolvedSummary {
+            source_caps: ls.source_caps,
+            sanitizer_caps: ls.sanitizer_caps,
+            sink_caps: ls.sink_caps,
+            propagates_taint: ls.propagates_taint,
+        });
+    }
+
+    // Multiple local matches — try arity disambiguation (future), for now return None
+    if local_matches.len() > 1 {
+        return None;
+    }
+
+    // 2) Global same-language
+    if let Some(gs) = global {
+        let matches = gs.lookup_same_lang(caller_lang, callee);
+        if matches.len() == 1 {
+            let (_, fs) = matches[0];
+            return Some(ResolvedSummary {
+                source_caps: fs.source_caps(),
+                sanitizer_caps: fs.sanitizer_caps(),
+                sink_caps: fs.sink_caps(),
+                propagates_taint: fs.propagates_taint,
+            });
+        }
+        // Multiple matches — try namespace match first
+        if matches.len() > 1 {
+            let same_ns: Vec<_> = matches
+                .iter()
+                .filter(|(k, _)| k.namespace == caller_namespace)
+                .collect();
+            if same_ns.len() == 1 {
+                let (_, fs) = same_ns[0];
+                return Some(ResolvedSummary {
+                    source_caps: fs.source_caps(),
+                    sanitizer_caps: fs.sanitizer_caps(),
+                    sink_caps: fs.sink_caps(),
+                    propagates_taint: fs.propagates_taint,
+                });
+            }
+            // Still ambiguous — return None (conservative)
+            return None;
+        }
+    }
+
+    // 3) Interop edges: explicit cross-language bridges
+    for edge in interop_edges {
+        if edge.from.caller_lang == caller_lang
+            && edge.from.caller_namespace == caller_namespace
+            && edge.from.callee_symbol == callee
+            && (edge.from.caller_func.is_empty() || edge.from.caller_func == caller_func)
+            && (edge.from.ordinal == 0 || edge.from.ordinal == call_ordinal)
+        {
+            // Look up the target in global summaries by exact FuncKey
+            if let Some(gs) = global
+                && let Some(fs) = gs.get(&edge.to)
+            {
+                return Some(ResolvedSummary {
+                    source_caps: fs.source_caps(),
+                    sanitizer_caps: fs.sanitizer_caps(),
+                    sink_caps: fs.sink_caps(),
+                    propagates_taint: fs.propagates_taint,
+                });
+            }
+        }
+    }
+
+    // 4) No cross-language fallback
+    None
+}
+
+fn apply_taint(
+    node: &NodeInfo,
+    taint: &HashMap<String, Cap>,
+    local_summaries: &FuncSummaries,
+    global_summaries: Option<&GlobalSummaries>,
+    caller_lang: Lang,
+    caller_namespace: &str,
+    interop_edges: &[InteropEdge],
+) -> HashMap<String, Cap> {
+    debug!(target: "taint", "Applying taint to node: {:?}", node);
+    debug!(target: "taint", "Taint: {:?}", taint);
+    let mut out = taint.clone();
+
+    let caller_func = node.enclosing_func.as_deref().unwrap_or("");
+
+    match node.label {
+        // A new untrusted value enters the program
+        Some(DataLabel::Source(bits)) => {
+            if let Some(v) = &node.defines {
+                out.insert(v.clone(), bits);
+            }
+        }
+        // Sanitizer: propagate input taint through the assignment FIRST,
+        // then strip the sanitizer's capability bits.  This ensures that
+        // `let y = sanitize_html(&x)` gives y the taint of x minus the
+        // HTML_ESCAPE bit — rather than leaving y completely clean (which
+        // would hide "wrong sanitiser for this sink" bugs).
+        Some(DataLabel::Sanitizer(bits)) => {
+            if let Some(v) = &node.defines {
+                // 1. Propagate: union taint from all read variables
+                let mut combined = Cap::empty();
+                for u in &node.uses {
+                    if let Some(b) = out.get(u) {
+                        combined |= *b;
+                    }
+                }
+                // 2. Strip the sanitiser's bits
+                let new = combined & !bits;
+                if new.is_empty() {
+                    out.remove(v);
+                } else {
+                    out.insert(v.clone(), new);
+                }
+            }
+        }
+
+        // A function call — resolve against local + global summaries
+        _ if node.kind == StmtKind::Call => {
+            if let Some(callee) = &node.callee
+                && let Some(resolved) = resolve_callee(
+                    callee,
+                    caller_lang,
+                    caller_namespace,
+                    caller_func,
+                    node.call_ordinal,
+                    local_summaries,
+                    global_summaries,
+                    interop_edges,
+                )
+            {
+                // Build the return value's taint bits in stages, then
+                // write once at the end.  Order matters:
+                //
+                //   1. Start with fresh source taint (if the callee is a source)
+                //   2. Union with propagated arg taint (if the callee propagates)
+                //   3. Strip sanitizer bits last (so sanitization always wins)
+
+                let mut return_bits = Cap::empty();
+
+                // ── 1. Source behaviour ──
+                return_bits |= resolved.source_caps;
+
+                // ── 2. Propagation ──
+                if resolved.propagates_taint {
+                    for u in &node.uses {
+                        if let Some(bits) = out.get(u) {
+                            return_bits |= *bits;
+                        }
+                    }
+                }
+
+                // ── 3. Sanitizer behaviour (applied last so it always wins) ──
+                return_bits &= !resolved.sanitizer_caps;
+
+                // ── Write the result ──
+                if let Some(v) = &node.defines {
+                    if return_bits.is_empty() {
+                        out.remove(v);
+                    } else {
+                        out.insert(v.clone(), return_bits);
+                    }
+                }
+
+                // ── Sink behaviour: handled in the main analysis loop
+                //    (checked via node.label or resolved summary) ──
+
+                return out;
+            }
+
+            // Unresolved call — fall through to default gen/kill below
+        }
+
+        // All other statements: classic gen/kill for assignments
+        _ => {}
+    }
+
+    // Default gen/kill: propagate taint through variable assignments
+    if !matches!(
+        node.label,
+        Some(DataLabel::Source(_)) | Some(DataLabel::Sanitizer(_))
+    ) && let Some(d) = &node.defines
+    {
+        let mut combined = Cap::empty();
+        for u in &node.uses {
+            if let Some(bits) = out.get(u) {
+                combined |= *bits;
+            }
+        }
+        if combined.is_empty() {
+            out.remove(d);
+        } else {
+            out.insert(d.clone(), combined);
+        }
+    }
+
+    out
+}
+
+/// Run taint analysis on a single file's CFG.
+///
+/// `global_summaries` is `None` for pass‑1 / single‑file mode and
+/// `Some(&map)` for pass‑2 cross‑file analysis.
+pub fn analyse_file(
+    cfg: &Cfg,
+    entry: NodeIndex,
+    local_summaries: &FuncSummaries,
+    global_summaries: Option<&GlobalSummaries>,
+    caller_lang: Lang,
+    caller_namespace: &str,
+    interop_edges: &[InteropEdge],
+) -> Vec<Finding> {
+    use std::collections::{HashMap, HashSet, VecDeque};
+
+    /// Queue item: current CFG node + taint map that holds here
+    #[derive(Clone)]
+    struct Item {
+        node: NodeIndex,
+        taint: HashMap<String, Cap>,
+    }
+
+    // (node, taint_hash)  →  predecessor key   (for path rebuild)
+    type Key = (NodeIndex, u64);
+    let mut pred: HashMap<Key, Key> = HashMap::new();
+
+    // Seen states so we do not revisit them infinitely
+    let mut seen: HashSet<Key> = HashSet::new();
+
+    // Resulting findings: (sink_node, source_node, full_path)
+    let mut findings: Vec<Finding> = Vec::new();
+
+    let mut q = VecDeque::new();
+    q.push_back(Item {
+        node: entry,
+        taint: HashMap::new(),
+    });
+    seen.insert((entry, 0));
+
+    while let Some(Item { node, taint }) = q.pop_front() {
+        let caller_func = cfg[node].enclosing_func.as_deref().unwrap_or("");
+        let out = apply_taint(
+            &cfg[node],
+            &taint,
+            local_summaries,
+            global_summaries,
+            caller_lang,
+            caller_namespace,
+            interop_edges,
+        );
+
+        // ── Sink check ──────────────────────────────────────────────────
+        // Two ways a node can be a sink:
+        //   1. Its AST label says Sink (existing inline labels)
+        //   2. Its callee resolves to a function with sink_caps (cross-file)
+        let sink_caps = match cfg[node].label {
+            Some(DataLabel::Sink(caps)) => caps,
+            _ => {
+                // check if callee resolves to a sink
+                cfg[node]
+                    .callee
+                    .as_ref()
+                    .and_then(|c| {
+                        resolve_callee(
+                            c,
+                            caller_lang,
+                            caller_namespace,
+                            caller_func,
+                            cfg[node].call_ordinal,
+                            local_summaries,
+                            global_summaries,
+                            interop_edges,
+                        )
+                    })
+                    .filter(|r| !r.sink_caps.is_empty())
+                    .map(|r| r.sink_caps)
+                    .unwrap_or(Cap::empty())
+            }
+        };
+
+        if !sink_caps.is_empty() {
+            let bad = cfg[node]
+                .uses
+                .iter()
+                .any(|u| out.get(u).is_some_and(|b| (*b & sink_caps) != Cap::empty()));
+            if bad {
+                // Reconstruct path backwards from sink to source.
+                //
+                // A node is considered a "source" if:
+                //   1. It has an inline DataLabel::Source (same-file), OR
+                //   2. It is a Call whose callee resolves to a source via
+                //      local or global summaries (cross-file).
+                let sink_node = node;
+                let mut path = vec![node];
+                let mut source_node = node; // fallback: sink itself
+                let mut key = (node, taint_hash(&taint));
+
+                while let Some(&(prev, prev_hash)) = pred.get(&key) {
+                    path.push(prev);
+
+                    // Check inline source label
+                    if matches!(cfg[prev].label, Some(DataLabel::Source(_))) {
+                        source_node = prev;
+                        break;
+                    }
+
+                    // Check cross-file source via resolved callee summary
+                    let prev_caller_func = cfg[prev].enclosing_func.as_deref().unwrap_or("");
+                    if cfg[prev].kind == StmtKind::Call
+                        && let Some(callee) = &cfg[prev].callee
+                        && let Some(resolved) = resolve_callee(
+                            callee,
+                            caller_lang,
+                            caller_namespace,
+                            prev_caller_func,
+                            cfg[prev].call_ordinal,
+                            local_summaries,
+                            global_summaries,
+                            interop_edges,
+                        )
+                        && !resolved.source_caps.is_empty()
+                    {
+                        source_node = prev;
+                        break;
+                    }
+
+                    key = (prev, prev_hash);
+                }
+
+                path.reverse();
+                findings.push(Finding {
+                    sink: sink_node,
+                    source: source_node,
+                    path,
+                });
+            }
+        }
+
+        // enqueue successors
+        for succ in cfg.neighbors(node) {
+            let h = taint_hash(&out);
+            let key = (succ, h);
+            if !seen.contains(&key) {
+                seen.insert(key);
+                pred.insert(key, (node, taint_hash(&taint)));
+                let item = Item {
+                    node: succ,
+                    taint: out.clone(),
+                };
+                q.push_back(item);
+            }
+        }
+    }
+
+    findings
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/src/taint/tests.rs b/src/taint/tests.rs
new file mode 100644
index 00000000..b4cb986a
--- /dev/null
+++ b/src/taint/tests.rs
@@ -0,0 +1,2220 @@
+use super::*;
+use crate::cfg::FuncSummaries;
+use crate::interop::InteropEdge;
+use crate::symbol::FuncKey;
+
+#[test]
+fn env_to_arg_is_flagged() {
+    use crate::cfg::build_cfg;
+    use tree_sitter::Language;
+    let src = br#"
+        use std::env; use std::process::Command;
+        fn main() {
+            let x = env::var("DANGEROUS_ARG").unwrap();
+            Command::new("sh").arg(x).status().unwrap();
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+
+    assert_eq!(findings.len(), 1); // exactly one unsanitised Source→Sink
+}
+
+#[test]
+fn taint_through_if_else() {
+    use crate::cfg::build_cfg;
+    use tree_sitter::Language;
+    let src = br#"
+        use std::env; use std::process::Command;
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let safe = html_escape::encode_safe(&x);
+
+            if x.len() > 5 {
+                Command::new("sh").arg(&x).status().unwrap();   // UNSAFE
+            } else {
+                Command::new("sh").arg(&safe).status().unwrap(); // SAFE
+            }
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+
+    // exactly one path (via the True branch) should be flagged
+    assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn taint_through_while_loop() {
+    use crate::cfg::build_cfg;
+    use tree_sitter::Language;
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let mut x = env::var("DANGEROUS").unwrap();
+            while x.len() < 100 {                       // Loop header (Loop)
+                x.push_str("a");
+            }
+            Command::new("sh").arg(x).status().unwrap(); // Should be flagged
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+    assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn taint_killed_by_matching_sanitizer() {
+    use crate::cfg::build_cfg;
+    use tree_sitter::Language;
+
+    // shell_escape sanitizer strips SHELL_ESCAPE → Command sink checks
+    // SHELL_ESCAPE → the matching bit is gone → no finding.
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let clean = shell_escape::unix::escape(&x);
+            Command::new("sh").arg(clean).status().unwrap();
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+    assert!(
+        findings.is_empty(),
+        "matching sanitizer should kill the taint"
+    );
+}
+
+#[test]
+fn wrong_sanitizer_preserves_taint() {
+    use crate::cfg::build_cfg;
+    use tree_sitter::Language;
+
+    // html_escape sanitizer strips HTML_ESCAPE, but Command sink checks
+    // SHELL_ESCAPE → the wrong bit was stripped → finding persists.
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let clean = html_escape::encode_safe(&x);
+            Command::new("sh").arg(clean).status().unwrap();
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+    assert_eq!(
+        findings.len(),
+        1,
+        "wrong sanitizer should NOT kill the taint"
+    );
+}
+
+#[test]
+fn taint_breaks_out_of_loop() {
+    use crate::cfg::build_cfg;
+    use tree_sitter::Language;
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            loop {
+                let x = env::var("DANGEROUS").unwrap();
+                Command::new("sh").arg(&x).status().unwrap(); // vulnerable
+                break;
+            }
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+    assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn test_two_sources_one_sanitised() {
+    use crate::cfg::build_cfg;
+    use tree_sitter::Language;
+
+    // Two env sources, one properly sanitised with the MATCHING sanitiser.
+    // x → unsanitised → Command = FINDING
+    // y → shell_escape → Command = safe
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let y = env::var("ANOTHER").unwrap();
+            let clean = shell_escape::unix::escape(&y);
+            Command::new("sh").arg(x).status().unwrap();
+            Command::new("sh").arg(clean).status().unwrap();
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+    assert_eq!(
+        findings.len(),
+        1,
+        "only the unsanitised source should be flagged"
+    );
+}
+
+#[test]
+fn test_two_sources_wrong_sanitiser_both_flagged() {
+    use crate::cfg::build_cfg;
+    use tree_sitter::Language;
+
+    // Two env sources, one "sanitised" with the WRONG sanitiser.
+    // x → unsanitised → Command = FINDING
+    // y → html_escape → Command = FINDING (wrong sanitiser for shell sink)
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let y = env::var("ANOTHER").unwrap();
+            let clean = html_escape::encode_safe(&y);
+            Command::new("sh").arg(x).status().unwrap();
+            Command::new("sh").arg(clean).status().unwrap();
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+    assert_eq!(
+        findings.len(),
+        2,
+        "both should be flagged — wrong sanitiser"
+    );
+}
+
+#[test]
+fn test_should_not_panic_on_empty_function() {
+    use crate::cfg::build_cfg;
+    use tree_sitter::Language;
+    let src = br#"
+        use std::{env, process::Command};
+        fn f() {
+            if cond() {
+                return;
+            }
+            do_something();
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+    assert!(findings.is_empty());
+}
+
+#[test]
+fn cross_file_source_resolved_via_global_summaries() {
+    use crate::summary::FuncSummary;
+
+    // Simulate file B calling `get_dangerous()` which is defined in file A.
+    // File A's summary says get_dangerous is a Source(all).
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = get_dangerous();
+            Command::new("sh").arg(x).status().unwrap();
+        }"#;
+
+    let (cfg, entry, local_summaries) = parse_rust(src);
+
+    // Build global summaries as if file A exported get_dangerous
+    let mut global = GlobalSummaries::new();
+    let key = FuncKey {
+        lang: Lang::Rust,
+        namespace: "file_a.rs".into(),
+        name: "get_dangerous".into(),
+        arity: Some(0),
+    };
+    global.insert(
+        key,
+        FuncSummary {
+            name: "get_dangerous".into(),
+            file_path: "file_a.rs".into(),
+            lang: "rust".into(),
+            param_count: 0,
+            param_names: vec![],
+            source_caps: Cap::all().bits(),
+            sanitizer_caps: 0,
+            sink_caps: 0,
+            propagates_taint: false,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local_summaries,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+    assert_eq!(findings.len(), 1, "cross-file source should be detected");
+}
+
+#[test]
+fn cross_file_sanitizer_resolved_via_global_summaries() {
+    use crate::summary::FuncSummary;
+
+    // File B gets tainted data and passes it through `my_sanitize()` from file A.
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let clean = my_sanitize(x);
+            Command::new("sh").arg(clean).status().unwrap();
+        }"#;
+
+    let (cfg, entry, local_summaries) = parse_rust(src);
+
+    let mut global = GlobalSummaries::new();
+    let key = FuncKey {
+        lang: Lang::Rust,
+        namespace: "file_a.rs".into(),
+        name: "my_sanitize".into(),
+        arity: Some(1),
+    };
+    global.insert(
+        key,
+        FuncSummary {
+            name: "my_sanitize".into(),
+            file_path: "file_a.rs".into(),
+            lang: "rust".into(),
+            param_count: 1,
+            param_names: vec!["input".into()],
+            source_caps: 0,
+            sanitizer_caps: Cap::all().bits(),
+            sink_caps: 0,
+            propagates_taint: true,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local_summaries,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+    assert!(
+        findings.is_empty(),
+        "cross-file sanitizer should neutralise taint"
+    );
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+//  Shared test helpers
+// ─────────────────────────────────────────────────────────────────────────────
+
+/// Parse Rust source bytes → (cfg, entry, local_summaries)
+fn parse_rust(src: &[u8]) -> (Cfg, NodeIndex, FuncSummaries) {
+    use crate::cfg::build_cfg;
+    use tree_sitter::Language;
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src, None).unwrap();
+    build_cfg(&tree, src, "rust", "test.rs")
+}
+
+/// Parse Rust source bytes, build CFG, and export cross-file summaries.
+fn extract_summaries_from_bytes(src: &[u8], path: &str) -> Vec<crate::summary::FuncSummary> {
+    use crate::cfg::export_summaries;
+    let (_, _, local) = parse_rust(src);
+    export_summaries(&local, path, "rust")
+}
+
+#[test]
+fn cross_file_sink_resolved_via_global_summaries() {
+    use crate::summary::FuncSummary;
+
+    // File B calls `dangerous_exec(x)` from file A which is a sink.
+    let src = br#"
+        use std::env;
+        fn main() {
+            let x = env::var("INPUT").unwrap();
+            dangerous_exec(x);
+        }"#;
+
+    let (cfg, entry, local_summaries) = parse_rust(src);
+
+    let mut global = GlobalSummaries::new();
+    let key = FuncKey {
+        lang: Lang::Rust,
+        namespace: "file_a.rs".into(),
+        name: "dangerous_exec".into(),
+        arity: Some(1),
+    };
+    global.insert(
+        key,
+        FuncSummary {
+            name: "dangerous_exec".into(),
+            file_path: "file_a.rs".into(),
+            lang: "rust".into(),
+            param_count: 1,
+            param_names: vec!["cmd".into()],
+            source_caps: 0,
+            sanitizer_caps: 0,
+            sink_caps: Cap::SHELL_ESCAPE.bits(),
+            propagates_taint: false,
+            tainted_sink_params: vec![0],
+            callees: vec!["Command::new".into()],
+        },
+    );
+
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local_summaries,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+    assert_eq!(findings.len(), 1, "cross-file sink should be detected");
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+//  Multi-file integration tests (real parsing, full pass-1 → pass-2 pipeline)
+// ─────────────────────────────────────────────────────────────────────────────
+
+#[test]
+fn multi_file_source_to_sink_detected() {
+    use crate::summary::merge_summaries;
+
+    // File A: defines get_dangerous() which calls env::var (a source).
+    let lib_src = br#"
+        use std::env;
+        fn get_dangerous() -> String {
+            env::var("SECRET").unwrap()
+        }
+    "#;
+
+    // File B: calls get_dangerous() then passes result to Command (a sink).
+    let caller_src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = get_dangerous();
+            Command::new("sh").arg(x).status().unwrap();
+        }
+    "#;
+
+    let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
+    let global = merge_summaries(summaries, None);
+
+    let (cfg, entry, local) = parse_rust(caller_src);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert_eq!(
+        findings.len(),
+        1,
+        "cross-file source → inline sink should produce 1 finding"
+    );
+}
+
+#[test]
+fn multi_file_sanitizer_neutralises_cross_file_source() {
+    use crate::summary::merge_summaries;
+
+    // File A: source + matching shell sanitizer.
+    // NOTE: function name avoids `sanitize_` prefix which triggers
+    //       the inline HTML sanitizer label rule.
+    let lib_src = br#"
+        use std::env;
+        fn get_input() -> String {
+            env::var("INPUT").unwrap()
+        }
+        fn clean_shell(s: &str) -> String {
+            shell_escape::unix::escape(s).to_string()
+        }
+    "#;
+
+    // File B: source → clean_shell → shell sink.
+    let caller_src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = get_input();
+            let clean = clean_shell(&x);
+            Command::new("sh").arg(clean).status().unwrap();
+        }
+    "#;
+
+    let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
+    let global = merge_summaries(summaries, None);
+
+    let (cfg, entry, local) = parse_rust(caller_src);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert!(
+        findings.is_empty(),
+        "matching cross-file sanitizer should neutralise taint, got {} findings",
+        findings.len()
+    );
+}
+
+#[test]
+fn multi_file_wrong_sanitizer_preserves_taint() {
+    use crate::summary::merge_summaries;
+
+    // File A: source + HTML sanitizer (wrong for shell sink).
+    let lib_src = br#"
+        use std::env;
+        fn get_input() -> String {
+            env::var("INPUT").unwrap()
+        }
+        fn clean_html(s: &str) -> String {
+            html_escape::encode_safe(s).to_string()
+        }
+    "#;
+
+    // File B: source → HTML sanitize → shell sink → should still flag.
+    let caller_src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = get_input();
+            let clean = clean_html(&x);
+            Command::new("sh").arg(clean).status().unwrap();
+        }
+    "#;
+
+    let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
+    let global = merge_summaries(summaries, None);
+
+    let (cfg, entry, local) = parse_rust(caller_src);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert_eq!(
+        findings.len(),
+        1,
+        "wrong sanitizer (HTML for shell sink) should NOT neutralise taint"
+    );
+}
+
+#[test]
+fn multi_file_sink_in_another_file() {
+    use crate::summary::merge_summaries;
+
+    // File A: defines exec_cmd() which internally calls Command::new (a sink).
+    let lib_src = br#"
+        use std::process::Command;
+        fn exec_cmd(cmd: &str) {
+            Command::new("sh").arg(cmd).status().unwrap();
+        }
+    "#;
+
+    // File B: env::var → exec_cmd() — sink is cross-file.
+    let caller_src = br#"
+        use std::env;
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            exec_cmd(&x);
+        }
+    "#;
+
+    let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
+    let global = merge_summaries(summaries, None);
+
+    let (cfg, entry, local) = parse_rust(caller_src);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert_eq!(findings.len(), 1, "cross-file sink should be detected");
+}
+
+#[test]
+fn multi_file_passthrough_preserves_taint() {
+    use crate::summary::FuncSummary;
+
+    // identity() just returns its argument — it propagates taint but has no
+    // source/sanitizer/sink caps of its own.
+    let mut global = GlobalSummaries::new();
+    let key = FuncKey {
+        lang: Lang::Rust,
+        namespace: "lib.rs".into(),
+        name: "identity".into(),
+        arity: Some(1),
+    };
+    global.insert(
+        key,
+        FuncSummary {
+            name: "identity".into(),
+            file_path: "lib.rs".into(),
+            lang: "rust".into(),
+            param_count: 1,
+            param_names: vec!["s".into()],
+            source_caps: 0,
+            sanitizer_caps: 0,
+            sink_caps: 0,
+            propagates_taint: true,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+
+    let caller_src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let y = identity(&x);
+            Command::new("sh").arg(y).status().unwrap();
+        }
+    "#;
+
+    let (cfg, entry, local) = parse_rust(caller_src);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert_eq!(
+        findings.len(),
+        1,
+        "taint should propagate through passthrough function"
+    );
+}
+
+#[test]
+fn multi_file_chain_source_sanitize_sink_across_files() {
+    use crate::summary::merge_summaries;
+
+    // Library file defines all three roles: source, sanitizer, sink.
+    let lib_src = br#"
+        use std::env;
+        use std::process::Command;
+        fn get_input() -> String {
+            env::var("INPUT").unwrap()
+        }
+        fn clean_shell(s: &str) -> String {
+            shell_escape::unix::escape(s).to_string()
+        }
+        fn exec_cmd(cmd: &str) {
+            Command::new("sh").arg(cmd).status().unwrap();
+        }
+    "#;
+
+    // Caller: source → correct sanitizer → sink.
+    let caller_src = br#"
+        fn main() {
+            let x = get_input();
+            let clean = clean_shell(&x);
+            exec_cmd(&clean);
+        }
+    "#;
+
+    let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
+    let global = merge_summaries(summaries, None);
+
+    let (cfg, entry, local) = parse_rust(caller_src);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert!(
+        findings.is_empty(),
+        "source → matching sanitizer → sink should produce 0 findings, got {}",
+        findings.len()
+    );
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+//  Edge-case unit tests
+// ─────────────────────────────────────────────────────────────────────────────
+
+#[test]
+fn sanitizer_strips_only_matching_bits() {
+    // Source(ALL) → shell_escape → sink_html (HTML sink).
+    // shell_escape strips SHELL_ESCAPE but not HTML_ESCAPE.
+    // sink_html is an HTML sink — HTML_ESCAPE bit is still set → 1 finding.
+    let src = br#"
+        use std::env;
+        fn sink_html(s: &str) {}
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let clean = shell_escape::unix::escape(&x);
+            sink_html(&clean);
+        }
+    "#;
+
+    let (cfg, entry, summaries) = parse_rust(src);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+
+    assert_eq!(
+        findings.len(),
+        1,
+        "shell sanitizer should NOT strip HTML_ESCAPE bit; HTML sink should still fire"
+    );
+}
+
+#[test]
+fn multiple_sanitizers_strip_all_bits() {
+    // Source → shell_escape → html_escape → Command (shell sink).
+    // shell_escape strips SHELL_ESCAPE; html_escape strips HTML_ESCAPE.
+    // After both, the remaining taint bits relevant to SHELL_ESCAPE are gone.
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let a = shell_escape::unix::escape(&x);
+            let b = html_escape::encode_safe(&a);
+            Command::new("sh").arg(b).status().unwrap();
+        }
+    "#;
+
+    let (cfg, entry, summaries) = parse_rust(src);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+
+    assert!(
+        findings.is_empty(),
+        "both sanitizers together should strip all relevant bits"
+    );
+}
+
+#[test]
+fn taint_through_variable_reassignment() {
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let y = x;
+            Command::new("sh").arg(y).status().unwrap();
+        }
+    "#;
+
+    let (cfg, entry, summaries) = parse_rust(src);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+
+    assert_eq!(
+        findings.len(),
+        1,
+        "taint should flow through simple variable reassignment"
+    );
+}
+
+#[test]
+fn untainted_variable_at_sink_is_safe() {
+    // A string literal (not from a source) passed to Command — no finding.
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = "harmless";
+            Command::new("sh").arg(x).status().unwrap();
+        }
+    "#;
+
+    let (cfg, entry, summaries) = parse_rust(src);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+
+    assert!(
+        findings.is_empty(),
+        "untainted literal should not trigger a finding"
+    );
+}
+
+#[test]
+fn local_summary_takes_precedence_over_global() {
+    use crate::summary::FuncSummary;
+
+    // The caller file defines my_func locally as a source.
+    // Global says my_func is a sanitizer.
+    // Local should win → finding expected.
+    let caller_src = br#"
+        use std::{env, process::Command};
+        fn my_func() -> String {
+            env::var("SECRET").unwrap()
+        }
+        fn main() {
+            let x = my_func();
+            Command::new("sh").arg(x).status().unwrap();
+        }
+    "#;
+
+    let mut global = GlobalSummaries::new();
+    let key = FuncKey {
+        lang: Lang::Rust,
+        namespace: "other.rs".into(),
+        name: "my_func".into(),
+        arity: Some(0),
+    };
+    global.insert(
+        key,
+        FuncSummary {
+            name: "my_func".into(),
+            file_path: "other.rs".into(),
+            lang: "rust".into(),
+            param_count: 0,
+            param_names: vec![],
+            source_caps: 0,
+            sanitizer_caps: Cap::all().bits(),
+            sink_caps: 0,
+            propagates_taint: true,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+
+    let (cfg, entry, local) = parse_rust(caller_src);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert_eq!(
+        findings.len(),
+        1,
+        "local summary (source) should take precedence over global (sanitizer)"
+    );
+}
+
+#[test]
+fn empty_global_summaries_same_as_none() {
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            Command::new("sh").arg(x).status().unwrap();
+        }
+    "#;
+
+    let (cfg, entry, summaries) = parse_rust(src);
+
+    let findings_none = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+    let empty = GlobalSummaries::new();
+    let findings_empty = analyse_file(
+        &cfg,
+        entry,
+        &summaries,
+        Some(&empty),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert_eq!(
+        findings_none.len(),
+        findings_empty.len(),
+        "empty GlobalSummaries should behave identically to None"
+    );
+}
+
+#[test]
+fn taint_not_introduced_by_non_source_function() {
+    // Call an unknown function (no summary anywhere), assign to var, pass to sink.
+    // Unknown calls should NOT introduce taint.
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = totally_unknown_func();
+            Command::new("sh").arg(x).status().unwrap();
+        }
+    "#;
+
+    let (cfg, entry, summaries) = parse_rust(src);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
+
+    assert!(
+        findings.is_empty(),
+        "unknown function call should not introduce taint"
+    );
+}
+
+#[test]
+fn source_and_sink_on_same_function() {
+    use crate::summary::FuncSummary;
+
+    // Cross-file function that is both source AND sink.
+    // Tainted arg hits sink → 1 finding.
+    let mut global = GlobalSummaries::new();
+    let key = FuncKey {
+        lang: Lang::Rust,
+        namespace: "lib.rs".into(),
+        name: "source_and_sink".into(),
+        arity: Some(1),
+    };
+    global.insert(
+        key,
+        FuncSummary {
+            name: "source_and_sink".into(),
+            file_path: "lib.rs".into(),
+            lang: "rust".into(),
+            param_count: 1,
+            param_names: vec!["input".into()],
+            source_caps: Cap::all().bits(),
+            sanitizer_caps: 0,
+            sink_caps: Cap::SHELL_ESCAPE.bits(),
+            propagates_taint: false,
+            tainted_sink_params: vec![0],
+            callees: vec![],
+        },
+    );
+
+    // Pass tainted data from env::var into source_and_sink.
+    let src = br#"
+        use std::env;
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            source_and_sink(x);
+        }
+    "#;
+
+    let (cfg, entry, local) = parse_rust(src);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert_eq!(
+        findings.len(),
+        1,
+        "function that is both source and sink should detect tainted arg as finding"
+    );
+}
+
+#[test]
+fn multiple_cross_file_sources_one_sanitised() {
+    use crate::summary::FuncSummary;
+
+    let mut global = GlobalSummaries::new();
+    // Two cross-file sources
+    let key1 = FuncKey {
+        lang: Lang::Rust,
+        namespace: "lib.rs".into(),
+        name: "get_secret".into(),
+        arity: Some(0),
+    };
+    global.insert(
+        key1,
+        FuncSummary {
+            name: "get_secret".into(),
+            file_path: "lib.rs".into(),
+            lang: "rust".into(),
+            param_count: 0,
+            param_names: vec![],
+            source_caps: Cap::all().bits(),
+            sanitizer_caps: 0,
+            sink_caps: 0,
+            propagates_taint: false,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+    let key2 = FuncKey {
+        lang: Lang::Rust,
+        namespace: "lib.rs".into(),
+        name: "get_other_secret".into(),
+        arity: Some(0),
+    };
+    global.insert(
+        key2,
+        FuncSummary {
+            name: "get_other_secret".into(),
+            file_path: "lib.rs".into(),
+            lang: "rust".into(),
+            param_count: 0,
+            param_names: vec![],
+            source_caps: Cap::all().bits(),
+            sanitizer_caps: 0,
+            sink_caps: 0,
+            propagates_taint: false,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+
+    // One source sanitised, one not.
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let a = get_secret();
+            let b = get_other_secret();
+            let clean_a = shell_escape::unix::escape(&a);
+            Command::new("sh").arg(clean_a).status().unwrap();
+            Command::new("sh").arg(b).status().unwrap();
+        }
+    "#;
+
+    let (cfg, entry, local) = parse_rust(src);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert_eq!(
+        findings.len(),
+        1,
+        "only the unsanitised cross-file source should produce a finding"
+    );
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+//  Multi-language helpers and tests
+// ─────────────────────────────────────────────────────────────────────────────
+
+/// Parse source bytes for any supported language → (cfg, entry, local_summaries)
+fn parse_lang(
+    src: &[u8],
+    slug: &str,
+    ts_lang: tree_sitter::Language,
+) -> (Cfg, NodeIndex, FuncSummaries) {
+    use crate::cfg::build_cfg;
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src, None).unwrap();
+    let ext = match slug {
+        "rust" => "test.rs",
+        "javascript" => "test.js",
+        "typescript" => "test.ts",
+        "python" => "test.py",
+        "go" => "test.go",
+        "java" => "test.java",
+        "c" => "test.c",
+        "cpp" => "test.cpp",
+        "php" => "test.php",
+        "ruby" => "test.rb",
+        _ => "test.txt",
+    };
+    build_cfg(&tree, src, slug, ext)
+}
+
+#[test]
+fn js_source_to_sink() {
+    let src = b"function main() {\n  let x = document.location();\n  eval(x);\n}\n";
+    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
+    let (cfg, entry, summaries) = parse_lang(src, "javascript", lang);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &summaries,
+        None,
+        Lang::JavaScript,
+        "test.js",
+        &[],
+    );
+    assert_eq!(
+        findings.len(),
+        1,
+        "JS: source->sink should produce 1 finding"
+    );
+}
+
+#[test]
+fn ts_source_to_sink() {
+    let src = b"function main() {\n  let x = document.location();\n  eval(x);\n}\n";
+    let lang = tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT);
+    let (cfg, entry, summaries) = parse_lang(src, "typescript", lang);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &summaries,
+        None,
+        Lang::TypeScript,
+        "test.ts",
+        &[],
+    );
+    assert_eq!(
+        findings.len(),
+        1,
+        "TS: source->sink should produce 1 finding"
+    );
+}
+
+#[test]
+fn python_source_to_sink() {
+    let src = b"def main():\n    x = os.getenv(\"SECRET\")\n    os.system(x)\n";
+    let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
+    let (cfg, entry, summaries) = parse_lang(src, "python", lang);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Python, "test.py", &[]);
+    assert_eq!(
+        findings.len(),
+        1,
+        "Python: source->sink should produce 1 finding"
+    );
+}
+
+#[test]
+fn go_source_to_sink() {
+    let src =
+        b"package main\n\nfunc main() {\n\tx := os.Getenv(\"SECRET\")\n\texec.Command(x)\n}\n";
+    let lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
+    let (cfg, entry, summaries) = parse_lang(src, "go", lang);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Go, "test.go", &[]);
+    assert_eq!(
+        findings.len(),
+        1,
+        "Go: source->sink should produce 1 finding"
+    );
+}
+
+#[test]
+fn java_source_to_sink() {
+    let src = b"class Main {\n  void main() {\n    String x = System.getenv(\"SECRET\");\n    Runtime.exec(x);\n  }\n}\n";
+    let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
+    let (cfg, entry, summaries) = parse_lang(src, "java", lang);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Java, "test.java", &[]);
+    assert_eq!(
+        findings.len(),
+        1,
+        "Java: source->sink should produce 1 finding"
+    );
+}
+
+#[test]
+fn c_source_to_sink() {
+    let src = b"void main() {\n  char* x = getenv(\"SECRET\");\n  system(x);\n}\n";
+    let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
+    let (cfg, entry, summaries) = parse_lang(src, "c", lang);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::C, "test.c", &[]);
+    assert_eq!(
+        findings.len(),
+        1,
+        "C: source->sink should produce 1 finding"
+    );
+}
+
+#[test]
+fn cpp_source_to_sink() {
+    let src = b"void main() {\n  char* x = getenv(\"SECRET\");\n  system(x);\n}\n";
+    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
+    let (cfg, entry, summaries) = parse_lang(src, "cpp", lang);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Cpp, "test.cpp", &[]);
+    assert_eq!(
+        findings.len(),
+        1,
+        "C++: source->sink should produce 1 finding"
+    );
+}
+
+#[test]
+fn php_source_to_sink() {
+    let src =
+        b"<?php\nfunction main() {\n  $x = file_get_contents(\"secret\");\n  system($x);\n}\n?>";
+    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
+    let (cfg, entry, summaries) = parse_lang(src, "php", lang);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Php, "test.php", &[]);
+    assert_eq!(
+        findings.len(),
+        1,
+        "PHP: source->sink should produce 1 finding"
+    );
+}
+
+#[test]
+fn ruby_source_to_sink() {
+    let src = b"def main\n  x = gets()\n  system(x)\nend\n";
+    let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
+    let (cfg, entry, summaries) = parse_lang(src, "ruby", lang);
+    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Ruby, "test.rb", &[]);
+    assert_eq!(
+        findings.len(),
+        1,
+        "Ruby: source->sink should produce 1 finding"
+    );
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+//  Cross-language multi-file tests
+// ─────────────────────────────────────────────────────────────────────────────
+//
+// Cross-language resolution now requires explicit InteropEdge declarations.
+// Without an edge, functions from different languages are never resolved —
+// this prevents false positives from name collisions across languages.
+
+/// Extract cross-file summaries from any language's source bytes.
+fn extract_lang_summaries(
+    src: &[u8],
+    slug: &str,
+    ts_lang: tree_sitter::Language,
+    path: &str,
+) -> Vec<crate::summary::FuncSummary> {
+    use crate::cfg::export_summaries;
+    let (_, _, local) = parse_lang(src, slug, ts_lang);
+    export_summaries(&local, path, slug)
+}
+
+// ── Scenario 1: Python source function → JavaScript sink via interop ─────
+#[test]
+fn cross_lang_python_source_to_js_sink_via_interop() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::merge_summaries;
+
+    let py_src = b"def get_input():\n    x = os.getenv(\"SECRET\")\n    return x\n";
+    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
+    let py_summaries = extract_lang_summaries(py_src, "python", py_lang, "lib.py");
+    let global = merge_summaries(py_summaries, None);
+
+    // JavaScript file calls get_input() and passes to eval()
+    let js_src = b"function main() {\n  let x = get_input();\n  eval(x);\n}\n";
+    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(js_src, "javascript", js_lang);
+
+    // Without interop: no cross-lang resolution
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::JavaScript,
+        "main.js",
+        &[],
+    );
+    assert!(findings.is_empty(), "No cross-lang without interop edge");
+
+    // With interop edge
+    let edges = vec![InteropEdge {
+        from: CallSiteKey {
+            caller_lang: Lang::JavaScript,
+            caller_namespace: "main.js".into(),
+            caller_func: "main".into(),
+            callee_symbol: "get_input".into(),
+            ordinal: 0,
+        },
+        to: FuncKey {
+            lang: Lang::Python,
+            namespace: "lib.py".into(),
+            name: "get_input".into(),
+            arity: Some(0),
+        },
+        arg_map: vec![],
+        ret_taints: true,
+    }];
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::JavaScript,
+        "main.js",
+        &edges,
+    );
+    assert_eq!(
+        findings.len(),
+        1,
+        "Python source → JS sink via interop edge"
+    );
+}
+
+// ── Scenario 2: Go source function → Python sink via interop ─────────────
+#[test]
+fn cross_lang_go_source_to_python_sink_via_interop() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::merge_summaries;
+
+    let go_src =
+        b"package main\n\nfunc fetch_env() string {\n\tx := os.Getenv(\"SECRET\")\n\treturn x\n}\n";
+    let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
+    let go_summaries = extract_lang_summaries(go_src, "go", go_lang, "lib.go");
+    let global = merge_summaries(go_summaries, None);
+
+    let py_src = b"def main():\n    x = fetch_env()\n    os.system(x)\n";
+    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(py_src, "python", py_lang);
+
+    // Without interop: no findings
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Python,
+        "main.py",
+        &[],
+    );
+    assert!(findings.is_empty(), "No cross-lang without interop");
+
+    // With interop
+    let edges = vec![InteropEdge {
+        from: CallSiteKey {
+            caller_lang: Lang::Python,
+            caller_namespace: "main.py".into(),
+            caller_func: "main".into(),
+            callee_symbol: "fetch_env".into(),
+            ordinal: 0,
+        },
+        to: FuncKey {
+            lang: Lang::Go,
+            namespace: "lib.go".into(),
+            name: "fetch_env".into(),
+            arity: Some(0),
+        },
+        arg_map: vec![],
+        ret_taints: true,
+    }];
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Python,
+        "main.py",
+        &edges,
+    );
+    assert_eq!(findings.len(), 1, "Go source → Python sink via interop");
+}
+
+// ── Scenario 3: Rust sanitizer applied in JavaScript context via interop ──
+#[test]
+fn cross_lang_rust_sanitizer_in_js_via_interop() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::merge_summaries;
+
+    let rs_src = br#"
+        fn clean_shell(s: &str) -> String {
+            shell_escape::unix::escape(s).to_string()
+        }
+    "#;
+    let rs_lang = tree_sitter::Language::from(tree_sitter_rust::LANGUAGE);
+    let rs_summaries = extract_lang_summaries(rs_src, "rust", rs_lang, "lib.rs");
+    let global = merge_summaries(rs_summaries, None);
+
+    // JS: source → Rust sanitizer → shell sink
+    let js_src = b"function main() {\n  let x = document.location();\n  let y = clean_shell(x);\n  eval(y);\n}\n";
+    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(js_src, "javascript", js_lang);
+
+    let edges = vec![InteropEdge {
+        from: CallSiteKey {
+            caller_lang: Lang::JavaScript,
+            caller_namespace: "main.js".into(),
+            caller_func: "main".into(),
+            callee_symbol: "clean_shell".into(),
+            ordinal: 0,
+        },
+        to: FuncKey {
+            lang: Lang::Rust,
+            namespace: "lib.rs".into(),
+            name: "clean_shell".into(),
+            arity: Some(1),
+        },
+        arg_map: vec![],
+        ret_taints: true,
+    }];
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::JavaScript,
+        "main.js",
+        &edges,
+    );
+    assert!(
+        findings.is_empty(),
+        "Rust SHELL_ESCAPE sanitizer should neutralise taint via interop"
+    );
+}
+
+// ── Scenario 4: C sink function called from Java via interop ─────────────
+#[test]
+fn cross_lang_c_sink_called_from_java_via_interop() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::merge_summaries;
+
+    let c_src = b"void run_cmd(char* cmd) {\n  system(cmd);\n}\n";
+    let c_lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
+    let c_summaries = extract_lang_summaries(c_src, "c", c_lang, "native.c");
+    let global = merge_summaries(c_summaries, None);
+
+    let java_src = b"class Main {\n  void main() {\n    String x = System.getenv(\"INPUT\");\n    run_cmd(x);\n  }\n}\n";
+    let java_lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(java_src, "java", java_lang);
+
+    let edges = vec![InteropEdge {
+        from: CallSiteKey {
+            caller_lang: Lang::Java,
+            caller_namespace: "Main.java".into(),
+            caller_func: "main".into(),
+            callee_symbol: "run_cmd".into(),
+            ordinal: 0,
+        },
+        to: FuncKey {
+            lang: Lang::C,
+            namespace: "native.c".into(),
+            name: "run_cmd".into(),
+            arity: Some(0), // C param extraction yields 0 (pre-existing limitation)
+        },
+        arg_map: vec![],
+        ret_taints: false,
+    }];
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Java,
+        "Main.java",
+        &edges,
+    );
+    assert_eq!(findings.len(), 1, "Java source → C sink via interop");
+}
+
+// ── Scenario 5: Multi-language summary merge with interop ────────────────
+#[test]
+fn cross_lang_three_languages_merged_summaries_via_interop() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::merge_summaries;
+
+    // Python: source function
+    let py_src = b"def get_secret():\n    x = os.getenv(\"SECRET\")\n    return x\n";
+    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
+    let py_sums = extract_lang_summaries(py_src, "python", py_lang, "source.py");
+
+    // C: sink function
+    let c_src = b"void run_dangerous(char* cmd) {\n  system(cmd);\n}\n";
+    let c_lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
+    let c_sums = extract_lang_summaries(c_src, "c", c_lang, "native.c");
+
+    // Rust: sanitizer function
+    let rs_src = br#"
+        fn make_safe(s: &str) -> String {
+            shell_escape::unix::escape(s).to_string()
+        }
+    "#;
+    let rs_lang = tree_sitter::Language::from(tree_sitter_rust::LANGUAGE);
+    let rs_sums = extract_lang_summaries(rs_src, "rust", rs_lang, "lib.rs");
+
+    let all_sums: Vec<_> = py_sums.into_iter().chain(c_sums).chain(rs_sums).collect();
+    let global = merge_summaries(all_sums, None);
+
+    // Go caller: source → sanitizer → sink (all cross-language)
+    let go_src = b"package main\n\nfunc main() {\n\tx := get_secret()\n\ty := make_safe(x)\n\trun_dangerous(y)\n}\n";
+    let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(go_src, "go", go_lang);
+
+    let edges = vec![
+        InteropEdge {
+            from: CallSiteKey {
+                caller_lang: Lang::Go,
+                caller_namespace: "main.go".into(),
+                caller_func: "main".into(),
+                callee_symbol: "get_secret".into(),
+                ordinal: 0,
+            },
+            to: FuncKey {
+                lang: Lang::Python,
+                namespace: "source.py".into(),
+                name: "get_secret".into(),
+                arity: Some(0),
+            },
+            arg_map: vec![],
+            ret_taints: true,
+        },
+        InteropEdge {
+            from: CallSiteKey {
+                caller_lang: Lang::Go,
+                caller_namespace: "main.go".into(),
+                caller_func: "main".into(),
+                callee_symbol: "make_safe".into(),
+                ordinal: 0,
+            },
+            to: FuncKey {
+                lang: Lang::Rust,
+                namespace: "lib.rs".into(),
+                name: "make_safe".into(),
+                arity: Some(1),
+            },
+            arg_map: vec![],
+            ret_taints: true,
+        },
+        InteropEdge {
+            from: CallSiteKey {
+                caller_lang: Lang::Go,
+                caller_namespace: "main.go".into(),
+                caller_func: "main".into(),
+                callee_symbol: "run_dangerous".into(),
+                ordinal: 0,
+            },
+            to: FuncKey {
+                lang: Lang::C,
+                namespace: "native.c".into(),
+                name: "run_dangerous".into(),
+                arity: Some(0), // C param extraction yields 0 (pre-existing limitation)
+            },
+            arg_map: vec![],
+            ret_taints: false,
+        },
+    ];
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Go,
+        "main.go",
+        &edges,
+    );
+    assert!(
+        findings.is_empty(),
+        "source(Py) → sanitizer(Rs) → sink(C) via interop should be safe; got {} findings",
+        findings.len()
+    );
+}
+
+// ── Scenario 6: Same flow without sanitizer should flag via interop ──────
+#[test]
+fn cross_lang_three_languages_unsanitised_via_interop() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::merge_summaries;
+
+    let py_src = b"def get_secret():\n    x = os.getenv(\"SECRET\")\n    return x\n";
+    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
+    let py_sums = extract_lang_summaries(py_src, "python", py_lang, "source.py");
+
+    let c_src = b"void run_dangerous(char* cmd) {\n  system(cmd);\n}\n";
+    let c_lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
+    let c_sums = extract_lang_summaries(c_src, "c", c_lang, "native.c");
+
+    let all_sums: Vec<_> = py_sums.into_iter().chain(c_sums).collect();
+    let global = merge_summaries(all_sums, None);
+
+    // Go caller: source → sink directly (no sanitizer)
+    let go_src = b"package main\n\nfunc main() {\n\tx := get_secret()\n\trun_dangerous(x)\n}\n";
+    let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(go_src, "go", go_lang);
+
+    let edges = vec![
+        InteropEdge {
+            from: CallSiteKey {
+                caller_lang: Lang::Go,
+                caller_namespace: "main.go".into(),
+                caller_func: "main".into(),
+                callee_symbol: "get_secret".into(),
+                ordinal: 0,
+            },
+            to: FuncKey {
+                lang: Lang::Python,
+                namespace: "source.py".into(),
+                name: "get_secret".into(),
+                arity: Some(0),
+            },
+            arg_map: vec![],
+            ret_taints: true,
+        },
+        InteropEdge {
+            from: CallSiteKey {
+                caller_lang: Lang::Go,
+                caller_namespace: "main.go".into(),
+                caller_func: "main".into(),
+                callee_symbol: "run_dangerous".into(),
+                ordinal: 0,
+            },
+            to: FuncKey {
+                lang: Lang::C,
+                namespace: "native.c".into(),
+                name: "run_dangerous".into(),
+                arity: Some(0), // C param extraction yields 0 (pre-existing limitation)
+            },
+            arg_map: vec![],
+            ret_taints: false,
+        },
+    ];
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Go,
+        "main.go",
+        &edges,
+    );
+    assert_eq!(
+        findings.len(),
+        1,
+        "source(Py) → sink(C) without sanitizer via interop"
+    );
+}
+
+// ── Scenario 7: Name collision across languages stays separate ───────────
+#[test]
+fn cross_lang_name_collision_stays_separate() {
+    use crate::summary::merge_summaries;
+
+    // Python version: source
+    let py_src = b"def process_data():\n    x = os.getenv(\"DATA\")\n    return x\n";
+    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
+    let py_sums = extract_lang_summaries(py_src, "python", py_lang, "handler.py");
+
+    // C version: benign passthrough (constructed manually)
+    let c_summary = crate::summary::FuncSummary {
+        name: "process_data".into(),
+        file_path: "handler.c".into(),
+        lang: "c".into(),
+        param_count: 1,
+        param_names: vec!["s".into()],
+        source_caps: 0,
+        sanitizer_caps: 0,
+        sink_caps: 0,
+        propagates_taint: true,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    };
+
+    let all_sums: Vec<_> = py_sums
+        .into_iter()
+        .chain(std::iter::once(c_summary))
+        .collect();
+    let global = merge_summaries(all_sums, None);
+
+    // Verify they are stored under different FuncKeys
+    let py_matches = global.lookup_same_lang(Lang::Python, "process_data");
+    let c_matches = global.lookup_same_lang(Lang::C, "process_data");
+    assert_eq!(py_matches.len(), 1, "Python version stored separately");
+    assert_eq!(c_matches.len(), 1, "C version stored separately");
+
+    // Python's source_caps should NOT bleed into C
+    assert!(py_matches[0].1.source_caps != 0, "Python has source caps");
+    assert_eq!(
+        c_matches[0].1.source_caps, 0,
+        "C should NOT get Python's source caps"
+    );
+}
+
+// ── Scenario 8: Ruby passthrough in JS via interop ───────────────────────
+#[test]
+fn cross_lang_ruby_passthrough_in_js_via_interop() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::FuncSummary;
+
+    let mut global = GlobalSummaries::new();
+    let key = FuncKey {
+        lang: Lang::Ruby,
+        namespace: "helper.rb".into(),
+        name: "transform".into(),
+        arity: Some(1),
+    };
+    global.insert(
+        key.clone(),
+        FuncSummary {
+            name: "transform".into(),
+            file_path: "helper.rb".into(),
+            lang: "ruby".into(),
+            param_count: 1,
+            param_names: vec!["data".into()],
+            source_caps: 0,
+            sanitizer_caps: 0,
+            sink_caps: 0,
+            propagates_taint: true,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+
+    let js_src = b"function main() {\n  let x = document.location();\n  let y = transform(x);\n  eval(y);\n}\n";
+    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(js_src, "javascript", js_lang);
+
+    let edges = vec![InteropEdge {
+        from: CallSiteKey {
+            caller_lang: Lang::JavaScript,
+            caller_namespace: "main.js".into(),
+            caller_func: "main".into(),
+            callee_symbol: "transform".into(),
+            ordinal: 0,
+        },
+        to: key,
+        arg_map: vec![],
+        ret_taints: true,
+    }];
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::JavaScript,
+        "main.js",
+        &edges,
+    );
+    assert_eq!(
+        findings.len(),
+        1,
+        "taint should propagate through cross-lang passthrough via interop"
+    );
+}
+
+// ── Scenario 9: PHP source → Go sink via interop ─────────────────────────
+#[test]
+fn cross_lang_php_source_to_go_sink_via_interop() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::{FuncSummary, merge_summaries};
+
+    let php_summary = FuncSummary {
+        name: "read_input".into(),
+        file_path: "input.php".into(),
+        lang: "php".into(),
+        param_count: 0,
+        param_names: vec![],
+        source_caps: Cap::all().bits(),
+        sanitizer_caps: 0,
+        sink_caps: 0,
+        propagates_taint: false,
+        tainted_sink_params: vec![],
+        callees: vec!["file_get_contents".into()],
+    };
+
+    let global = merge_summaries(vec![php_summary], None);
+
+    let go_src = b"package main\n\nfunc main() {\n\tx := read_input()\n\texec.Command(x)\n}\n";
+    let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(go_src, "go", go_lang);
+
+    let edges = vec![InteropEdge {
+        from: CallSiteKey {
+            caller_lang: Lang::Go,
+            caller_namespace: "main.go".into(),
+            caller_func: "main".into(),
+            callee_symbol: "read_input".into(),
+            ordinal: 0,
+        },
+        to: FuncKey {
+            lang: Lang::Php,
+            namespace: "input.php".into(),
+            name: "read_input".into(),
+            arity: Some(0),
+        },
+        arg_map: vec![],
+        ret_taints: true,
+    }];
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Go,
+        "main.go",
+        &edges,
+    );
+    assert_eq!(findings.len(), 1, "PHP source → Go sink via interop");
+}
+
+// ── Scenario 10: Wrong sanitizer caps still wrong across languages ───────
+#[test]
+fn cross_lang_wrong_sanitizer_still_flags_via_interop() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::FuncSummary;
+
+    let mut global = GlobalSummaries::new();
+    let key = FuncKey {
+        lang: Lang::Python,
+        namespace: "sanitizers.py".into(),
+        name: "html_clean".into(),
+        arity: Some(1),
+    };
+    global.insert(
+        key.clone(),
+        FuncSummary {
+            name: "html_clean".into(),
+            file_path: "sanitizers.py".into(),
+            lang: "python".into(),
+            param_count: 1,
+            param_names: vec!["text".into()],
+            source_caps: 0,
+            sanitizer_caps: Cap::HTML_ESCAPE.bits(),
+            sink_caps: 0,
+            propagates_taint: true,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+
+    // JS: source → Python HTML sanitizer → shell sink
+    let js_src = b"function main() {\n  let x = document.location();\n  let y = html_clean(x);\n  eval(y);\n}\n";
+    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(js_src, "javascript", js_lang);
+
+    let edges = vec![InteropEdge {
+        from: CallSiteKey {
+            caller_lang: Lang::JavaScript,
+            caller_namespace: "main.js".into(),
+            caller_func: "main".into(),
+            callee_symbol: "html_clean".into(),
+            ordinal: 0,
+        },
+        to: key,
+        arg_map: vec![],
+        ret_taints: true,
+    }];
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::JavaScript,
+        "main.js",
+        &edges,
+    );
+    assert_eq!(
+        findings.len(),
+        1,
+        "wrong cross-language sanitizer should NOT neutralise"
+    );
+}
+
+// ── Scenario 11: Summary lang field preserved (different FuncKeys) ───────
+#[test]
+fn cross_lang_summary_preserves_lang_metadata() {
+    use crate::summary::merge_summaries;
+
+    let py_summary = crate::summary::FuncSummary {
+        name: "helper".into(),
+        file_path: "lib.py".into(),
+        lang: "python".into(),
+        param_count: 0,
+        param_names: vec![],
+        source_caps: Cap::all().bits(),
+        sanitizer_caps: 0,
+        sink_caps: 0,
+        propagates_taint: false,
+        tainted_sink_params: vec![],
+        callees: vec![],
+    };
+
+    let js_summary = crate::summary::FuncSummary {
+        name: "helper".into(),
+        file_path: "lib.js".into(),
+        lang: "javascript".into(),
+        param_count: 1,
+        param_names: vec!["x".into()],
+        source_caps: 0,
+        sanitizer_caps: 0,
+        sink_caps: Cap::SHELL_ESCAPE.bits(),
+        propagates_taint: true,
+        tainted_sink_params: vec![0],
+        callees: vec![],
+    };
+
+    let global = merge_summaries(vec![py_summary, js_summary], None);
+
+    // They are now separate entries — not merged
+    let py_matches = global.lookup_same_lang(Lang::Python, "helper");
+    let js_matches = global.lookup_same_lang(Lang::JavaScript, "helper");
+
+    assert_eq!(py_matches.len(), 1, "Python helper stored separately");
+    assert_eq!(js_matches.len(), 1, "JS helper stored separately");
+    assert!(
+        py_matches[0].1.source_caps != 0,
+        "Python source caps preserved"
+    );
+    assert!(js_matches[0].1.sink_caps != 0, "JS sink caps preserved");
+    assert!(
+        js_matches[0].1.propagates_taint,
+        "JS propagates_taint preserved"
+    );
+}
+
+// ── Scenario 12: Full pipeline Python lib + JS caller via interop ────────
+#[test]
+fn cross_lang_full_pipeline_python_lib_js_caller_via_interop() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::merge_summaries;
+
+    // Python library: defines dangerous_query() that reads from os.getenv
+    let py_src = b"def dangerous_query():\n    x = os.getenv(\"SQL\")\n    return x\n";
+    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
+    let py_sums = extract_lang_summaries(py_src, "python", py_lang, "db.py");
+
+    // JavaScript library: defines run_query() that calls eval (a sink)
+    let js_lib_src = b"function run_query(q) {\n  eval(q);\n}\n";
+    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
+    let js_sums = extract_lang_summaries(js_lib_src, "javascript", js_lang, "db.js");
+
+    let all_sums: Vec<_> = py_sums.into_iter().chain(js_sums).collect();
+    let global = merge_summaries(all_sums, None);
+
+    // Go caller: dangerous_query() → run_query()
+    let go_src = b"package main\n\nfunc main() {\n\tq := dangerous_query()\n\trun_query(q)\n}\n";
+    let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(go_src, "go", go_lang);
+
+    let edges = vec![
+        InteropEdge {
+            from: CallSiteKey {
+                caller_lang: Lang::Go,
+                caller_namespace: "main.go".into(),
+                caller_func: "main".into(),
+                callee_symbol: "dangerous_query".into(),
+                ordinal: 0,
+            },
+            to: FuncKey {
+                lang: Lang::Python,
+                namespace: "db.py".into(),
+                name: "dangerous_query".into(),
+                arity: Some(0),
+            },
+            arg_map: vec![],
+            ret_taints: true,
+        },
+        InteropEdge {
+            from: CallSiteKey {
+                caller_lang: Lang::Go,
+                caller_namespace: "main.go".into(),
+                caller_func: "main".into(),
+                callee_symbol: "run_query".into(),
+                ordinal: 0,
+            },
+            to: FuncKey {
+                lang: Lang::JavaScript,
+                namespace: "db.js".into(),
+                name: "run_query".into(),
+                arity: Some(1),
+            },
+            arg_map: vec![],
+            ret_taints: false,
+        },
+    ];
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Go,
+        "main.go",
+        &edges,
+    );
+    assert_eq!(
+        findings.len(),
+        1,
+        "Python source → JS sink via Go caller via interop"
+    );
+}
+
+// ── New tests: ambiguous resolution, interop edge specificity ────────────
+
+#[test]
+fn ambiguous_resolution_returns_none() {
+    use crate::summary::FuncSummary;
+
+    // Two same-lang functions, same name + arity, different namespaces
+    let mut global = GlobalSummaries::new();
+    for ns in &["a.rs", "b.rs"] {
+        let key = FuncKey {
+            lang: Lang::Rust,
+            namespace: (*ns).to_string(),
+            name: "helper".into(),
+            arity: Some(0),
+        };
+        global.insert(
+            key,
+            FuncSummary {
+                name: "helper".into(),
+                file_path: (*ns).to_string(),
+                lang: "rust".into(),
+                param_count: 0,
+                param_names: vec![],
+                source_caps: Cap::all().bits(),
+                sanitizer_caps: 0,
+                sink_caps: 0,
+                propagates_taint: false,
+                tainted_sink_params: vec![],
+                callees: vec![],
+            },
+        );
+    }
+
+    // Caller from c.rs calls helper() — ambiguous (two matches, neither is caller's namespace)
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = helper();
+            Command::new("sh").arg(x).status().unwrap();
+        }
+    "#;
+
+    let (cfg, entry, local) = parse_rust(src);
+    let findings = analyse_file(&cfg, entry, &local, Some(&global), Lang::Rust, "c.rs", &[]);
+
+    // Ambiguous resolution returns None → no source → no finding
+    assert!(
+        findings.is_empty(),
+        "ambiguous resolution (two namespaces) should return None → no finding"
+    );
+}
+
+#[test]
+fn exact_namespace_match_wins() {
+    use crate::summary::FuncSummary;
+
+    // Same name in two namespaces, but one matches caller's namespace
+    let mut global = GlobalSummaries::new();
+    // test.rs version: source
+    let key_local = FuncKey {
+        lang: Lang::Rust,
+        namespace: "test.rs".into(),
+        name: "helper".into(),
+        arity: Some(0),
+    };
+    global.insert(
+        key_local,
+        FuncSummary {
+            name: "helper".into(),
+            file_path: "test.rs".into(),
+            lang: "rust".into(),
+            param_count: 0,
+            param_names: vec![],
+            source_caps: Cap::all().bits(),
+            sanitizer_caps: 0,
+            sink_caps: 0,
+            propagates_taint: false,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+    // other.rs version: no caps
+    let key_other = FuncKey {
+        lang: Lang::Rust,
+        namespace: "other.rs".into(),
+        name: "helper".into(),
+        arity: Some(0),
+    };
+    global.insert(
+        key_other,
+        FuncSummary {
+            name: "helper".into(),
+            file_path: "other.rs".into(),
+            lang: "rust".into(),
+            param_count: 0,
+            param_names: vec![],
+            source_caps: 0,
+            sanitizer_caps: 0,
+            sink_caps: 0,
+            propagates_taint: false,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let x = helper();
+            Command::new("sh").arg(x).status().unwrap();
+        }
+    "#;
+
+    let (cfg, entry, local) = parse_rust(src);
+    // caller_namespace = "test.rs" matches the source version
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::Rust,
+        "test.rs",
+        &[],
+    );
+
+    assert_eq!(
+        findings.len(),
+        1,
+        "exact namespace match should resolve to the source version"
+    );
+}
+
+#[test]
+fn interop_edge_wrong_caller_lang_no_match() {
+    use crate::interop::CallSiteKey;
+    use crate::summary::FuncSummary;
+
+    let mut global = GlobalSummaries::new();
+    let key = FuncKey {
+        lang: Lang::Python,
+        namespace: "lib.py".into(),
+        name: "get_data".into(),
+        arity: Some(0),
+    };
+    global.insert(
+        key.clone(),
+        FuncSummary {
+            name: "get_data".into(),
+            file_path: "lib.py".into(),
+            lang: "python".into(),
+            param_count: 0,
+            param_names: vec![],
+            source_caps: Cap::all().bits(),
+            sanitizer_caps: 0,
+            sink_caps: 0,
+            propagates_taint: false,
+            tainted_sink_params: vec![],
+            callees: vec![],
+        },
+    );
+
+    // Edge specifies Python caller, but we're calling from JavaScript
+    let edges = vec![InteropEdge {
+        from: CallSiteKey {
+            caller_lang: Lang::Python, // wrong!
+            caller_namespace: "main.js".into(),
+            caller_func: "main".into(),
+            callee_symbol: "get_data".into(),
+            ordinal: 0,
+        },
+        to: key,
+        arg_map: vec![],
+        ret_taints: true,
+    }];
+
+    let js_src = b"function main() {\n  let x = get_data();\n  eval(x);\n}\n";
+    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
+    let (cfg, entry, local) = parse_lang(js_src, "javascript", js_lang);
+    let findings = analyse_file(
+        &cfg,
+        entry,
+        &local,
+        Some(&global),
+        Lang::JavaScript,
+        "main.js",
+        &edges,
+    );
+
+    assert!(
+        findings.is_empty(),
+        "Edge for wrong caller_lang should not match"
+    );
+}
+
+#[test]
+fn return_call_recognized_as_source() {
+    use crate::cfg::{build_cfg, export_summaries};
+    use tree_sitter::Language;
+
+    // fn foo() -> String { env::var("X").unwrap() }
+    // The return statement contains a call to env::var which should be
+    // recognized as a source after the return-call fix.
+    let src = br#"
+        use std::env;
+        fn foo() -> String {
+            env::var("X").unwrap()
+        }
+    "#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (_, _, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let exported = export_summaries(&summaries, "test.rs", "rust");
+
+    let foo = exported
+        .iter()
+        .find(|s| s.name == "foo")
+        .expect("foo should exist");
+    assert!(
+        foo.source_caps != 0,
+        "foo() should have source_caps set because env::var is called inside return"
+    );
+}
diff --git a/src/utils/ext.rs b/src/utils/ext.rs
index 302350ac..9ff66ba9 100644
--- a/src/utils/ext.rs
+++ b/src/utils/ext.rs
@@ -9,6 +9,7 @@ pub fn lowercase_ext(path: &std::path::Path) -> Option<&'static str> {
         "py" | "PY" => Some("py"),
         "ts" | "TSX" | "tsx" => Some("ts"),
         "js" => Some("js"),
+        "rb" | "RB" => Some("rb"),
         _ => None,
     })
 }
diff --git a/src/walk.rs b/src/walk.rs
index d3242c21..a5056a9a 100644
--- a/src/walk.rs
+++ b/src/walk.rs
@@ -1,62 +1,82 @@
+use crate::utils::Config;
 use crossbeam_channel::{Receiver, Sender, bounded};
 use ignore::{WalkBuilder, WalkState, overrides::OverrideBuilder};
+use std::thread::JoinHandle;
 use std::{
     mem,
     path::{Path, PathBuf},
     thread,
 };
 
-use crate::utils::Config;
-
 // ---------------------------------------------------------------------------
 // Internal constants / helpers
 // ---------------------------------------------------------------------------
 
-type Batch = Vec<PathBuf>;
+type Paths = Vec<PathBuf>;
 
-struct Batcher {
-    tx: Sender<Batch>,
-    batch: Batch,
+struct BatchSender {
+    tx: Sender<Paths>,
+    batch: Paths,
+    batch_size: usize,
 }
-impl Batcher {
-    fn push(&mut self, p: PathBuf, batch_size: usize) {
-        self.batch.push(p);
-        if self.batch.len() == batch_size {
+impl BatchSender {
+    fn new(tx: Sender<Paths>, batch_size: usize) -> Self {
+        Self {
+            tx,
+            batch: Vec::with_capacity(batch_size),
+            batch_size,
+        }
+    }
+
+    fn push_path(&mut self, path: PathBuf) {
+        self.batch.push(path);
+        if self.batch.len() >= self.batch_size {
             self.flush();
         }
     }
+
     fn flush(&mut self) {
         if !self.batch.is_empty() {
+            tracing::debug!(n_paths = self.batch.len(), "flushing batch");
             let _ = self.tx.send(mem::take(&mut self.batch));
         }
     }
 }
-impl Drop for Batcher {
+impl Drop for BatchSender {
     fn drop(&mut self) {
         self.flush();
     }
 }
 
-// ---------------------------------------------------------------------------
-/// Walk `root` and send *batches* of paths through the returned channel.
-pub fn spawn_senders(root: &Path, cfg: &Config) -> Receiver<Batch> {
-    // ----- 1  build ignore/override rules ----------------------------------
+fn build_overrides(root: &Path, cfg: &Config) -> ignore::overrides::Override {
     let mut ob = OverrideBuilder::new(root);
+
     for ext in &cfg.scanner.excluded_extensions {
         if let Err(e) = ob.add(&format!("!*.{ext}")) {
-            tracing::warn!("cannot add ignore pattern ‘{ext}’: {e}");
+            tracing::warn!("invalid exclude‐extension pattern ‘{ext}’: {e}");
         }
     }
     for dir in &cfg.scanner.excluded_directories {
         if let Err(e) = ob.add(&format!("!**/{dir}/**")) {
-            tracing::warn!("cannot add ignore pattern ‘{dir}’: {e}");
+            tracing::warn!("invalid exclude‐dir pattern ‘{dir}’: {e}");
         }
     }
-    let overrides = ob.build().unwrap();
+
+    ob.build().unwrap_or_else(|e| {
+        tracing::error!("failed to build ignore overrides: {e}");
+        ignore::overrides::Override::empty()
+    })
+}
+
+// ---------------------------------------------------------------------------
+/// Walk `root` and send *batches* of paths through the returned channel.
+pub fn spawn_file_walker(root: &Path, cfg: &Config) -> (Receiver<Paths>, JoinHandle<()>) {
+    let _span = tracing::info_span!("spawn_file_walker", root = %root.display()).entered();
+    let overrides = build_overrides(root, cfg);
 
     // ----- 2  channel & thread pool parameters -----------------------------
     let workers = cfg.performance.worker_threads.unwrap_or(num_cpus::get());
-    let (tx, rx) = bounded::<Batch>(workers * cfg.performance.channel_multiplier);
+    let (tx, rx) = bounded::<Paths>(workers * cfg.performance.channel_multiplier);
 
     let root = root.to_path_buf();
     let scan_hidden = cfg.scanner.scan_hidden_files;
@@ -65,45 +85,48 @@ pub fn spawn_senders(root: &Path, cfg: &Config) -> Receiver<Batch> {
     let batch_size = cfg.performance.batch_size;
 
     // ----- 3  the background walker thread ---------------------------------
-    thread::spawn(move || {
+    let handle = thread::spawn(move || {
+        tracing::info!(
+            root = ?root,
+            workers = workers,
+            scan_hidden = scan_hidden,
+            follow_links = follow,
+            max_bytes = max_bytes,
+            batch_size = batch_size,
+            "starting directory walk"
+        );
+
         WalkBuilder::new(root)
             .hidden(!scan_hidden)
             .follow_links(follow)
             .threads(workers)
             .overrides(overrides)
+            .filter_entry(|e| {
+                e.file_type()
+                    .map(|ft| ft.is_dir() || ft.is_file())
+                    .unwrap_or(true)
+            })
             .build_parallel()
             .run(move || {
-                let mut b = Batcher {
-                    tx: tx.clone(),
-                    batch: Vec::with_capacity(batch_size),
-                };
+                let mut bs = BatchSender::new(tx.clone(), batch_size);
 
                 Box::new(move |entry| {
-                    tracing::debug!("walking {:?}", entry);
-                    let entry = match entry {
-                        Ok(e) if e.file_type().map(|ft| ft.is_file()).unwrap_or(false) => e,
-                        _ => return WalkState::Continue,
-                    };
+                    if let Ok(e) = entry {
+                        let is_file = e.file_type().is_some_and(|ft| ft.is_file());
+                        let under_limit = max_bytes == 0
+                            || e.metadata().map(|m| m.len() <= max_bytes).unwrap_or(true);
 
-                    if max_bytes != 0 {
-                        match entry.metadata() {
-                            Ok(m) if m.len() > max_bytes => return WalkState::Continue,
-                            Err(e) => {
-                                tracing::debug!("metadata failed for {:?}: {e}", entry.path());
-                                return WalkState::Continue;
-                            }
-                            _ => {}
+                        if is_file && under_limit {
+                            bs.push_path(e.into_path());
                         }
                     }
-
-                    tracing::debug!("sending {:?}", entry);
-                    b.push(entry.into_path(), batch_size);
                     WalkState::Continue
                 })
             });
+        tracing::info!("directory walk complete");
     });
 
-    rx
+    (rx, handle)
 }
 
 #[test]
@@ -118,7 +141,10 @@ fn walker_respects_excluded_extensions() {
     cfg.performance.channel_multiplier = 1;
     cfg.performance.batch_size = 2;
 
-    let rx = spawn_senders(tmp.path(), &cfg);
+    let (rx, handle) = spawn_file_walker(tmp.path(), &cfg);
+    if let Err(err) = handle.join() {
+        tracing::error!("walker thread panicked: {:#?}", err);
+    }
 
     let all: Vec<_> = rx.into_iter().flatten().collect();
 
diff --git a/tests/common/mod.rs b/tests/common/mod.rs
new file mode 100644
index 00000000..51d7eb8c
--- /dev/null
+++ b/tests/common/mod.rs
@@ -0,0 +1,177 @@
+// Shared test helpers for integration and perf tests.
+
+use nyx_scanner::commands::scan::Diag;
+use nyx_scanner::utils::config::{AnalysisMode, Config};
+use serde::Deserialize;
+use std::path::Path;
+
+// ── Deterministic test config ──────────────────────────────────────────────
+
+pub fn test_config(mode: AnalysisMode) -> Config {
+    let mut cfg = Config::default();
+    cfg.scanner.mode = mode;
+    cfg.scanner.read_vcsignore = false;
+    cfg.scanner.require_git_to_read_vcsignore = false;
+    cfg.performance.worker_threads = Some(1);
+    cfg.performance.batch_size = 64;
+    cfg.performance.channel_multiplier = 1;
+    cfg
+}
+
+// ── Scan helpers ───────────────────────────────────────────────────────────
+
+/// Full two-pass scan of a directory (filesystem only, no index).
+pub fn scan_fixture_dir(path: &Path, mode: AnalysisMode) -> Vec<Diag> {
+    let cfg = test_config(mode);
+    nyx_scanner::scan_no_index(path, &cfg).expect("scan_no_index should succeed")
+}
+
+// ── Counting / assertion helpers ───────────────────────────────────────────
+
+pub fn count_by_prefix(diags: &[Diag], prefix: &str) -> usize {
+    diags.iter().filter(|d| d.id.starts_with(prefix)).count()
+}
+
+pub fn assert_min_findings(diags: &[Diag], prefix: &str, min: usize) {
+    let count = count_by_prefix(diags, prefix);
+    assert!(
+        count >= min,
+        "Expected >= {min} findings matching prefix '{prefix}', but found {count}.\n\
+         All findings: {:#?}",
+        diags
+            .iter()
+            .map(|d| format!(
+                "  {}:{}:{} [{}] {}",
+                d.path,
+                d.line,
+                d.col,
+                d.severity.as_db_str(),
+                d.id
+            ))
+            .collect::<Vec<_>>()
+    );
+}
+
+pub fn assert_no_findings(diags: &[Diag], prefix: &str) {
+    let matching: Vec<_> = diags.iter().filter(|d| d.id.starts_with(prefix)).collect();
+    assert!(
+        matching.is_empty(),
+        "Expected 0 findings matching prefix '{prefix}', but found {}:\n{:#?}",
+        matching.len(),
+        matching
+            .iter()
+            .map(|d| format!("  {}:{}:{} {}", d.path, d.line, d.col, d.id))
+            .collect::<Vec<_>>()
+    );
+}
+
+pub fn assert_max_findings(diags: &[Diag], max_total: usize, max_high: usize) {
+    let high_count = diags
+        .iter()
+        .filter(|d| d.severity.as_db_str() == "HIGH")
+        .count();
+    assert!(
+        diags.len() <= max_total,
+        "Noise budget exceeded: {}/{max_total} total findings.\n\
+         All findings: {:?}",
+        diags.len(),
+        diags
+            .iter()
+            .map(|d| format!("{}:{} {}", d.path, d.line, d.id))
+            .collect::<Vec<_>>()
+    );
+    assert!(
+        high_count <= max_high,
+        "Noise budget exceeded: {high_count}/{max_high} HIGH findings."
+    );
+}
+
+// ── expectations.json schema ───────────────────────────────────────────────
+
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct Expectations {
+    pub required_findings: Vec<RequiredFinding>,
+    #[serde(default)]
+    pub forbidden_findings: Vec<ForbiddenFinding>,
+    pub noise_budget: NoiseBudget,
+    pub performance_expectations: PerformanceExpectations,
+}
+
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct RequiredFinding {
+    pub id_prefix: String,
+    pub min_count: usize,
+}
+
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct ForbiddenFinding {
+    pub id_prefix: String,
+    #[serde(default)]
+    pub file_glob: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct NoiseBudget {
+    pub max_total_findings: usize,
+    pub max_high_findings: usize,
+}
+
+#[derive(Debug, Deserialize)]
+#[allow(dead_code)]
+pub struct PerformanceExpectations {
+    pub max_ms_no_index: u64,
+    pub max_ms_index_cold: u64,
+    pub max_ms_index_warm: u64,
+    pub ci_mode: String,
+}
+
+/// Load and parse `expectations.json` from a fixture directory.
+pub fn load_expectations(fixture_dir: &Path) -> Expectations {
+    let path = fixture_dir.join("expectations.json");
+    let content = std::fs::read_to_string(&path)
+        .unwrap_or_else(|e| panic!("Failed to read {}: {e}", path.display()));
+    serde_json::from_str(&content)
+        .unwrap_or_else(|e| panic!("Failed to parse {}: {e}", path.display()))
+}
+
+/// Validate a set of diagnostics against a fixture's expectations.json.
+pub fn validate_expectations(diags: &[Diag], fixture_dir: &Path) {
+    let exp = load_expectations(fixture_dir);
+
+    // Required findings
+    for req in &exp.required_findings {
+        assert_min_findings(diags, &req.id_prefix, req.min_count);
+    }
+
+    // Forbidden findings
+    for forb in &exp.forbidden_findings {
+        if let Some(glob) = &forb.file_glob {
+            let pattern =
+                glob::Pattern::new(glob).unwrap_or_else(|e| panic!("Invalid glob '{glob}': {e}"));
+            let matching: Vec<_> = diags
+                .iter()
+                .filter(|d| d.id.starts_with(&forb.id_prefix) && pattern.matches(&d.path))
+                .collect();
+            assert!(
+                matching.is_empty(),
+                "Forbidden finding '{}' in files matching '{}': found {}",
+                forb.id_prefix,
+                glob,
+                matching.len()
+            );
+        } else {
+            assert_no_findings(diags, &forb.id_prefix);
+        }
+    }
+
+    // Noise budget
+    assert_max_findings(
+        diags,
+        exp.noise_budget.max_total_findings,
+        exp.noise_budget.max_high_findings,
+    );
+}
diff --git a/tests/fixtures/c_utils/expectations.json b/tests/fixtures/c_utils/expectations.json
new file mode 100644
index 00000000..5e6e6ee4
--- /dev/null
+++ b/tests/fixtures/c_utils/expectations.json
@@ -0,0 +1,23 @@
+{
+  "required_findings": [
+    { "id_prefix": "taint-unsanitised-flow", "min_count": 4 },
+    { "id_prefix": "strcpy_call", "min_count": 1 },
+    { "id_prefix": "strcat_call", "min_count": 1 },
+    { "id_prefix": "sprintf_call", "min_count": 4 },
+    { "id_prefix": "gets_call", "min_count": 1 },
+    { "id_prefix": "scanf_with_percent_s", "min_count": 1 },
+    { "id_prefix": "system_call", "min_count": 3 },
+    { "id_prefix": "cfg-unguarded-sink", "min_count": 5 }
+  ],
+  "forbidden_findings": [],
+  "noise_budget": {
+    "max_total_findings": 50,
+    "max_high_findings": 20
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1000,
+    "max_ms_index_cold": 1500,
+    "max_ms_index_warm": 500,
+    "ci_mode": "lenient"
+  }
+}
diff --git a/tests/fixtures/c_utils/io.c b/tests/fixtures/c_utils/io.c
new file mode 100644
index 00000000..660000e9
--- /dev/null
+++ b/tests/fixtures/c_utils/io.c
@@ -0,0 +1,110 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/* ───── Configuration loader ─────
+ * Reads config from environment and files, uses values in system calls.
+ */
+
+#define MAX_PATH 4096
+#define MAX_CMD  2048
+#define MAX_BUF  256
+
+/* VULN: getenv → system (command injection via environment) */
+void run_maintenance_task(void) {
+    char *cmd = getenv("MAINTENANCE_CMD");
+    if (cmd != NULL) {
+        system(cmd);
+    }
+}
+
+/* VULN: getenv → popen (command injection via environment) */
+FILE *check_service_status(void) {
+    char *service = getenv("SERVICE_NAME");
+    char cmd[MAX_CMD];
+    sprintf(cmd, "systemctl status %s", service);
+    return popen(cmd, "r");
+}
+
+/* VULN: getenv flows into sprintf, then system (multi-hop taint) */
+void deploy_package(void) {
+    char *repo_url = getenv("PACKAGE_REPO");
+    char *pkg_name = getenv("PACKAGE_NAME");
+    char cmd[MAX_CMD];
+    sprintf(cmd, "curl -sL %s/%s.tar.gz | tar xz -C /opt", repo_url, pkg_name);
+    system(cmd);
+}
+
+/* ───── Network input handling ─────
+ * Simulates reading from a socket and processing the data.
+ */
+
+/* VULN: fgets (stdin/file source) → strcpy (buffer overflow) */
+void handle_client_request(FILE *client_stream) {
+    char input[MAX_BUF];
+    char request_path[64];
+    char query_string[64];
+
+    fgets(input, sizeof(input), client_stream);
+
+    /* Parse the request line — vulnerable string operations */
+    strcpy(request_path, input);        /* VULN: strcpy no bounds check */
+    strcat(request_path, "/index.html");/* VULN: strcat can overflow */
+
+    /* Build a log message */
+    char log_msg[128];
+    sprintf(log_msg, "Request: %s from client", request_path); /* VULN: sprintf overflow */
+    printf("%s\n", log_msg);
+}
+
+/* VULN: scanf with %s has no width limit (buffer overflow) */
+void read_username(void) {
+    char username[32];
+    printf("Username: ");
+    scanf("%s", username);
+
+    char greeting[64];
+    sprintf(greeting, "Hello, %s! Welcome back.", username);
+    printf("%s\n", greeting);
+}
+
+/* VULN: gets is always unsafe (removed in C11 but still in legacy code) */
+void read_legacy_input(void) {
+    char buffer[128];
+    printf("Enter command: ");
+    gets(buffer);
+    system(buffer);
+}
+
+/* ───── File processing ─────
+ * Reads configuration files and processes their contents.
+ */
+
+/* VULN: fgets → sprintf chain (taint from file through format string) */
+void process_config_file(const char *config_path) {
+    FILE *f = fopen(config_path, "r");
+    if (!f) return;
+
+    char line[256];
+    char processed[512];
+
+    while (fgets(line, sizeof(line), f) != NULL) {
+        /* Strip newline */
+        line[strcspn(line, "\n")] = 0;
+
+        /* Build a command from config line — taint propagates */
+        sprintf(processed, "configure --set %s", line);
+
+        /* Execute the constructed command */
+        system(processed);
+    }
+    fclose(f);
+}
+
+/* VULN: getenv → execvp (command injection) */
+void run_custom_shell(void) {
+    char *shell = getenv("CUSTOM_SHELL");
+    char *args[] = { shell, "-c", "echo started", NULL };
+    execvp(shell, args);
+}
diff --git a/tests/fixtures/c_utils/safe.c b/tests/fixtures/c_utils/safe.c
new file mode 100644
index 00000000..19c23883
--- /dev/null
+++ b/tests/fixtures/c_utils/safe.c
@@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* ───── Safe string handling ─────
+ * Demonstrates proper bounded operations that should NOT trigger findings.
+ */
+
+/* SAFE: uses snprintf with explicit size limit */
+void safe_format_message(const char *user, char *out, size_t out_size) {
+    snprintf(out, out_size, "Hello, %s! Welcome back.", user);
+}
+
+/* SAFE: uses strncpy with explicit length */
+void safe_copy_path(const char *src, char *dst, size_t dst_size) {
+    strncpy(dst, src, dst_size - 1);
+    dst[dst_size - 1] = '\0';
+}
+
+/* SAFE: uses fgets with proper buffer size, no dangerous operations */
+void safe_read_config(const char *path) {
+    FILE *f = fopen(path, "r");
+    if (!f) return;
+
+    char line[256];
+    while (fgets(line, sizeof(line), f) != NULL) {
+        /* Just log the line, no shell execution */
+        printf("Config: %s", line);
+    }
+    fclose(f);
+}
+
+/* SAFE: pure computation, no external input */
+int safe_calculate_checksum(const unsigned char *data, size_t len) {
+    int sum = 0;
+    for (size_t i = 0; i < len; i++) {
+        sum = (sum + data[i]) & 0xFFFF;
+    }
+    return sum;
+}
+
+/* SAFE: hardcoded command, no taint from environment */
+void safe_list_directory(void) {
+    system("ls -la /var/log");
+}
diff --git a/tests/fixtures/express_app/expectations.json b/tests/fixtures/express_app/expectations.json
new file mode 100644
index 00000000..2ccd377c
--- /dev/null
+++ b/tests/fixtures/express_app/expectations.json
@@ -0,0 +1,20 @@
+{
+  "required_findings": [
+    { "id_prefix": "taint-unsanitised-flow", "min_count": 6 },
+    { "id_prefix": "eval_call", "min_count": 1 },
+    { "id_prefix": "document_write", "min_count": 1 },
+    { "id_prefix": "settimeout_string", "min_count": 1 },
+    { "id_prefix": "cookie_assignment", "min_count": 1 }
+  ],
+  "forbidden_findings": [],
+  "noise_budget": {
+    "max_total_findings": 25,
+    "max_high_findings": 15
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1000,
+    "max_ms_index_cold": 1500,
+    "max_ms_index_warm": 500,
+    "ci_mode": "lenient"
+  }
+}
diff --git a/tests/fixtures/express_app/routes.js b/tests/fixtures/express_app/routes.js
new file mode 100644
index 00000000..64cbbff7
--- /dev/null
+++ b/tests/fixtures/express_app/routes.js
@@ -0,0 +1,137 @@
+var child_process = require("child_process");
+var crypto = require("crypto");
+var fs = require("fs");
+
+// ───── User authentication route ─────
+
+// POST /auth/login
+// Reads credentials from request body, constructs a shell command to
+// check credentials via an external LDAP tool.
+// VULN: req.body flows into child_process.exec
+function handleLogin(req, res) {
+    var username = req.body.username;
+    var password = req.body.password;
+
+    var cmd = "ldapwhoami -x -D 'cn=" + username + ",dc=corp' -w '" + password + "'";
+    child_process.exec(cmd, function(err, stdout, stderr) {
+        if (err) {
+            res.status(401).send("Authentication failed");
+            return;
+        }
+        var token = crypto.randomBytes(32).toString("hex");
+        res.json({ token: token, user: username });
+    });
+}
+
+// ───── Search endpoint ─────
+
+// GET /api/search
+// User-supplied query parameter is passed directly to eval for "dynamic filtering".
+// VULN: req.query flows into eval (code injection)
+function handleSearch(req, res) {
+    var query = req.query.q;
+    var filterExpr = req.query.filter;
+
+    // Developer thought this was clever for dynamic filtering
+    var filterFn = eval("(function(item) { return " + filterExpr + "; })");
+
+    var results = getDatabase().filter(filterFn);
+    res.json({ results: results, query: query });
+}
+
+// ───── Admin panel rendering ─────
+
+// GET /admin/dashboard
+// Renders an admin dashboard; user-supplied name goes into innerHTML.
+// VULN: req.query flows into innerHTML (XSS)
+function renderDashboard(req, res) {
+    var userName = req.query.name;
+    var greeting = "<h1>Welcome, " + userName + "</h1>";
+    document.getElementById("header").innerHTML = greeting;
+
+    var statsHtml = req.query.stats;
+    document.getElementById("stats-panel").innerHTML = statsHtml;
+}
+
+// ───── Webhook handler ─────
+
+// POST /webhooks/deploy
+// Reads a deployment command from process.env, executes it.
+// VULN: process.env flows into child_process.execSync
+function handleDeployWebhook(req, res) {
+    var secret = req.headers["x-webhook-secret"];
+    if (secret !== process.env.WEBHOOK_SECRET) {
+        res.status(403).send("Forbidden");
+        return;
+    }
+
+    var deployCmd = process.env.DEPLOY_COMMAND;
+    var output = child_process.execSync(deployCmd);
+    res.send("Deployed: " + output.toString());
+}
+
+// ───── File preview ─────
+
+// GET /files/preview
+// Reads a file based on user-supplied path, writes content to page.
+// VULN: req.query flows into innerHTML (reflected XSS via file content)
+function previewFile(req, res) {
+    var filePath = req.query.path;
+    var content = fs.readFileSync(filePath, "utf-8");
+    document.getElementById("preview").innerHTML = content;
+}
+
+// ───── Cookie-based session ─────
+
+// POST /session/set
+// Sets a cookie from request parameters.
+// VULN: document.cookie write from user input
+function setSessionCookie(req, res) {
+    var sessionId = req.params.sid;
+    document.cookie = "session=" + sessionId + "; path=/; HttpOnly";
+}
+
+// ───── Prototype pollution ─────
+
+// POST /api/config/merge
+// Merges user-supplied config into the global config object.
+// VULN: prototype pollution via __proto__
+function mergeConfig(req, res) {
+    var userConfig = JSON.parse(req.body.config);
+    for (var key in userConfig) {
+        if (key === "__proto__") {
+            // Developer forgot to skip this
+            Object.prototype[key] = userConfig[key];
+        }
+        globalConfig[key] = userConfig[key];
+    }
+    res.json({ status: "ok" });
+}
+
+// ───── Timer-based polling ─────
+
+// Sets up a polling interval with a string argument.
+// VULN: setTimeout with string is equivalent to eval
+function startPolling() {
+    var interval = 5000;
+    setTimeout("checkForUpdates()", interval);
+    setInterval("refreshDashboard()", 30000);
+}
+
+// ───── Safe patterns ─────
+
+// GET /api/profile
+// SAFE: user input sanitized with DOMPurify before rendering
+function renderProfile(req, res) {
+    var bio = req.query.bio;
+    var cleanBio = DOMPurify.sanitize(bio);
+    document.getElementById("bio").innerHTML = cleanBio;
+}
+
+// GET /api/redirect
+// SAFE: URL properly encoded before use
+function safeRedirect(req, res) {
+    var target = req.query.url;
+    var encoded = encodeURIComponent(target);
+    res.redirect("/go?url=" + encoded);
+}
diff --git a/tests/fixtures/express_app/utils.js b/tests/fixtures/express_app/utils.js
new file mode 100644
index 00000000..4ae72ff6
--- /dev/null
+++ b/tests/fixtures/express_app/utils.js
@@ -0,0 +1,81 @@
+var child_process = require("child_process");
+var crypto = require("crypto");
+var fs = require("fs");
+
+// ───── Background job runner ─────
+
+// Runs a job command read from environment.
+// VULN: process.env flows into child_process.exec
+function runScheduledJob() {
+    var jobCmd = process.env.CRON_JOB_CMD;
+    child_process.exec(jobCmd, function(err, stdout, stderr) {
+        if (err) {
+            console.error("Job failed:", stderr);
+            return;
+        }
+        console.log("Job output:", stdout);
+    });
+}
+
+// Spawns a worker process from environment config.
+// VULN: process.env flows into child_process.spawn
+function spawnWorker() {
+    var workerBin = process.env.WORKER_BINARY;
+    var workerArgs = process.env.WORKER_ARGS.split(" ");
+    var proc = child_process.spawn(workerBin, workerArgs);
+    proc.stdout.on("data", function(data) {
+        console.log("Worker: " + data);
+    });
+}
+
+// ───── Template rendering helper ─────
+
+// Renders user-visible content by injecting location data.
+// VULN: window.location flows into innerHTML
+function renderBreadcrumb() {
+    var currentPath = document.location.pathname;
+    var parts = currentPath.split("/");
+    var html = parts.map(function(p) {
+        return "<a href='/" + p + "'>" + p + "</a>";
+    }).join(" &gt; ");
+    document.getElementById("breadcrumb").innerHTML = html;
+}
+
+// ───── URL redirect handler ─────
+
+// VULN: location.href assignment from user-controlled data
+function handleExternalRedirect() {
+    var target = window.location.hash.substring(1);
+    window.location.href = target;
+}
+
+// ───── Markdown rendering ─────
+
+// Uses document.write to render parsed markdown.
+// VULN: document.write with dynamic content
+function renderMarkdown(markdownHtml) {
+    document.write("<div class='markdown'>" + markdownHtml + "</div>");
+}
+
+// ───── Insecure hashing ─────
+
+// Uses MD5 for password hashing.
+// VULN: weak hash algorithm
+function hashPassword(password) {
+    return crypto.createHash("md5").update(password).digest("hex");
+}
+
+// ───── Dynamic regex from user input ─────
+
+// VULN: RegExp with user-controlled pattern (ReDoS risk)
+function searchLogs(pattern) {
+    var re = new RegExp(pattern, "gi");
+    return logs.filter(function(line) { return re.test(line); });
+}
+
+// ───── Safe utility ─────
+
+// SAFE: no taint flows, pure computation
+function calculateChecksum(data) {
+    return crypto.createHash("sha256").update(data).digest("hex");
+}
diff --git a/tests/fixtures/flask_app/app.py b/tests/fixtures/flask_app/app.py
new file mode 100644
index 00000000..70862eab
--- /dev/null
+++ b/tests/fixtures/flask_app/app.py
@@ -0,0 +1,115 @@
+import os
+import subprocess
+import sqlite3
+import pickle
+import shlex
+
+# ───── Configuration ─────
+
+DATABASE_PATH = os.getenv("DB_PATH", "/var/lib/app/data.db")
+UPLOAD_DIR = os.getenv("UPLOAD_DIR", "/tmp/uploads")
+REDIS_URL = os.getenv("REDIS_URL")
+
+# ───── Request handlers ─────
+
+def handle_admin_exec(request):
+    """POST /admin/exec
+    Runs an admin command from environment config.
+    VULN: os.getenv flows into subprocess.run (command injection)
+    """
+    admin_cmd = os.getenv("ADMIN_COMMAND")
+    result = subprocess.run(admin_cmd, shell=True, capture_output=True)
+    return {"status": result.returncode, "output": result.stdout.decode()}
+
+def handle_report_generate(request):
+    """POST /reports/generate
+    Generates a report by calling an external script.
+    VULN: os.getenv flows into subprocess.Popen
+    """
+    script_path = os.getenv("REPORT_SCRIPT")
+    proc = subprocess.Popen(
+        [script_path, "--format", "pdf"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    stdout, stderr = proc.communicate()
+    return {"report": stdout.decode()}
+
+def handle_eval_expression(request):
+    """POST /api/eval
+    Evaluates a mathematical expression from user input.
+    VULN: request.form flows into eval (code injection)
+    """
+    expression = request.form.get("expr")
+    result = eval(expression)
+    return {"result": result}
+
+def handle_dynamic_import(request):
+    """POST /api/plugins/load
+    Loads a plugin by executing its setup code.
+    VULN: request.json flows into exec (arbitrary code execution)
+    """
+    plugin_code = request.json.get("setup_code")
+    exec(plugin_code)
+    return {"status": "loaded"}
+
+def handle_search(request):
+    """GET /api/search
+    Searches the database with user-supplied query.
+    VULN: request.args flows into cursor.execute (SQL injection)
+    """
+    query = request.args.get("q")
+    conn = sqlite3.connect(DATABASE_PATH)
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM items WHERE name LIKE '%" + query + "%'")
+    rows = cursor.fetchall()
+    conn.close()
+    return {"results": rows}
+
+def handle_lookup(request):
+    """GET /api/lookup
+    Looks up a record by user-supplied ID.
+    VULN: request.args flows into os.popen (command injection)
+    """
+    record_id = request.args.get("id")
+    output = os.popen("grep " + record_id + " /var/log/audit.log").read()
+    return {"matches": output}
+
+def handle_backup(request):
+    """POST /admin/backup
+    Creates a database backup.
+    VULN: os.environ flows into subprocess.call
+    """
+    backup_dir = os.environ.get("BACKUP_DIR", "/backups")
+    subprocess.call(["pg_dump", "-f", backup_dir + "/dump.sql", REDIS_URL])
+    return {"status": "ok"}
+
+# ───── Input handling ─────
+
+def handle_interactive_setup():
+    """Interactive setup wizard.
+    VULN: input() flows into os.system (command injection from stdin)
+    """
+    db_host = input("Enter database host: ")
+    os.system("ping -c 1 " + db_host)
+
+    db_password = input("Enter database password: ")
+    return {"host": db_host, "password": db_password}
+
+# ───── Safe patterns ─────
+
+def handle_safe_exec():
+    """SAFE: shlex.quote sanitizes before shell execution."""
+    user_dir = os.getenv("USER_DIR")
+    safe_dir = shlex.quote(user_dir)
+    subprocess.run(["ls", "-la", safe_dir], capture_output=True)
+
+def handle_safe_search(request):
+    """SAFE: parameterized query prevents SQL injection."""
+    query = request.args.get("q")
+    conn = sqlite3.connect(DATABASE_PATH)
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM items WHERE name LIKE ?", ("%" + query + "%",))
+    rows = cursor.fetchall()
+    conn.close()
+    return {"results": rows}
diff --git a/tests/fixtures/flask_app/expectations.json b/tests/fixtures/flask_app/expectations.json
new file mode 100644
index 00000000..218d5e95
--- /dev/null
+++ b/tests/fixtures/flask_app/expectations.json
@@ -0,0 +1,19 @@
+{
+  "required_findings": [
+    { "id_prefix": "taint-unsanitised-flow", "min_count": 8 },
+    { "id_prefix": "eval_call", "min_count": 1 },
+    { "id_prefix": "exec_call", "min_count": 2 },
+    { "id_prefix": "cfg-auth-gap", "min_count": 5 }
+  ],
+  "forbidden_findings": [],
+  "noise_budget": {
+    "max_total_findings": 35,
+    "max_high_findings": 25
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1000,
+    "max_ms_index_cold": 1500,
+    "max_ms_index_warm": 500,
+    "ci_mode": "lenient"
+  }
+}
diff --git a/tests/fixtures/flask_app/helpers.py b/tests/fixtures/flask_app/helpers.py
new file mode 100644
index 00000000..a925646f
--- /dev/null
+++ b/tests/fixtures/flask_app/helpers.py
@@ -0,0 +1,71 @@
+import os
+import subprocess
+import pickle
+import yaml
+import hashlib
+import tempfile
+
+# ───── Deserialization ─────
+
+def load_cached_session(session_file):
+    """Loads a pickled session from disk.
+    VULN: pickle.load on untrusted data (arbitrary code execution)
+    """
+    with open(session_file, "rb") as f:
+        session = pickle.load(f)
+    return session
+
+def load_yaml_config(config_path):
+    """Loads YAML configuration.
+    VULN: yaml.load without SafeLoader (arbitrary code execution)
+    """
+    with open(config_path) as f:
+        config = yaml.load(f)
+    return config
+
+# ───── File operations ─────
+
+def process_upload(request):
+    """Saves an uploaded file to a path constructed from user input.
+    VULN: request.form flows into open() path (path traversal)
+    """
+    filename = request.form.get("filename")
+    content = request.form.get("content")
+    upload_path = os.path.join("/uploads", filename)
+    with open(upload_path, "w") as f:
+        f.write(content)
+    return {"saved": upload_path}
+
+# ───── System commands ─────
+
+def check_disk_usage():
+    """Reports disk usage from an env-configured mount point.
+    VULN: os.getenv flows into subprocess.check_output
+    """
+    mount = os.getenv("MOUNT_POINT")
+    output = subprocess.check_output(["df", "-h", mount])
+    return output.decode()
+
+def compile_template(template_path):
+    """Compiles a template by calling an external tool.
+    VULN: os.getenv flows into exec (code injection via env)
+    """
+    compiler = os.getenv("TEMPLATE_COMPILER")
+    exec(compiler + "('" + template_path + "')")
+
+# ───── Hashing ─────
+
+def hash_token(token):
+    """VULN: MD5 is cryptographically weak, should use sha256+salt."""
+    return hashlib.md5(token.encode()).hexdigest()
+
+# ───── Safe utilities ─────
+
+def sanitize_filename(name):
+    """Strips path traversal characters from a filename."""
+    return os.path.basename(name).replace("..", "")
+
+def safe_hash(data):
+    """SAFE: uses SHA-256 with proper salt."""
+    salt = os.urandom(16)
+    return hashlib.sha256(salt + data.encode()).hexdigest()
diff --git a/tests/fixtures/go_server/db.go b/tests/fixtures/go_server/db.go
new file mode 100644
index 00000000..eca1fed4
--- /dev/null
+++ b/tests/fixtures/go_server/db.go
@@ -0,0 +1,75 @@
+package main
+
+import (
+	"database/sql"
+	"fmt"
+	"log"
+	"os"
+	"os/exec"
+)
+
+// ───── Database initialization ─────
+
+// InitDB opens a database connection using credentials from environment.
+// VULN: os.Getenv flows into db.Exec for schema setup
+func InitDB() (*sql.DB, error) {
+	dsn := os.Getenv("DATABASE_DSN")
+	db, err := sql.Open("postgres", dsn)
+	if err != nil {
+		return nil, err
+	}
+
+	// Run schema setup from env
+	schema := os.Getenv("SCHEMA_SQL")
+	_, err = db.Exec(schema)
+	if err != nil {
+		log.Printf("schema setup failed: %v", err)
+	}
+
+	return db, nil
+}
+
+// ───── Data export ─────
+
+// ExportTable dumps a table to CSV using pg_dump.
+// VULN: os.Getenv flows into exec.Command (command injection)
+func ExportTable(tableName string) error {
+	dbURL := os.Getenv("DATABASE_URL")
+	dumpCmd := fmt.Sprintf("pg_dump --table=%s --format=csv %s", tableName, dbURL)
+	out, err := exec.Command("sh", "-c", dumpCmd).Output()
+	if err != nil {
+		return fmt.Errorf("export failed: %w", err)
+	}
+	log.Printf("Exported %d bytes", len(out))
+	return nil
+}
+
+// ───── Audit logging ─────
+
+// LogAuditEvent writes an audit record using env-driven SQL.
+// VULN: os.Getenv flows into db.Exec
+func LogAuditEvent(db *sql.DB, event string) error {
+	tableName := os.Getenv("AUDIT_TABLE")
+	query := fmt.Sprintf("INSERT INTO %s (event, ts) VALUES ('%s', NOW())", tableName, event)
+	_, err := db.Exec(query)
+	return err
+}
+
+// ───── Health check ─────
+
+// CheckDependencies pings all external services.
+// VULN: os.Getenv flows into exec.Command
+func CheckDependencies() error {
+	endpoints := []string{
+		os.Getenv("REDIS_HOST"),
+		os.Getenv("KAFKA_HOST"),
+		os.Getenv("ELASTICSEARCH_HOST"),
+	}
+	for _, ep := range endpoints {
+		cmd := exec.Command("nc", "-z", ep, "6379")
+		if err := cmd.Run(); err != nil {
+			return fmt.Errorf("dependency %s unreachable: %w", ep, err)
+		}
+	}
+	return nil
+}
diff --git a/tests/fixtures/go_server/expectations.json b/tests/fixtures/go_server/expectations.json
new file mode 100644
index 00000000..f633b3e3
--- /dev/null
+++ b/tests/fixtures/go_server/expectations.json
@@ -0,0 +1,18 @@
+{
+  "required_findings": [
+    { "id_prefix": "taint-unsanitised-flow", "min_count": 4 },
+    { "id_prefix": "exec_command", "min_count": 3 },
+    { "id_prefix": "cfg-unguarded-sink", "min_count": 1 }
+  ],
+  "forbidden_findings": [],
+  "noise_budget": {
+    "max_total_findings": 25,
+    "max_high_findings": 10
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1000,
+    "max_ms_index_cold": 1500,
+    "max_ms_index_warm": 500,
+    "ci_mode": "lenient"
+  }
+}
diff --git a/tests/fixtures/go_server/server.go b/tests/fixtures/go_server/server.go
new file mode 100644
index 00000000..9fe7789c
--- /dev/null
+++ b/tests/fixtures/go_server/server.go
@@ -0,0 +1,107 @@
+package main
+
+import (
+	"database/sql"
+	"fmt"
+	"html"
+	"html/template"
+	"log"
+	"net/http"
+	"os"
+	"os/exec"
+)
+
+// ───── Handler: Execute system command from env ─────
+
+// GET /admin/run
+// Reads a maintenance command from the environment and executes it.
+// VULN: os.Getenv flows into exec.Command (command injection)
+func handleAdminRun(w http.ResponseWriter, r *http.Request) {
+	maintenanceCmd := os.Getenv("MAINTENANCE_CMD")
+	out, err := exec.Command("bash", "-c", maintenanceCmd).Output()
+	if err != nil {
+		http.Error(w, "command failed: "+err.Error(), 500)
+		return
+	}
+	fmt.Fprintf(w, "Output: %s", out)
+}
+
+// ───── Handler: Deploy from env config ─────
+
+// POST /admin/deploy
+// Constructs a deploy command from multiple env vars.
+// VULN: os.Getenv flows into exec.Command
+func handleDeploy(w http.ResponseWriter, r *http.Request) {
+	target := os.Getenv("DEPLOY_TARGET")
+	branch := os.Getenv("DEPLOY_BRANCH")
+	cmd := fmt.Sprintf("cd /opt/app && git checkout %s && ./deploy.sh %s", branch, target)
+	out, err := exec.Command("sh", "-c", cmd).CombinedOutput()
+	if err != nil {
+		log.Printf("deploy failed: %s\n%s", err, out)
+		http.Error(w, "deploy failed", 500)
+		return
+	}
+	fmt.Fprintf(w, "Deployed %s to %s", branch, target)
+}
+
+// ───── Handler: Database query from env ─────
+
+// GET /admin/db-check
+// Runs a diagnostic SQL query read from environment.
+// VULN: os.Getenv flows into db.Query (SQL injection)
+func handleDBCheck(db *sql.DB) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		diagnosticQuery := os.Getenv("DIAGNOSTIC_QUERY")
+		rows, err := db.Query(diagnosticQuery)
+		if err != nil {
+			http.Error(w, "query failed: "+err.Error(), 500)
+			return
+		}
+		defer rows.Close()
+		fmt.Fprintln(w, "Query executed successfully")
+	}
+}
+
+// ───── Handler: Database exec from env ─────
+
+// POST /admin/db-migrate
+// Runs a migration statement from environment config.
+// VULN: os.Getenv flows into db.Exec (SQL injection)
+func handleDBMigrate(db *sql.DB) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		migration := os.Getenv("MIGRATION_SQL")
+		_, err := db.Exec(migration)
+		if err != nil {
+			http.Error(w, "migration failed: "+err.Error(), 500)
+			return
+		}
+		fmt.Fprintln(w, "Migration complete")
+	}
+}
+
+// ───── Handler: Safe output (HTML escaped) ─────
+
+// GET /api/greet
+// SAFE: user input properly escaped with html.EscapeString
+func handleGreet(w http.ResponseWriter, r *http.Request) {
+	name := os.Getenv("DEFAULT_GREETING")
+	safeName := html.EscapeString(name)
+	fmt.Fprintf(w, "<h1>Hello, %s</h1>", safeName)
+}
+
+// ───── Handler: Safe URL encoding ─────
+
+// GET /api/safe-redirect
+// SAFE: URL properly escaped with url.QueryEscape before use
+func handleSafeRedirect(w http.ResponseWriter, r *http.Request) {
+	// This would use url.QueryEscape in real code
+	target := os.Getenv("REDIRECT_URL")
+	safeTarget := template.HTMLEscapeString(target)
+	http.Redirect(w, r, "/go?url="+safeTarget, http.StatusFound)
+}
+
+func main() {
+	http.HandleFunc("/admin/run", handleAdminRun)
+	http.HandleFunc("/admin/deploy", handleDeploy)
+	log.Fatal(http.ListenAndServe(":8080", nil))
+}
diff --git a/tests/fixtures/java_service/Service.java b/tests/fixtures/java_service/Service.java
new file mode 100644
index 00000000..efa66f7e
--- /dev/null
+++ b/tests/fixtures/java_service/Service.java
@@ -0,0 +1,127 @@
+import java.io.*;
+import java.sql.*;
+import java.util.Random;
+
+/**
+ * Simulates a Java backend service handling HTTP requests.
+ * Contains realistic vulnerability patterns found in enterprise Java code.
+ */
+public class Service {
+
+    private Connection dbConn;
+
+    public Service(Connection dbConn) {
+        this.dbConn = dbConn;
+    }
+
+    // ───── Command execution from environment ─────
+
+    /**
+     * POST /admin/maintenance
+     * Runs a maintenance command from environment config.
+     * VULN: System.getenv flows into Runtime.exec (command injection)
+     */
+    public String handleMaintenance() throws IOException {
+        String cmd = System.getenv("MAINTENANCE_CMD");
+        Process proc = Runtime.getRuntime().exec(cmd);
+        BufferedReader reader = new BufferedReader(
+            new InputStreamReader(proc.getInputStream())
+        );
+        StringBuilder output = new StringBuilder();
+        String line;
+        while ((line = reader.readLine()) != null) {
+            output.append(line).append("\n");
+        }
+        return output.toString();
+    }
+
+    /**
+     * POST /admin/deploy
+     * Constructs a deploy command from multiple env vars.
+     * VULN: System.getenv flows into Runtime.exec
+     */
+    public void handleDeploy() throws IOException {
+        String target = System.getenv("DEPLOY_HOST");
+        String artifact = System.getenv("ARTIFACT_PATH");
+        String command = "scp " + artifact + " " + target + ":/opt/app/";
+        Runtime.getRuntime().exec(command);
+    }
+
+    // ───── SQL injection via string concatenation ─────
+
+    /**
+     * GET /api/users/search
+     * Searches users with a query parameter concatenated into SQL.
+     * VULN: System.getenv flows into executeQuery (SQL injection)
+     */
+    public ResultSet searchUsers(String searchTerm) throws SQLException {
+        String table = System.getenv("USERS_TABLE");
+        String sql = "SELECT * FROM " + table + " WHERE name LIKE '%" + searchTerm + "%'";
+        Statement stmt = dbConn.createStatement();
+        return stmt.executeQuery(sql);
+    }
+
+    /**
+     * POST /api/audit/log
+     * Writes an audit log entry using concatenated SQL.
+     * VULN: String concatenation in executeUpdate (SQL injection)
+     */
+    public void logAuditEvent(String event, String userId) throws SQLException {
+        String sql = "INSERT INTO audit_log (event, user_id, ts) VALUES ('"
+            + event + "', '" + userId + "', NOW())";
+        Statement stmt = dbConn.createStatement();
+        stmt.executeUpdate(sql);
+    }
+
+    // ───── Deserialization ─────
+
+    /**
+     * POST /api/session/restore
+     * Deserializes a session object from a byte stream.
+     * VULN: ObjectInputStream.readObject on untrusted data
+     */
+    public Object restoreSession(InputStream sessionData) throws Exception {
+        ObjectInputStream ois = new ObjectInputStream(sessionData);
+        Object session = ois.readObject();
+        ois.close();
+        return session;
+    }
+
+    // ───── Reflection ─────
+
+    /**
+     * POST /api/plugins/load
+     * Dynamically loads a class by name from environment config.
+     * VULN: System.getenv flows into Class.forName (unsafe reflection)
+     */
+    public Object loadPlugin() throws Exception {
+        String className = System.getenv("PLUGIN_CLASS");
+        Class<?> pluginClass = Class.forName(className);
+        return pluginClass.getDeclaredConstructor().newInstance();
+    }
+
+    // ───── Weak randomness ─────
+
+    /**
+     * Generates a session token using java.util.Random.
+     * VULN: insecure random — should use SecureRandom for tokens
+     */
+    public String generateSessionToken() {
+        Random rng = new Random();
+        long tokenValue = rng.nextLong();
+        return Long.toHexString(tokenValue);
+    }
+
+    // ───── Safe patterns ─────
+
+    /**
+     * SAFE: uses PreparedStatement (parameterized query).
+     */
+    public ResultSet safeSearch(String term) throws SQLException {
+        PreparedStatement pstmt = dbConn.prepareStatement(
+            "SELECT * FROM users WHERE name LIKE ?"
+        );
+        pstmt.setString(1, "%" + term + "%");
+        return pstmt.executeQuery();
+    }
+}
diff --git a/tests/fixtures/java_service/expectations.json b/tests/fixtures/java_service/expectations.json
new file mode 100644
index 00000000..a4e245b1
--- /dev/null
+++ b/tests/fixtures/java_service/expectations.json
@@ -0,0 +1,19 @@
+{
+  "required_findings": [
+    { "id_prefix": "taint-unsanitised-flow", "min_count": 2 },
+    { "id_prefix": "runtime_exec", "min_count": 2 },
+    { "id_prefix": "class_for_name", "min_count": 1 },
+    { "id_prefix": "cfg-unguarded-sink", "min_count": 2 }
+  ],
+  "forbidden_findings": [],
+  "noise_budget": {
+    "max_total_findings": 15,
+    "max_high_findings": 8
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1000,
+    "max_ms_index_cold": 1500,
+    "max_ms_index_warm": 500,
+    "ci_mode": "lenient"
+  }
+}
diff --git a/tests/fixtures/mixed_project/config.rs b/tests/fixtures/mixed_project/config.rs
new file mode 100644
index 00000000..66aa2efa
--- /dev/null
+++ b/tests/fixtures/mixed_project/config.rs
@@ -0,0 +1,68 @@
+use std::env;
+use std::fs;
+use std::process::Command;
+
+/// Infrastructure provisioning tool — Rust core.
+/// Reads infrastructure config from environment and executes provisioning commands.
+
+struct InfraConfig {
+    provider: String,
+    region: String,
+    ssh_key_path: String,
+    cluster_name: String,
+}
+
+fn load_infra_config() -> InfraConfig {
+    InfraConfig {
+        provider: env::var("CLOUD_PROVIDER").unwrap(),
+        region: env::var("CLOUD_REGION").unwrap(),
+        ssh_key_path: env::var("SSH_KEY_PATH").expect("SSH_KEY_PATH required"),
+        cluster_name: env::var("CLUSTER_NAME").unwrap(),
+    }
+}
+
+/// Provisions a new cluster by shelling out to the provider CLI.
+/// VULN: env var flows into Command (command injection)
+fn provision_cluster() {
+    let cfg = load_infra_config();
+    let cmd = format!(
+        "{}-cli create-cluster --name {} --region {} --ssh-key {}",
+        cfg.provider, cfg.cluster_name, cfg.region, cfg.ssh_key_path
+    );
+    let output = Command::new("sh")
+        .arg("-c")
+        .arg(&cmd)
+        .output()
+        .expect("provisioning failed");
+
+    if !output.status.success() {
+        panic!("Cluster provisioning failed: {}", String::from_utf8_lossy(&output.stderr));
+    }
+}
+
+/// Reads a Terraform state file and applies changes.
+/// VULN: file contents flow into Command
+fn apply_terraform() {
+    let state = fs::read_to_string("/etc/terraform/main.tf").unwrap();
+    let workspace = state.lines()
+        .find(|l| l.starts_with("workspace"))
+        .unwrap_or("default");
+    Command::new("terraform")
+        .arg("apply")
+        .arg("-auto-approve")
+        .arg("-var")
+        .arg(format!("workspace={}", workspace))
+        .status()
+        .unwrap();
+}
+
+/// Destroys infrastructure — reads target from env.
+/// VULN: env var flows into Command
+fn destroy_cluster() {
+    let cluster = env::var("DESTROY_TARGET").unwrap();
+    Command::new("sh")
+        .arg("-c")
+        .arg(format!("kubectl delete cluster {}", cluster))
+        .status()
+        .expect("destroy failed");
+}
diff --git a/tests/fixtures/mixed_project/expectations.json b/tests/fixtures/mixed_project/expectations.json
new file mode 100644
index 00000000..05d0bf4a
--- /dev/null
+++ b/tests/fixtures/mixed_project/expectations.json
@@ -0,0 +1,21 @@
+{
+  "required_findings": [
+    { "id_prefix": "taint-unsanitised-flow", "min_count": 10 },
+    { "id_prefix": "eval_call", "min_count": 2 },
+    { "id_prefix": "unwrap_call", "min_count": 3 },
+    { "id_prefix": "expect_call", "min_count": 1 },
+    { "id_prefix": "panic_macro", "min_count": 1 },
+    { "id_prefix": "cfg-unguarded-sink", "min_count": 2 }
+  ],
+  "forbidden_findings": [],
+  "noise_budget": {
+    "max_total_findings": 40,
+    "max_high_findings": 20
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 2000,
+    "max_ms_index_cold": 3000,
+    "max_ms_index_warm": 1000,
+    "ci_mode": "lenient"
+  }
+}
diff --git a/tests/fixtures/mixed_project/handler.js b/tests/fixtures/mixed_project/handler.js
new file mode 100644
index 00000000..f16e1f27
--- /dev/null
+++ b/tests/fixtures/mixed_project/handler.js
@@ -0,0 +1,62 @@
+var child_process = require("child_process");
+var fs = require("fs");
+
+// Infrastructure provisioning tool — JavaScript CLI frontend.
+// Handles user commands and delegates to backend services.
+
+// ───── CLI command handler ─────
+
+// Executes a user-specified infrastructure command.
+// VULN: process.env flows into child_process.exec
+function executeInfraCommand() {
+    var provider = process.env.CLOUD_PROVIDER;
+    var action = process.env.INFRA_ACTION;
+    var cmd = provider + "-cli " + action;
+    child_process.exec(cmd, function(err, stdout, stderr) {
+        if (err) {
+            console.error("Infrastructure command failed:", stderr);
+            return;
+        }
+        console.log("Result:", stdout);
+    });
+}
+
+// ───── Template rendering ─────
+
+// Renders infrastructure status into the dashboard.
+// VULN: process.env flows into eval (code injection)
+function renderStatusWidget() {
+    var templateCode = process.env.STATUS_WIDGET_TEMPLATE;
+    var widget = eval(templateCode);
+    document.getElementById("status").innerHTML = widget;
+}
+
+// ───── Provisioning log viewer ─────
+
+// Reads provisioning logs and renders them.
+// VULN: process.env → child_process.execSync (command injection)
+function fetchProvisioningLogs() {
+    var logDir = process.env.PROVISIONING_LOG_DIR;
+    var output = child_process.execSync("cat " + logDir + "/latest.log");
+    document.getElementById("logs").innerHTML = output.toString();
+}
+
+// ───── SSH key management ─────
+
+// Generates an SSH key pair using a command from env.
+// VULN: process.env flows into child_process.spawn
+function generateSSHKey() {
+    var keygenPath = process.env.KEYGEN_BINARY;
+    var proc = child_process.spawn(keygenPath, ["-t", "ed25519", "-f", "/tmp/id_deploy"]);
+    proc.on("close", function(code) {
+        console.log("Key generation exited with code", code);
+    });
+}
+
+// ───── Safe utility ─────
+
+// SAFE: hardcoded command, no taint flow
+function checkKubectlVersion() {
+    var output = child_process.execSync("kubectl version --client --short");
+    console.log("kubectl:", output.toString());
+}
diff --git a/tests/fixtures/mixed_project/utils.py b/tests/fixtures/mixed_project/utils.py
new file mode 100644
index 00000000..57dbde90
--- /dev/null
+++ b/tests/fixtures/mixed_project/utils.py
@@ -0,0 +1,68 @@
+import os
+import subprocess
+import shlex
+
+# Infrastructure provisioning tool — Python automation scripts.
+# Handles configuration management and deployment automation.
+
+# ───── Configuration management ─────
+
+def sync_config():
+    """Syncs configuration from a remote source.
+    VULN: os.getenv flows into subprocess.run (command injection)
+    """
+    remote = os.getenv("CONFIG_REMOTE_URL")
+    local_dir = os.getenv("CONFIG_LOCAL_DIR")
+    subprocess.run(["rsync", "-avz", remote, local_dir])
+
+def apply_ansible_playbook():
+    """Runs an Ansible playbook from env-configured path.
+    VULN: os.getenv flows into subprocess.Popen (command injection)
+    """
+    playbook = os.getenv("ANSIBLE_PLAYBOOK")
+    inventory = os.getenv("ANSIBLE_INVENTORY")
+    proc = subprocess.Popen(
+        ["ansible-playbook", "-i", inventory, playbook],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    stdout, stderr = proc.communicate()
+    if proc.returncode != 0:
+        raise RuntimeError(f"Playbook failed: {stderr.decode()}")
+    return stdout.decode()
+
+# ───── Secret management ─────
+
+def rotate_secrets():
+    """Rotates secrets by calling a vault CLI.
+    VULN: os.getenv flows into os.system (command injection)
+    """
+    vault_addr = os.getenv("VAULT_ADDR")
+    vault_token = os.getenv("VAULT_TOKEN")
+    os.system(f"vault write -address={vault_addr} secret/app/key value=rotated")
+
+def inject_secrets():
+    """Injects secrets into the environment from vault.
+    VULN: os.getenv flows into eval (code injection via env)
+    """
+    secret_loader = os.getenv("SECRET_LOADER_EXPR")
+    secrets = eval(secret_loader)
+    return secrets
+
+# ───── Monitoring ─────
+
+def check_service_health():
+    """Checks health of all configured services.
+    VULN: os.getenv flows into subprocess.call
+    """
+    services = os.getenv("MONITORED_SERVICES", "").split(",")
+    for svc in services:
+        subprocess.call(["curl", "-sf", f"http://{svc}/health"])
+
+# ───── Safe patterns ─────
+
+def safe_exec():
+    """SAFE: shlex.quote properly sanitizes before shell use."""
+    user_path = os.getenv("USER_PATH")
+    safe_path = shlex.quote(user_path)
+    subprocess.run(f"ls -la {safe_path}", shell=True, capture_output=True)
diff --git a/tests/fixtures/rust_web_app/config.rs b/tests/fixtures/rust_web_app/config.rs
new file mode 100644
index 00000000..4d044726
--- /dev/null
+++ b/tests/fixtures/rust_web_app/config.rs
@@ -0,0 +1,70 @@
+use std::env;
+use std::fs;
+
+/// Application configuration loaded from environment variables and config files.
+/// Realistic pattern: env vars parsed at startup, propagated through the app.
+
+pub struct DatabaseConfig {
+    pub host: String,
+    pub port: u16,
+    pub user: String,
+    pub password: String,
+    pub name: String,
+}
+
+pub struct ServerConfig {
+    pub listen_addr: String,
+    pub tls_cert_path: String,
+    pub tls_key_path: String,
+    pub session_secret: String,
+}
+
+pub struct Config {
+    pub db: DatabaseConfig,
+    pub server: ServerConfig,
+}
+
+impl Config {
+    /// Load config from environment.
+    /// Multiple env::var calls, each introducing a source.
+    pub fn from_env() -> Config {
+        Config {
+            db: DatabaseConfig {
+                host: env::var("DB_HOST").unwrap_or_else(|_| "localhost".into()),
+                port: env::var("DB_PORT")
+                    .unwrap_or_else(|_| "5432".into())
+                    .parse()
+                    .expect("DB_PORT must be a number"),
+                user: env::var("DB_USER").unwrap(),
+                password: env::var("DB_PASSWORD").unwrap(),
+                name: env::var("DB_NAME").unwrap(),
+            },
+            server: ServerConfig {
+                listen_addr: env::var("LISTEN_ADDR").unwrap_or_else(|_| "0.0.0.0:8080".into()),
+                tls_cert_path: env::var("TLS_CERT").unwrap_or_default(),
+                tls_key_path: env::var("TLS_KEY").unwrap_or_default(),
+                session_secret: env::var("SESSION_SECRET")
+                    .expect("SESSION_SECRET is required for cookie signing"),
+            },
+        }
+    }
+
+    /// Alternative: load from a TOML file.
+    /// fs::read_to_string is a file source.
+    pub fn from_file(path: &str) -> Config {
+        let raw = fs::read_to_string(path).unwrap();
+        // In real code this would be toml::from_str(&raw) but we simulate
+        // the pattern: file contents flowing into the app.
+        let _parsed = raw.lines().count();
+        Config::from_env() // fallback to env for now
+    }
+}
+
+/// Build a connection string from config.
+/// The password from env flows into a string that could be logged or misused.
+pub fn connection_string(cfg: &Config) -> String {
+    format!(
+        "postgres://{}:{}@{}:{}/{}",
+        cfg.db.user, cfg.db.password, cfg.db.host, cfg.db.port, cfg.db.name
+    )
+}
diff --git a/tests/fixtures/rust_web_app/expectations.json b/tests/fixtures/rust_web_app/expectations.json
new file mode 100644
index 00000000..983c2d0a
--- /dev/null
+++ b/tests/fixtures/rust_web_app/expectations.json
@@ -0,0 +1,21 @@
+{
+  "required_findings": [
+    { "id_prefix": "taint-unsanitised-flow", "min_count": 5 },
+    { "id_prefix": "unwrap_call", "min_count": 10 },
+    { "id_prefix": "expect_call", "min_count": 5 },
+    { "id_prefix": "unsafe_block", "min_count": 1 },
+    { "id_prefix": "panic_macro", "min_count": 1 },
+    { "id_prefix": "cfg-auth-gap", "min_count": 3 }
+  ],
+  "forbidden_findings": [],
+  "noise_budget": {
+    "max_total_findings": 45,
+    "max_high_findings": 15
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1000,
+    "max_ms_index_cold": 1500,
+    "max_ms_index_warm": 500,
+    "ci_mode": "lenient"
+  }
+}
diff --git a/tests/fixtures/rust_web_app/handler.rs b/tests/fixtures/rust_web_app/handler.rs
new file mode 100644
index 00000000..731f8a2f
--- /dev/null
+++ b/tests/fixtures/rust_web_app/handler.rs
@@ -0,0 +1,164 @@
+use std::collections::HashMap;
+use std::env;
+use std::fs;
+use std::process::Command;
+
+// ───── Configuration from environment ─────
+
+struct AppConfig {
+    db_url: String,
+    upload_dir: String,
+    admin_token: String,
+    log_level: String,
+}
+
+fn load_config() -> AppConfig {
+    AppConfig {
+        db_url: env::var("DATABASE_URL").unwrap(),
+        upload_dir: env::var("UPLOAD_DIR").unwrap(),
+        admin_token: env::var("ADMIN_TOKEN").expect("ADMIN_TOKEN must be set"),
+        log_level: env::var("LOG_LEVEL").unwrap_or_else(|_| "info".to_string()),
+    }
+}
+
+// ───── Request handling ─────
+
+struct Request {
+    path: String,
+    headers: HashMap<String, String>,
+    body: String,
+}
+
+struct Response {
+    status: u16,
+    body: String,
+}
+
+/// POST /admin/run-migration
+/// Reads a migration script name from the environment and executes it.
+/// VULN: env var flows directly into Command without sanitization.
+fn handle_migration() -> Response {
+    let script = env::var("MIGRATION_SCRIPT").unwrap();
+    let output = Command::new("bash")
+        .arg("-c")
+        .arg(&script)
+        .output()
+        .expect("migration failed");
+
+    Response {
+        status: 200,
+        body: String::from_utf8_lossy(&output.stdout).to_string(),
+    }
+}
+
+/// POST /admin/deploy
+/// Reads deployment target from config file (which is a source),
+/// then shells out.
+/// VULN: file contents flow into Command.
+fn handle_deploy() -> Response {
+    let manifest = fs::read_to_string("/etc/deploy/manifest.toml").unwrap();
+    let target = manifest.lines().next().unwrap();
+    let status = Command::new("rsync")
+        .arg("-avz")
+        .arg("./build/")
+        .arg(target)
+        .status()
+        .unwrap();
+
+    Response {
+        status: if status.success() { 200 } else { 500 },
+        body: format!("deploy exited with {}", status),
+    }
+}
+
+/// GET /admin/export
+/// Constructs a shell command from an env-var driven path.
+/// VULN: env var flows into Command::arg.
+fn handle_export() -> Response {
+    let config = load_config();
+    let dump_cmd = format!("pg_dump {}", config.db_url);
+    let output = Command::new("sh")
+        .arg("-c")
+        .arg(&dump_cmd)
+        .output()
+        .unwrap();
+
+    let dump_path = format!("{}/export.sql", config.upload_dir);
+    fs::write(&dump_path, &output.stdout).unwrap();
+
+    Response {
+        status: 200,
+        body: format!("Exported to {}", dump_path),
+    }
+}
+
+/// POST /admin/backup
+/// SAFE: uses a hardcoded command, no taint from external input.
+fn handle_backup() -> Response {
+    let output = Command::new("tar")
+        .arg("-czf")
+        .arg("/backups/nightly.tar.gz")
+        .arg("/var/data")
+        .output()
+        .expect("backup failed");
+
+    Response {
+        status: if output.status.success() { 200 } else { 500 },
+        body: "backup complete".to_string(),
+    }
+}
+
+/// POST /admin/cleanup
+/// SAFE: shell_escape sanitizer applied before sink.
+fn handle_cleanup() -> Response {
+    let dir = env::var("CLEANUP_DIR").unwrap();
+    let safe_dir = sanitize_shell(&dir);
+    let output = Command::new("rm")
+        .arg("-rf")
+        .arg(&safe_dir)
+        .output()
+        .unwrap();
+
+    Response {
+        status: 200,
+        body: format!("cleaned up, exit={}", output.status),
+    }
+}
+
+fn sanitize_shell(input: &str) -> String {
+    input.replace(['&', ';', '|', '$', '`', '\\', '"', '\''], "")
+}
+
+// ───── Unsafe FFI bridge ─────
+
+/// Re-encodes a buffer from an external C library.
+/// VULN: unsafe block for FFI.
+unsafe fn decode_legacy_buffer(ptr: *const u8, len: usize) -> Vec<u8> {
+    std::slice::from_raw_parts(ptr, len).to_vec()
+}
+
+/// Transmutes raw byte data into a config header struct.
+/// VULN: transmute is inherently dangerous, mem::zeroed is UB-prone.
+fn parse_legacy_header(bytes: &[u8]) -> u64 {
+    if bytes.len() < 8 {
+        panic!("header too short");
+    }
+    unsafe { std::mem::transmute::<[u8; 8], u64>(bytes[..8].try_into().unwrap()) }
+}
+
+// ───── Utility functions with code smells ─────
+
+fn read_pid_file(path: &str) -> u32 {
+    let contents = fs::read_to_string(path).unwrap();
+    contents.trim().parse::<u32>().expect("invalid pid")
+}
+
+/// TODO: implement proper logging
+fn setup_logging() {
+    todo!()
+}
+
+fn debug_request(req: &Request) {
+    dbg!(&req.path);
+    dbg!(&req.body);
+}
diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs
new file mode 100644
index 00000000..791b40ba
--- /dev/null
+++ b/tests/integration_tests.rs
@@ -0,0 +1,178 @@
+mod common;
+
+use common::{assert_no_findings, scan_fixture_dir, validate_expectations};
+use nyx_scanner::utils::config::AnalysisMode;
+use std::collections::HashSet;
+use std::path::PathBuf;
+
+fn fixture_path(name: &str) -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("tests")
+        .join("fixtures")
+        .join(name)
+}
+
+// ── Per-fixture tests ──────────────────────────────────────────────────────
+
+#[test]
+fn rust_web_app() {
+    let dir = fixture_path("rust_web_app");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
+    validate_expectations(&diags, &dir);
+}
+
+#[test]
+fn express_app() {
+    let dir = fixture_path("express_app");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
+    validate_expectations(&diags, &dir);
+}
+
+#[test]
+fn flask_app() {
+    let dir = fixture_path("flask_app");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
+    validate_expectations(&diags, &dir);
+}
+
+#[test]
+fn go_server() {
+    let dir = fixture_path("go_server");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
+    validate_expectations(&diags, &dir);
+}
+
+#[test]
+fn c_utils() {
+    let dir = fixture_path("c_utils");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
+    validate_expectations(&diags, &dir);
+}
+
+#[test]
+fn java_service() {
+    let dir = fixture_path("java_service");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
+    validate_expectations(&diags, &dir);
+}
+
+#[test]
+fn mixed_project() {
+    let dir = fixture_path("mixed_project");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
+    validate_expectations(&diags, &dir);
+}
+
+// ── Cross-cutting tests ───────────────────────────────────────────────────
+
+#[test]
+fn ast_only_mode_excludes_taint() {
+    let dir = fixture_path("rust_web_app");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Ast);
+
+    assert_no_findings(&diags, "taint-");
+    assert_no_findings(&diags, "cfg-");
+}
+
+#[test]
+fn taint_only_mode_excludes_ast() {
+    let dir = fixture_path("rust_web_app");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Taint);
+
+    // Taint mode should not produce AST-only pattern findings
+    assert_no_findings(&diags, "unwrap_call");
+    assert_no_findings(&diags, "expect_call");
+}
+
+#[test]
+fn dedup_no_double_report() {
+    let dir = fixture_path("rust_web_app");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
+
+    // The same (path, line, col, rule_id) tuple should never appear twice.
+    // Different rule IDs at the same location are fine (e.g., taint + cfg-auth-gap).
+    let mut seen: HashSet<(String, usize, usize, String)> = HashSet::new();
+    let mut exact_dupes = Vec::new();
+    for d in &diags {
+        let key = (d.path.clone(), d.line, d.col, d.id.clone());
+        if !seen.insert(key) {
+            exact_dupes.push(format!("{}:{}:{} {}", d.path, d.line, d.col, d.id));
+        }
+    }
+    assert!(
+        exact_dupes.is_empty(),
+        "Exact duplicate findings (same location + rule ID) found ({}):\n  {}",
+        exact_dupes.len(),
+        exact_dupes.join("\n  ")
+    );
+}
+
+#[test]
+fn mixed_project_multi_language() {
+    let dir = fixture_path("mixed_project");
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
+
+    // Findings should span at least 2 different file extensions
+    let extensions: HashSet<&str> = diags
+        .iter()
+        .filter_map(|d| {
+            std::path::Path::new(&d.path)
+                .extension()
+                .and_then(|e| e.to_str())
+        })
+        .collect();
+
+    assert!(
+        extensions.len() >= 2,
+        "Expected findings from >= 2 language file extensions, got: {:?}",
+        extensions
+    );
+
+    // Total findings >= 3 across languages
+    assert!(
+        diags.len() >= 3,
+        "Expected >= 3 total findings in mixed project, got {}",
+        diags.len()
+    );
+}
+
+// ── Binary smoke test ──────────────────────────────────────────────────────
+
+#[test]
+fn binary_json_output() {
+    let fixture = fixture_path("rust_web_app");
+    #[allow(deprecated)]
+    let cmd = assert_cmd::Command::cargo_bin("nyx")
+        .expect("nyx binary should exist")
+        .arg("scan")
+        .arg(fixture.to_str().unwrap())
+        .arg("--no-index")
+        .arg("--format")
+        .arg("json")
+        .output()
+        .expect("failed to execute nyx binary");
+
+    assert!(
+        cmd.status.success(),
+        "nyx scan exited with non-zero status: {:?}\nstderr: {}",
+        cmd.status,
+        String::from_utf8_lossy(&cmd.stderr)
+    );
+
+    let stdout = String::from_utf8_lossy(&cmd.stdout);
+    // Find the JSON array line in stdout (config notes and "Finished" surround it)
+    let json_start = stdout.find('[').expect("Expected JSON array in stdout");
+    let json_end = stdout[json_start..]
+        .find(']')
+        .expect("Expected closing bracket in JSON")
+        + json_start
+        + 1;
+    let json_str = &stdout[json_start..json_end];
+    let parsed: Vec<serde_json::Value> =
+        serde_json::from_str(json_str).expect("stdout should contain valid JSON array");
+
+    assert!(
+        !parsed.is_empty(),
+        "Expected at least 1 finding in JSON output"
+    );
+}
diff --git a/tests/perf_tests.rs b/tests/perf_tests.rs
new file mode 100644
index 00000000..99ab8c95
--- /dev/null
+++ b/tests/perf_tests.rs
@@ -0,0 +1,148 @@
+#[allow(dead_code)]
+mod common;
+
+use common::{load_expectations, test_config};
+use nyx_scanner::utils::config::AnalysisMode;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+use std::time::Instant;
+
+fn fixture_path(name: &str) -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("tests")
+        .join("fixtures")
+        .join(name)
+}
+
+fn is_ci_bench() -> bool {
+    std::env::var("NYX_CI_BENCH").as_deref() == Ok("1")
+        || std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true")
+}
+
+/// Run `scan_no_index` N times and return the median duration in ms.
+fn bench_no_index(fixture_dir: &Path, iterations: usize) -> u64 {
+    let cfg = test_config(AnalysisMode::Full);
+    let mut durations: Vec<u64> = Vec::with_capacity(iterations);
+
+    for _ in 0..iterations {
+        let start = Instant::now();
+        let _ = nyx_scanner::scan_no_index(fixture_dir, &cfg);
+        durations.push(start.elapsed().as_millis() as u64);
+    }
+
+    durations.sort();
+    durations[iterations / 2]
+}
+
+/// Run indexed scan (cold = new tempdir with fresh index, warm = second run).
+fn bench_indexed(fixture_dir: &Path, iterations: usize) -> (u64, u64) {
+    use nyx_scanner::commands::index::build_index;
+    use nyx_scanner::commands::scan::scan_with_index_parallel;
+    use nyx_scanner::database::index::Indexer;
+
+    let cfg = test_config(AnalysisMode::Full);
+    let mut cold_durations: Vec<u64> = Vec::with_capacity(iterations);
+    let mut warm_durations: Vec<u64> = Vec::with_capacity(iterations);
+
+    for _ in 0..iterations {
+        let td = tempfile::tempdir().expect("tempdir");
+        let db_path = td.path().join("bench.db");
+
+        // Cold: build index + scan
+        let start = Instant::now();
+        build_index("bench", fixture_dir, &db_path, &cfg).expect("build_index");
+        let pool = Indexer::init(&db_path).expect("db init");
+        let _ = scan_with_index_parallel("bench", Arc::clone(&pool), &cfg);
+        cold_durations.push(start.elapsed().as_millis() as u64);
+
+        // Warm: second scan on same index — files unchanged
+        let start = Instant::now();
+        let _ = scan_with_index_parallel("bench", Arc::clone(&pool), &cfg);
+        warm_durations.push(start.elapsed().as_millis() as u64);
+    }
+
+    cold_durations.sort();
+    warm_durations.sort();
+    (
+        cold_durations[iterations / 2],
+        warm_durations[iterations / 2],
+    )
+}
+
+fn run_fixture_bench(name: &str) {
+    let dir = fixture_path(name);
+    let exp = load_expectations(&dir);
+    let perf = &exp.performance_expectations;
+    let iterations = 5;
+
+    let no_index_ms = bench_no_index(&dir, iterations);
+    println!(
+        "[{name}] no-index: {no_index_ms}ms (threshold: {}ms)",
+        perf.max_ms_no_index
+    );
+
+    let (cold_ms, warm_ms) = bench_indexed(&dir, iterations);
+    println!(
+        "[{name}] index-cold: {cold_ms}ms (threshold: {}ms)",
+        perf.max_ms_index_cold
+    );
+    println!(
+        "[{name}] index-warm: {warm_ms}ms (threshold: {}ms)",
+        perf.max_ms_index_warm
+    );
+
+    if is_ci_bench() {
+        let multiplier = if perf.ci_mode == "lenient" { 1.5 } else { 1.0 };
+        let max_no_index = (perf.max_ms_no_index as f64 * multiplier) as u64;
+        let max_cold = (perf.max_ms_index_cold as f64 * multiplier) as u64;
+        let max_warm = (perf.max_ms_index_warm as f64 * multiplier) as u64;
+
+        assert!(
+            no_index_ms <= max_no_index,
+            "[{name}] no-index exceeded threshold: {no_index_ms}ms > {max_no_index}ms"
+        );
+        assert!(
+            cold_ms <= max_cold,
+            "[{name}] index-cold exceeded threshold: {cold_ms}ms > {max_cold}ms"
+        );
+        assert!(
+            warm_ms <= max_warm,
+            "[{name}] index-warm exceeded threshold: {warm_ms}ms > {max_warm}ms"
+        );
+    }
+}
+
+#[test]
+fn perf_rust_web_app() {
+    run_fixture_bench("rust_web_app");
+}
+
+#[test]
+fn perf_express_app() {
+    run_fixture_bench("express_app");
+}
+
+#[test]
+fn perf_flask_app() {
+    run_fixture_bench("flask_app");
+}
+
+#[test]
+fn perf_go_server() {
+    run_fixture_bench("go_server");
+}
+
+#[test]
+fn perf_c_utils() {
+    run_fixture_bench("c_utils");
+}
+
+#[test]
+fn perf_java_service() {
+    run_fixture_bench("java_service");
+}
+
+#[test]
+fn perf_mixed_project() {
+    run_fixture_bench("mixed_project");
+}