//! SQLite connection pool and schema for the incremental index. //! //! The index stores file content hashes, per-file scan results, and function //! summaries so subsequent scans can skip files whose content has not changed. //! The pool is backed by [`r2d2`] with WAL journaling, `synchronous=NORMAL`, //! and memory-mapped I/O tuned for large codebases. //! //! Tables: `files`, `issues`, `function_summaries`, `ssa_function_summaries`. //! SSA-specific persistence lives in [`crate::summary::ssa_summary`]; routines //! here cover function summaries and file-level hash bookkeeping. pub mod index { #![allow(clippy::too_many_arguments, clippy::type_complexity)] use crate::commands::scan::Diag; use crate::errors::{NyxError, NyxResult}; use crate::patterns::Severity; use r2d2::{Pool, PooledConnection}; use r2d2_sqlite::SqliteConnectionManager; use rusqlite::{Connection, OpenFlags, OptionalExtension, params}; use std::fs; use std::io::Read; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; use std::time::{Duration, SystemTime, UNIX_EPOCH}; /// How long each SQLite connection waits for the single writer slot. /// /// Indexed scans can have dozens of Rayon workers finishing analysis at /// once. SQLite still permits only one writer, so a timeout here turns that /// burst into short backpressure instead of surfacing SQLITE_BUSY. const SQLITE_BUSY_TIMEOUT: Duration = Duration::from_secs(60); /// DB schema (foreign‑keys enabled). const SCHEMA: &str = r#" PRAGMA foreign_keys = ON; CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, path TEXT NOT NULL, hash BLOB NOT NULL, mtime INTEGER NOT NULL, scanned_at INTEGER NOT NULL, UNIQUE(project, path) ); CREATE TABLE IF NOT EXISTS issues (file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE, rule_id TEXT NOT NULL, severity TEXT NOT NULL, line INTEGER NOT NULL, col INTEGER NOT NULL, PRIMARY KEY (file_id, rule_id, line, col)); CREATE TABLE IF NOT EXISTS function_summaries ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, file_path TEXT NOT NULL, file_hash BLOB NOT NULL, name TEXT NOT NULL, arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL, container TEXT NOT NULL DEFAULT '', disambig INTEGER, kind TEXT NOT NULL DEFAULT 'fn', summary TEXT NOT NULL, entry_kind TEXT, updated_at INTEGER NOT NULL, UNIQUE(project, file_path, name, container, arity, disambig, kind) ); CREATE TABLE IF NOT EXISTS ssa_function_summaries ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, file_path TEXT NOT NULL, file_hash BLOB NOT NULL, name TEXT NOT NULL, arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL, namespace TEXT NOT NULL DEFAULT '', container TEXT NOT NULL DEFAULT '', disambig INTEGER, kind TEXT NOT NULL DEFAULT 'fn', summary TEXT NOT NULL, entry_kind TEXT, updated_at INTEGER NOT NULL, UNIQUE(project, file_path, name, container, arity, disambig, kind) ); CREATE TABLE IF NOT EXISTS auth_check_summaries ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, file_path TEXT NOT NULL, file_hash BLOB NOT NULL, name TEXT NOT NULL, arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL, namespace TEXT NOT NULL DEFAULT '', container TEXT NOT NULL DEFAULT '', disambig INTEGER, kind TEXT NOT NULL DEFAULT 'fn', summary TEXT NOT NULL, updated_at INTEGER NOT NULL, UNIQUE(project, file_path, name, container, arity, disambig, kind) ); CREATE TABLE IF NOT EXISTS ssa_function_bodies ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, file_path TEXT NOT NULL, file_hash BLOB NOT NULL, name TEXT NOT NULL, arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL, namespace TEXT NOT NULL DEFAULT '', container TEXT NOT NULL DEFAULT '', disambig INTEGER, kind TEXT NOT NULL DEFAULT 'fn', body BLOB NOT NULL, updated_at INTEGER NOT NULL, UNIQUE(project, file_path, name, container, arity, disambig, kind) ); CREATE TABLE IF NOT EXISTS cross_package_imports ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, file_path TEXT NOT NULL, file_hash BLOB NOT NULL, namespace TEXT NOT NULL, imports BLOB NOT NULL, updated_at INTEGER NOT NULL, UNIQUE(project, file_path) ); CREATE TABLE IF NOT EXISTS scans ( id TEXT PRIMARY KEY, status TEXT NOT NULL, scan_root TEXT NOT NULL, started_at TEXT, finished_at TEXT, duration_secs REAL, engine_version TEXT, languages TEXT, files_scanned INTEGER, files_skipped INTEGER, finding_count INTEGER, findings_json TEXT, timing_json TEXT, error TEXT ); CREATE TABLE IF NOT EXISTS scan_metrics ( scan_id TEXT PRIMARY KEY REFERENCES scans(id) ON DELETE CASCADE, cfg_nodes INTEGER, call_edges INTEGER, functions_analyzed INTEGER, summaries_reused INTEGER, unresolved_calls INTEGER ); CREATE TABLE IF NOT EXISTS scan_logs ( id INTEGER PRIMARY KEY AUTOINCREMENT, scan_id TEXT NOT NULL REFERENCES scans(id) ON DELETE CASCADE, timestamp TEXT NOT NULL, level TEXT NOT NULL, message TEXT NOT NULL, file_path TEXT, detail TEXT ); CREATE INDEX IF NOT EXISTS idx_scan_logs_scan ON scan_logs(scan_id); CREATE TABLE IF NOT EXISTS triage_states ( fingerprint TEXT PRIMARY KEY, state TEXT NOT NULL DEFAULT 'open', note TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS triage_audit_log ( id INTEGER PRIMARY KEY AUTOINCREMENT, fingerprint TEXT NOT NULL, action TEXT NOT NULL, previous_state TEXT NOT NULL, new_state TEXT NOT NULL, note TEXT NOT NULL DEFAULT '', timestamp TEXT NOT NULL ); CREATE INDEX IF NOT EXISTS idx_triage_audit_fp ON triage_audit_log(fingerprint); CREATE INDEX IF NOT EXISTS idx_triage_audit_ts ON triage_audit_log(timestamp); CREATE TABLE IF NOT EXISTS nyx_metadata ( key TEXT PRIMARY KEY, value TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS triage_suppression_rules ( id INTEGER PRIMARY KEY AUTOINCREMENT, suppress_by TEXT NOT NULL, match_value TEXT NOT NULL, state TEXT NOT NULL DEFAULT 'suppressed', note TEXT NOT NULL DEFAULT '', created_at TEXT NOT NULL, UNIQUE(suppress_by, match_value) ); -- First time we observed each finding fingerprint. Lazy-populated by the -- overview endpoint when computing backlog age — INSERT OR IGNORE means -- only the earliest scan that mentioned a fingerprint sticks. CREATE TABLE IF NOT EXISTS finding_first_seen ( fingerprint TEXT PRIMARY KEY, first_seen_at TEXT NOT NULL ); -- Dynamic verdict cache (§12 Q5). -- Keyed on (spec_hash, entry_content_hash, transitive_import_digest). -- Invalidation: any of entry content, import digest, toolchain_id, -- corpus_version, or spec_format_version change → DELETE row → re-run. CREATE TABLE IF NOT EXISTS dynamic_verdict_cache ( id INTEGER PRIMARY KEY AUTOINCREMENT, spec_hash TEXT NOT NULL, entry_content_hash TEXT NOT NULL, transitive_import_digest TEXT NOT NULL, toolchain_id TEXT NOT NULL, corpus_version INTEGER NOT NULL, spec_format_version INTEGER NOT NULL, verdict_json TEXT NOT NULL, created_at TEXT NOT NULL, UNIQUE(spec_hash, entry_content_hash, transitive_import_digest, toolchain_id, corpus_version, spec_format_version) ); CREATE INDEX IF NOT EXISTS idx_dynamic_verdict_cache_spec_hash ON dynamic_verdict_cache(spec_hash); -- Phase 21: persisted attack-surface map. One row per project. -- Stored as canonical JSON so the round-trip is byte-identical -- across rescans (see `SurfaceMap::to_json`). CREATE TABLE IF NOT EXISTS surface_map ( project TEXT PRIMARY KEY, map_json BLOB NOT NULL, updated_at INTEGER NOT NULL ); -- Indexes on (project, file_path) for the per-file replace_* paths. -- Without these, every DELETE WHERE project=? AND file_path=? does a -- full table scan, which dominates indexing time as the cache grows. CREATE INDEX IF NOT EXISTS idx_function_summaries_project_file ON function_summaries(project, file_path); CREATE INDEX IF NOT EXISTS idx_ssa_function_summaries_project_file ON ssa_function_summaries(project, file_path); CREATE INDEX IF NOT EXISTS idx_ssa_function_bodies_project_file ON ssa_function_bodies(project, file_path); CREATE INDEX IF NOT EXISTS idx_auth_check_summaries_project_file ON auth_check_summaries(project, file_path); CREATE INDEX IF NOT EXISTS idx_cross_package_imports_project_file ON cross_package_imports(project, file_path); "#; /// Engine version used to detect stale caches across upgrades. pub const ENGINE_VERSION: &str = env!("CARGO_PKG_VERSION"); /// On-disk schema version for cached analysis data. /// /// Bumped independently of `ENGINE_VERSION` whenever the serialized /// layout or identity of a cached artefact changes in an incompatible /// way, e.g. a `FuncKey` field semantic change that would cause old /// summaries to misbehave when rehydrated. /// /// History: /// * `"1"`, initial. /// * `"2"`, 0.5.0: `FuncKey.disambig` changed from the function-node /// byte offset to a depth-first structural index. Pre-0.5.0 caches /// store byte-offset disambigs and would fail to match bodies built /// by the new engine, so they are silently rebuilt on open. /// * `"3"`, `ssa_function_bodies.body` changed from JSON TEXT to /// bincode BLOB. Old JSON payloads cannot be deserialised by the /// new engine, so they are silently rebuilt on open. /// * `"4"`, `Cap` widened from u16 to u32 to accommodate cap bits /// ≥ 14 (LDAP_INJECTION, XPATH_INJECTION, HEADER_INJECTION, /// OPEN_REDIRECT, SSTI, XXE, PROTOTYPE_POLLUTION). The `Cap` /// deserialiser accepts both u16- and u32-width JSON values, so /// pre-bump caches load without crashing, but the cached /// `source_caps` / `sanitizer_caps` / `sink_caps` blobs were /// produced before any of these caps could appear and would /// underreport rules that emit them. Bumping forces a rescan so /// newly-emitted gates and sinks land in the cache with the wider /// footprint. pub const SCHEMA_VERSION: &str = "4"; /// A single issue row, ready for insertion. #[derive(Debug, Clone)] pub struct IssueRow<'a> { pub rule_id: &'a str, pub severity: &'a str, pub line: i64, pub col: i64, } type IndexWriteJob = Box NyxResult<()> + Send + 'static>; #[derive(Default)] struct IndexWriteReport { error_count: usize, samples: Vec, } impl IndexWriteReport { fn record(&mut self, err: impl ToString) { self.error_count += 1; if self.samples.len() < 8 { self.samples.push(err.to_string()); } } } /// Bounded handle for submitting persisted-index writes. /// /// The scanner can keep parsing in parallel while this sender applies /// backpressure when SQLite's single writer falls behind. #[derive(Clone)] pub(crate) struct IndexWriteSender { tx: crossbeam_channel::Sender, } impl IndexWriteSender { pub(crate) fn enqueue(&self, job: F) -> NyxResult<()> where F: FnOnce(&mut Indexer) -> NyxResult<()> + Send + 'static, { self.tx .send(Box::new(job)) .map_err(|_| NyxError::Msg("database writer stopped before accepting write".into())) } } /// Single-writer queue for project index mutations. /// /// SQLite permits many readers but only one writer. Parallel scans should /// therefore submit analyzed file results here instead of letting every /// Rayon worker compete for the writer lock. pub(crate) struct IndexWriteQueue { tx: IndexWriteSender, handle: std::thread::JoinHandle, } impl IndexWriteQueue { pub(crate) fn start( project: impl Into, pool: Arc>, ) -> Self { let capacity = std::env::var("NYX_INDEX_WRITE_QUEUE_MAX") .ok() .and_then(|v| v.parse::().ok()) .filter(|n| *n >= 1) .unwrap_or_else(|| (num_cpus::get() * 2).max(64)); Self::start_with_capacity(project, pool, capacity) } pub(crate) fn start_with_capacity( project: impl Into, pool: Arc>, capacity: usize, ) -> Self { let project = project.into(); let (tx, rx) = crossbeam_channel::bounded::(capacity.max(1)); let handle = std::thread::spawn(move || { let mut report = IndexWriteReport::default(); let mut idx = match Indexer::from_pool(&project, &pool) { Ok(idx) => idx, Err(err) => { report.record(format!("writer init: {err}")); return report; } }; for job in rx { if let Err(err) = job(&mut idx) { report.record(err); } } report }); Self { tx: IndexWriteSender { tx }, handle, } } pub(crate) fn sender(&self) -> IndexWriteSender { self.tx.clone() } pub(crate) fn finish(self, stage: &str) -> NyxResult<()> { let Self { tx, handle } = self; drop(tx); let report = handle .join() .map_err(|_| NyxError::Msg(format!("{stage} database writer panicked")))?; if report.error_count == 0 { return Ok(()); } let mut details = report.samples; if report.error_count > details.len() { details.push(format!( "... and {} more", report.error_count - details.len() )); } Err(NyxError::Msg(format!( "{stage} failed to persist scan state: {}", details.join("; ") ))) } } /// A scan record for DB persistence. #[derive(Debug, Clone)] pub struct ScanRecord { pub id: String, pub status: String, pub scan_root: String, pub started_at: Option, pub finished_at: Option, pub duration_secs: Option, pub engine_version: Option, pub languages: Option, pub files_scanned: Option, pub files_skipped: Option, pub finding_count: Option, pub findings_json: Option, pub timing_json: Option, pub error: Option, } /// A triage audit log entry. #[derive(Debug, Clone, serde::Serialize)] pub struct AuditEntry { pub id: i64, pub fingerprint: String, pub action: String, pub previous_state: String, pub new_state: String, pub note: String, pub timestamp: String, } /// A pattern-based suppression rule. #[derive(Debug, Clone, serde::Serialize)] pub struct SuppressionRule { pub id: i64, pub suppress_by: String, pub match_value: String, pub state: String, pub note: String, pub created_at: String, } pub struct Indexer { conn: PooledConnection, project: String, } /// SQLite database files start with this 16-byte ASCII magic. const SQLITE_MAGIC: &[u8; 16] = b"SQLite format 3\0"; /// Reject obviously non-SQLite files before handing them to the /// connection pool, where the same rejection costs minutes instead of /// microseconds on some corruption shapes. /// /// Returns `Ok(())` when: /// * the file does not exist (the pool will `CREATE` it), /// * the file is zero-length (SQLite treats this as a fresh DB), /// * the first 16 bytes match the SQLite magic header, /// * the file is shorter than the magic but non-empty (extremely /// unusual; we defer to SQLite rather than gating arbitrarily). /// /// Returns `Err(NyxError::Sql(...))` carrying `SQLITE_NOTADB` when the /// header is present but does not match. fn preflight_header(database_path: &Path) -> NyxResult<()> { let Ok(meta) = fs::metadata(database_path) else { return Ok(()); }; if !meta.is_file() { return Ok(()); } if meta.len() < SQLITE_MAGIC.len() as u64 { return Ok(()); } let mut head = [0u8; 16]; let mut f = fs::File::open(database_path)?; f.read_exact(&mut head)?; if &head != SQLITE_MAGIC { return Err(NyxError::Sql(rusqlite::Error::SqliteFailure( rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_NOTADB), Some(format!( "file at {} is not a SQLite database (header magic mismatch)", database_path.display(), )), ))); } Ok(()) } impl Indexer { pub fn init(database_path: &Path) -> NyxResult>> { let _span = tracing::info_span!("db_init", path = %database_path.display()).entered(); // Fast-fail when the existing file is clearly not a SQLite // database. Without this guard, certain corruption shapes // (truncated header, header overwritten with arbitrary bytes, // mid-page damage that preserves magic) can keep SQLite busy // for 150-200 seconds inside the PRAGMA / schema execution // below before it surfaces SQLITE_NOTADB or SQLITE_CORRUPT. // A zero-length file is treated as a fresh DB by SQLite, so we // only validate when the file is large enough to hold the // 16-byte magic header. preflight_header(database_path)?; // NO_MUTEX is safe because r2d2 ensures each pooled connection // is only ever used by one thread at a time. Combined with WAL // mode this allows concurrent readers + a single writer without // the global serialization that FULL_MUTEX causes. let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_NO_MUTEX; { let conn = Self::open_configured_connection(database_path, flags)?; conn.pragma_update(None, "journal_mode", "WAL")?; conn.execute_batch(SCHEMA)?; // Migrate: if the function_summaries table is missing any required // column (arity for older schemas; container/disambig/kind for the // richer FuncKey identity), drop and recreate it so the data layout // matches the current model. let fn_cols: std::collections::HashSet = conn .prepare("PRAGMA table_info(function_summaries)") .and_then(|mut s| { let cols: Vec = s .query_map([], |r| r.get::<_, String>(1))? .filter_map(Result::ok) .collect(); Ok(cols.into_iter().collect()) }) .unwrap_or_default(); let fn_ok = fn_cols.contains("arity") && fn_cols.contains("container") && fn_cols.contains("disambig") && fn_cols.contains("kind"); if !fn_ok { tracing::info!( "migrating function_summaries: recreating table with identity columns" ); conn.execute_batch("DROP TABLE IF EXISTS function_summaries;")?; conn.execute_batch(SCHEMA)?; } // Migrate: verify SSA tables carry namespace + container/disambig/kind. let ssa_cols: std::collections::HashSet = conn .prepare("PRAGMA table_info(ssa_function_summaries)") .and_then(|mut s| { let cols: Vec = s .query_map([], |r| r.get::<_, String>(1))? .filter_map(Result::ok) .collect(); Ok(cols.into_iter().collect()) }) .unwrap_or_default(); let ssa_ok = ssa_cols.contains("namespace") && ssa_cols.contains("container") && ssa_cols.contains("disambig") && ssa_cols.contains("kind"); if !ssa_ok { tracing::info!("migrating ssa_function_summaries: recreating tables"); conn.execute_batch("DROP TABLE IF EXISTS ssa_function_summaries;")?; conn.execute_batch("DROP TABLE IF EXISTS ssa_function_bodies;")?; conn.execute_batch(SCHEMA)?; } // ssa_function_bodies may have been created with the old column set // even when ssa_function_summaries is current (e.g. partial // migrations). Check and recreate independently. let body_cols: std::collections::HashSet = conn .prepare("PRAGMA table_info(ssa_function_bodies)") .and_then(|mut s| { let cols: Vec = s .query_map([], |r| r.get::<_, String>(1))? .filter_map(Result::ok) .collect(); Ok(cols.into_iter().collect()) }) .unwrap_or_default(); let body_ok = body_cols.contains("namespace") && body_cols.contains("container") && body_cols.contains("disambig") && body_cols.contains("kind"); if !body_ok { tracing::info!("migrating ssa_function_bodies: recreating table"); conn.execute_batch("DROP TABLE IF EXISTS ssa_function_bodies;")?; conn.execute_batch(SCHEMA)?; } // Phase 10 — `entry_kind` column on (ssa_)function_summaries. // Non-destructive `ALTER TABLE ... ADD COLUMN` so existing // rows survive the upgrade. The column is nullable; the // INSERT paths write the JSON-encoded `EntryKind` text or // NULL when the function is not an entry point. Self::ensure_column(&conn, "function_summaries", "entry_kind", "TEXT")?; Self::ensure_column(&conn, "ssa_function_summaries", "entry_kind", "TEXT")?; // Ensure the auth_check_summaries table exists for DBs // created before this column set was introduced. The // `CREATE TABLE IF NOT EXISTS` in SCHEMA handles new DBs; // this branch only fires when the table is missing // entirely from a pre-existing DB. let auth_exists: bool = conn .query_row( "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'auth_check_summaries'", [], |_| Ok(true), ) .optional()? .unwrap_or(false); if !auth_exists { tracing::info!("creating auth_check_summaries table"); conn.execute_batch(SCHEMA)?; } // Phase 09 indexed-mode parity: ensure the // `cross_package_imports` table exists for DBs created // before this column set was introduced. `CREATE TABLE // IF NOT EXISTS` in SCHEMA handles new DBs; this branch // only fires when the table is missing entirely from a // pre-existing DB. let cpi_exists: bool = conn .query_row( "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'cross_package_imports'", [], |_| Ok(true), ) .optional()? .unwrap_or(false); if !cpi_exists { tracing::info!("creating cross_package_imports table"); conn.execute_batch(SCHEMA)?; } // Phase 21: ensure the `surface_map` table exists on // DBs created before this column set was introduced. let surface_exists: bool = conn .query_row( "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'surface_map'", [], |_| Ok(true), ) .optional()? .unwrap_or(false); if !surface_exists { tracing::info!("creating surface_map table"); conn.execute_batch(SCHEMA)?; } // Schema version check: invalidate cached summary tables // when the on-disk artefact layout has changed in an // incompatible way, independently of the engine version. // Runs before `check_engine_version` so the engine-version // branch below does not race with a stale schema. Self::check_schema_version(&conn)?; // Engine version check: invalidate all caches when the scanner // version changes so stale serialized data cannot be loaded. Self::check_engine_version(&conn)?; } let manager = SqliteConnectionManager::file(database_path) .with_flags(flags) .with_init(Self::configure_connection); // r2d2's default `max_size` is 10, which can stall rayon // workers on machines with more cores than that during the // parallel indexing pass. Size the pool to comfortably hold // a connection per rayon thread plus a small slack. // // `NYX_INDEX_POOL_MAX` overrides the auto-sized default. Use it in // fd-constrained environments (test sandboxes, containers with low // ulimit) where many parallel indexed scans would otherwise exhaust // EMFILE: each pooled SQLite WAL connection costs ~3 fds (db + -wal // + -shm), so 30 parallel scans × 16 conns × 3 fds = 1440 fds. let max_conns = std::env::var("NYX_INDEX_POOL_MAX") .ok() .and_then(|v| v.parse::().ok()) .filter(|n| *n >= 1) .unwrap_or_else(|| (num_cpus::get() as u32 + 4).max(16)); let pool = Arc::new(Pool::builder().max_size(max_conns).build(manager)?); Ok(pool) } fn open_configured_connection( database_path: &Path, flags: OpenFlags, ) -> rusqlite::Result { let mut conn = Connection::open_with_flags(database_path, flags)?; Self::configure_connection(&mut conn)?; Ok(conn) } fn configure_connection(conn: &mut Connection) -> rusqlite::Result<()> { conn.busy_timeout(SQLITE_BUSY_TIMEOUT)?; conn.pragma_update(None, "foreign_keys", "ON")?; conn.pragma_update(None, "synchronous", "NORMAL")?; conn.pragma_update(None, "cache_size", -8000i64)?; // 8 MB conn.pragma_update(None, "temp_store", "MEMORY")?; conn.pragma_update(None, "mmap_size", 268_435_456i64)?; // 256 MB Ok(()) } /// Add a column to an existing table when it is missing. /// /// Non-destructive: leaves all existing rows untouched, populating /// the new column with NULL. Used to thread additive schema /// changes (Phase 10's `entry_kind`) into pre-existing databases /// without forcing a full cache rebuild. fn ensure_column( conn: &Connection, table: &str, column: &str, sqlite_type: &str, ) -> NyxResult<()> { let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?; let cols: std::collections::HashSet = stmt .query_map([], |r| r.get::<_, String>(1))? .filter_map(Result::ok) .collect(); if cols.contains(column) { return Ok(()); } tracing::info!("adding column {column} to {table}"); conn.execute_batch(&format!( "ALTER TABLE {table} ADD COLUMN {column} {sqlite_type}" ))?; Ok(()) } /// Check stored schema version against the compiled-in value. /// /// On mismatch (including first-time open), wipe the cached /// summary tables so pre-schema-bump artefacts cannot be /// rehydrated against the current engine. Intentionally does /// not drop `files`, `scans`, or triage data: those are not /// layout-sensitive across this bump. fn check_schema_version(conn: &Connection) -> NyxResult<()> { let stored: Option = conn .query_row( "SELECT value FROM nyx_metadata WHERE key = 'schema_version'", [], |r| r.get(0), ) .optional()?; let current = SCHEMA_VERSION; match stored { Some(ref v) if v == current => { // Schema version matches, nothing to do. } _ => { let old = stored.as_deref().unwrap_or(""); tracing::info!( "db schema version changed ({old} → {current}), clearing summary caches" ); // Drop ssa_function_bodies entirely: column type changed // to BLOB in v3 and `CREATE TABLE IF NOT EXISTS` will // not migrate the column on an existing table. conn.execute_batch( "DROP TABLE IF EXISTS ssa_function_bodies; DELETE FROM function_summaries; DELETE FROM ssa_function_summaries; DELETE FROM auth_check_summaries; DELETE FROM files; DROP TABLE IF EXISTS cross_package_imports;", )?; conn.execute_batch(SCHEMA)?; conn.execute( "INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('schema_version', ?1)", params![current], )?; } } Ok(()) } /// Check stored engine version against the running binary. /// On mismatch (or missing row), wipe all cached analysis data so /// every file is rescanned with the new engine. fn check_engine_version(conn: &Connection) -> NyxResult<()> { let stored: Option = conn .query_row( "SELECT value FROM nyx_metadata WHERE key = 'engine_version'", [], |r| r.get(0), ) .optional()?; let current = ENGINE_VERSION; match stored { Some(ref v) if v == current => { // Version matches, nothing to do. } _ => { let old = stored.as_deref().unwrap_or(""); tracing::info!("engine version changed ({old} → {current}), rebuilding index"); // Wipe all cached summaries and file hashes so everything // gets rescanned. conn.execute_batch( "DELETE FROM function_summaries; DELETE FROM ssa_function_summaries; DELETE FROM ssa_function_bodies; DELETE FROM auth_check_summaries; DELETE FROM files;", )?; conn.execute( "INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('engine_version', ?1)", params![current], )?; } } Ok(()) } /// Persist the current engine version into metadata. /// /// Called after a successful scan to ensure the metadata row exists /// even for a freshly created database. pub fn write_engine_version(pool: &Pool) -> NyxResult<()> { let conn = pool.get()?; conn.execute( "INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('engine_version', ?1)", params![ENGINE_VERSION], )?; Ok(()) } /// Force a specific engine version into the metadata table. /// Used by tests to simulate version mismatch scenarios. #[cfg(test)] pub fn set_engine_version( pool: &Pool, version: &str, ) -> NyxResult<()> { let conn = pool.get()?; conn.execute( "INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('engine_version', ?1)", params![version], )?; Ok(()) } /// Read the stored engine version from metadata. Returns None if not set. #[cfg(test)] pub fn get_stored_engine_version( pool: &Pool, ) -> NyxResult> { let conn = pool.get()?; let v: Option = conn .query_row( "SELECT value FROM nyx_metadata WHERE key = 'engine_version'", [], |r| r.get(0), ) .optional()?; Ok(v) } /// Count rows in a table for a given project. Test helper. #[cfg(test)] pub fn count_rows( pool: &Pool, table: &str, project: &str, ) -> NyxResult { let conn = pool.get()?; // table name can't be parameterized; this is test-only code with trusted inputs. let sql = format!("SELECT COUNT(*) FROM {table} WHERE project = ?1"); let count: i64 = conn.query_row(&sql, params![project], |r| r.get(0))?; Ok(count) } /// Create a pool with init (schema + migrations + version check) for testing. /// This is `init()` but exposed under a clearer name for tests. #[cfg(test)] pub fn init_for_test( database_path: &Path, ) -> NyxResult>> { Self::init(database_path) } pub fn from_pool(project: &str, pool: &Pool) -> NyxResult { let conn = pool.get()?; Ok(Self { conn, project: project.to_owned(), }) } // helper so code below can treat PooledConnection like &Connection fn c(&self) -> &Connection { self.conn.deref() } /// Return true when the file *content* or *mtime* changed since the last scan. /// /// Short-circuits on mtime: if the stored mtime matches the /// filesystem mtime, the file is assumed unchanged (skip hash). /// Production scans use `should_scan_with_hash`, which avoids the /// redundant `digest_file` read; this variant exists for tests. #[cfg(test)] pub fn should_scan(&self, path: &Path) -> NyxResult { let meta = fs::metadata(path)?; let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64; let row: Option<(Vec, i64)> = self .conn .query_row( "SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2", params![self.project, path.to_string_lossy()], |r| Ok((r.get(0)?, r.get(1)?)), ) .optional()?; Ok(match row { Some((stored_hash, stored_mtime)) => { if stored_mtime != mtime { // mtime changed, must re-scan true } else { // mtime matches, compare hash only if cheap // (the caller already read the file and can use // should_scan_with_hash instead for full accuracy) let digest = Self::digest_file(path)?; stored_hash != digest } } None => true, }) } /// Like `should_scan` but accepts a pre-computed hash to avoid /// redundant file reads. pub fn should_scan_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult { let row: Option> = self .conn .query_row( "SELECT hash FROM files WHERE project = ?1 AND path = ?2", params![self.project, path.to_string_lossy()], |r| r.get(0), ) .optional()?; Ok(match row { Some(stored_hash) => stored_hash != hash, None => true, }) } /// Insert or update the `files` row and return its id. pub fn upsert_file(&self, path: &Path) -> NyxResult { let bytes = fs::read(path)?; let hash = Self::digest_bytes(&bytes); self.upsert_file_with_hash(path, &hash) } /// Insert or update the `files` row using a pre-computed hash. /// Avoids redundant file reads when the caller already has the hash. pub fn upsert_file_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult { let meta = fs::metadata(path)?; let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64; let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; let path_str = path.to_string_lossy(); // Use a single statement: upsert then query the id. self.c().execute( "INSERT INTO files (project, path, hash, mtime, scanned_at) VALUES (?1, ?2, ?3, ?4, ?5) ON CONFLICT(project,path) DO UPDATE SET hash = excluded.hash, mtime = excluded.mtime, scanned_at = excluded.scanned_at", params![self.project, path_str, hash, mtime, scanned_at], )?; let id: i64 = self.c().query_row( "SELECT id FROM files WHERE project = ?1 AND path = ?2", params![self.project, path_str], |r| r.get(0), )?; Ok(id) } /// Replace all issues for `file_id` with the supplied set. /// /// Dedups rows by the same PRIMARY KEY the `issues` table enforces /// (`file_id, rule_id, line, col`) to defend against upstream bugs /// that produce same-keyed diagnostics with differing severity or /// cosmetic fields. The first-seen row wins; upstream /// `ParsedSource::finalize_diags` sorts so that high /// severity comes first, and this fallback preserves that ordering. pub fn replace_issues<'a>( &mut self, file_id: i64, issues: impl IntoIterator>, ) -> NyxResult<()> { let tx = self.conn.transaction()?; tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?; { let mut stmt = tx.prepare( "INSERT INTO issues (file_id, rule_id, severity, line, col) VALUES (?1, ?2, ?3, ?4, ?5)", )?; let mut seen: std::collections::HashSet<(String, i64, i64)> = std::collections::HashSet::new(); for iss in issues { if !seen.insert((iss.rule_id.to_string(), iss.line, iss.col)) { continue; } stmt.execute(params![ file_id, iss.rule_id, iss.severity, iss.line, iss.col ])?; } } tx.commit()?; Ok(()) } /// Gets the issues for a specific file so we don't have to rescan pub fn get_issues_from_file(&self, path: &Path) -> NyxResult> { let file_id: i64 = self.c().query_row( "SELECT id FROM files WHERE project = ?1 AND path = ?2", params![self.project, path.to_string_lossy()], |r| r.get(0), )?; let mut stmt = self.c().prepare( "SELECT rule_id, severity, line, col FROM issues WHERE file_id = ?1", )?; let issue_iter = stmt.query_map([file_id], |row| { let sev_str: String = row.get(1)?; let severity = Severity::from_str(&sev_str).unwrap_or_else(|_| { tracing::warn!( severity = %sev_str, "unknown severity in DB row; defaulting to Medium" ); Severity::Medium }); Ok(Diag { path: path.to_string_lossy().to_string(), id: row.get::<_, String>(0)?, // rule_id line: row.get::<_, i64>(2)? as usize, col: row.get::<_, i64>(3)? as usize, severity, category: crate::patterns::FindingCategory::Security, path_validated: false, guard_kind: None, message: None, labels: vec![], confidence: None, evidence: None, rank_score: None, rank_reason: None, suppressed: false, suppression: None, triage_state: "open".to_string(), triage_note: String::new(), rollup: None, finding_id: String::new(), alternative_finding_ids: Vec::new(), stable_hash: 0, }) })?; Ok(issue_iter.filter_map(Result::ok).collect()) } /// Atomically replace all function summaries for a single file. /// /// Deletes every existing summary row for `(project, file_path)` then /// inserts the new set. This keeps the table in sync when a file is /// re‑parsed and its functions change. pub fn replace_summaries_for_file( &mut self, file_path: &Path, file_hash: &[u8], summaries: &[crate::summary::FuncSummary], ) -> NyxResult<()> { let tx = self.conn.transaction()?; let path_str = file_path.to_string_lossy(); let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; tx.execute( "DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2", params![self.project, path_str], )?; { let mut stmt = tx.prepare( "INSERT OR REPLACE INTO function_summaries (project, file_path, file_hash, name, arity, lang, container, disambig, kind, summary, entry_kind, updated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", )?; for s in summaries { let json = serde_json::to_string(s) .map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?; let disambig_sql = s.disambig.map(|d| d as i64); let entry_kind_sql = s .entry_kind .as_ref() .map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new())) .filter(|s| !s.is_empty()); stmt.execute(params![ self.project, path_str, file_hash, s.name, s.param_count as i64, s.lang, s.container, disambig_sql, s.kind.as_str(), json, entry_kind_sql, now ])?; } } tx.commit()?; Ok(()) } /// Atomically replace all SSA function summaries for a single file. /// /// The input tuple is /// `(name, arity, lang, namespace, container, disambig, kind, summary)` , /// matching the fields required to reconstruct a full [`crate::symbol::FuncKey`] /// on load. pub fn replace_ssa_summaries_for_file( &mut self, file_path: &Path, file_hash: &[u8], summaries: &[( String, usize, String, String, String, Option, crate::symbol::FuncKind, crate::summary::ssa_summary::SsaFuncSummary, )], ) -> NyxResult<()> { let tx = self.conn.transaction()?; let path_str = file_path.to_string_lossy(); let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; tx.execute( "DELETE FROM ssa_function_summaries WHERE project = ?1 AND file_path = ?2", params![self.project, path_str], )?; { let mut stmt = tx.prepare( "INSERT OR REPLACE INTO ssa_function_summaries (project, file_path, file_hash, name, arity, lang, namespace, container, disambig, kind, summary, entry_kind, updated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)", )?; for (name, arity, lang, namespace, container, disambig, kind, summary) in summaries { let json = serde_json::to_string(summary) .map_err(|e| NyxError::Msg(format!("SSA summary serialise: {e}")))?; let disambig_sql = disambig.map(|d| d as i64); let entry_kind_sql = summary .entry_kind .as_ref() .map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new())) .filter(|s| !s.is_empty()); stmt.execute(params![ self.project, path_str, file_hash, name, *arity as i64, lang, namespace, container, disambig_sql, kind.as_str(), json, entry_kind_sql, now ])?; } } tx.commit()?; Ok(()) } /// Load every function summary for this project. /// /// Reads all JSON strings from SQLite in one pass, then /// deserializes them in parallel with rayon for large result sets. pub fn load_all_summaries(&self) -> NyxResult> { let mut stmt = self .c() .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?; let jsons: Vec = stmt .query_map([&self.project], |row| row.get::<_, String>(0))? .filter_map(|r| match r { Ok(v) => Some(v), Err(e) => { tracing::warn!("failed to read summary row: {e}"); None } }) .collect(); // Parallel JSON deserialization for large sets if jsons.len() > 256 { use rayon::prelude::*; let results: Vec<_> = jsons .par_iter() .filter_map(|json| { serde_json::from_str::(json) .map_err(|e| { tracing::warn!("failed to deserialize summary JSON: {e}"); e }) .ok() }) .collect(); Ok(results) } else { let mut out = Vec::with_capacity(jsons.len()); for json in &jsons { match serde_json::from_str::(json) { Ok(s) => out.push(s), Err(e) => { tracing::warn!("failed to deserialize summary JSON: {e}"); } } } Ok(out) } } /// Load every SSA function summary for this project. /// /// Returns rows with full metadata for `FuncKey` reconstruction: /// `(file_path, name, lang, arity, namespace, container, disambig, kind, SsaFuncSummary)`. pub fn load_all_ssa_summaries( &self, ) -> NyxResult< Vec<( String, String, String, i64, String, String, Option, crate::symbol::FuncKind, crate::summary::ssa_summary::SsaFuncSummary, )>, > { let mut stmt = self.c().prepare( "SELECT file_path, name, lang, arity, namespace, container, disambig, kind, summary FROM ssa_function_summaries WHERE project = ?1", )?; let rows: Vec<( String, String, String, i64, String, String, Option, String, String, )> = stmt .query_map([&self.project], |row| { Ok(( row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?, row.get::<_, i64>(3)?, row.get::<_, String>(4)?, row.get::<_, String>(5)?, row.get::<_, Option>(6)?, row.get::<_, String>(7)?, row.get::<_, String>(8)?, )) })? .filter_map(|r| match r { Ok(v) => Some(v), Err(e) => { tracing::warn!("failed to read SSA summary row: {e}"); None } }) .collect(); if rows.len() > 256 { use rayon::prelude::*; let results: Vec<_> = rows .par_iter() .filter_map( |(fp, name, lang, arity, ns, container, disambig, kind, json)| { serde_json::from_str::( json, ) .map_err(|e| { tracing::warn!("failed to deserialize SSA summary JSON: {e}"); e }) .ok() .map(|s| { ( fp.clone(), name.clone(), lang.clone(), *arity, ns.clone(), container.clone(), disambig.map(|d| d as u32), crate::symbol::FuncKind::from_slug(kind), s, ) }) }, ) .collect(); Ok(results) } else { let mut out = Vec::with_capacity(rows.len()); for (fp, name, lang, arity, ns, container, disambig, kind, json) in &rows { match serde_json::from_str::(json) { Ok(s) => { out.push(( fp.clone(), name.clone(), lang.clone(), *arity, ns.clone(), container.clone(), disambig.map(|d| d as u32), crate::symbol::FuncKind::from_slug(kind), s, )); } Err(e) => { tracing::warn!("failed to deserialize SSA summary JSON: {e}"); } } } Ok(out) } } /// Load symbol metadata (name, arity, lang, namespace, container, kind) /// for a single file. /// /// Lighter than `load_all_ssa_summaries`, skips JSON deserialization of /// the full summary body and filters by file_path in the query. `kind` /// is the [`crate::symbol::FuncKind`] slug (`"fn"`, `"method"`, /// `"closure"`, ...) so consumers can distinguish anonymous functions /// from named ones. pub fn load_ssa_summaries_for_file( &self, file_path: &str, ) -> NyxResult> { let mut stmt = self.c().prepare( "SELECT name, arity, lang, namespace, container, kind FROM ssa_function_summaries WHERE project = ?1 AND file_path = ?2", )?; let rows: Vec<(String, i64, String, String, String, String)> = stmt .query_map(rusqlite::params![self.project, file_path], |row| { Ok(( row.get::<_, String>(0)?, row.get::<_, i64>(1)?, row.get::<_, String>(2)?, row.get::<_, String>(3)?, row.get::<_, String>(4)?, row.get::<_, String>(5)?, )) })? .filter_map(Result::ok) .collect(); Ok(rows) } /// Atomically replace all SSA callee bodies for a single file. /// /// Persists cross-file callee bodies for interprocedural symex. /// Bodies are serialized as MessagePack (rmp-serde, named-field /// encoding) BLOBs, JSON proved too costly at indexing time on /// large SSA structures, and bincode's positional format trips /// over the `#[serde(skip_serializing_if = ...)]` attributes /// scattered through `OptimizeResult` and friends. /// Input tuple: `(name, arity, lang, namespace, container, disambig, kind, body)`. pub fn replace_ssa_bodies_for_file( &mut self, file_path: &Path, file_hash: &[u8], bodies: &[( String, usize, String, String, String, Option, crate::symbol::FuncKind, crate::taint::ssa_transfer::CalleeSsaBody, )], ) -> NyxResult<()> { let tx = self.conn.transaction()?; let path_str = file_path.to_string_lossy(); let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; tx.execute( "DELETE FROM ssa_function_bodies WHERE project = ?1 AND file_path = ?2", params![self.project, path_str], )?; { let mut stmt = tx.prepare( "INSERT OR REPLACE INTO ssa_function_bodies (project, file_path, file_hash, name, arity, lang, namespace, container, disambig, kind, body, updated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", )?; for (name, arity, lang, namespace, container, disambig, kind, body) in bodies { let blob = rmp_serde::to_vec_named(body) .map_err(|e| NyxError::Msg(format!("SSA body serialise: {e}")))?; let disambig_sql = disambig.map(|d| d as i64); stmt.execute(params![ self.project, path_str, file_hash, name, *arity as i64, lang, namespace, container, disambig_sql, kind.as_str(), blob, now ])?; } } tx.commit()?; Ok(()) } /// Load every SSA callee body for this project. /// /// Returns rows with full metadata for `FuncKey` reconstruction: /// `(file_path, name, lang, arity, namespace, container, disambig, kind, CalleeSsaBody)`. pub fn load_all_ssa_bodies( &self, ) -> NyxResult< Vec<( String, String, String, i64, String, String, Option, crate::symbol::FuncKind, crate::taint::ssa_transfer::CalleeSsaBody, )>, > { let mut stmt = self.c().prepare( "SELECT file_path, name, lang, arity, namespace, container, disambig, kind, body FROM ssa_function_bodies WHERE project = ?1", )?; let rows: Vec<( String, String, String, i64, String, String, Option, String, Vec, )> = stmt .query_map([&self.project], |row| { Ok(( row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?, row.get::<_, i64>(3)?, row.get::<_, String>(4)?, row.get::<_, String>(5)?, row.get::<_, Option>(6)?, row.get::<_, String>(7)?, row.get::<_, Vec>(8)?, )) })? .filter_map(|r| match r { Ok(v) => Some(v), Err(e) => { tracing::warn!("failed to read SSA body row: {e}"); None } }) .collect(); if rows.len() > 256 { use rayon::prelude::*; let results: Vec<_> = rows .par_iter() .filter_map( |(fp, name, lang, arity, ns, container, disambig, kind, blob)| { rmp_serde::from_slice::(blob) .map_err(|e| { tracing::warn!("failed to deserialize SSA body: {e}"); e }) .ok() .map(|mut b| { // Rehydrate a proxy Cfg from node_meta so // the taint engine's cross-file inline path can index // `cfg[inst.cfg_node]` uniformly. No-op for intra-file // bodies that carry node_meta empty. crate::taint::ssa_transfer::rebuild_body_graph(&mut b); ( fp.clone(), name.clone(), lang.clone(), *arity, ns.clone(), container.clone(), disambig.map(|d| d as u32), crate::symbol::FuncKind::from_slug(kind), b, ) }) }, ) .collect(); Ok(results) } else { let mut out = Vec::with_capacity(rows.len()); for (fp, name, lang, arity, ns, container, disambig, kind, blob) in &rows { match rmp_serde::from_slice::(blob) { Ok(mut b) => { // See note in parallel branch above. crate::taint::ssa_transfer::rebuild_body_graph(&mut b); out.push(( fp.clone(), name.clone(), lang.clone(), *arity, ns.clone(), container.clone(), disambig.map(|d| d as u32), crate::symbol::FuncKind::from_slug(kind), b, )); } Err(e) => { tracing::warn!("failed to deserialize SSA body: {e}"); } } } Ok(out) } } /// Atomically replace all `AuthCheckSummary` rows for a single file. /// /// Mirrors [`Self::replace_ssa_summaries_for_file`]. Each input tuple /// is `(name, arity, lang, namespace, container, disambig, kind, summary)` ///, the full identity needed to reconstruct the callee's /// [`crate::symbol::FuncKey`] on load. pub fn replace_auth_summaries_for_file( &mut self, file_path: &Path, file_hash: &[u8], summaries: &[( String, usize, String, String, String, Option, crate::symbol::FuncKind, crate::auth_analysis::model::AuthCheckSummary, )], ) -> NyxResult<()> { let tx = self.conn.transaction()?; let path_str = file_path.to_string_lossy(); let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; tx.execute( "DELETE FROM auth_check_summaries WHERE project = ?1 AND file_path = ?2", params![self.project, path_str], )?; { let mut stmt = tx.prepare( "INSERT OR REPLACE INTO auth_check_summaries (project, file_path, file_hash, name, arity, lang, namespace, container, disambig, kind, summary, updated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", )?; for (name, arity, lang, namespace, container, disambig, kind, summary) in summaries { let json = serde_json::to_string(summary) .map_err(|e| NyxError::Msg(format!("auth summary serialise: {e}")))?; let disambig_sql = disambig.map(|d| d as i64); stmt.execute(params![ self.project, path_str, file_hash, name, *arity as i64, lang, namespace, container, disambig_sql, kind.as_str(), json, now ])?; } } tx.commit()?; Ok(()) } /// Atomically replace all four per-file caches in a single /// transaction. Equivalent in effect to calling /// [`Self::replace_summaries_for_file`], /// [`Self::replace_ssa_summaries_for_file`], /// [`Self::replace_ssa_bodies_for_file`] and /// [`Self::replace_auth_summaries_for_file`] in sequence, but /// issues a single fsync at commit instead of four, the /// dominant cost on large scans. /// /// Behaviour parity with the four-call sequence: /// * function and auth summaries: DELETE-then-INSERT regardless /// of input length, so emptying a file's summaries clears /// stale rows. /// * SSA summaries and bodies: only touched when the input is /// non-empty, matching the existing scan path. #[allow(clippy::too_many_arguments)] pub fn replace_all_for_file( &mut self, file_path: &Path, file_hash: &[u8], func_summaries: &[crate::summary::FuncSummary], ssa_summaries: &[( String, usize, String, String, String, Option, crate::symbol::FuncKind, crate::summary::ssa_summary::SsaFuncSummary, )], ssa_bodies: &[( String, usize, String, String, String, Option, crate::symbol::FuncKind, crate::taint::ssa_transfer::CalleeSsaBody, )], auth_summaries: &[( String, usize, String, String, String, Option, crate::symbol::FuncKind, crate::auth_analysis::model::AuthCheckSummary, )], cross_package_imports: Option<( &str, &std::collections::HashMap, )>, ) -> NyxResult<()> { let tx = self.conn.transaction()?; let path_str = file_path.to_string_lossy(); let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; // function_summaries, always replace. tx.execute( "DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2", params![self.project, path_str], )?; { let mut stmt = tx.prepare( "INSERT OR REPLACE INTO function_summaries (project, file_path, file_hash, name, arity, lang, container, disambig, kind, summary, entry_kind, updated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", )?; for s in func_summaries { let json = serde_json::to_string(s) .map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?; let disambig_sql = s.disambig.map(|d| d as i64); let entry_kind_sql = s .entry_kind .as_ref() .map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new())) .filter(|s| !s.is_empty()); stmt.execute(params![ self.project, path_str, file_hash, s.name, s.param_count as i64, s.lang, s.container, disambig_sql, s.kind.as_str(), json, entry_kind_sql, now ])?; } } // ssa_function_summaries, only touched when non-empty. if !ssa_summaries.is_empty() { tx.execute( "DELETE FROM ssa_function_summaries WHERE project = ?1 AND file_path = ?2", params![self.project, path_str], )?; let mut stmt = tx.prepare( "INSERT OR REPLACE INTO ssa_function_summaries (project, file_path, file_hash, name, arity, lang, namespace, container, disambig, kind, summary, entry_kind, updated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)", )?; for (name, arity, lang, namespace, container, disambig, kind, summary) in ssa_summaries { let json = serde_json::to_string(summary) .map_err(|e| NyxError::Msg(format!("SSA summary serialise: {e}")))?; let disambig_sql = disambig.map(|d| d as i64); let entry_kind_sql = summary .entry_kind .as_ref() .map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new())) .filter(|s| !s.is_empty()); stmt.execute(params![ self.project, path_str, file_hash, name, *arity as i64, lang, namespace, container, disambig_sql, kind.as_str(), json, entry_kind_sql, now ])?; } } // ssa_function_bodies, only touched when non-empty. if !ssa_bodies.is_empty() { tx.execute( "DELETE FROM ssa_function_bodies WHERE project = ?1 AND file_path = ?2", params![self.project, path_str], )?; let mut stmt = tx.prepare( "INSERT OR REPLACE INTO ssa_function_bodies (project, file_path, file_hash, name, arity, lang, namespace, container, disambig, kind, body, updated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", )?; for (name, arity, lang, namespace, container, disambig, kind, body) in ssa_bodies { let blob = rmp_serde::to_vec_named(body) .map_err(|e| NyxError::Msg(format!("SSA body serialise: {e}")))?; let disambig_sql = disambig.map(|d| d as i64); stmt.execute(params![ self.project, path_str, file_hash, name, *arity as i64, lang, namespace, container, disambig_sql, kind.as_str(), blob, now ])?; } } // auth_check_summaries, always replace, even when empty, // so a helper that lost its ownership check no longer // leaks lifts into subsequent pass-2 runs. tx.execute( "DELETE FROM auth_check_summaries WHERE project = ?1 AND file_path = ?2", params![self.project, path_str], )?; { let mut stmt = tx.prepare( "INSERT OR REPLACE INTO auth_check_summaries (project, file_path, file_hash, name, arity, lang, namespace, container, disambig, kind, summary, updated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", )?; for (name, arity, lang, namespace, container, disambig, kind, summary) in auth_summaries { let json = serde_json::to_string(summary) .map_err(|e| NyxError::Msg(format!("auth summary serialise: {e}")))?; let disambig_sql = disambig.map(|d| d as i64); stmt.execute(params![ self.project, path_str, file_hash, name, *arity as i64, lang, namespace, container, disambig_sql, kind.as_str(), json, now ])?; } } // cross_package_imports: replace this file's row, even with // an empty input, so a file that lost its imports does not // leave stale resolutions in the cache. tx.execute( "DELETE FROM cross_package_imports WHERE project = ?1 AND file_path = ?2", params![self.project, path_str], )?; if let Some((namespace, map)) = cross_package_imports && !map.is_empty() { let blob = rmp_serde::to_vec_named(map) .map_err(|e| NyxError::Msg(format!("cross_package_imports serialise: {e}")))?; tx.execute( "INSERT OR REPLACE INTO cross_package_imports (project, file_path, file_hash, namespace, imports, updated_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", params![self.project, path_str, file_hash, namespace, blob, now], )?; } tx.commit()?; Ok(()) } /// Load every `AuthCheckSummary` for this project. /// /// Returns rows with full metadata for `FuncKey` reconstruction: /// `(file_path, name, lang, arity, namespace, container, disambig, kind, AuthCheckSummary)`. pub fn load_all_auth_summaries( &self, ) -> NyxResult< Vec<( String, String, String, i64, String, String, Option, crate::symbol::FuncKind, crate::auth_analysis::model::AuthCheckSummary, )>, > { let mut stmt = self.c().prepare( "SELECT file_path, name, lang, arity, namespace, container, disambig, kind, summary FROM auth_check_summaries WHERE project = ?1", )?; let rows: Vec<( String, String, String, i64, String, String, Option, String, String, )> = stmt .query_map([&self.project], |row| { Ok(( row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?, row.get::<_, i64>(3)?, row.get::<_, String>(4)?, row.get::<_, String>(5)?, row.get::<_, Option>(6)?, row.get::<_, String>(7)?, row.get::<_, String>(8)?, )) })? .filter_map(|r| match r { Ok(v) => Some(v), Err(e) => { tracing::warn!("failed to read auth summary row: {e}"); None } }) .collect(); let mut out = Vec::with_capacity(rows.len()); for (fp, name, lang, arity, ns, container, disambig, kind, json) in &rows { match serde_json::from_str::(json) { Ok(s) => { out.push(( fp.clone(), name.clone(), lang.clone(), *arity, ns.clone(), container.clone(), disambig.map(|d| d as u32), crate::symbol::FuncKind::from_slug(kind), s, )); } Err(e) => { tracing::warn!("failed to deserialize auth summary JSON: {e}"); } } } Ok(out) } /// Load every persisted per-file Phase-09 cross-package import map /// for this project. /// /// Returns rows as `(file_path, namespace, imports_map)`. Used by /// pass 2 of indexed scans to populate /// `GlobalSummaries::cross_package_imports_by_namespace`, recovering /// the per-file import view that /// [`crate::taint::ssa_transfer::CalleeSsaBody::cross_package_imports`] /// loses across SQLite round-trip (`#[serde(skip)]`). pub fn load_all_cross_package_imports( &self, ) -> NyxResult< Vec<( String, String, std::collections::HashMap, )>, > { let mut stmt = self.c().prepare( "SELECT file_path, namespace, imports FROM cross_package_imports WHERE project = ?1", )?; let rows: Vec<(String, String, Vec)> = stmt .query_map([&self.project], |row| { Ok(( row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, Vec>(2)?, )) })? .filter_map(|r| match r { Ok(v) => Some(v), Err(e) => { tracing::warn!("failed to read cross_package_imports row: {e}"); None } }) .collect(); let mut out = Vec::with_capacity(rows.len()); for (fp, ns, blob) in rows { match rmp_serde::from_slice::< std::collections::HashMap, >(&blob) { Ok(map) => out.push((fp, ns, map)), Err(e) => { tracing::warn!("failed to deserialize cross_package_imports blob: {e}"); } } } Ok(out) } /// Persist a [`crate::surface::SurfaceMap`] for this project. /// /// Replaces any previously-persisted map; the table holds one row /// per project. The map is canonicalised before serialisation so /// `replace_surface_map` + `load_surface_map` round-trip is /// byte-identical for structurally identical maps. pub fn replace_surface_map(&mut self, map: &crate::surface::SurfaceMap) -> NyxResult<()> { let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; let mut canon = map.clone(); let bytes = canon .to_json() .map_err(|e| NyxError::Msg(format!("surface map serialise: {e}")))?; self.c().execute( "INSERT OR REPLACE INTO surface_map (project, map_json, updated_at) VALUES (?1, ?2, ?3)", params![self.project, bytes, now], )?; Ok(()) } /// Load the persisted [`crate::surface::SurfaceMap`] for this /// project, or `None` when no map has been written. pub fn load_surface_map(&self) -> NyxResult> { let row: Option> = self .c() .query_row( "SELECT map_json FROM surface_map WHERE project = ?1", params![self.project], |r| r.get::<_, Vec>(0), ) .optional()?; let Some(bytes) = row else { return Ok(None); }; let map = crate::surface::SurfaceMap::from_json(&bytes) .map_err(|e| NyxError::Msg(format!("surface map deserialise: {e}")))?; Ok(Some(map)) } /// Return the raw JSON bytes stored for the surface map without /// deserialising. Used by the round-trip parity tests so they /// can compare on-disk bytes across rescans. pub fn load_surface_map_bytes(&self) -> NyxResult>> { let row: Option> = self .c() .query_row( "SELECT map_json FROM surface_map WHERE project = ?1", params![self.project], |r| r.get::<_, Vec>(0), ) .optional()?; Ok(row) } /// Remove a file and all derived persisted state for this project. /// /// This deletes the file row, issues, and all persisted summary rows so /// incremental scans can prune deleted files from the index cleanly. pub fn remove_file_and_related(&mut self, path: &Path) -> NyxResult<()> { let tx = self.conn.transaction()?; let path_str = path.to_string_lossy(); let file_id: Option = tx .query_row( "SELECT id FROM files WHERE project = ?1 AND path = ?2", params![self.project, path_str.as_ref()], |r| r.get(0), ) .optional()?; if let Some(file_id) = file_id { tx.execute("DELETE FROM issues WHERE file_id = ?1", params![file_id])?; tx.execute("DELETE FROM files WHERE id = ?1", params![file_id])?; } tx.execute( "DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2", params![self.project, path_str.as_ref()], )?; tx.execute( "DELETE FROM ssa_function_summaries WHERE project = ?1 AND file_path = ?2", params![self.project, path_str.as_ref()], )?; tx.execute( "DELETE FROM ssa_function_bodies WHERE project = ?1 AND file_path = ?2", params![self.project, path_str.as_ref()], )?; tx.execute( "DELETE FROM auth_check_summaries WHERE project = ?1 AND file_path = ?2", params![self.project, path_str.as_ref()], )?; tx.execute( "DELETE FROM cross_package_imports WHERE project = ?1 AND file_path = ?2", params![self.project, path_str.as_ref()], )?; tx.commit()?; Ok(()) } /// gets files from the database pub fn get_files(&self, project: &str) -> NyxResult> { let mut stmt = self.c().prepare( "SELECT path FROM files WHERE project = ?1", )?; let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?; Ok(file_iter .map(|p| p.map(PathBuf::from)) .collect::>()?) } // Scan persistence /// Insert a new scan record. pub fn insert_scan(&self, record: &ScanRecord) -> NyxResult<()> { self.c().execute( "INSERT OR REPLACE INTO scans (id, status, scan_root, started_at, finished_at, duration_secs, engine_version, languages, files_scanned, files_skipped, finding_count, findings_json, timing_json, error) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14)", params![ record.id, record.status, record.scan_root, record.started_at, record.finished_at, record.duration_secs, record.engine_version, record.languages, record.files_scanned, record.files_skipped, record.finding_count, record.findings_json, record.timing_json, record.error, ], )?; Ok(()) } /// Update a scan record status and completion fields. pub fn update_scan( &self, id: &str, status: &str, finished_at: Option<&str>, duration_secs: Option, finding_count: Option, findings_json: Option<&str>, timing_json: Option<&str>, error: Option<&str>, files_scanned: Option, files_skipped: Option, languages: Option<&str>, ) -> NyxResult<()> { self.c().execute( "UPDATE scans SET status = ?2, finished_at = ?3, duration_secs = ?4, finding_count = ?5, findings_json = ?6, timing_json = ?7, error = ?8, files_scanned = ?9, files_skipped = ?10, languages = ?11 WHERE id = ?1", params![ id, status, finished_at, duration_secs, finding_count, findings_json, timing_json, error, files_scanned, files_skipped, languages, ], )?; Ok(()) } /// Get a single scan record by ID. pub fn get_scan(&self, id: &str) -> NyxResult> { let result = self .c() .query_row( "SELECT id, status, scan_root, started_at, finished_at, duration_secs, engine_version, languages, files_scanned, files_skipped, finding_count, findings_json, timing_json, error FROM scans WHERE id = ?1", params![id], |row| { Ok(ScanRecord { id: row.get(0)?, status: row.get(1)?, scan_root: row.get(2)?, started_at: row.get(3)?, finished_at: row.get(4)?, duration_secs: row.get(5)?, engine_version: row.get(6)?, languages: row.get(7)?, files_scanned: row.get(8)?, files_skipped: row.get(9)?, finding_count: row.get(10)?, findings_json: row.get(11)?, timing_json: row.get(12)?, error: row.get(13)?, }) }, ) .optional()?; Ok(result) } /// List scan records, most recent first, up to `limit`. pub fn list_scans(&self, limit: i64) -> NyxResult> { let mut stmt = self.c().prepare( "SELECT id, status, scan_root, started_at, finished_at, duration_secs, engine_version, languages, files_scanned, files_skipped, finding_count, findings_json, timing_json, error FROM scans ORDER BY started_at DESC LIMIT ?1", )?; let rows = stmt .query_map(params![limit], |row| { Ok(ScanRecord { id: row.get(0)?, status: row.get(1)?, scan_root: row.get(2)?, started_at: row.get(3)?, finished_at: row.get(4)?, duration_secs: row.get(5)?, engine_version: row.get(6)?, languages: row.get(7)?, files_scanned: row.get(8)?, files_skipped: row.get(9)?, finding_count: row.get(10)?, findings_json: row.get(11)?, timing_json: row.get(12)?, error: row.get(13)?, }) })? .filter_map(Result::ok) .collect(); Ok(rows) } /// Delete a scan and its associated metrics/logs (FK CASCADE). pub fn delete_scan(&self, id: &str) -> NyxResult { let rows = self .c() .execute("DELETE FROM scans WHERE id = ?1", params![id])?; Ok(rows) } /// Insert scan metrics for a completed scan. pub fn insert_scan_metrics( &self, scan_id: &str, metrics: &crate::server::progress::ScanMetricsSnapshot, ) -> NyxResult<()> { self.c().execute( "INSERT OR REPLACE INTO scan_metrics (scan_id, cfg_nodes, call_edges, functions_analyzed, summaries_reused, unresolved_calls) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", params![ scan_id, metrics.cfg_nodes as i64, metrics.call_edges as i64, metrics.functions_analyzed as i64, metrics.summaries_reused as i64, metrics.unresolved_calls as i64, ], )?; Ok(()) } /// Get scan metrics by scan ID. pub fn get_scan_metrics( &self, scan_id: &str, ) -> NyxResult> { let result = self .c() .query_row( "SELECT cfg_nodes, call_edges, functions_analyzed, summaries_reused, unresolved_calls FROM scan_metrics WHERE scan_id = ?1", params![scan_id], |row| { Ok(crate::server::progress::ScanMetricsSnapshot { cfg_nodes: row.get::<_, i64>(0)? as u64, call_edges: row.get::<_, i64>(1)? as u64, functions_analyzed: row.get::<_, i64>(2)? as u64, summaries_reused: row.get::<_, i64>(3)? as u64, unresolved_calls: row.get::<_, i64>(4)? as u64, }) }, ) .optional()?; Ok(result) } /// Insert scan log entries. pub fn insert_scan_logs( &self, scan_id: &str, logs: &[crate::server::scan_log::ScanLogEntry], ) -> NyxResult<()> { let mut stmt = self.c().prepare( "INSERT INTO scan_logs (scan_id, timestamp, level, message, file_path, detail) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", )?; for log in logs { stmt.execute(params![ scan_id, log.timestamp.to_rfc3339(), log.level.to_string(), log.message, log.file_path, log.detail, ])?; } Ok(()) } /// Get scan logs, optionally filtered by level. pub fn get_scan_logs( &self, scan_id: &str, level_filter: Option<&str>, ) -> NyxResult> { let (sql, params_vec): (&str, Vec>) = if let Some(level) = level_filter { ( "SELECT timestamp, level, message, file_path, detail FROM scan_logs WHERE scan_id = ?1 AND level = ?2 ORDER BY id ASC", vec![Box::new(scan_id.to_string()), Box::new(level.to_string())], ) } else { ( "SELECT timestamp, level, message, file_path, detail FROM scan_logs WHERE scan_id = ?1 ORDER BY id ASC", vec![Box::new(scan_id.to_string())], ) }; let mut stmt = self.c().prepare(sql)?; let params_refs: Vec<&dyn rusqlite::types::ToSql> = params_vec.iter().map(|p| p.as_ref()).collect(); let rows = stmt .query_map(params_refs.as_slice(), |row| { let ts_str: String = row.get(0)?; let level_str: String = row.get(1)?; Ok(( ts_str, level_str, row.get::<_, String>(2)?, row.get::<_, Option>(3)?, row.get::<_, Option>(4)?, )) })? .filter_map(Result::ok) .filter_map(|(ts_str, level_str, message, file_path, detail)| { let timestamp = chrono::DateTime::parse_from_rfc3339(&ts_str) .ok()? .with_timezone(&chrono::Utc); let level = level_str.parse().ok()?; Some(crate::server::scan_log::ScanLogEntry { timestamp, level, message, file_path, detail, }) }) .collect(); Ok(rows) } // Triage state management /// Get the triage state for a single finding fingerprint. /// Returns (state, note, updated_at) or None if no triage state exists. #[allow(dead_code)] pub fn get_triage_state( &self, fingerprint: &str, ) -> NyxResult> { let result = self .c() .query_row( "SELECT state, note, updated_at FROM triage_states WHERE fingerprint = ?1", params![fingerprint], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)), ) .optional()?; Ok(result) } /// Set the triage state for a single finding. Upserts the state and /// appends an audit log entry. Returns the previous state (or "open"). pub fn set_triage_state( &self, fingerprint: &str, state: &str, note: &str, action: &str, ) -> NyxResult { let now = chrono::Utc::now().to_rfc3339(); let prev: String = self .c() .query_row( "SELECT state FROM triage_states WHERE fingerprint = ?1", params![fingerprint], |row| row.get(0), ) .optional()? .unwrap_or_else(|| "open".to_string()); self.c().execute( "INSERT INTO triage_states (fingerprint, state, note, updated_at) VALUES (?1, ?2, ?3, ?4) ON CONFLICT(fingerprint) DO UPDATE SET state = excluded.state, note = excluded.note, updated_at = excluded.updated_at", params![fingerprint, state, note, now], )?; self.c().execute( "INSERT INTO triage_audit_log (fingerprint, action, previous_state, new_state, note, timestamp) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", params![fingerprint, action, prev, state, note, now], )?; Ok(prev) } /// Bulk set triage state. Returns vec of (fingerprint, previous_state). pub fn set_triage_states_bulk( &self, fingerprints: &[String], state: &str, note: &str, action: &str, ) -> NyxResult> { let now = chrono::Utc::now().to_rfc3339(); let mut results = Vec::with_capacity(fingerprints.len()); // Read all previous states first let mut prev_stmt = self .c() .prepare("SELECT state FROM triage_states WHERE fingerprint = ?1")?; for fp in fingerprints { let prev: String = prev_stmt .query_row(params![fp], |row| row.get(0)) .optional()? .unwrap_or_else(|| "open".to_string()); results.push((fp.clone(), prev)); } drop(prev_stmt); // Upsert all states let mut upsert_stmt = self.c().prepare( "INSERT INTO triage_states (fingerprint, state, note, updated_at) VALUES (?1, ?2, ?3, ?4) ON CONFLICT(fingerprint) DO UPDATE SET state = excluded.state, note = excluded.note, updated_at = excluded.updated_at", )?; for fp in fingerprints { upsert_stmt.execute(params![fp, state, note, now])?; } drop(upsert_stmt); // Insert audit log entries let mut audit_stmt = self.c().prepare( "INSERT INTO triage_audit_log (fingerprint, action, previous_state, new_state, note, timestamp) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", )?; for (fp, prev) in &results { audit_stmt.execute(params![fp, action, prev, state, note, now])?; } Ok(results) } /// Load all triage states as a map: fingerprint → (state, note, updated_at). pub fn get_all_triage_states( &self, ) -> NyxResult> { let mut stmt = self .c() .prepare("SELECT fingerprint, state, note, updated_at FROM triage_states")?; let rows = stmt .query_map([], |row| { Ok(( row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?, row.get::<_, String>(3)?, )) })? .filter_map(Result::ok) .map(|(fp, state, note, updated)| (fp, (state, note, updated))) .collect(); Ok(rows) } /// List triage states with optional state filter, paginated. /// Returns (entries, total_count). pub fn list_triage_states( &self, state_filter: Option<&str>, limit: i64, offset: i64, ) -> NyxResult<(Vec<(String, String, String, String)>, i64)> { let (sql, count_sql, params_vec): (&str, &str, Vec>) = if let Some(state) = state_filter { ( "SELECT fingerprint, state, note, updated_at FROM triage_states WHERE state = ?1 ORDER BY updated_at DESC LIMIT ?2 OFFSET ?3", "SELECT COUNT(*) FROM triage_states WHERE state = ?1", vec![ Box::new(state.to_string()), Box::new(limit), Box::new(offset), ], ) } else { ( "SELECT fingerprint, state, note, updated_at FROM triage_states ORDER BY updated_at DESC LIMIT ?1 OFFSET ?2", "SELECT COUNT(*) FROM triage_states", vec![Box::new(limit), Box::new(offset)], ) }; let total: i64 = if let Some(state) = state_filter { self.c() .query_row(count_sql, params![state], |row| row.get(0))? } else { self.c().query_row(count_sql, [], |row| row.get(0))? }; let mut stmt = self.c().prepare(sql)?; let params_refs: Vec<&dyn rusqlite::types::ToSql> = params_vec.iter().map(|p| p.as_ref()).collect(); let rows = stmt .query_map(params_refs.as_slice(), |row| { Ok(( row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?, row.get::<_, String>(3)?, )) })? .filter_map(Result::ok) .collect(); Ok((rows, total)) } /// Get the audit log, optionally filtered by fingerprint, paginated. /// Returns (entries, total_count). pub fn get_audit_log( &self, fingerprint_filter: Option<&str>, limit: i64, offset: i64, ) -> NyxResult<(Vec, i64)> { let (sql, count_sql, params_vec): (&str, &str, Vec>) = if let Some(fp) = fingerprint_filter { ( "SELECT id, fingerprint, action, previous_state, new_state, note, timestamp FROM triage_audit_log WHERE fingerprint = ?1 ORDER BY timestamp DESC LIMIT ?2 OFFSET ?3", "SELECT COUNT(*) FROM triage_audit_log WHERE fingerprint = ?1", vec![Box::new(fp.to_string()), Box::new(limit), Box::new(offset)], ) } else { ( "SELECT id, fingerprint, action, previous_state, new_state, note, timestamp FROM triage_audit_log ORDER BY timestamp DESC LIMIT ?1 OFFSET ?2", "SELECT COUNT(*) FROM triage_audit_log", vec![Box::new(limit), Box::new(offset)], ) }; let total: i64 = if let Some(fp) = fingerprint_filter { self.c() .query_row(count_sql, params![fp], |row| row.get(0))? } else { self.c().query_row(count_sql, [], |row| row.get(0))? }; let mut stmt = self.c().prepare(sql)?; let params_refs: Vec<&dyn rusqlite::types::ToSql> = params_vec.iter().map(|p| p.as_ref()).collect(); let rows = stmt .query_map(params_refs.as_slice(), |row| { Ok(AuditEntry { id: row.get(0)?, fingerprint: row.get(1)?, action: row.get(2)?, previous_state: row.get(3)?, new_state: row.get(4)?, note: row.get(5)?, timestamp: row.get(6)?, }) })? .filter_map(Result::ok) .collect(); Ok((rows, total)) } /// Add a pattern-based suppression rule. pub fn add_suppression_rule( &self, suppress_by: &str, match_value: &str, state: &str, note: &str, ) -> NyxResult { let now = chrono::Utc::now().to_rfc3339(); self.c().execute( "INSERT OR REPLACE INTO triage_suppression_rules (suppress_by, match_value, state, note, created_at) VALUES (?1, ?2, ?3, ?4, ?5)", params![suppress_by, match_value, state, note, now], )?; Ok(self.c().last_insert_rowid()) } /// Get all suppression rules. pub fn get_suppression_rules(&self) -> NyxResult> { let mut stmt = self.c().prepare( "SELECT id, suppress_by, match_value, state, note, created_at FROM triage_suppression_rules ORDER BY created_at DESC", )?; let rows = stmt .query_map([], |row| { Ok(SuppressionRule { id: row.get(0)?, suppress_by: row.get(1)?, match_value: row.get(2)?, state: row.get(3)?, note: row.get(4)?, created_at: row.get(5)?, }) })? .filter_map(Result::ok) .collect(); Ok(rows) } /// Record the first time a finding fingerprint was observed. Idempotent , /// the earliest call wins via INSERT OR IGNORE. Used by the overview /// backlog-age computation; ts should be the originating scan's /// `started_at` (RFC-3339). pub fn record_finding_first_seen(&self, fingerprint: &str, ts: &str) -> NyxResult<()> { self.c().execute( "INSERT OR IGNORE INTO finding_first_seen (fingerprint, first_seen_at) VALUES (?1, ?2)", params![fingerprint, ts], )?; Ok(()) } /// Bulk variant. Inserts ignoring conflicts. pub fn record_finding_first_seen_bulk( &self, entries: &[(String, String)], ) -> NyxResult<()> { if entries.is_empty() { return Ok(()); } let conn = self.c(); let tx = conn.unchecked_transaction()?; { let mut stmt = tx.prepare( "INSERT OR IGNORE INTO finding_first_seen (fingerprint, first_seen_at) VALUES (?1, ?2)", )?; for (fp, ts) in entries { stmt.execute(params![fp, ts])?; } } tx.commit()?; Ok(()) } /// Look up first-seen timestamps for a set of fingerprints. Missing /// entries are simply absent from the returned map. pub fn get_first_seen_map( &self, fingerprints: &[String], ) -> NyxResult> { if fingerprints.is_empty() { return Ok(std::collections::HashMap::new()); } // SQLite IN-clause cap is high but parameter count is bounded, chunk // for safety with large fingerprint sets. let mut out = std::collections::HashMap::with_capacity(fingerprints.len()); let conn = self.c(); for chunk in fingerprints.chunks(500) { let placeholders = (1..=chunk.len()) .map(|i| format!("?{i}")) .collect::>() .join(","); let sql = format!( "SELECT fingerprint, first_seen_at FROM finding_first_seen WHERE fingerprint IN ({placeholders})" ); let mut stmt = conn.prepare(&sql)?; let params: Vec<&dyn rusqlite::ToSql> = chunk.iter().map(|s| s as &dyn rusqlite::ToSql).collect(); let rows = stmt.query_map(params.as_slice(), |row| { Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) })?; for r in rows.flatten() { out.insert(r.0, r.1); } } Ok(out) } /// Get a single metadata value by key. Returns None if absent. pub fn get_metadata(&self, key: &str) -> NyxResult> { let conn = self.c(); let mut stmt = conn.prepare("SELECT value FROM nyx_metadata WHERE key = ?1")?; let mut rows = stmt.query(params![key])?; if let Some(row) = rows.next()? { Ok(Some(row.get(0)?)) } else { Ok(None) } } /// Set a metadata value (insert-or-replace). pub fn set_metadata(&self, key: &str, value: &str) -> NyxResult<()> { self.c().execute( "INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES (?1, ?2)", params![key, value], )?; Ok(()) } /// Remove a metadata key. Returns true if a row was deleted. pub fn delete_metadata(&self, key: &str) -> NyxResult { let n = self .c() .execute("DELETE FROM nyx_metadata WHERE key = ?1", params![key])?; Ok(n > 0) } /// Delete a suppression rule by ID. Returns true if a row was deleted. pub fn delete_suppression_rule(&self, id: i64) -> NyxResult { let count = self.c().execute( "DELETE FROM triage_suppression_rules WHERE id = ?1", params![id], )?; Ok(count > 0) } // Maintenance utilities pub fn clear(&self) -> NyxResult<()> { self.c().execute_batch( r#" PRAGMA foreign_keys = OFF; DROP TABLE IF EXISTS issues; DROP TABLE IF EXISTS files; DROP TABLE IF EXISTS function_summaries; DROP TABLE IF EXISTS ssa_function_summaries; PRAGMA foreign_keys = ON; VACUUM; "#, )?; self.c().execute_batch(SCHEMA)?; Ok(()) } pub fn vacuum(&self) -> NyxResult<()> { self.c().execute("VACUUM;", [])?; Ok(()) } // Helpers #[cfg(test)] fn digest_file(path: &Path) -> NyxResult> { let mut hasher = blake3::Hasher::new(); let mut file = fs::File::open(path)?; std::io::copy(&mut file, &mut hasher)?; Ok(hasher.finalize().as_bytes().to_vec()) } /// Hash already-read bytes without re-reading from disk. pub fn digest_bytes(bytes: &[u8]) -> Vec { let mut hasher = blake3::Hasher::new(); hasher.update(bytes); hasher.finalize().as_bytes().to_vec() } } } #[test] fn indexer_should_scan_and_upsert_logic() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let file = td.path().join("sample.rs"); std::fs::write(&file, "fn main() {}").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let idx = index::Indexer::from_pool("proj", &pool).unwrap(); // first time: nothing in DB → must scan assert!(idx.should_scan(&file).unwrap()); // after upsert: no changes → should *not* scan idx.upsert_file(&file).unwrap(); assert!(!idx.should_scan(&file).unwrap()); // modify contents std::thread::sleep(std::time::Duration::from_millis(25)); // ensure mtime tick std::fs::write(&file, "fn main() { /* changed */ }").unwrap(); assert!(idx.should_scan(&file).unwrap()); } #[test] fn replace_issues_and_query_back() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let file = td.path().join("code.go"); std::fs::write(&file, "package main").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let fid = idx.upsert_file(&file).unwrap(); let issues = [ index::IssueRow { rule_id: "X1", severity: "High", line: 3, col: 7, }, index::IssueRow { rule_id: "X2", severity: "Low", line: 4, col: 1, }, ]; idx.replace_issues(fid, issues.clone()).unwrap(); let stored = idx.get_issues_from_file(&file).unwrap(); assert_eq!(stored.len(), 2); assert!( stored .iter() .any(|d| d.id == "X1" && d.severity == crate::patterns::Severity::High) ); assert!( stored .iter() .any(|d| d.id == "X2" && d.severity == crate::patterns::Severity::Low) ); } #[test] fn clear_and_vacuum_reset_tables() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("f.rs"); std::fs::write(&f, "//").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let idx = index::Indexer::from_pool("proj", &pool).unwrap(); idx.upsert_file(&f).unwrap(); assert!(!idx.get_files("proj").unwrap().is_empty()); idx.clear().unwrap(); idx.vacuum().unwrap(); assert!(idx.get_files("proj").unwrap().is_empty()); } #[test] fn clear_preserves_scan_history_tables() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); let idx = index::Indexer::from_pool("_scans", &pool).unwrap(); idx.insert_scan(&index::ScanRecord { id: "scan-1".to_string(), status: "completed".to_string(), scan_root: td.path().display().to_string(), started_at: Some("2026-03-25T12:00:00Z".to_string()), finished_at: Some("2026-03-25T12:00:01Z".to_string()), duration_secs: Some(1.0), engine_version: Some("test".to_string()), languages: None, files_scanned: Some(1), files_skipped: Some(0), finding_count: Some(0), findings_json: Some("[]".to_string()), timing_json: None, error: None, }) .unwrap(); let proj_idx = index::Indexer::from_pool("proj", &pool).unwrap(); proj_idx.clear().unwrap(); let loaded = idx .get_scan("scan-1") .unwrap() .expect("scan history should survive index clears"); assert_eq!(loaded.status, "completed"); } #[test] fn ssa_summaries_round_trip() { use crate::labels::Cap; use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform}; let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("app.py"); std::fs::write(&f, "def process(data): return data").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let hash = index::Indexer::digest_bytes(b"def process(data): return data"); let summaries = vec![ ( "process".to_string(), 1_usize, "python".to_string(), "app.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ), ( "sanitize".to_string(), 1_usize, "python".to_string(), "app.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::StripBits(Cap::HTML_ESCAPE))], param_to_sink: vec![( 0, smallvec::smallvec![crate::summary::SinkSite::cap_only(Cap::SQL_QUERY)], )], source_caps: Cap::ENV_VAR, param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ), ]; idx.replace_ssa_summaries_for_file(&f, &hash, &summaries) .unwrap(); let loaded = idx.load_all_ssa_summaries().unwrap(); assert_eq!(loaded.len(), 2); // Check first summary let (_, name1, lang1, arity1, ns1, _, _, _, sum1) = loaded .iter() .find(|(_, n, _, _, _, _, _, _, _)| n == "process") .unwrap(); assert_eq!(name1, "process"); assert_eq!(lang1, "python"); assert_eq!(*arity1, 1); assert_eq!(ns1, "app.py"); assert_eq!(sum1.param_to_return, vec![(0, TaintTransform::Identity)]); assert!(sum1.param_to_sink.is_empty()); // Check second summary let (_, name2, _, _, _, _, _, _, sum2) = loaded .iter() .find(|(_, n, _, _, _, _, _, _, _)| n == "sanitize") .unwrap(); assert_eq!(name2, "sanitize"); assert_eq!( sum2.param_to_return, vec![(0, TaintTransform::StripBits(Cap::HTML_ESCAPE))] ); assert_eq!(sum2.param_to_sink_caps(), vec![(0, Cap::SQL_QUERY)]); assert_eq!(sum2.source_caps, Cap::ENV_VAR); } /// Round-trip test for [`crate::summary::ssa_summary::PathFactReturnEntry`]: /// asserts that `return_path_facts` survive serialise → SQLite persist → /// load → deserialise. Regression guard for the per-return-path PathFact /// decomposition that closes the rs-safe-014 / tar-rs / rs-safe-016 FP /// cluster, without this round-trip working, cross-file callers lose /// the per-arm narrowing and inline-only callees regain the joined-fact /// dilution. #[test] fn ssa_summaries_round_trip_preserves_return_path_facts() { use crate::abstract_interp::PathFact; use crate::summary::ssa_summary::{PathFactReturnEntry, SsaFuncSummary, TaintTransform}; use smallvec::smallvec; let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("sanitize.rs"); std::fs::write(&f, "// sanitizer body").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let hash = index::Indexer::digest_bytes(b"// sanitizer body"); let return_path_facts = smallvec![ PathFactReturnEntry { predicate_hash: 0, known_true: 0, known_false: 0, path_fact: PathFact::top(), variant_inner_fact: None, }, PathFactReturnEntry { predicate_hash: 17, known_true: 0, known_false: 0, path_fact: PathFact::top(), variant_inner_fact: Some( PathFact::top() .with_dotdot_cleared() .with_absolute_cleared(), ), }, ]; let summary = SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], return_path_facts: return_path_facts.clone(), ..Default::default() }; let row = ( "sanitize_path".to_string(), 1_usize, "rust".to_string(), "sanitize.rs".to_string(), String::new(), None, crate::symbol::FuncKind::Function, summary, ); idx.replace_ssa_summaries_for_file(&f, &hash, &[row]) .unwrap(); let loaded = idx.load_all_ssa_summaries().unwrap(); assert_eq!(loaded.len(), 1); let (_, name, _, _, _, _, _, _, sum) = &loaded[0]; assert_eq!(name, "sanitize_path"); assert_eq!( sum.return_path_facts.len(), 2, "two distinct return paths must round-trip" ); // Find each entry by predicate hash so order doesn't matter. let none_arm = sum .return_path_facts .iter() .find(|e| e.predicate_hash == 0) .expect("unguarded entry"); assert!(none_arm.path_fact.is_top()); assert!(none_arm.variant_inner_fact.is_none()); let some_arm = sum .return_path_facts .iter() .find(|e| e.predicate_hash == 17) .expect("guarded entry"); let inner = some_arm .variant_inner_fact .as_ref() .expect("inner fact survives round-trip"); assert!( inner.is_path_safe(), "Some arm's inner fact stays path-safe" ); } #[test] fn ssa_summaries_hash_rescan_replaces_stale() { use crate::labels::Cap; use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform}; let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("lib.py"); std::fs::write(&f, "v1").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let hash_v1 = index::Indexer::digest_bytes(b"v1"); let sums_v1 = vec![( "old_func".to_string(), 1_usize, "python".to_string(), "lib.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, )]; idx.replace_ssa_summaries_for_file(&f, &hash_v1, &sums_v1) .unwrap(); // Simulate file change: different function, different hash let hash_v2 = index::Indexer::digest_bytes(b"v2"); let sums_v2 = vec![( "new_func".to_string(), 2_usize, "python".to_string(), "lib.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::StripBits(Cap::SHELL_ESCAPE))], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, )]; idx.replace_ssa_summaries_for_file(&f, &hash_v2, &sums_v2) .unwrap(); let loaded = idx.load_all_ssa_summaries().unwrap(); assert_eq!( loaded.len(), 1, "old summary should be replaced, not duplicated" ); assert_eq!(loaded[0].1, "new_func"); } #[test] fn clear_drops_ssa_summaries_table() { use crate::labels::Cap; use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform}; let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("test.py"); std::fs::write(&f, "x").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let hash = index::Indexer::digest_bytes(b"x"); let sums = vec![( "f".to_string(), 1_usize, "python".to_string(), "test.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, )]; idx.replace_ssa_summaries_for_file(&f, &hash, &sums) .unwrap(); assert_eq!(idx.load_all_ssa_summaries().unwrap().len(), 1); idx.clear().unwrap(); assert_eq!(idx.load_all_ssa_summaries().unwrap().len(), 0); } // ── CalleeSsaBody persistence tests ────────────────────────────────────── /// Helper: build a minimal CalleeSsaBody for DB tests. #[cfg(test)] fn make_test_callee_body( num_blocks: usize, param_count: usize, ) -> crate::taint::ssa_transfer::CalleeSsaBody { use crate::ssa::ir::*; use smallvec::smallvec; let mut blocks = Vec::new(); for i in 0..num_blocks { blocks.push(SsaBlock { id: BlockId(i as u32), phis: vec![], body: vec![SsaInst { value: SsaValue(i as u32), op: SsaOp::Const(Some(format!("{i}"))), cfg_node: petgraph::graph::NodeIndex::new(0), var_name: None, span: (0, 0), }], terminator: Terminator::Return(Some(SsaValue(0))), preds: smallvec![], succs: smallvec![], }); } let value_defs: Vec = (0..num_blocks) .map(|i| ValueDef { var_name: None, cfg_node: petgraph::graph::NodeIndex::new(0), block: BlockId(i as u32), }) .collect(); crate::taint::ssa_transfer::CalleeSsaBody { ssa: SsaBody { blocks, entry: BlockId(0), value_defs, cfg_node_map: std::collections::HashMap::new(), exception_edges: vec![], field_interner: crate::ssa::ir::FieldInterner::new(), field_writes: std::collections::HashMap::new(), synthetic_externals: std::collections::HashSet::new(), slot_scoped_assigns: std::collections::HashSet::new(), }, opt: crate::ssa::OptimizeResult { const_values: std::collections::HashMap::new(), type_facts: crate::ssa::type_facts::TypeFactResult { facts: std::collections::HashMap::new(), }, xml_parser_config: crate::ssa::xml_config::XmlParserConfigResult::default(), xpath_config: crate::ssa::xpath_config::XPathConfigResult::default(), alias_result: crate::ssa::alias::BaseAliasResult::empty(), points_to: crate::ssa::heap::PointsToResult::empty(), module_aliases: std::collections::HashMap::new(), branches_pruned: 0, copies_eliminated: 0, dead_defs_removed: 0, }, param_count, node_meta: std::collections::HashMap::new(), body_graph: None, cross_package_imports: std::sync::Arc::new(std::collections::HashMap::new()), } } #[test] fn cross_package_imports_round_trip_via_replace_all_for_file() { use crate::symbol::{FuncKey, FuncKind, Lang}; let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("caller.ts"); std::fs::write(&f, "import { escape } from '@scope/util';").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let hash = index::Indexer::digest_bytes(b"caller content"); let mut imports: std::collections::HashMap = std::collections::HashMap::new(); imports.insert( "escape".to_string(), FuncKey { lang: Lang::TypeScript, namespace: "packages/util/src/escape.ts".to_string(), container: String::new(), name: "escape".to_string(), arity: None, disambig: None, kind: FuncKind::Function, }, ); idx.replace_all_for_file(&f, &hash, &[], &[], &[], &[], Some(("caller.ts", &imports))) .unwrap(); let loaded = idx.load_all_cross_package_imports().unwrap(); assert_eq!(loaded.len(), 1); let (fp, ns, map) = &loaded[0]; assert_eq!(fp, &f.to_string_lossy().to_string()); assert_eq!(ns, "caller.ts"); assert_eq!(map.len(), 1); let key = map .get("escape") .expect("escape binding survives round-trip"); assert_eq!(key.namespace, "packages/util/src/escape.ts"); assert_eq!(key.name, "escape"); assert_eq!(key.lang, Lang::TypeScript); // Empty input on rescan should drop the row. idx.replace_all_for_file(&f, &hash, &[], &[], &[], &[], None) .unwrap(); assert!(idx.load_all_cross_package_imports().unwrap().is_empty()); } #[test] fn ssa_bodies_round_trip() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("helper.py"); std::fs::write(&f, "def transform(val): return val").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let hash = index::Indexer::digest_bytes(b"def transform(val): return val"); let body = make_test_callee_body(3, 1); let bodies = vec![( "transform".to_string(), 1_usize, "python".to_string(), "helper.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, body, )]; idx.replace_ssa_bodies_for_file(&f, &hash, &bodies).unwrap(); let loaded = idx.load_all_ssa_bodies().unwrap(); assert_eq!(loaded.len(), 1); let (fp, name, lang, arity, ns, _, _, _, loaded_body) = &loaded[0]; assert_eq!(fp, &f.to_string_lossy().to_string()); assert_eq!(name, "transform"); assert_eq!(lang, "python"); assert_eq!(*arity, 1); assert_eq!(ns, "helper.py"); assert_eq!(loaded_body.param_count, 1); assert_eq!(loaded_body.ssa.blocks.len(), 3); } #[test] fn ssa_bodies_replace_on_rescan() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("helper.py"); std::fs::write(&f, "v1").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); // Store v1 with 2 blocks let hash1 = index::Indexer::digest_bytes(b"v1"); let bodies1 = vec![( "func".to_string(), 1_usize, "python".to_string(), "h.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_callee_body(2, 1), )]; idx.replace_ssa_bodies_for_file(&f, &hash1, &bodies1) .unwrap(); assert_eq!(idx.load_all_ssa_bodies().unwrap().len(), 1); assert_eq!(idx.load_all_ssa_bodies().unwrap()[0].8.ssa.blocks.len(), 2); // Store v2 with 5 blocks, should replace, not accumulate let hash2 = index::Indexer::digest_bytes(b"v2"); let bodies2 = vec![( "func".to_string(), 1_usize, "python".to_string(), "h.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_callee_body(5, 1), )]; idx.replace_ssa_bodies_for_file(&f, &hash2, &bodies2) .unwrap(); let loaded = idx.load_all_ssa_bodies().unwrap(); assert_eq!(loaded.len(), 1, "should replace, not accumulate"); assert_eq!(loaded[0].8.ssa.blocks.len(), 5); } #[test] fn ssa_bodies_with_node_meta_round_trip() { use crate::cfg::{NodeInfo, TaintMeta}; use crate::labels::{Cap, DataLabel}; use crate::taint::ssa_transfer::CrossFileNodeMeta; let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("helper.py"); std::fs::write(&f, "code").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let hash = index::Indexer::digest_bytes(b"code"); let mut body = make_test_callee_body(1, 0); body.node_meta.insert( 0, CrossFileNodeMeta { info: NodeInfo { bin_op: Some(crate::cfg::BinOp::Add), taint: TaintMeta { labels: smallvec::smallvec![DataLabel::Sink(Cap::SQL_QUERY)], ..Default::default() }, ..Default::default() }, }, ); let bodies = vec![( "f".to_string(), 0_usize, "python".to_string(), "h.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, body, )]; idx.replace_ssa_bodies_for_file(&f, &hash, &bodies).unwrap(); let loaded = idx.load_all_ssa_bodies().unwrap(); assert_eq!(loaded.len(), 1); let meta = &loaded[0].8.node_meta; assert_eq!(meta.len(), 1); assert_eq!(meta[&0].info.bin_op, Some(crate::cfg::BinOp::Add)); assert!(matches!(meta[&0].info.taint.labels[0], DataLabel::Sink(cap) if cap == Cap::SQL_QUERY)); } #[test] fn ssa_bodies_removed_on_file_delete() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("helper.py"); std::fs::write(&f, "code").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let hash = index::Indexer::digest_bytes(b"code"); // Register file first so remove_file_and_related has something to find idx.upsert_file(&f).unwrap(); let bodies = vec![( "f".to_string(), 0_usize, "python".to_string(), "h.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_callee_body(1, 0), )]; idx.replace_ssa_bodies_for_file(&f, &hash, &bodies).unwrap(); assert_eq!(idx.load_all_ssa_bodies().unwrap().len(), 1); // Delete file, should also remove bodies idx.remove_file_and_related(&f).unwrap(); assert_eq!(idx.load_all_ssa_bodies().unwrap().len(), 0); } // ── Persistence hardening tests ───────────────────────────────────────────── /// Helper: build a minimal SsaFuncSummary for persistence tests. #[cfg(test)] fn make_test_ssa_summary() -> crate::summary::ssa_summary::SsaFuncSummary { use crate::labels::Cap; use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform}; SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, } } /// Helper: insert a fake summary + SSA summary + file row for a project. #[cfg(test)] fn populate_project( pool: &r2d2::Pool, project: &str, dir: &std::path::Path, ) { let f = dir.join("app.py"); std::fs::write(&f, "# code").unwrap(); let mut idx = index::Indexer::from_pool(project, pool).unwrap(); idx.upsert_file(&f).unwrap(); let hash = index::Indexer::digest_bytes(b"# code"); // Insert a FuncSummary let func_summary = crate::summary::FuncSummary { name: "do_stuff".to_string(), file_path: f.to_string_lossy().to_string(), param_count: 1, param_names: vec!["data".to_string()], lang: "python".to_string(), source_caps: 0, sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![0], propagates_taint: true, tainted_sink_params: vec![], callees: vec![], ..Default::default() }; idx.replace_summaries_for_file(&f, &hash, &[func_summary]) .unwrap(); // Insert an SSA summary let ssa_sums = vec![( "do_stuff".to_string(), 1_usize, "python".to_string(), "app.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_ssa_summary(), )]; idx.replace_ssa_summaries_for_file(&f, &hash, &ssa_sums) .unwrap(); // Insert an SSA body let bodies = vec![( "do_stuff".to_string(), 1_usize, "python".to_string(), "app.py".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_callee_body(1, 1), )]; idx.replace_ssa_bodies_for_file(&f, &hash, &bodies).unwrap(); } // ── 1. Engine Version Tests ───────────────────────────────────────────────── #[test] fn version_match_no_reset() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); // First init: creates DB and sets version let pool = index::Indexer::init(&db).unwrap(); populate_project(&pool, "proj", td.path()); // Verify data exists assert_eq!( index::Indexer::count_rows(&pool, "function_summaries", "proj").unwrap(), 1 ); assert_eq!( index::Indexer::count_rows(&pool, "ssa_function_summaries", "proj").unwrap(), 1 ); assert_eq!( index::Indexer::count_rows(&pool, "ssa_function_bodies", "proj").unwrap(), 1 ); // Second init with same version: data should be preserved drop(pool); let pool2 = index::Indexer::init(&db).unwrap(); assert_eq!( index::Indexer::count_rows(&pool2, "function_summaries", "proj").unwrap(), 1 ); assert_eq!( index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj").unwrap(), 1 ); assert_eq!( index::Indexer::count_rows(&pool2, "ssa_function_bodies", "proj").unwrap(), 1 ); let stored = index::Indexer::get_stored_engine_version(&pool2).unwrap(); assert_eq!(stored.as_deref(), Some(index::ENGINE_VERSION)); } #[test] fn version_mismatch_triggers_reset() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); // First init let pool = index::Indexer::init(&db).unwrap(); populate_project(&pool, "proj", td.path()); // Simulate an old version index::Indexer::set_engine_version(&pool, "0.0.1-old").unwrap(); // Verify data is populated assert_eq!( index::Indexer::count_rows(&pool, "function_summaries", "proj").unwrap(), 1 ); // Reopen, version mismatch should trigger full wipe drop(pool); let pool2 = index::Indexer::init(&db).unwrap(); assert_eq!( index::Indexer::count_rows(&pool2, "function_summaries", "proj").unwrap(), 0 ); assert_eq!( index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj").unwrap(), 0 ); assert_eq!( index::Indexer::count_rows(&pool2, "ssa_function_bodies", "proj").unwrap(), 0 ); // files table should also be cleared (forces rescan) let idx = index::Indexer::from_pool("proj", &pool2).unwrap(); assert!(idx.get_files("proj").unwrap().is_empty()); // Version should now be updated let stored = index::Indexer::get_stored_engine_version(&pool2).unwrap(); assert_eq!(stored.as_deref(), Some(index::ENGINE_VERSION)); } #[test] fn missing_version_triggers_reset() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); // Init the DB let pool = index::Indexer::init(&db).unwrap(); populate_project(&pool, "proj", td.path()); // Remove the metadata row to simulate a pre-version DB { let conn = pool.get().unwrap(); conn.execute("DELETE FROM nyx_metadata WHERE key = 'engine_version'", []) .unwrap(); } // Reopen drop(pool); let pool2 = index::Indexer::init(&db).unwrap(); // All caches should be wiped assert_eq!( index::Indexer::count_rows(&pool2, "function_summaries", "proj").unwrap(), 0 ); assert_eq!( index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj").unwrap(), 0 ); // Version should now be set let stored = index::Indexer::get_stored_engine_version(&pool2).unwrap(); assert_eq!(stored.as_deref(), Some(index::ENGINE_VERSION)); } #[test] fn multiple_opens_no_repeated_resets() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); // First open let pool = index::Indexer::init(&db).unwrap(); populate_project(&pool, "proj", td.path()); drop(pool); // Second open, should preserve data let pool2 = index::Indexer::init(&db).unwrap(); assert_eq!( index::Indexer::count_rows(&pool2, "function_summaries", "proj").unwrap(), 1 ); // Re-populate after second open populate_project(&pool2, "proj2", td.path()); drop(pool2); // Third open, should still preserve both projects let pool3 = index::Indexer::init(&db).unwrap(); assert_eq!( index::Indexer::count_rows(&pool3, "function_summaries", "proj").unwrap(), 1 ); assert_eq!( index::Indexer::count_rows(&pool3, "function_summaries", "proj2").unwrap(), 1 ); } #[test] fn write_engine_version_on_scan_completion() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); // Simulate writing version after scan index::Indexer::write_engine_version(&pool).unwrap(); let stored = index::Indexer::get_stored_engine_version(&pool).unwrap(); assert_eq!(stored.as_deref(), Some(index::ENGINE_VERSION)); } // ── 2. Migration Tests ────────────────────────────────────────────────────── #[test] fn fresh_db_no_migration_needed() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); // Should not panic and tables should exist let pool = index::Indexer::init(&db).unwrap(); let idx = index::Indexer::from_pool("proj", &pool).unwrap(); // Verify tables are accessible assert!(idx.load_all_summaries().unwrap().is_empty()); assert!(idx.load_all_ssa_summaries().unwrap().is_empty()); assert!(idx.load_all_ssa_bodies().unwrap().is_empty()); assert!(idx.get_files("proj").unwrap().is_empty()); } #[test] fn init_applies_busy_timeout_to_every_pooled_connection() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); // Hold several connections at once so r2d2 must hand out distinct pooled // handles. The timeout is connection-local, so configuring only the schema // setup connection would leave later worker connections at rusqlite's // default. let conns: Vec<_> = (0..4).map(|_| pool.get().unwrap()).collect(); for conn in &conns { let timeout_ms: i64 = conn .query_row("PRAGMA busy_timeout", [], |row| row.get(0)) .unwrap(); assert_eq!(timeout_ms, 60_000); } } #[test] fn index_write_queue_serializes_parallel_writes() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); let project = "proj"; let writer = index::IndexWriteQueue::start_with_capacity(project, std::sync::Arc::clone(&pool), 2); let tx = writer.sender(); let mut handles = Vec::new(); for i in 0..16 { let path = td.path().join(format!("file_{i}.rs")); let source = format!("fn f_{i}() {{}}\n"); std::fs::write(&path, &source).unwrap(); let hash = index::Indexer::digest_bytes(source.as_bytes()); let tx = tx.clone(); handles.push(std::thread::spawn(move || { tx.enqueue(move |idx| { let file_id = idx.upsert_file_with_hash(&path, &hash)?; let issue_rows = [(String::from("test-rule"), String::from("LOW"), 1_i64, 0_i64)]; idx.replace_issues( file_id, issue_rows .iter() .map(|(rule_id, severity, line, col)| index::IssueRow { rule_id: rule_id.as_str(), severity: severity.as_str(), line: *line, col: *col, }), )?; Ok(()) }) .unwrap(); })); } for handle in handles { handle.join().unwrap(); } drop(tx); writer.finish("test").unwrap(); let idx = index::Indexer::from_pool(project, &pool).unwrap(); let files = idx.get_files(project).unwrap(); assert_eq!(files.len(), 16); for path in files { assert_eq!(idx.get_issues_from_file(&path).unwrap().len(), 1); } } #[test] fn missing_ssa_namespace_column_triggers_recreate() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); // Create DB with an outdated SSA table (no namespace column) { let conn = rusqlite::Connection::open(&db).unwrap(); conn.execute_batch( "CREATE TABLE IF NOT EXISTS files ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, path TEXT NOT NULL, hash BLOB NOT NULL, mtime INTEGER NOT NULL, scanned_at INTEGER NOT NULL, UNIQUE(project, path) ); CREATE TABLE IF NOT EXISTS function_summaries ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, file_path TEXT NOT NULL, file_hash BLOB NOT NULL, name TEXT NOT NULL, arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL, summary TEXT NOT NULL, updated_at INTEGER NOT NULL, UNIQUE(project, file_path, name, arity) ); CREATE TABLE IF NOT EXISTS ssa_function_summaries ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, file_path TEXT NOT NULL, file_hash BLOB NOT NULL, name TEXT NOT NULL, arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL, summary TEXT NOT NULL, updated_at INTEGER NOT NULL, UNIQUE(project, file_path, name, arity) );", ) .unwrap(); } // Open via init, should detect missing namespace and recreate let pool = index::Indexer::init(&db).unwrap(); // Verify the table now has the namespace column by inserting with it let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let f = td.path().join("test.py"); std::fs::write(&f, "x").unwrap(); let hash = index::Indexer::digest_bytes(b"x"); let sums = vec![( "func".to_string(), 1_usize, "python".to_string(), "ns".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_ssa_summary(), )]; // This would fail if the namespace column doesn't exist idx.replace_ssa_summaries_for_file(&f, &hash, &sums) .unwrap(); assert_eq!(idx.load_all_ssa_summaries().unwrap().len(), 1); } /// Phase 10 migration test. Build a database whose /// `(ssa_)function_summaries` tables are at the post-Phase 09 shape /// (namespace + container + disambig + kind columns present, but no /// `entry_kind` column). Insert a row directly so the migration must /// preserve it. After `init`, the column should exist on both tables /// without dropping the pre-existing data. #[test] fn entry_kind_column_added_in_place_without_data_loss() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); // Hand-build a pre-Phase-10 schema (no `entry_kind` column). { let conn = rusqlite::Connection::open(&db).unwrap(); conn.execute_batch( "CREATE TABLE files ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, path TEXT NOT NULL, hash BLOB NOT NULL, mtime INTEGER NOT NULL, scanned_at INTEGER NOT NULL, UNIQUE(project, path) ); CREATE TABLE function_summaries ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, file_path TEXT NOT NULL, file_hash BLOB NOT NULL, name TEXT NOT NULL, arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL, container TEXT NOT NULL DEFAULT '', disambig INTEGER, kind TEXT NOT NULL DEFAULT 'fn', summary TEXT NOT NULL, updated_at INTEGER NOT NULL, UNIQUE(project, file_path, name, container, arity, disambig, kind) ); CREATE TABLE ssa_function_summaries ( id INTEGER PRIMARY KEY AUTOINCREMENT, project TEXT NOT NULL, file_path TEXT NOT NULL, file_hash BLOB NOT NULL, name TEXT NOT NULL, arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL, namespace TEXT NOT NULL DEFAULT '', container TEXT NOT NULL DEFAULT '', disambig INTEGER, kind TEXT NOT NULL DEFAULT 'fn', summary TEXT NOT NULL, updated_at INTEGER NOT NULL, UNIQUE(project, file_path, name, container, arity, disambig, kind) );", ) .unwrap(); conn.execute( "INSERT INTO function_summaries (project, file_path, file_hash, name, arity, lang, container, disambig, kind, summary, updated_at) VALUES ('proj', 'lib.py', X'00', 'old_func', 1, 'python', '', NULL, 'fn', '{}', 0)", [], ) .unwrap(); conn.execute( "INSERT INTO ssa_function_summaries (project, file_path, file_hash, name, arity, lang, namespace, container, disambig, kind, summary, updated_at) VALUES ('proj', 'lib.py', X'00', 'old_func', 1, 'python', '', '', NULL, 'fn', '{}', 0)", [], ) .unwrap(); // Pre-populate the metadata so `check_schema_version` and // `check_engine_version` consider the database current and do // not wipe the rows we just inserted. The point of this test // is the in-place `ALTER TABLE`; the version checks are a // separate concern. conn.execute( "CREATE TABLE IF NOT EXISTS nyx_metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)", [], ) .unwrap(); conn.execute( "INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('schema_version', ?1)", rusqlite::params![index::SCHEMA_VERSION], ) .unwrap(); conn.execute( "INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('engine_version', ?1)", rusqlite::params![index::ENGINE_VERSION], ) .unwrap(); } // Open via init — should non-destructively ALTER both tables to // add `entry_kind`, leaving the seeded rows intact. let pool = index::Indexer::init(&db).unwrap(); let conn = pool.get().unwrap(); let cols_for = |table: &str| { let mut stmt = conn .prepare(&format!("PRAGMA table_info({table})")) .unwrap(); let v: Vec = stmt .query_map([], |r| r.get::<_, String>(1)) .unwrap() .filter_map(Result::ok) .collect(); v }; assert!( cols_for("function_summaries") .iter() .any(|c| c == "entry_kind"), "function_summaries.entry_kind missing after migration" ); assert!( cols_for("ssa_function_summaries") .iter() .any(|c| c == "entry_kind"), "ssa_function_summaries.entry_kind missing after migration" ); // Pre-existing rows survive the migration. let func_rows: i64 = conn .query_row( "SELECT COUNT(*) FROM function_summaries WHERE project = 'proj'", [], |r| r.get(0), ) .unwrap(); assert_eq!(func_rows, 1, "pre-existing function_summaries row was lost"); let ssa_rows: i64 = conn .query_row( "SELECT COUNT(*) FROM ssa_function_summaries WHERE project = 'proj'", [], |r| r.get(0), ) .unwrap(); assert_eq!( ssa_rows, 1, "pre-existing ssa_function_summaries row was lost" ); // Existing rows have NULL entry_kind by default. let entry_kind_value: Option = conn .query_row( "SELECT entry_kind FROM function_summaries WHERE project = 'proj'", [], |r| r.get(0), ) .unwrap(); assert!(entry_kind_value.is_none()); } #[test] fn valid_schema_no_recreate() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); // First init, creates all tables let pool = index::Indexer::init(&db).unwrap(); populate_project(&pool, "proj", td.path()); drop(pool); // Second init, schema is valid, should NOT drop/recreate let pool2 = index::Indexer::init(&db).unwrap(); // Data survives because schema was already correct assert_eq!( index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj").unwrap(), 1 ); } // ── 3. Deserialization Failure Tests ──────────────────────────────────────── #[test] fn invalid_json_skipped_in_load_summaries() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); // Insert corrupted JSON directly { let conn = pool.get().unwrap(); conn.execute( "INSERT INTO function_summaries (project, file_path, file_hash, name, arity, lang, summary, updated_at) VALUES ('proj', 'bad.py', X'00', 'bad', 1, 'python', '{not valid json!!!', 0)", [], ).unwrap(); } let idx = index::Indexer::from_pool("proj", &pool).unwrap(); // Should not panic; invalid row is skipped let loaded = idx.load_all_summaries().unwrap(); assert_eq!(loaded.len(), 0); } #[test] fn invalid_json_skipped_in_load_ssa_summaries() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); // Insert corrupted JSON directly { let conn = pool.get().unwrap(); conn.execute( "INSERT INTO ssa_function_summaries (project, file_path, file_hash, name, arity, lang, namespace, summary, updated_at) VALUES ('proj', 'bad.py', X'00', 'bad', 1, 'python', '', 'CORRUPTED', 0)", [], ).unwrap(); } let idx = index::Indexer::from_pool("proj", &pool).unwrap(); let loaded = idx.load_all_ssa_summaries().unwrap(); assert_eq!(loaded.len(), 0); } #[test] fn invalid_json_skipped_in_load_ssa_bodies() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); { let conn = pool.get().unwrap(); conn.execute( "INSERT INTO ssa_function_bodies (project, file_path, file_hash, name, arity, lang, namespace, body, updated_at) VALUES ('proj', 'bad.py', X'00', 'bad', 1, 'python', '', '{{{{broken', 0)", [], ).unwrap(); } let idx = index::Indexer::from_pool("proj", &pool).unwrap(); let loaded = idx.load_all_ssa_bodies().unwrap(); assert_eq!(loaded.len(), 0); } #[test] fn partial_failure_does_not_drop_valid_rows() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); // Insert one valid SSA summary via the normal API let f = td.path().join("good.py"); std::fs::write(&f, "ok").unwrap(); let hash = index::Indexer::digest_bytes(b"ok"); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let sums = vec![( "good_func".to_string(), 1_usize, "python".to_string(), "".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_ssa_summary(), )]; idx.replace_ssa_summaries_for_file(&f, &hash, &sums) .unwrap(); // Insert a corrupted row directly { let conn = pool.get().unwrap(); conn.execute( "INSERT INTO ssa_function_summaries (project, file_path, file_hash, name, arity, lang, namespace, summary, updated_at) VALUES ('proj', 'bad.py', X'00', 'bad_func', 1, 'python', '', 'NOT_JSON', 0)", [], ).unwrap(); } // Load: should get exactly the 1 valid row let loaded = idx.load_all_ssa_summaries().unwrap(); assert_eq!(loaded.len(), 1); assert_eq!(loaded[0].1, "good_func"); } // ── 4. Integration / Round-Trip Tests ─────────────────────────────────────── #[test] fn scan_persist_reload_cycle() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); populate_project(&pool, "myproject", td.path()); // Write version as scan completion would index::Indexer::write_engine_version(&pool).unwrap(); // Reload from a fresh pool drop(pool); let pool2 = index::Indexer::init(&db).unwrap(); let idx = index::Indexer::from_pool("myproject", &pool2).unwrap(); assert_eq!(idx.load_all_summaries().unwrap().len(), 1); assert_eq!(idx.load_all_ssa_summaries().unwrap().len(), 1); assert_eq!(idx.load_all_ssa_bodies().unwrap().len(), 1); assert_eq!(idx.get_files("myproject").unwrap().len(), 1); } #[test] fn version_bump_forces_reindex_behavior() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); // Simulate a previous engine version let pool = index::Indexer::init(&db).unwrap(); populate_project(&pool, "proj", td.path()); index::Indexer::set_engine_version(&pool, "0.1.0-alpha").unwrap(); drop(pool); // Reopen: version bump should force full invalidation let pool2 = index::Indexer::init(&db).unwrap(); // Everything should be wiped let idx = index::Indexer::from_pool("proj", &pool2).unwrap(); assert!(idx.load_all_summaries().unwrap().is_empty()); assert!(idx.load_all_ssa_summaries().unwrap().is_empty()); assert!(idx.load_all_ssa_bodies().unwrap().is_empty()); assert!(idx.get_files("proj").unwrap().is_empty()); // After wiping, we can re-populate and it persists populate_project(&pool2, "proj", td.path()); assert_eq!(idx.load_all_summaries().unwrap().len(), 1); } // ── 5. Edge Cases ─────────────────────────────────────────────────────────── #[test] fn empty_db_file_works() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("empty.sqlite"); // Create empty file std::fs::write(&db, "").unwrap(); // init should handle this (SQLite will overwrite the empty file) let pool = index::Indexer::init(&db).unwrap(); let idx = index::Indexer::from_pool("proj", &pool).unwrap(); assert!(idx.load_all_summaries().unwrap().is_empty()); } #[test] fn multiple_projects_isolated() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); // Populate two different projects let f1 = td.path().join("proj1_file.py"); let f2 = td.path().join("proj2_file.py"); std::fs::write(&f1, "p1").unwrap(); std::fs::write(&f2, "p2").unwrap(); let mut idx1 = index::Indexer::from_pool("project_a", &pool).unwrap(); idx1.upsert_file(&f1).unwrap(); let hash1 = index::Indexer::digest_bytes(b"p1"); let sums1 = vec![( "func_a".to_string(), 0_usize, "python".to_string(), "".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_ssa_summary(), )]; idx1.replace_ssa_summaries_for_file(&f1, &hash1, &sums1) .unwrap(); let mut idx2 = index::Indexer::from_pool("project_b", &pool).unwrap(); idx2.upsert_file(&f2).unwrap(); let hash2 = index::Indexer::digest_bytes(b"p2"); let sums2 = vec![( "func_b".to_string(), 0_usize, "python".to_string(), "".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_ssa_summary(), )]; idx2.replace_ssa_summaries_for_file(&f2, &hash2, &sums2) .unwrap(); // Each project sees only its own summaries assert_eq!(idx1.load_all_ssa_summaries().unwrap().len(), 1); assert_eq!(idx1.load_all_ssa_summaries().unwrap()[0].1, "func_a"); assert_eq!(idx2.load_all_ssa_summaries().unwrap().len(), 1); assert_eq!(idx2.load_all_ssa_summaries().unwrap()[0].1, "func_b"); // Files are project-scoped too (get_files queries by its argument) assert_eq!(idx1.get_files("project_a").unwrap().len(), 1); assert_eq!(idx2.get_files("project_b").unwrap().len(), 1); // Cross-project: project_a should have no project_b files assert_eq!(idx1.get_files("nonexistent_project").unwrap().len(), 0); } #[test] fn version_reset_wipes_all_projects() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); // Populate two projects let f1 = td.path().join("a.py"); let f2 = td.path().join("b.py"); std::fs::write(&f1, "a").unwrap(); std::fs::write(&f2, "b").unwrap(); let mut idx1 = index::Indexer::from_pool("proj_x", &pool).unwrap(); idx1.upsert_file(&f1).unwrap(); let hash1 = index::Indexer::digest_bytes(b"a"); let sums1 = vec![( "fx".to_string(), 0_usize, "python".to_string(), "".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_ssa_summary(), )]; idx1.replace_ssa_summaries_for_file(&f1, &hash1, &sums1) .unwrap(); let mut idx2 = index::Indexer::from_pool("proj_y", &pool).unwrap(); idx2.upsert_file(&f2).unwrap(); let hash2 = index::Indexer::digest_bytes(b"b"); let sums2 = vec![( "fy".to_string(), 0_usize, "python".to_string(), "".to_string(), String::new(), None, crate::symbol::FuncKind::Function, make_test_ssa_summary(), )]; idx2.replace_ssa_summaries_for_file(&f2, &hash2, &sums2) .unwrap(); // Simulate version mismatch index::Indexer::set_engine_version(&pool, "0.0.0-stale").unwrap(); drop(pool); let pool2 = index::Indexer::init(&db).unwrap(); // Both projects' data should be gone (version check is global, not per-project) assert_eq!( index::Indexer::count_rows(&pool2, "function_summaries", "proj_x").unwrap(), 0 ); assert_eq!( index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj_x").unwrap(), 0 ); assert_eq!( index::Indexer::count_rows(&pool2, "function_summaries", "proj_y").unwrap(), 0 ); assert_eq!( index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj_y").unwrap(), 0 ); } #[test] fn metadata_table_survives_clear() { let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let pool = index::Indexer::init(&db).unwrap(); index::Indexer::write_engine_version(&pool).unwrap(); let idx = index::Indexer::from_pool("proj", &pool).unwrap(); idx.clear().unwrap(); // Metadata should survive clear (clear only drops analysis tables) let stored = index::Indexer::get_stored_engine_version(&pool).unwrap(); assert_eq!(stored.as_deref(), Some(index::ENGINE_VERSION)); } /// field_points_to round-trips through /// the SsaFuncSummary SQLite blob. Pin that the new field_points_to /// records preserve param_field_reads, param_field_writes, the /// receiver sentinel (`u32::MAX`), the container-element marker /// (``), and the `overflow` flag across serialise → store → /// load → deserialise. This is the strict-additive contract for /// older blobs without field_points_to (default-empty deserialises cleanly) and the /// completeness check for the W3 cross-call resolver. #[test] fn ssa_summaries_round_trip_preserves_field_points_to() { use crate::summary::points_to::FieldPointsToSummary; use crate::summary::ssa_summary::SsaFuncSummary; let td = tempfile::tempdir().unwrap(); let db = td.path().join("nyx.sqlite"); let f = td.path().join("store.rs"); std::fs::write(&f, "// helper that writes obj.cache").unwrap(); let pool = index::Indexer::init(&db).unwrap(); let mut idx = index::Indexer::from_pool("proj", &pool).unwrap(); let hash = index::Indexer::digest_bytes(b"// helper that writes obj.cache"); // Build a summary with one read on param 0 ("name"), one write on // param 1 ("cache"), one read on the receiver sentinel ("kind"), // and an ELEM marker on param 0. Round-trip must preserve all // four channels. let mut fpt = FieldPointsToSummary::empty(); fpt.add_read(0, "name"); fpt.add_write(1, "cache"); fpt.add_read(u32::MAX, "kind"); fpt.add_write(0, ""); let summary = SsaFuncSummary { field_points_to: fpt.clone(), ..Default::default() }; let row = ( "store".to_string(), 2_usize, "rust".to_string(), "store.rs".to_string(), String::new(), None, crate::symbol::FuncKind::Function, summary, ); idx.replace_ssa_summaries_for_file(&f, &hash, &[row]) .unwrap(); let loaded = idx.load_all_ssa_summaries().unwrap(); assert_eq!(loaded.len(), 1, "single summary stored, single returned"); let (_, name, _, _, _, _, _, _, sum) = &loaded[0]; assert_eq!(name, "store"); assert_eq!( sum.field_points_to, fpt, "field_points_to must round-trip byte-equal", ); // Spot-check sentinel + ELEM marker channels. let recv_read = sum .field_points_to .param_field_reads .iter() .find(|(p, _)| *p == u32::MAX) .expect("receiver read at u32::MAX sentinel"); assert!(recv_read.1.iter().any(|s| s == "kind")); let elem_write = sum .field_points_to .param_field_writes .iter() .find(|(p, _)| *p == 0) .expect("param 0 writes recorded"); assert!( elem_write.1.iter().any(|s| s == ""), " marker must survive round-trip without conversion", ); assert!(!sum.field_points_to.overflow); } /// Older blob compatibility: a summary serialised without /// `field_points_to` deserialises with the empty default, no /// migration needed because the field is `#[serde(default)]`. #[test] fn ssa_summaries_legacy_blob_decodes_with_empty_field_points_to() { use crate::summary::ssa_summary::SsaFuncSummary; // Hand-craft JSON without the `field_points_to` key. let legacy_json = r#"{ "param_to_return": [], "param_to_sink": [], "source_caps": 0, "param_to_sink_param": [], "param_container_to_return": [], "param_to_container_store": [], "return_type": null, "return_abstract": null, "source_to_callback": [], "receiver_to_return": null, "receiver_to_sink": 0, "abstract_transfer": [], "param_return_paths": [], "return_path_facts": [], "typed_call_receivers": [] }"#; let sum: SsaFuncSummary = serde_json::from_str(legacy_json).unwrap(); assert!( sum.field_points_to.is_empty(), "missing field_points_to must default to empty", ); } /// Pre-`param_to_gate_filters` blob compatibility: a summary serialised /// before this field existed deserialises with the empty default. /// `#[serde(default)]` on the field means old SQLite blobs round-trip /// without a schema migration, the new field is stored inside the JSON /// `summary` column so SQL-level columns are unchanged. #[test] fn ssa_summaries_pre_gate_filters_blob_decodes_with_empty_param_to_gate_filters() { use crate::summary::ssa_summary::SsaFuncSummary; // Hand-craft JSON without the `param_to_gate_filters` key. let pre_gate_filters_json = r#"{ "param_to_return": [], "param_to_sink": [], "source_caps": 0, "param_to_sink_param": [], "param_container_to_return": [], "param_to_container_store": [], "return_type": null, "return_abstract": null, "source_to_callback": [], "receiver_to_return": null, "receiver_to_sink": 0, "abstract_transfer": [], "param_return_paths": [], "return_path_facts": [], "typed_call_receivers": [] }"#; let sum: SsaFuncSummary = serde_json::from_str(pre_gate_filters_json).unwrap(); assert!( sum.param_to_gate_filters.is_empty(), "missing param_to_gate_filters must default to empty", ); } /// Round-trip: a summary with a populated `param_to_gate_filters` /// survives JSON serialise + deserialise, including the per-position /// cap-mask values needed to preserve SSRF-vs-DATA_EXFIL splits across /// the function-summary boundary. #[test] fn ssa_summaries_param_to_gate_filters_round_trip() { use crate::labels::Cap; use crate::summary::ssa_summary::SsaFuncSummary; let mut sum = SsaFuncSummary::default(); sum.param_to_gate_filters.push((0, Cap::SSRF)); sum.param_to_gate_filters.push((1, Cap::DATA_EXFIL)); let json = serde_json::to_string(&sum).expect("serialize"); let restored: SsaFuncSummary = serde_json::from_str(&json).expect("deserialize"); assert_eq!( restored.param_to_gate_filters, vec![(0, Cap::SSRF), (1, Cap::DATA_EXFIL)], "per-position cap masks must round-trip exactly", ); }