nyx/src/database.rs

4695 lines
176 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! SQLite connection pool and schema for the incremental index.
//!
//! The index stores file content hashes, per-file scan results, and function
//! summaries so subsequent scans can skip files whose content has not changed.
//! The pool is backed by [`r2d2`] with WAL journaling, `synchronous=NORMAL`,
//! and memory-mapped I/O tuned for large codebases.
//!
//! Tables: `files`, `issues`, `function_summaries`, `ssa_function_summaries`.
//! SSA-specific persistence lives in [`crate::summary::ssa_summary`]; routines
//! here cover function summaries and file-level hash bookkeeping.
pub mod index {
#![allow(clippy::too_many_arguments, clippy::type_complexity)]
use crate::commands::scan::Diag;
use crate::errors::{NyxError, NyxResult};
use crate::patterns::Severity;
use r2d2::{Pool, PooledConnection};
use r2d2_sqlite::SqliteConnectionManager;
use rusqlite::{Connection, OpenFlags, OptionalExtension, params};
use std::fs;
use std::io::Read;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
/// How long each SQLite connection waits for the single writer slot.
///
/// Indexed scans can have dozens of Rayon workers finishing analysis at
/// once. SQLite still permits only one writer, so a timeout here turns that
/// burst into short backpressure instead of surfacing SQLITE_BUSY.
const SQLITE_BUSY_TIMEOUT: Duration = Duration::from_secs(60);
/// DB schema (foreignkeys enabled).
const SCHEMA: &str = r#"
PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
path TEXT NOT NULL,
hash BLOB NOT NULL,
mtime INTEGER NOT NULL,
scanned_at INTEGER NOT NULL,
UNIQUE(project, path)
);
CREATE TABLE IF NOT EXISTS issues (file_id INTEGER NOT NULL
REFERENCES files(id)
ON DELETE CASCADE,
rule_id TEXT NOT NULL,
severity TEXT NOT NULL,
line INTEGER NOT NULL,
col INTEGER NOT NULL,
PRIMARY KEY (file_id, rule_id, line, col));
CREATE TABLE IF NOT EXISTS function_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
file_path TEXT NOT NULL,
file_hash BLOB NOT NULL,
name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1,
lang TEXT NOT NULL,
container TEXT NOT NULL DEFAULT '',
disambig INTEGER,
kind TEXT NOT NULL DEFAULT 'fn',
summary TEXT NOT NULL,
entry_kind TEXT,
updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);
CREATE TABLE IF NOT EXISTS ssa_function_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
file_path TEXT NOT NULL,
file_hash BLOB NOT NULL,
name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1,
lang TEXT NOT NULL,
namespace TEXT NOT NULL DEFAULT '',
container TEXT NOT NULL DEFAULT '',
disambig INTEGER,
kind TEXT NOT NULL DEFAULT 'fn',
summary TEXT NOT NULL,
entry_kind TEXT,
updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);
CREATE TABLE IF NOT EXISTS auth_check_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
file_path TEXT NOT NULL,
file_hash BLOB NOT NULL,
name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1,
lang TEXT NOT NULL,
namespace TEXT NOT NULL DEFAULT '',
container TEXT NOT NULL DEFAULT '',
disambig INTEGER,
kind TEXT NOT NULL DEFAULT 'fn',
summary TEXT NOT NULL,
updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);
CREATE TABLE IF NOT EXISTS ssa_function_bodies (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
file_path TEXT NOT NULL,
file_hash BLOB NOT NULL,
name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1,
lang TEXT NOT NULL,
namespace TEXT NOT NULL DEFAULT '',
container TEXT NOT NULL DEFAULT '',
disambig INTEGER,
kind TEXT NOT NULL DEFAULT 'fn',
body BLOB NOT NULL,
updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);
CREATE TABLE IF NOT EXISTS cross_package_imports (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
file_path TEXT NOT NULL,
file_hash BLOB NOT NULL,
namespace TEXT NOT NULL,
imports BLOB NOT NULL,
updated_at INTEGER NOT NULL,
UNIQUE(project, file_path)
);
CREATE TABLE IF NOT EXISTS scans (
id TEXT PRIMARY KEY,
status TEXT NOT NULL,
scan_root TEXT NOT NULL,
started_at TEXT,
finished_at TEXT,
duration_secs REAL,
engine_version TEXT,
languages TEXT,
files_scanned INTEGER,
files_skipped INTEGER,
finding_count INTEGER,
findings_json TEXT,
timing_json TEXT,
error TEXT
);
CREATE TABLE IF NOT EXISTS scan_metrics (
scan_id TEXT PRIMARY KEY REFERENCES scans(id) ON DELETE CASCADE,
cfg_nodes INTEGER,
call_edges INTEGER,
functions_analyzed INTEGER,
summaries_reused INTEGER,
unresolved_calls INTEGER
);
CREATE TABLE IF NOT EXISTS scan_logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
scan_id TEXT NOT NULL REFERENCES scans(id) ON DELETE CASCADE,
timestamp TEXT NOT NULL,
level TEXT NOT NULL,
message TEXT NOT NULL,
file_path TEXT,
detail TEXT
);
CREATE INDEX IF NOT EXISTS idx_scan_logs_scan ON scan_logs(scan_id);
CREATE TABLE IF NOT EXISTS triage_states (
fingerprint TEXT PRIMARY KEY,
state TEXT NOT NULL DEFAULT 'open',
note TEXT NOT NULL DEFAULT '',
updated_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS triage_audit_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
fingerprint TEXT NOT NULL,
action TEXT NOT NULL,
previous_state TEXT NOT NULL,
new_state TEXT NOT NULL,
note TEXT NOT NULL DEFAULT '',
timestamp TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_triage_audit_fp ON triage_audit_log(fingerprint);
CREATE INDEX IF NOT EXISTS idx_triage_audit_ts ON triage_audit_log(timestamp);
CREATE TABLE IF NOT EXISTS nyx_metadata (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS triage_suppression_rules (
id INTEGER PRIMARY KEY AUTOINCREMENT,
suppress_by TEXT NOT NULL,
match_value TEXT NOT NULL,
state TEXT NOT NULL DEFAULT 'suppressed',
note TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL,
UNIQUE(suppress_by, match_value)
);
-- First time we observed each finding fingerprint. Lazy-populated by the
-- overview endpoint when computing backlog age — INSERT OR IGNORE means
-- only the earliest scan that mentioned a fingerprint sticks.
CREATE TABLE IF NOT EXISTS finding_first_seen (
fingerprint TEXT PRIMARY KEY,
first_seen_at TEXT NOT NULL
);
-- Dynamic verdict cache (§12 Q5).
-- Keyed on (spec_hash, entry_content_hash, transitive_import_digest).
-- Invalidation: any of entry content, import digest, toolchain_id,
-- corpus_version, or spec_format_version change → DELETE row → re-run.
CREATE TABLE IF NOT EXISTS dynamic_verdict_cache (
id INTEGER PRIMARY KEY AUTOINCREMENT,
spec_hash TEXT NOT NULL,
entry_content_hash TEXT NOT NULL,
transitive_import_digest TEXT NOT NULL,
toolchain_id TEXT NOT NULL,
corpus_version INTEGER NOT NULL,
spec_format_version INTEGER NOT NULL,
verdict_json TEXT NOT NULL,
created_at TEXT NOT NULL,
UNIQUE(spec_hash, entry_content_hash, transitive_import_digest,
toolchain_id, corpus_version, spec_format_version)
);
CREATE INDEX IF NOT EXISTS idx_dynamic_verdict_cache_spec_hash
ON dynamic_verdict_cache(spec_hash);
-- Phase 21: persisted attack-surface map. One row per project.
-- Stored as canonical JSON so the round-trip is byte-identical
-- across rescans (see `SurfaceMap::to_json`).
CREATE TABLE IF NOT EXISTS surface_map (
project TEXT PRIMARY KEY,
map_json BLOB NOT NULL,
updated_at INTEGER NOT NULL
);
-- Indexes on (project, file_path) for the per-file replace_* paths.
-- Without these, every DELETE WHERE project=? AND file_path=? does a
-- full table scan, which dominates indexing time as the cache grows.
CREATE INDEX IF NOT EXISTS idx_function_summaries_project_file
ON function_summaries(project, file_path);
CREATE INDEX IF NOT EXISTS idx_ssa_function_summaries_project_file
ON ssa_function_summaries(project, file_path);
CREATE INDEX IF NOT EXISTS idx_ssa_function_bodies_project_file
ON ssa_function_bodies(project, file_path);
CREATE INDEX IF NOT EXISTS idx_auth_check_summaries_project_file
ON auth_check_summaries(project, file_path);
CREATE INDEX IF NOT EXISTS idx_cross_package_imports_project_file
ON cross_package_imports(project, file_path);
"#;
/// Engine version used to detect stale caches across upgrades.
pub const ENGINE_VERSION: &str = env!("CARGO_PKG_VERSION");
/// On-disk schema version for cached analysis data.
///
/// Bumped independently of `ENGINE_VERSION` whenever the serialized
/// layout or identity of a cached artefact changes in an incompatible
/// way, e.g. a `FuncKey` field semantic change that would cause old
/// summaries to misbehave when rehydrated.
///
/// History:
/// * `"1"`, initial.
/// * `"2"`, 0.5.0: `FuncKey.disambig` changed from the function-node
/// byte offset to a depth-first structural index. Pre-0.5.0 caches
/// store byte-offset disambigs and would fail to match bodies built
/// by the new engine, so they are silently rebuilt on open.
/// * `"3"`, `ssa_function_bodies.body` changed from JSON TEXT to
/// bincode BLOB. Old JSON payloads cannot be deserialised by the
/// new engine, so they are silently rebuilt on open.
/// * `"4"`, `Cap` widened from u16 to u32 to accommodate cap bits
/// ≥ 14 (LDAP_INJECTION, XPATH_INJECTION, HEADER_INJECTION,
/// OPEN_REDIRECT, SSTI, XXE, PROTOTYPE_POLLUTION). The `Cap`
/// deserialiser accepts both u16- and u32-width JSON values, so
/// pre-bump caches load without crashing, but the cached
/// `source_caps` / `sanitizer_caps` / `sink_caps` blobs were
/// produced before any of these caps could appear and would
/// underreport rules that emit them. Bumping forces a rescan so
/// newly-emitted gates and sinks land in the cache with the wider
/// footprint.
pub const SCHEMA_VERSION: &str = "4";
/// A single issue row, ready for insertion.
#[derive(Debug, Clone)]
pub struct IssueRow<'a> {
pub rule_id: &'a str,
pub severity: &'a str,
pub line: i64,
pub col: i64,
}
type IndexWriteJob = Box<dyn FnOnce(&mut Indexer) -> NyxResult<()> + Send + 'static>;
#[derive(Default)]
struct IndexWriteReport {
error_count: usize,
samples: Vec<String>,
}
impl IndexWriteReport {
fn record(&mut self, err: impl ToString) {
self.error_count += 1;
if self.samples.len() < 8 {
self.samples.push(err.to_string());
}
}
}
/// Bounded handle for submitting persisted-index writes.
///
/// The scanner can keep parsing in parallel while this sender applies
/// backpressure when SQLite's single writer falls behind.
#[derive(Clone)]
pub(crate) struct IndexWriteSender {
tx: crossbeam_channel::Sender<IndexWriteJob>,
}
impl IndexWriteSender {
pub(crate) fn enqueue<F>(&self, job: F) -> NyxResult<()>
where
F: FnOnce(&mut Indexer) -> NyxResult<()> + Send + 'static,
{
self.tx
.send(Box::new(job))
.map_err(|_| NyxError::Msg("database writer stopped before accepting write".into()))
}
}
/// Single-writer queue for project index mutations.
///
/// SQLite permits many readers but only one writer. Parallel scans should
/// therefore submit analyzed file results here instead of letting every
/// Rayon worker compete for the writer lock.
pub(crate) struct IndexWriteQueue {
tx: IndexWriteSender,
handle: std::thread::JoinHandle<IndexWriteReport>,
}
impl IndexWriteQueue {
pub(crate) fn start(
project: impl Into<String>,
pool: Arc<Pool<SqliteConnectionManager>>,
) -> Self {
let capacity = std::env::var("NYX_INDEX_WRITE_QUEUE_MAX")
.ok()
.and_then(|v| v.parse::<usize>().ok())
.filter(|n| *n >= 1)
.unwrap_or_else(|| (num_cpus::get() * 2).max(64));
Self::start_with_capacity(project, pool, capacity)
}
pub(crate) fn start_with_capacity(
project: impl Into<String>,
pool: Arc<Pool<SqliteConnectionManager>>,
capacity: usize,
) -> Self {
let project = project.into();
let (tx, rx) = crossbeam_channel::bounded::<IndexWriteJob>(capacity.max(1));
let handle = std::thread::spawn(move || {
let mut report = IndexWriteReport::default();
let mut idx = match Indexer::from_pool(&project, &pool) {
Ok(idx) => idx,
Err(err) => {
report.record(format!("writer init: {err}"));
return report;
}
};
for job in rx {
if let Err(err) = job(&mut idx) {
report.record(err);
}
}
report
});
Self {
tx: IndexWriteSender { tx },
handle,
}
}
pub(crate) fn sender(&self) -> IndexWriteSender {
self.tx.clone()
}
pub(crate) fn finish(self, stage: &str) -> NyxResult<()> {
let Self { tx, handle } = self;
drop(tx);
let report = handle
.join()
.map_err(|_| NyxError::Msg(format!("{stage} database writer panicked")))?;
if report.error_count == 0 {
return Ok(());
}
let mut details = report.samples;
if report.error_count > details.len() {
details.push(format!(
"... and {} more",
report.error_count - details.len()
));
}
Err(NyxError::Msg(format!(
"{stage} failed to persist scan state: {}",
details.join("; ")
)))
}
}
/// A scan record for DB persistence.
#[derive(Debug, Clone)]
pub struct ScanRecord {
pub id: String,
pub status: String,
pub scan_root: String,
pub started_at: Option<String>,
pub finished_at: Option<String>,
pub duration_secs: Option<f64>,
pub engine_version: Option<String>,
pub languages: Option<String>,
pub files_scanned: Option<i64>,
pub files_skipped: Option<i64>,
pub finding_count: Option<i64>,
pub findings_json: Option<String>,
pub timing_json: Option<String>,
pub error: Option<String>,
}
/// A triage audit log entry.
#[derive(Debug, Clone, serde::Serialize)]
pub struct AuditEntry {
pub id: i64,
pub fingerprint: String,
pub action: String,
pub previous_state: String,
pub new_state: String,
pub note: String,
pub timestamp: String,
}
/// A pattern-based suppression rule.
#[derive(Debug, Clone, serde::Serialize)]
pub struct SuppressionRule {
pub id: i64,
pub suppress_by: String,
pub match_value: String,
pub state: String,
pub note: String,
pub created_at: String,
}
pub struct Indexer {
conn: PooledConnection<SqliteConnectionManager>,
project: String,
}
/// SQLite database files start with this 16-byte ASCII magic.
const SQLITE_MAGIC: &[u8; 16] = b"SQLite format 3\0";
/// Reject obviously non-SQLite files before handing them to the
/// connection pool, where the same rejection costs minutes instead of
/// microseconds on some corruption shapes.
///
/// Returns `Ok(())` when:
/// * the file does not exist (the pool will `CREATE` it),
/// * the file is zero-length (SQLite treats this as a fresh DB),
/// * the first 16 bytes match the SQLite magic header,
/// * the file is shorter than the magic but non-empty (extremely
/// unusual; we defer to SQLite rather than gating arbitrarily).
///
/// Returns `Err(NyxError::Sql(...))` carrying `SQLITE_NOTADB` when the
/// header is present but does not match.
fn preflight_header(database_path: &Path) -> NyxResult<()> {
let Ok(meta) = fs::metadata(database_path) else {
return Ok(());
};
if !meta.is_file() {
return Ok(());
}
if meta.len() < SQLITE_MAGIC.len() as u64 {
return Ok(());
}
let mut head = [0u8; 16];
let mut f = fs::File::open(database_path)?;
f.read_exact(&mut head)?;
if &head != SQLITE_MAGIC {
return Err(NyxError::Sql(rusqlite::Error::SqliteFailure(
rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_NOTADB),
Some(format!(
"file at {} is not a SQLite database (header magic mismatch)",
database_path.display(),
)),
)));
}
Ok(())
}
impl Indexer {
pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
// Fast-fail when the existing file is clearly not a SQLite
// database. Without this guard, certain corruption shapes
// (truncated header, header overwritten with arbitrary bytes,
// mid-page damage that preserves magic) can keep SQLite busy
// for 150-200 seconds inside the PRAGMA / schema execution
// below before it surfaces SQLITE_NOTADB or SQLITE_CORRUPT.
// A zero-length file is treated as a fresh DB by SQLite, so we
// only validate when the file is large enough to hold the
// 16-byte magic header.
preflight_header(database_path)?;
// NO_MUTEX is safe because r2d2 ensures each pooled connection
// is only ever used by one thread at a time. Combined with WAL
// mode this allows concurrent readers + a single writer without
// the global serialization that FULL_MUTEX causes.
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
| OpenFlags::SQLITE_OPEN_CREATE
| OpenFlags::SQLITE_OPEN_NO_MUTEX;
{
let conn = Self::open_configured_connection(database_path, flags)?;
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.execute_batch(SCHEMA)?;
// Migrate: if the function_summaries table is missing any required
// column (arity for older schemas; container/disambig/kind for the
// richer FuncKey identity), drop and recreate it so the data layout
// matches the current model.
let fn_cols: std::collections::HashSet<String> = conn
.prepare("PRAGMA table_info(function_summaries)")
.and_then(|mut s| {
let cols: Vec<String> = s
.query_map([], |r| r.get::<_, String>(1))?
.filter_map(Result::ok)
.collect();
Ok(cols.into_iter().collect())
})
.unwrap_or_default();
let fn_ok = fn_cols.contains("arity")
&& fn_cols.contains("container")
&& fn_cols.contains("disambig")
&& fn_cols.contains("kind");
if !fn_ok {
tracing::info!(
"migrating function_summaries: recreating table with identity columns"
);
conn.execute_batch("DROP TABLE IF EXISTS function_summaries;")?;
conn.execute_batch(SCHEMA)?;
}
// Migrate: verify SSA tables carry namespace + container/disambig/kind.
let ssa_cols: std::collections::HashSet<String> = conn
.prepare("PRAGMA table_info(ssa_function_summaries)")
.and_then(|mut s| {
let cols: Vec<String> = s
.query_map([], |r| r.get::<_, String>(1))?
.filter_map(Result::ok)
.collect();
Ok(cols.into_iter().collect())
})
.unwrap_or_default();
let ssa_ok = ssa_cols.contains("namespace")
&& ssa_cols.contains("container")
&& ssa_cols.contains("disambig")
&& ssa_cols.contains("kind");
if !ssa_ok {
tracing::info!("migrating ssa_function_summaries: recreating tables");
conn.execute_batch("DROP TABLE IF EXISTS ssa_function_summaries;")?;
conn.execute_batch("DROP TABLE IF EXISTS ssa_function_bodies;")?;
conn.execute_batch(SCHEMA)?;
}
// ssa_function_bodies may have been created with the old column set
// even when ssa_function_summaries is current (e.g. partial
// migrations). Check and recreate independently.
let body_cols: std::collections::HashSet<String> = conn
.prepare("PRAGMA table_info(ssa_function_bodies)")
.and_then(|mut s| {
let cols: Vec<String> = s
.query_map([], |r| r.get::<_, String>(1))?
.filter_map(Result::ok)
.collect();
Ok(cols.into_iter().collect())
})
.unwrap_or_default();
let body_ok = body_cols.contains("namespace")
&& body_cols.contains("container")
&& body_cols.contains("disambig")
&& body_cols.contains("kind");
if !body_ok {
tracing::info!("migrating ssa_function_bodies: recreating table");
conn.execute_batch("DROP TABLE IF EXISTS ssa_function_bodies;")?;
conn.execute_batch(SCHEMA)?;
}
// Phase 10 — `entry_kind` column on (ssa_)function_summaries.
// Non-destructive `ALTER TABLE ... ADD COLUMN` so existing
// rows survive the upgrade. The column is nullable; the
// INSERT paths write the JSON-encoded `EntryKind` text or
// NULL when the function is not an entry point.
Self::ensure_column(&conn, "function_summaries", "entry_kind", "TEXT")?;
Self::ensure_column(&conn, "ssa_function_summaries", "entry_kind", "TEXT")?;
// Ensure the auth_check_summaries table exists for DBs
// created before this column set was introduced. The
// `CREATE TABLE IF NOT EXISTS` in SCHEMA handles new DBs;
// this branch only fires when the table is missing
// entirely from a pre-existing DB.
let auth_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master
WHERE type = 'table' AND name = 'auth_check_summaries'",
[],
|_| Ok(true),
)
.optional()?
.unwrap_or(false);
if !auth_exists {
tracing::info!("creating auth_check_summaries table");
conn.execute_batch(SCHEMA)?;
}
// Phase 09 indexed-mode parity: ensure the
// `cross_package_imports` table exists for DBs created
// before this column set was introduced. `CREATE TABLE
// IF NOT EXISTS` in SCHEMA handles new DBs; this branch
// only fires when the table is missing entirely from a
// pre-existing DB.
let cpi_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master
WHERE type = 'table' AND name = 'cross_package_imports'",
[],
|_| Ok(true),
)
.optional()?
.unwrap_or(false);
if !cpi_exists {
tracing::info!("creating cross_package_imports table");
conn.execute_batch(SCHEMA)?;
}
// Phase 21: ensure the `surface_map` table exists on
// DBs created before this column set was introduced.
let surface_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master
WHERE type = 'table' AND name = 'surface_map'",
[],
|_| Ok(true),
)
.optional()?
.unwrap_or(false);
if !surface_exists {
tracing::info!("creating surface_map table");
conn.execute_batch(SCHEMA)?;
}
// Schema version check: invalidate cached summary tables
// when the on-disk artefact layout has changed in an
// incompatible way, independently of the engine version.
// Runs before `check_engine_version` so the engine-version
// branch below does not race with a stale schema.
Self::check_schema_version(&conn)?;
// Engine version check: invalidate all caches when the scanner
// version changes so stale serialized data cannot be loaded.
Self::check_engine_version(&conn)?;
}
let manager = SqliteConnectionManager::file(database_path)
.with_flags(flags)
.with_init(Self::configure_connection);
// r2d2's default `max_size` is 10, which can stall rayon
// workers on machines with more cores than that during the
// parallel indexing pass. Size the pool to comfortably hold
// a connection per rayon thread plus a small slack.
//
// `NYX_INDEX_POOL_MAX` overrides the auto-sized default. Use it in
// fd-constrained environments (test sandboxes, containers with low
// ulimit) where many parallel indexed scans would otherwise exhaust
// EMFILE: each pooled SQLite WAL connection costs ~3 fds (db + -wal
// + -shm), so 30 parallel scans × 16 conns × 3 fds = 1440 fds.
let max_conns = std::env::var("NYX_INDEX_POOL_MAX")
.ok()
.and_then(|v| v.parse::<u32>().ok())
.filter(|n| *n >= 1)
.unwrap_or_else(|| (num_cpus::get() as u32 + 4).max(16));
let pool = Arc::new(Pool::builder().max_size(max_conns).build(manager)?);
Ok(pool)
}
fn open_configured_connection(
database_path: &Path,
flags: OpenFlags,
) -> rusqlite::Result<Connection> {
let mut conn = Connection::open_with_flags(database_path, flags)?;
Self::configure_connection(&mut conn)?;
Ok(conn)
}
fn configure_connection(conn: &mut Connection) -> rusqlite::Result<()> {
conn.busy_timeout(SQLITE_BUSY_TIMEOUT)?;
conn.pragma_update(None, "foreign_keys", "ON")?;
conn.pragma_update(None, "synchronous", "NORMAL")?;
conn.pragma_update(None, "cache_size", -8000i64)?; // 8 MB
conn.pragma_update(None, "temp_store", "MEMORY")?;
conn.pragma_update(None, "mmap_size", 268_435_456i64)?; // 256 MB
Ok(())
}
/// Add a column to an existing table when it is missing.
///
/// Non-destructive: leaves all existing rows untouched, populating
/// the new column with NULL. Used to thread additive schema
/// changes (Phase 10's `entry_kind`) into pre-existing databases
/// without forcing a full cache rebuild.
fn ensure_column(
conn: &Connection,
table: &str,
column: &str,
sqlite_type: &str,
) -> NyxResult<()> {
let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
let cols: std::collections::HashSet<String> = stmt
.query_map([], |r| r.get::<_, String>(1))?
.filter_map(Result::ok)
.collect();
if cols.contains(column) {
return Ok(());
}
tracing::info!("adding column {column} to {table}");
conn.execute_batch(&format!(
"ALTER TABLE {table} ADD COLUMN {column} {sqlite_type}"
))?;
Ok(())
}
/// Check stored schema version against the compiled-in value.
///
/// On mismatch (including first-time open), wipe the cached
/// summary tables so pre-schema-bump artefacts cannot be
/// rehydrated against the current engine. Intentionally does
/// not drop `files`, `scans`, or triage data: those are not
/// layout-sensitive across this bump.
fn check_schema_version(conn: &Connection) -> NyxResult<()> {
let stored: Option<String> = conn
.query_row(
"SELECT value FROM nyx_metadata WHERE key = 'schema_version'",
[],
|r| r.get(0),
)
.optional()?;
let current = SCHEMA_VERSION;
match stored {
Some(ref v) if v == current => {
// Schema version matches, nothing to do.
}
_ => {
let old = stored.as_deref().unwrap_or("<none>");
tracing::info!(
"db schema version changed ({old} → {current}), clearing summary caches"
);
// Drop ssa_function_bodies entirely: column type changed
// to BLOB in v3 and `CREATE TABLE IF NOT EXISTS` will
// not migrate the column on an existing table.
conn.execute_batch(
"DROP TABLE IF EXISTS ssa_function_bodies;
DELETE FROM function_summaries;
DELETE FROM ssa_function_summaries;
DELETE FROM auth_check_summaries;
DELETE FROM files;
DROP TABLE IF EXISTS cross_package_imports;",
)?;
conn.execute_batch(SCHEMA)?;
conn.execute(
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('schema_version', ?1)",
params![current],
)?;
}
}
Ok(())
}
/// Check stored engine version against the running binary.
/// On mismatch (or missing row), wipe all cached analysis data so
/// every file is rescanned with the new engine.
fn check_engine_version(conn: &Connection) -> NyxResult<()> {
let stored: Option<String> = conn
.query_row(
"SELECT value FROM nyx_metadata WHERE key = 'engine_version'",
[],
|r| r.get(0),
)
.optional()?;
let current = ENGINE_VERSION;
match stored {
Some(ref v) if v == current => {
// Version matches, nothing to do.
}
_ => {
let old = stored.as_deref().unwrap_or("<none>");
tracing::info!("engine version changed ({old} → {current}), rebuilding index");
// Wipe all cached summaries and file hashes so everything
// gets rescanned.
conn.execute_batch(
"DELETE FROM function_summaries;
DELETE FROM ssa_function_summaries;
DELETE FROM ssa_function_bodies;
DELETE FROM auth_check_summaries;
DELETE FROM files;",
)?;
conn.execute(
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('engine_version', ?1)",
params![current],
)?;
}
}
Ok(())
}
/// Persist the current engine version into metadata.
///
/// Called after a successful scan to ensure the metadata row exists
/// even for a freshly created database.
pub fn write_engine_version(pool: &Pool<SqliteConnectionManager>) -> NyxResult<()> {
let conn = pool.get()?;
conn.execute(
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('engine_version', ?1)",
params![ENGINE_VERSION],
)?;
Ok(())
}
/// Force a specific engine version into the metadata table.
/// Used by tests to simulate version mismatch scenarios.
#[cfg(test)]
pub fn set_engine_version(
pool: &Pool<SqliteConnectionManager>,
version: &str,
) -> NyxResult<()> {
let conn = pool.get()?;
conn.execute(
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('engine_version', ?1)",
params![version],
)?;
Ok(())
}
/// Read the stored engine version from metadata. Returns None if not set.
#[cfg(test)]
pub fn get_stored_engine_version(
pool: &Pool<SqliteConnectionManager>,
) -> NyxResult<Option<String>> {
let conn = pool.get()?;
let v: Option<String> = conn
.query_row(
"SELECT value FROM nyx_metadata WHERE key = 'engine_version'",
[],
|r| r.get(0),
)
.optional()?;
Ok(v)
}
/// Count rows in a table for a given project. Test helper.
#[cfg(test)]
pub fn count_rows(
pool: &Pool<SqliteConnectionManager>,
table: &str,
project: &str,
) -> NyxResult<i64> {
let conn = pool.get()?;
// table name can't be parameterized; this is test-only code with trusted inputs.
let sql = format!("SELECT COUNT(*) FROM {table} WHERE project = ?1");
let count: i64 = conn.query_row(&sql, params![project], |r| r.get(0))?;
Ok(count)
}
/// Create a pool with init (schema + migrations + version check) for testing.
/// This is `init()` but exposed under a clearer name for tests.
#[cfg(test)]
pub fn init_for_test(
database_path: &Path,
) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
Self::init(database_path)
}
pub fn from_pool(project: &str, pool: &Pool<SqliteConnectionManager>) -> NyxResult<Self> {
let conn = pool.get()?;
Ok(Self {
conn,
project: project.to_owned(),
})
}
// helper so code below can treat PooledConnection like &Connection
fn c(&self) -> &Connection {
self.conn.deref()
}
/// Return true when the file *content* or *mtime* changed since the last scan.
///
/// Short-circuits on mtime: if the stored mtime matches the
/// filesystem mtime, the file is assumed unchanged (skip hash).
/// Production scans use `should_scan_with_hash`, which avoids the
/// redundant `digest_file` read; this variant exists for tests.
#[cfg(test)]
pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let row: Option<(Vec<u8>, i64)> = self
.conn
.query_row(
"SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.optional()?;
Ok(match row {
Some((stored_hash, stored_mtime)) => {
if stored_mtime != mtime {
// mtime changed, must re-scan
true
} else {
// mtime matches, compare hash only if cheap
// (the caller already read the file and can use
// should_scan_with_hash instead for full accuracy)
let digest = Self::digest_file(path)?;
stored_hash != digest
}
}
None => true,
})
}
/// Like `should_scan` but accepts a pre-computed hash to avoid
/// redundant file reads.
pub fn should_scan_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult<bool> {
let row: Option<Vec<u8>> = self
.conn
.query_row(
"SELECT hash FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| r.get(0),
)
.optional()?;
Ok(match row {
Some(stored_hash) => stored_hash != hash,
None => true,
})
}
/// Insert or update the `files` row and return its id.
pub fn upsert_file(&self, path: &Path) -> NyxResult<i64> {
let bytes = fs::read(path)?;
let hash = Self::digest_bytes(&bytes);
self.upsert_file_with_hash(path, &hash)
}
/// Insert or update the `files` row using a pre-computed hash.
/// Avoids redundant file reads when the caller already has the hash.
pub fn upsert_file_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult<i64> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
let path_str = path.to_string_lossy();
// Use a single statement: upsert then query the id.
self.c().execute(
"INSERT INTO files (project, path, hash, mtime, scanned_at)
VALUES (?1, ?2, ?3, ?4, ?5)
ON CONFLICT(project,path) DO UPDATE
SET hash = excluded.hash,
mtime = excluded.mtime,
scanned_at = excluded.scanned_at",
params![self.project, path_str, hash, mtime, scanned_at],
)?;
let id: i64 = self.c().query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path_str],
|r| r.get(0),
)?;
Ok(id)
}
/// Replace all issues for `file_id` with the supplied set.
///
/// Dedups rows by the same PRIMARY KEY the `issues` table enforces
/// (`file_id, rule_id, line, col`) to defend against upstream bugs
/// that produce same-keyed diagnostics with differing severity or
/// cosmetic fields. The first-seen row wins; upstream
/// `ParsedSource::finalize_diags` sorts so that high
/// severity comes first, and this fallback preserves that ordering.
pub fn replace_issues<'a>(
&mut self,
file_id: i64,
issues: impl IntoIterator<Item = IssueRow<'a>>,
) -> NyxResult<()> {
let tx = self.conn.transaction()?;
tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?;
{
let mut stmt = tx.prepare(
"INSERT INTO issues (file_id, rule_id, severity, line, col)
VALUES (?1, ?2, ?3, ?4, ?5)",
)?;
let mut seen: std::collections::HashSet<(String, i64, i64)> =
std::collections::HashSet::new();
for iss in issues {
if !seen.insert((iss.rule_id.to_string(), iss.line, iss.col)) {
continue;
}
stmt.execute(params![
file_id,
iss.rule_id,
iss.severity,
iss.line,
iss.col
])?;
}
}
tx.commit()?;
Ok(())
}
/// Gets the issues for a specific file so we don't have to rescan
pub fn get_issues_from_file(&self, path: &Path) -> NyxResult<Vec<Diag>> {
let file_id: i64 = self.c().query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| r.get(0),
)?;
let mut stmt = self.c().prepare(
"SELECT rule_id, severity, line, col
FROM issues
WHERE file_id = ?1",
)?;
let issue_iter = stmt.query_map([file_id], |row| {
let sev_str: String = row.get(1)?;
let severity = Severity::from_str(&sev_str).unwrap_or_else(|_| {
tracing::warn!(
severity = %sev_str,
"unknown severity in DB row; defaulting to Medium"
);
Severity::Medium
});
Ok(Diag {
path: path.to_string_lossy().to_string(),
id: row.get::<_, String>(0)?, // rule_id
line: row.get::<_, i64>(2)? as usize,
col: row.get::<_, i64>(3)? as usize,
severity,
category: crate::patterns::FindingCategory::Security,
path_validated: false,
guard_kind: None,
message: None,
labels: vec![],
confidence: None,
evidence: None,
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
triage_state: "open".to_string(),
triage_note: String::new(),
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
stable_hash: 0,
})
})?;
Ok(issue_iter.filter_map(Result::ok).collect())
}
/// Atomically replace all function summaries for a single file.
///
/// Deletes every existing summary row for `(project, file_path)` then
/// inserts the new set. This keeps the table in sync when a file is
/// reparsed and its functions change.
pub fn replace_summaries_for_file(
&mut self,
file_path: &Path,
file_hash: &[u8],
summaries: &[crate::summary::FuncSummary],
) -> NyxResult<()> {
let tx = self.conn.transaction()?;
let path_str = file_path.to_string_lossy();
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
tx.execute(
"DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO function_summaries
(project, file_path, file_hash, name, arity, lang,
container, disambig, kind, summary, entry_kind, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)?;
for s in summaries {
let json = serde_json::to_string(s)
.map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
let disambig_sql = s.disambig.map(|d| d as i64);
let entry_kind_sql = s
.entry_kind
.as_ref()
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
.filter(|s| !s.is_empty());
stmt.execute(params![
self.project,
path_str,
file_hash,
s.name,
s.param_count as i64,
s.lang,
s.container,
disambig_sql,
s.kind.as_str(),
json,
entry_kind_sql,
now
])?;
}
}
tx.commit()?;
Ok(())
}
/// Atomically replace all SSA function summaries for a single file.
///
/// The input tuple is
/// `(name, arity, lang, namespace, container, disambig, kind, summary)` ,
/// matching the fields required to reconstruct a full [`crate::symbol::FuncKey`]
/// on load.
pub fn replace_ssa_summaries_for_file(
&mut self,
file_path: &Path,
file_hash: &[u8],
summaries: &[(
String,
usize,
String,
String,
String,
Option<u32>,
crate::symbol::FuncKind,
crate::summary::ssa_summary::SsaFuncSummary,
)],
) -> NyxResult<()> {
let tx = self.conn.transaction()?;
let path_str = file_path.to_string_lossy();
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
tx.execute(
"DELETE FROM ssa_function_summaries WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO ssa_function_summaries
(project, file_path, file_hash, name, arity, lang, namespace,
container, disambig, kind, summary, entry_kind, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)",
)?;
for (name, arity, lang, namespace, container, disambig, kind, summary) in summaries
{
let json = serde_json::to_string(summary)
.map_err(|e| NyxError::Msg(format!("SSA summary serialise: {e}")))?;
let disambig_sql = disambig.map(|d| d as i64);
let entry_kind_sql = summary
.entry_kind
.as_ref()
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
.filter(|s| !s.is_empty());
stmt.execute(params![
self.project,
path_str,
file_hash,
name,
*arity as i64,
lang,
namespace,
container,
disambig_sql,
kind.as_str(),
json,
entry_kind_sql,
now
])?;
}
}
tx.commit()?;
Ok(())
}
/// Load every function summary for this project.
///
/// Reads all JSON strings from SQLite in one pass, then
/// deserializes them in parallel with rayon for large result sets.
pub fn load_all_summaries(&self) -> NyxResult<Vec<crate::summary::FuncSummary>> {
let mut stmt = self
.c()
.prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
let jsons: Vec<String> = stmt
.query_map([&self.project], |row| row.get::<_, String>(0))?
.filter_map(|r| match r {
Ok(v) => Some(v),
Err(e) => {
tracing::warn!("failed to read summary row: {e}");
None
}
})
.collect();
// Parallel JSON deserialization for large sets
if jsons.len() > 256 {
use rayon::prelude::*;
let results: Vec<_> = jsons
.par_iter()
.filter_map(|json| {
serde_json::from_str::<crate::summary::FuncSummary>(json)
.map_err(|e| {
tracing::warn!("failed to deserialize summary JSON: {e}");
e
})
.ok()
})
.collect();
Ok(results)
} else {
let mut out = Vec::with_capacity(jsons.len());
for json in &jsons {
match serde_json::from_str::<crate::summary::FuncSummary>(json) {
Ok(s) => out.push(s),
Err(e) => {
tracing::warn!("failed to deserialize summary JSON: {e}");
}
}
}
Ok(out)
}
}
/// Load every SSA function summary for this project.
///
/// Returns rows with full metadata for `FuncKey` reconstruction:
/// `(file_path, name, lang, arity, namespace, container, disambig, kind, SsaFuncSummary)`.
pub fn load_all_ssa_summaries(
&self,
) -> NyxResult<
Vec<(
String,
String,
String,
i64,
String,
String,
Option<u32>,
crate::symbol::FuncKind,
crate::summary::ssa_summary::SsaFuncSummary,
)>,
> {
let mut stmt = self.c().prepare(
"SELECT file_path, name, lang, arity, namespace,
container, disambig, kind, summary
FROM ssa_function_summaries WHERE project = ?1",
)?;
let rows: Vec<(
String,
String,
String,
i64,
String,
String,
Option<i64>,
String,
String,
)> = stmt
.query_map([&self.project], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, i64>(3)?,
row.get::<_, String>(4)?,
row.get::<_, String>(5)?,
row.get::<_, Option<i64>>(6)?,
row.get::<_, String>(7)?,
row.get::<_, String>(8)?,
))
})?
.filter_map(|r| match r {
Ok(v) => Some(v),
Err(e) => {
tracing::warn!("failed to read SSA summary row: {e}");
None
}
})
.collect();
if rows.len() > 256 {
use rayon::prelude::*;
let results: Vec<_> = rows
.par_iter()
.filter_map(
|(fp, name, lang, arity, ns, container, disambig, kind, json)| {
serde_json::from_str::<crate::summary::ssa_summary::SsaFuncSummary>(
json,
)
.map_err(|e| {
tracing::warn!("failed to deserialize SSA summary JSON: {e}");
e
})
.ok()
.map(|s| {
(
fp.clone(),
name.clone(),
lang.clone(),
*arity,
ns.clone(),
container.clone(),
disambig.map(|d| d as u32),
crate::symbol::FuncKind::from_slug(kind),
s,
)
})
},
)
.collect();
Ok(results)
} else {
let mut out = Vec::with_capacity(rows.len());
for (fp, name, lang, arity, ns, container, disambig, kind, json) in &rows {
match serde_json::from_str::<crate::summary::ssa_summary::SsaFuncSummary>(json)
{
Ok(s) => {
out.push((
fp.clone(),
name.clone(),
lang.clone(),
*arity,
ns.clone(),
container.clone(),
disambig.map(|d| d as u32),
crate::symbol::FuncKind::from_slug(kind),
s,
));
}
Err(e) => {
tracing::warn!("failed to deserialize SSA summary JSON: {e}");
}
}
}
Ok(out)
}
}
/// Load symbol metadata (name, arity, lang, namespace, container, kind)
/// for a single file.
///
/// Lighter than `load_all_ssa_summaries`, skips JSON deserialization of
/// the full summary body and filters by file_path in the query. `kind`
/// is the [`crate::symbol::FuncKind`] slug (`"fn"`, `"method"`,
/// `"closure"`, ...) so consumers can distinguish anonymous functions
/// from named ones.
pub fn load_ssa_summaries_for_file(
&self,
file_path: &str,
) -> NyxResult<Vec<(String, i64, String, String, String, String)>> {
let mut stmt = self.c().prepare(
"SELECT name, arity, lang, namespace, container, kind
FROM ssa_function_summaries
WHERE project = ?1 AND file_path = ?2",
)?;
let rows: Vec<(String, i64, String, String, String, String)> = stmt
.query_map(rusqlite::params![self.project, file_path], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, String>(2)?,
row.get::<_, String>(3)?,
row.get::<_, String>(4)?,
row.get::<_, String>(5)?,
))
})?
.filter_map(Result::ok)
.collect();
Ok(rows)
}
/// Atomically replace all SSA callee bodies for a single file.
///
/// Persists cross-file callee bodies for interprocedural symex.
/// Bodies are serialized as MessagePack (rmp-serde, named-field
/// encoding) BLOBs, JSON proved too costly at indexing time on
/// large SSA structures, and bincode's positional format trips
/// over the `#[serde(skip_serializing_if = ...)]` attributes
/// scattered through `OptimizeResult` and friends.
/// Input tuple: `(name, arity, lang, namespace, container, disambig, kind, body)`.
pub fn replace_ssa_bodies_for_file(
&mut self,
file_path: &Path,
file_hash: &[u8],
bodies: &[(
String,
usize,
String,
String,
String,
Option<u32>,
crate::symbol::FuncKind,
crate::taint::ssa_transfer::CalleeSsaBody,
)],
) -> NyxResult<()> {
let tx = self.conn.transaction()?;
let path_str = file_path.to_string_lossy();
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
tx.execute(
"DELETE FROM ssa_function_bodies WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO ssa_function_bodies
(project, file_path, file_hash, name, arity, lang, namespace,
container, disambig, kind, body, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)?;
for (name, arity, lang, namespace, container, disambig, kind, body) in bodies {
let blob = rmp_serde::to_vec_named(body)
.map_err(|e| NyxError::Msg(format!("SSA body serialise: {e}")))?;
let disambig_sql = disambig.map(|d| d as i64);
stmt.execute(params![
self.project,
path_str,
file_hash,
name,
*arity as i64,
lang,
namespace,
container,
disambig_sql,
kind.as_str(),
blob,
now
])?;
}
}
tx.commit()?;
Ok(())
}
/// Load every SSA callee body for this project.
///
/// Returns rows with full metadata for `FuncKey` reconstruction:
/// `(file_path, name, lang, arity, namespace, container, disambig, kind, CalleeSsaBody)`.
pub fn load_all_ssa_bodies(
&self,
) -> NyxResult<
Vec<(
String,
String,
String,
i64,
String,
String,
Option<u32>,
crate::symbol::FuncKind,
crate::taint::ssa_transfer::CalleeSsaBody,
)>,
> {
let mut stmt = self.c().prepare(
"SELECT file_path, name, lang, arity, namespace,
container, disambig, kind, body
FROM ssa_function_bodies WHERE project = ?1",
)?;
let rows: Vec<(
String,
String,
String,
i64,
String,
String,
Option<i64>,
String,
Vec<u8>,
)> = stmt
.query_map([&self.project], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, i64>(3)?,
row.get::<_, String>(4)?,
row.get::<_, String>(5)?,
row.get::<_, Option<i64>>(6)?,
row.get::<_, String>(7)?,
row.get::<_, Vec<u8>>(8)?,
))
})?
.filter_map(|r| match r {
Ok(v) => Some(v),
Err(e) => {
tracing::warn!("failed to read SSA body row: {e}");
None
}
})
.collect();
if rows.len() > 256 {
use rayon::prelude::*;
let results: Vec<_> = rows
.par_iter()
.filter_map(
|(fp, name, lang, arity, ns, container, disambig, kind, blob)| {
rmp_serde::from_slice::<crate::taint::ssa_transfer::CalleeSsaBody>(blob)
.map_err(|e| {
tracing::warn!("failed to deserialize SSA body: {e}");
e
})
.ok()
.map(|mut b| {
// Rehydrate a proxy Cfg from node_meta so
// the taint engine's cross-file inline path can index
// `cfg[inst.cfg_node]` uniformly. No-op for intra-file
// bodies that carry node_meta empty.
crate::taint::ssa_transfer::rebuild_body_graph(&mut b);
(
fp.clone(),
name.clone(),
lang.clone(),
*arity,
ns.clone(),
container.clone(),
disambig.map(|d| d as u32),
crate::symbol::FuncKind::from_slug(kind),
b,
)
})
},
)
.collect();
Ok(results)
} else {
let mut out = Vec::with_capacity(rows.len());
for (fp, name, lang, arity, ns, container, disambig, kind, blob) in &rows {
match rmp_serde::from_slice::<crate::taint::ssa_transfer::CalleeSsaBody>(blob) {
Ok(mut b) => {
// See note in parallel branch above.
crate::taint::ssa_transfer::rebuild_body_graph(&mut b);
out.push((
fp.clone(),
name.clone(),
lang.clone(),
*arity,
ns.clone(),
container.clone(),
disambig.map(|d| d as u32),
crate::symbol::FuncKind::from_slug(kind),
b,
));
}
Err(e) => {
tracing::warn!("failed to deserialize SSA body: {e}");
}
}
}
Ok(out)
}
}
/// Atomically replace all `AuthCheckSummary` rows for a single file.
///
/// Mirrors [`Self::replace_ssa_summaries_for_file`]. Each input tuple
/// is `(name, arity, lang, namespace, container, disambig, kind, summary)`
///, the full identity needed to reconstruct the callee's
/// [`crate::symbol::FuncKey`] on load.
pub fn replace_auth_summaries_for_file(
&mut self,
file_path: &Path,
file_hash: &[u8],
summaries: &[(
String,
usize,
String,
String,
String,
Option<u32>,
crate::symbol::FuncKind,
crate::auth_analysis::model::AuthCheckSummary,
)],
) -> NyxResult<()> {
let tx = self.conn.transaction()?;
let path_str = file_path.to_string_lossy();
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
tx.execute(
"DELETE FROM auth_check_summaries WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO auth_check_summaries
(project, file_path, file_hash, name, arity, lang, namespace,
container, disambig, kind, summary, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)?;
for (name, arity, lang, namespace, container, disambig, kind, summary) in summaries
{
let json = serde_json::to_string(summary)
.map_err(|e| NyxError::Msg(format!("auth summary serialise: {e}")))?;
let disambig_sql = disambig.map(|d| d as i64);
stmt.execute(params![
self.project,
path_str,
file_hash,
name,
*arity as i64,
lang,
namespace,
container,
disambig_sql,
kind.as_str(),
json,
now
])?;
}
}
tx.commit()?;
Ok(())
}
/// Atomically replace all four per-file caches in a single
/// transaction. Equivalent in effect to calling
/// [`Self::replace_summaries_for_file`],
/// [`Self::replace_ssa_summaries_for_file`],
/// [`Self::replace_ssa_bodies_for_file`] and
/// [`Self::replace_auth_summaries_for_file`] in sequence, but
/// issues a single fsync at commit instead of four, the
/// dominant cost on large scans.
///
/// Behaviour parity with the four-call sequence:
/// * function and auth summaries: DELETE-then-INSERT regardless
/// of input length, so emptying a file's summaries clears
/// stale rows.
/// * SSA summaries and bodies: only touched when the input is
/// non-empty, matching the existing scan path.
#[allow(clippy::too_many_arguments)]
pub fn replace_all_for_file(
&mut self,
file_path: &Path,
file_hash: &[u8],
func_summaries: &[crate::summary::FuncSummary],
ssa_summaries: &[(
String,
usize,
String,
String,
String,
Option<u32>,
crate::symbol::FuncKind,
crate::summary::ssa_summary::SsaFuncSummary,
)],
ssa_bodies: &[(
String,
usize,
String,
String,
String,
Option<u32>,
crate::symbol::FuncKind,
crate::taint::ssa_transfer::CalleeSsaBody,
)],
auth_summaries: &[(
String,
usize,
String,
String,
String,
Option<u32>,
crate::symbol::FuncKind,
crate::auth_analysis::model::AuthCheckSummary,
)],
cross_package_imports: Option<(
&str,
&std::collections::HashMap<String, crate::symbol::FuncKey>,
)>,
) -> NyxResult<()> {
let tx = self.conn.transaction()?;
let path_str = file_path.to_string_lossy();
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
// function_summaries, always replace.
tx.execute(
"DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO function_summaries
(project, file_path, file_hash, name, arity, lang,
container, disambig, kind, summary, entry_kind, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)?;
for s in func_summaries {
let json = serde_json::to_string(s)
.map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
let disambig_sql = s.disambig.map(|d| d as i64);
let entry_kind_sql = s
.entry_kind
.as_ref()
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
.filter(|s| !s.is_empty());
stmt.execute(params![
self.project,
path_str,
file_hash,
s.name,
s.param_count as i64,
s.lang,
s.container,
disambig_sql,
s.kind.as_str(),
json,
entry_kind_sql,
now
])?;
}
}
// ssa_function_summaries, only touched when non-empty.
if !ssa_summaries.is_empty() {
tx.execute(
"DELETE FROM ssa_function_summaries
WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO ssa_function_summaries
(project, file_path, file_hash, name, arity, lang, namespace,
container, disambig, kind, summary, entry_kind, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)",
)?;
for (name, arity, lang, namespace, container, disambig, kind, summary) in
ssa_summaries
{
let json = serde_json::to_string(summary)
.map_err(|e| NyxError::Msg(format!("SSA summary serialise: {e}")))?;
let disambig_sql = disambig.map(|d| d as i64);
let entry_kind_sql = summary
.entry_kind
.as_ref()
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
.filter(|s| !s.is_empty());
stmt.execute(params![
self.project,
path_str,
file_hash,
name,
*arity as i64,
lang,
namespace,
container,
disambig_sql,
kind.as_str(),
json,
entry_kind_sql,
now
])?;
}
}
// ssa_function_bodies, only touched when non-empty.
if !ssa_bodies.is_empty() {
tx.execute(
"DELETE FROM ssa_function_bodies
WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO ssa_function_bodies
(project, file_path, file_hash, name, arity, lang, namespace,
container, disambig, kind, body, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)?;
for (name, arity, lang, namespace, container, disambig, kind, body) in ssa_bodies {
let blob = rmp_serde::to_vec_named(body)
.map_err(|e| NyxError::Msg(format!("SSA body serialise: {e}")))?;
let disambig_sql = disambig.map(|d| d as i64);
stmt.execute(params![
self.project,
path_str,
file_hash,
name,
*arity as i64,
lang,
namespace,
container,
disambig_sql,
kind.as_str(),
blob,
now
])?;
}
}
// auth_check_summaries, always replace, even when empty,
// so a helper that lost its ownership check no longer
// leaks lifts into subsequent pass-2 runs.
tx.execute(
"DELETE FROM auth_check_summaries WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO auth_check_summaries
(project, file_path, file_hash, name, arity, lang, namespace,
container, disambig, kind, summary, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)?;
for (name, arity, lang, namespace, container, disambig, kind, summary) in
auth_summaries
{
let json = serde_json::to_string(summary)
.map_err(|e| NyxError::Msg(format!("auth summary serialise: {e}")))?;
let disambig_sql = disambig.map(|d| d as i64);
stmt.execute(params![
self.project,
path_str,
file_hash,
name,
*arity as i64,
lang,
namespace,
container,
disambig_sql,
kind.as_str(),
json,
now
])?;
}
}
// cross_package_imports: replace this file's row, even with
// an empty input, so a file that lost its imports does not
// leave stale resolutions in the cache.
tx.execute(
"DELETE FROM cross_package_imports WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
if let Some((namespace, map)) = cross_package_imports
&& !map.is_empty()
{
let blob = rmp_serde::to_vec_named(map)
.map_err(|e| NyxError::Msg(format!("cross_package_imports serialise: {e}")))?;
tx.execute(
"INSERT OR REPLACE INTO cross_package_imports
(project, file_path, file_hash, namespace, imports, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
params![self.project, path_str, file_hash, namespace, blob, now],
)?;
}
tx.commit()?;
Ok(())
}
/// Load every `AuthCheckSummary` for this project.
///
/// Returns rows with full metadata for `FuncKey` reconstruction:
/// `(file_path, name, lang, arity, namespace, container, disambig, kind, AuthCheckSummary)`.
pub fn load_all_auth_summaries(
&self,
) -> NyxResult<
Vec<(
String,
String,
String,
i64,
String,
String,
Option<u32>,
crate::symbol::FuncKind,
crate::auth_analysis::model::AuthCheckSummary,
)>,
> {
let mut stmt = self.c().prepare(
"SELECT file_path, name, lang, arity, namespace,
container, disambig, kind, summary
FROM auth_check_summaries WHERE project = ?1",
)?;
let rows: Vec<(
String,
String,
String,
i64,
String,
String,
Option<i64>,
String,
String,
)> = stmt
.query_map([&self.project], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, i64>(3)?,
row.get::<_, String>(4)?,
row.get::<_, String>(5)?,
row.get::<_, Option<i64>>(6)?,
row.get::<_, String>(7)?,
row.get::<_, String>(8)?,
))
})?
.filter_map(|r| match r {
Ok(v) => Some(v),
Err(e) => {
tracing::warn!("failed to read auth summary row: {e}");
None
}
})
.collect();
let mut out = Vec::with_capacity(rows.len());
for (fp, name, lang, arity, ns, container, disambig, kind, json) in &rows {
match serde_json::from_str::<crate::auth_analysis::model::AuthCheckSummary>(json) {
Ok(s) => {
out.push((
fp.clone(),
name.clone(),
lang.clone(),
*arity,
ns.clone(),
container.clone(),
disambig.map(|d| d as u32),
crate::symbol::FuncKind::from_slug(kind),
s,
));
}
Err(e) => {
tracing::warn!("failed to deserialize auth summary JSON: {e}");
}
}
}
Ok(out)
}
/// Load every persisted per-file Phase-09 cross-package import map
/// for this project.
///
/// Returns rows as `(file_path, namespace, imports_map)`. Used by
/// pass 2 of indexed scans to populate
/// `GlobalSummaries::cross_package_imports_by_namespace`, recovering
/// the per-file import view that
/// [`crate::taint::ssa_transfer::CalleeSsaBody::cross_package_imports`]
/// loses across SQLite round-trip (`#[serde(skip)]`).
pub fn load_all_cross_package_imports(
&self,
) -> NyxResult<
Vec<(
String,
String,
std::collections::HashMap<String, crate::symbol::FuncKey>,
)>,
> {
let mut stmt = self.c().prepare(
"SELECT file_path, namespace, imports
FROM cross_package_imports WHERE project = ?1",
)?;
let rows: Vec<(String, String, Vec<u8>)> = stmt
.query_map([&self.project], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, Vec<u8>>(2)?,
))
})?
.filter_map(|r| match r {
Ok(v) => Some(v),
Err(e) => {
tracing::warn!("failed to read cross_package_imports row: {e}");
None
}
})
.collect();
let mut out = Vec::with_capacity(rows.len());
for (fp, ns, blob) in rows {
match rmp_serde::from_slice::<
std::collections::HashMap<String, crate::symbol::FuncKey>,
>(&blob)
{
Ok(map) => out.push((fp, ns, map)),
Err(e) => {
tracing::warn!("failed to deserialize cross_package_imports blob: {e}");
}
}
}
Ok(out)
}
/// Persist a [`crate::surface::SurfaceMap`] for this project.
///
/// Replaces any previously-persisted map; the table holds one row
/// per project. The map is canonicalised before serialisation so
/// `replace_surface_map` + `load_surface_map` round-trip is
/// byte-identical for structurally identical maps.
pub fn replace_surface_map(&mut self, map: &crate::surface::SurfaceMap) -> NyxResult<()> {
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
let mut canon = map.clone();
let bytes = canon
.to_json()
.map_err(|e| NyxError::Msg(format!("surface map serialise: {e}")))?;
self.c().execute(
"INSERT OR REPLACE INTO surface_map (project, map_json, updated_at)
VALUES (?1, ?2, ?3)",
params![self.project, bytes, now],
)?;
Ok(())
}
/// Load the persisted [`crate::surface::SurfaceMap`] for this
/// project, or `None` when no map has been written.
pub fn load_surface_map(&self) -> NyxResult<Option<crate::surface::SurfaceMap>> {
let row: Option<Vec<u8>> = self
.c()
.query_row(
"SELECT map_json FROM surface_map WHERE project = ?1",
params![self.project],
|r| r.get::<_, Vec<u8>>(0),
)
.optional()?;
let Some(bytes) = row else {
return Ok(None);
};
let map = crate::surface::SurfaceMap::from_json(&bytes)
.map_err(|e| NyxError::Msg(format!("surface map deserialise: {e}")))?;
Ok(Some(map))
}
/// Return the raw JSON bytes stored for the surface map without
/// deserialising. Used by the round-trip parity tests so they
/// can compare on-disk bytes across rescans.
pub fn load_surface_map_bytes(&self) -> NyxResult<Option<Vec<u8>>> {
let row: Option<Vec<u8>> = self
.c()
.query_row(
"SELECT map_json FROM surface_map WHERE project = ?1",
params![self.project],
|r| r.get::<_, Vec<u8>>(0),
)
.optional()?;
Ok(row)
}
/// Remove a file and all derived persisted state for this project.
///
/// This deletes the file row, issues, and all persisted summary rows so
/// incremental scans can prune deleted files from the index cleanly.
pub fn remove_file_and_related(&mut self, path: &Path) -> NyxResult<()> {
let tx = self.conn.transaction()?;
let path_str = path.to_string_lossy();
let file_id: Option<i64> = tx
.query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path_str.as_ref()],
|r| r.get(0),
)
.optional()?;
if let Some(file_id) = file_id {
tx.execute("DELETE FROM issues WHERE file_id = ?1", params![file_id])?;
tx.execute("DELETE FROM files WHERE id = ?1", params![file_id])?;
}
tx.execute(
"DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str.as_ref()],
)?;
tx.execute(
"DELETE FROM ssa_function_summaries WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str.as_ref()],
)?;
tx.execute(
"DELETE FROM ssa_function_bodies WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str.as_ref()],
)?;
tx.execute(
"DELETE FROM auth_check_summaries WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str.as_ref()],
)?;
tx.execute(
"DELETE FROM cross_package_imports WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str.as_ref()],
)?;
tx.commit()?;
Ok(())
}
/// gets files from the database
pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
let mut stmt = self.c().prepare(
"SELECT path
FROM files
WHERE project = ?1",
)?;
let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?;
Ok(file_iter
.map(|p| p.map(PathBuf::from))
.collect::<Result<_, _>>()?)
}
// Scan persistence
/// Insert a new scan record.
pub fn insert_scan(&self, record: &ScanRecord) -> NyxResult<()> {
self.c().execute(
"INSERT OR REPLACE INTO scans (id, status, scan_root, started_at, finished_at,
duration_secs, engine_version, languages, files_scanned, files_skipped,
finding_count, findings_json, timing_json, error)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14)",
params![
record.id,
record.status,
record.scan_root,
record.started_at,
record.finished_at,
record.duration_secs,
record.engine_version,
record.languages,
record.files_scanned,
record.files_skipped,
record.finding_count,
record.findings_json,
record.timing_json,
record.error,
],
)?;
Ok(())
}
/// Update a scan record status and completion fields.
pub fn update_scan(
&self,
id: &str,
status: &str,
finished_at: Option<&str>,
duration_secs: Option<f64>,
finding_count: Option<i64>,
findings_json: Option<&str>,
timing_json: Option<&str>,
error: Option<&str>,
files_scanned: Option<i64>,
files_skipped: Option<i64>,
languages: Option<&str>,
) -> NyxResult<()> {
self.c().execute(
"UPDATE scans SET status = ?2, finished_at = ?3, duration_secs = ?4,
finding_count = ?5, findings_json = ?6, timing_json = ?7, error = ?8,
files_scanned = ?9, files_skipped = ?10, languages = ?11
WHERE id = ?1",
params![
id,
status,
finished_at,
duration_secs,
finding_count,
findings_json,
timing_json,
error,
files_scanned,
files_skipped,
languages,
],
)?;
Ok(())
}
/// Get a single scan record by ID.
pub fn get_scan(&self, id: &str) -> NyxResult<Option<ScanRecord>> {
let result = self
.c()
.query_row(
"SELECT id, status, scan_root, started_at, finished_at, duration_secs,
engine_version, languages, files_scanned, files_skipped, finding_count,
findings_json, timing_json, error
FROM scans WHERE id = ?1",
params![id],
|row| {
Ok(ScanRecord {
id: row.get(0)?,
status: row.get(1)?,
scan_root: row.get(2)?,
started_at: row.get(3)?,
finished_at: row.get(4)?,
duration_secs: row.get(5)?,
engine_version: row.get(6)?,
languages: row.get(7)?,
files_scanned: row.get(8)?,
files_skipped: row.get(9)?,
finding_count: row.get(10)?,
findings_json: row.get(11)?,
timing_json: row.get(12)?,
error: row.get(13)?,
})
},
)
.optional()?;
Ok(result)
}
/// List scan records, most recent first, up to `limit`.
pub fn list_scans(&self, limit: i64) -> NyxResult<Vec<ScanRecord>> {
let mut stmt = self.c().prepare(
"SELECT id, status, scan_root, started_at, finished_at, duration_secs,
engine_version, languages, files_scanned, files_skipped, finding_count,
findings_json, timing_json, error
FROM scans ORDER BY started_at DESC LIMIT ?1",
)?;
let rows = stmt
.query_map(params![limit], |row| {
Ok(ScanRecord {
id: row.get(0)?,
status: row.get(1)?,
scan_root: row.get(2)?,
started_at: row.get(3)?,
finished_at: row.get(4)?,
duration_secs: row.get(5)?,
engine_version: row.get(6)?,
languages: row.get(7)?,
files_scanned: row.get(8)?,
files_skipped: row.get(9)?,
finding_count: row.get(10)?,
findings_json: row.get(11)?,
timing_json: row.get(12)?,
error: row.get(13)?,
})
})?
.filter_map(Result::ok)
.collect();
Ok(rows)
}
/// Delete a scan and its associated metrics/logs (FK CASCADE).
pub fn delete_scan(&self, id: &str) -> NyxResult<usize> {
let rows = self
.c()
.execute("DELETE FROM scans WHERE id = ?1", params![id])?;
Ok(rows)
}
/// Insert scan metrics for a completed scan.
pub fn insert_scan_metrics(
&self,
scan_id: &str,
metrics: &crate::server::progress::ScanMetricsSnapshot,
) -> NyxResult<()> {
self.c().execute(
"INSERT OR REPLACE INTO scan_metrics (scan_id, cfg_nodes, call_edges,
functions_analyzed, summaries_reused, unresolved_calls)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
params![
scan_id,
metrics.cfg_nodes as i64,
metrics.call_edges as i64,
metrics.functions_analyzed as i64,
metrics.summaries_reused as i64,
metrics.unresolved_calls as i64,
],
)?;
Ok(())
}
/// Get scan metrics by scan ID.
pub fn get_scan_metrics(
&self,
scan_id: &str,
) -> NyxResult<Option<crate::server::progress::ScanMetricsSnapshot>> {
let result = self
.c()
.query_row(
"SELECT cfg_nodes, call_edges, functions_analyzed,
summaries_reused, unresolved_calls
FROM scan_metrics WHERE scan_id = ?1",
params![scan_id],
|row| {
Ok(crate::server::progress::ScanMetricsSnapshot {
cfg_nodes: row.get::<_, i64>(0)? as u64,
call_edges: row.get::<_, i64>(1)? as u64,
functions_analyzed: row.get::<_, i64>(2)? as u64,
summaries_reused: row.get::<_, i64>(3)? as u64,
unresolved_calls: row.get::<_, i64>(4)? as u64,
})
},
)
.optional()?;
Ok(result)
}
/// Insert scan log entries.
pub fn insert_scan_logs(
&self,
scan_id: &str,
logs: &[crate::server::scan_log::ScanLogEntry],
) -> NyxResult<()> {
let mut stmt = self.c().prepare(
"INSERT INTO scan_logs (scan_id, timestamp, level, message, file_path, detail)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
)?;
for log in logs {
stmt.execute(params![
scan_id,
log.timestamp.to_rfc3339(),
log.level.to_string(),
log.message,
log.file_path,
log.detail,
])?;
}
Ok(())
}
/// Get scan logs, optionally filtered by level.
pub fn get_scan_logs(
&self,
scan_id: &str,
level_filter: Option<&str>,
) -> NyxResult<Vec<crate::server::scan_log::ScanLogEntry>> {
let (sql, params_vec): (&str, Vec<Box<dyn rusqlite::types::ToSql>>) =
if let Some(level) = level_filter {
(
"SELECT timestamp, level, message, file_path, detail
FROM scan_logs WHERE scan_id = ?1 AND level = ?2
ORDER BY id ASC",
vec![Box::new(scan_id.to_string()), Box::new(level.to_string())],
)
} else {
(
"SELECT timestamp, level, message, file_path, detail
FROM scan_logs WHERE scan_id = ?1
ORDER BY id ASC",
vec![Box::new(scan_id.to_string())],
)
};
let mut stmt = self.c().prepare(sql)?;
let params_refs: Vec<&dyn rusqlite::types::ToSql> =
params_vec.iter().map(|p| p.as_ref()).collect();
let rows = stmt
.query_map(params_refs.as_slice(), |row| {
let ts_str: String = row.get(0)?;
let level_str: String = row.get(1)?;
Ok((
ts_str,
level_str,
row.get::<_, String>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, Option<String>>(4)?,
))
})?
.filter_map(Result::ok)
.filter_map(|(ts_str, level_str, message, file_path, detail)| {
let timestamp = chrono::DateTime::parse_from_rfc3339(&ts_str)
.ok()?
.with_timezone(&chrono::Utc);
let level = level_str.parse().ok()?;
Some(crate::server::scan_log::ScanLogEntry {
timestamp,
level,
message,
file_path,
detail,
})
})
.collect();
Ok(rows)
}
// Triage state management
/// Get the triage state for a single finding fingerprint.
/// Returns (state, note, updated_at) or None if no triage state exists.
#[allow(dead_code)]
pub fn get_triage_state(
&self,
fingerprint: &str,
) -> NyxResult<Option<(String, String, String)>> {
let result = self
.c()
.query_row(
"SELECT state, note, updated_at FROM triage_states WHERE fingerprint = ?1",
params![fingerprint],
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
)
.optional()?;
Ok(result)
}
/// Set the triage state for a single finding. Upserts the state and
/// appends an audit log entry. Returns the previous state (or "open").
pub fn set_triage_state(
&self,
fingerprint: &str,
state: &str,
note: &str,
action: &str,
) -> NyxResult<String> {
let now = chrono::Utc::now().to_rfc3339();
let prev: String = self
.c()
.query_row(
"SELECT state FROM triage_states WHERE fingerprint = ?1",
params![fingerprint],
|row| row.get(0),
)
.optional()?
.unwrap_or_else(|| "open".to_string());
self.c().execute(
"INSERT INTO triage_states (fingerprint, state, note, updated_at)
VALUES (?1, ?2, ?3, ?4)
ON CONFLICT(fingerprint) DO UPDATE
SET state = excluded.state, note = excluded.note, updated_at = excluded.updated_at",
params![fingerprint, state, note, now],
)?;
self.c().execute(
"INSERT INTO triage_audit_log (fingerprint, action, previous_state, new_state, note, timestamp)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
params![fingerprint, action, prev, state, note, now],
)?;
Ok(prev)
}
/// Bulk set triage state. Returns vec of (fingerprint, previous_state).
pub fn set_triage_states_bulk(
&self,
fingerprints: &[String],
state: &str,
note: &str,
action: &str,
) -> NyxResult<Vec<(String, String)>> {
let now = chrono::Utc::now().to_rfc3339();
let mut results = Vec::with_capacity(fingerprints.len());
// Read all previous states first
let mut prev_stmt = self
.c()
.prepare("SELECT state FROM triage_states WHERE fingerprint = ?1")?;
for fp in fingerprints {
let prev: String = prev_stmt
.query_row(params![fp], |row| row.get(0))
.optional()?
.unwrap_or_else(|| "open".to_string());
results.push((fp.clone(), prev));
}
drop(prev_stmt);
// Upsert all states
let mut upsert_stmt = self.c().prepare(
"INSERT INTO triage_states (fingerprint, state, note, updated_at)
VALUES (?1, ?2, ?3, ?4)
ON CONFLICT(fingerprint) DO UPDATE
SET state = excluded.state, note = excluded.note, updated_at = excluded.updated_at",
)?;
for fp in fingerprints {
upsert_stmt.execute(params![fp, state, note, now])?;
}
drop(upsert_stmt);
// Insert audit log entries
let mut audit_stmt = self.c().prepare(
"INSERT INTO triage_audit_log (fingerprint, action, previous_state, new_state, note, timestamp)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
)?;
for (fp, prev) in &results {
audit_stmt.execute(params![fp, action, prev, state, note, now])?;
}
Ok(results)
}
/// Load all triage states as a map: fingerprint → (state, note, updated_at).
pub fn get_all_triage_states(
&self,
) -> NyxResult<std::collections::HashMap<String, (String, String, String)>> {
let mut stmt = self
.c()
.prepare("SELECT fingerprint, state, note, updated_at FROM triage_states")?;
let rows = stmt
.query_map([], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, String>(3)?,
))
})?
.filter_map(Result::ok)
.map(|(fp, state, note, updated)| (fp, (state, note, updated)))
.collect();
Ok(rows)
}
/// List triage states with optional state filter, paginated.
/// Returns (entries, total_count).
pub fn list_triage_states(
&self,
state_filter: Option<&str>,
limit: i64,
offset: i64,
) -> NyxResult<(Vec<(String, String, String, String)>, i64)> {
let (sql, count_sql, params_vec): (&str, &str, Vec<Box<dyn rusqlite::types::ToSql>>) =
if let Some(state) = state_filter {
(
"SELECT fingerprint, state, note, updated_at FROM triage_states
WHERE state = ?1 ORDER BY updated_at DESC LIMIT ?2 OFFSET ?3",
"SELECT COUNT(*) FROM triage_states WHERE state = ?1",
vec![
Box::new(state.to_string()),
Box::new(limit),
Box::new(offset),
],
)
} else {
(
"SELECT fingerprint, state, note, updated_at FROM triage_states
ORDER BY updated_at DESC LIMIT ?1 OFFSET ?2",
"SELECT COUNT(*) FROM triage_states",
vec![Box::new(limit), Box::new(offset)],
)
};
let total: i64 = if let Some(state) = state_filter {
self.c()
.query_row(count_sql, params![state], |row| row.get(0))?
} else {
self.c().query_row(count_sql, [], |row| row.get(0))?
};
let mut stmt = self.c().prepare(sql)?;
let params_refs: Vec<&dyn rusqlite::types::ToSql> =
params_vec.iter().map(|p| p.as_ref()).collect();
let rows = stmt
.query_map(params_refs.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, String>(3)?,
))
})?
.filter_map(Result::ok)
.collect();
Ok((rows, total))
}
/// Get the audit log, optionally filtered by fingerprint, paginated.
/// Returns (entries, total_count).
pub fn get_audit_log(
&self,
fingerprint_filter: Option<&str>,
limit: i64,
offset: i64,
) -> NyxResult<(Vec<AuditEntry>, i64)> {
let (sql, count_sql, params_vec): (&str, &str, Vec<Box<dyn rusqlite::types::ToSql>>) =
if let Some(fp) = fingerprint_filter {
(
"SELECT id, fingerprint, action, previous_state, new_state, note, timestamp
FROM triage_audit_log WHERE fingerprint = ?1
ORDER BY timestamp DESC LIMIT ?2 OFFSET ?3",
"SELECT COUNT(*) FROM triage_audit_log WHERE fingerprint = ?1",
vec![Box::new(fp.to_string()), Box::new(limit), Box::new(offset)],
)
} else {
(
"SELECT id, fingerprint, action, previous_state, new_state, note, timestamp
FROM triage_audit_log ORDER BY timestamp DESC LIMIT ?1 OFFSET ?2",
"SELECT COUNT(*) FROM triage_audit_log",
vec![Box::new(limit), Box::new(offset)],
)
};
let total: i64 = if let Some(fp) = fingerprint_filter {
self.c()
.query_row(count_sql, params![fp], |row| row.get(0))?
} else {
self.c().query_row(count_sql, [], |row| row.get(0))?
};
let mut stmt = self.c().prepare(sql)?;
let params_refs: Vec<&dyn rusqlite::types::ToSql> =
params_vec.iter().map(|p| p.as_ref()).collect();
let rows = stmt
.query_map(params_refs.as_slice(), |row| {
Ok(AuditEntry {
id: row.get(0)?,
fingerprint: row.get(1)?,
action: row.get(2)?,
previous_state: row.get(3)?,
new_state: row.get(4)?,
note: row.get(5)?,
timestamp: row.get(6)?,
})
})?
.filter_map(Result::ok)
.collect();
Ok((rows, total))
}
/// Add a pattern-based suppression rule.
pub fn add_suppression_rule(
&self,
suppress_by: &str,
match_value: &str,
state: &str,
note: &str,
) -> NyxResult<i64> {
let now = chrono::Utc::now().to_rfc3339();
self.c().execute(
"INSERT OR REPLACE INTO triage_suppression_rules
(suppress_by, match_value, state, note, created_at)
VALUES (?1, ?2, ?3, ?4, ?5)",
params![suppress_by, match_value, state, note, now],
)?;
Ok(self.c().last_insert_rowid())
}
/// Get all suppression rules.
pub fn get_suppression_rules(&self) -> NyxResult<Vec<SuppressionRule>> {
let mut stmt = self.c().prepare(
"SELECT id, suppress_by, match_value, state, note, created_at
FROM triage_suppression_rules ORDER BY created_at DESC",
)?;
let rows = stmt
.query_map([], |row| {
Ok(SuppressionRule {
id: row.get(0)?,
suppress_by: row.get(1)?,
match_value: row.get(2)?,
state: row.get(3)?,
note: row.get(4)?,
created_at: row.get(5)?,
})
})?
.filter_map(Result::ok)
.collect();
Ok(rows)
}
/// Record the first time a finding fingerprint was observed. Idempotent ,
/// the earliest call wins via INSERT OR IGNORE. Used by the overview
/// backlog-age computation; ts should be the originating scan's
/// `started_at` (RFC-3339).
pub fn record_finding_first_seen(&self, fingerprint: &str, ts: &str) -> NyxResult<()> {
self.c().execute(
"INSERT OR IGNORE INTO finding_first_seen (fingerprint, first_seen_at) VALUES (?1, ?2)",
params![fingerprint, ts],
)?;
Ok(())
}
/// Bulk variant. Inserts ignoring conflicts.
pub fn record_finding_first_seen_bulk(
&self,
entries: &[(String, String)],
) -> NyxResult<()> {
if entries.is_empty() {
return Ok(());
}
let conn = self.c();
let tx = conn.unchecked_transaction()?;
{
let mut stmt = tx.prepare(
"INSERT OR IGNORE INTO finding_first_seen (fingerprint, first_seen_at) VALUES (?1, ?2)",
)?;
for (fp, ts) in entries {
stmt.execute(params![fp, ts])?;
}
}
tx.commit()?;
Ok(())
}
/// Look up first-seen timestamps for a set of fingerprints. Missing
/// entries are simply absent from the returned map.
pub fn get_first_seen_map(
&self,
fingerprints: &[String],
) -> NyxResult<std::collections::HashMap<String, String>> {
if fingerprints.is_empty() {
return Ok(std::collections::HashMap::new());
}
// SQLite IN-clause cap is high but parameter count is bounded, chunk
// for safety with large fingerprint sets.
let mut out = std::collections::HashMap::with_capacity(fingerprints.len());
let conn = self.c();
for chunk in fingerprints.chunks(500) {
let placeholders = (1..=chunk.len())
.map(|i| format!("?{i}"))
.collect::<Vec<_>>()
.join(",");
let sql = format!(
"SELECT fingerprint, first_seen_at FROM finding_first_seen WHERE fingerprint IN ({placeholders})"
);
let mut stmt = conn.prepare(&sql)?;
let params: Vec<&dyn rusqlite::ToSql> =
chunk.iter().map(|s| s as &dyn rusqlite::ToSql).collect();
let rows = stmt.query_map(params.as_slice(), |row| {
Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
})?;
for r in rows.flatten() {
out.insert(r.0, r.1);
}
}
Ok(out)
}
/// Get a single metadata value by key. Returns None if absent.
pub fn get_metadata(&self, key: &str) -> NyxResult<Option<String>> {
let conn = self.c();
let mut stmt = conn.prepare("SELECT value FROM nyx_metadata WHERE key = ?1")?;
let mut rows = stmt.query(params![key])?;
if let Some(row) = rows.next()? {
Ok(Some(row.get(0)?))
} else {
Ok(None)
}
}
/// Set a metadata value (insert-or-replace).
pub fn set_metadata(&self, key: &str, value: &str) -> NyxResult<()> {
self.c().execute(
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES (?1, ?2)",
params![key, value],
)?;
Ok(())
}
/// Remove a metadata key. Returns true if a row was deleted.
pub fn delete_metadata(&self, key: &str) -> NyxResult<bool> {
let n = self
.c()
.execute("DELETE FROM nyx_metadata WHERE key = ?1", params![key])?;
Ok(n > 0)
}
/// Delete a suppression rule by ID. Returns true if a row was deleted.
pub fn delete_suppression_rule(&self, id: i64) -> NyxResult<bool> {
let count = self.c().execute(
"DELETE FROM triage_suppression_rules WHERE id = ?1",
params![id],
)?;
Ok(count > 0)
}
// Maintenance utilities
pub fn clear(&self) -> NyxResult<()> {
self.c().execute_batch(
r#"
PRAGMA foreign_keys = OFF;
DROP TABLE IF EXISTS issues;
DROP TABLE IF EXISTS files;
DROP TABLE IF EXISTS function_summaries;
DROP TABLE IF EXISTS ssa_function_summaries;
PRAGMA foreign_keys = ON;
VACUUM;
"#,
)?;
self.c().execute_batch(SCHEMA)?;
Ok(())
}
pub fn vacuum(&self) -> NyxResult<()> {
self.c().execute("VACUUM;", [])?;
Ok(())
}
// Helpers
#[cfg(test)]
fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
let mut hasher = blake3::Hasher::new();
let mut file = fs::File::open(path)?;
std::io::copy(&mut file, &mut hasher)?;
Ok(hasher.finalize().as_bytes().to_vec())
}
/// Hash already-read bytes without re-reading from disk.
pub fn digest_bytes(bytes: &[u8]) -> Vec<u8> {
let mut hasher = blake3::Hasher::new();
hasher.update(bytes);
hasher.finalize().as_bytes().to_vec()
}
}
}
#[test]
fn indexer_should_scan_and_upsert_logic() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let file = td.path().join("sample.rs");
std::fs::write(&file, "fn main() {}").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let idx = index::Indexer::from_pool("proj", &pool).unwrap();
// first time: nothing in DB → must scan
assert!(idx.should_scan(&file).unwrap());
// after upsert: no changes → should *not* scan
idx.upsert_file(&file).unwrap();
assert!(!idx.should_scan(&file).unwrap());
// modify contents
std::thread::sleep(std::time::Duration::from_millis(25)); // ensure mtime tick
std::fs::write(&file, "fn main() { /* changed */ }").unwrap();
assert!(idx.should_scan(&file).unwrap());
}
#[test]
fn replace_issues_and_query_back() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let file = td.path().join("code.go");
std::fs::write(&file, "package main").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let fid = idx.upsert_file(&file).unwrap();
let issues = [
index::IssueRow {
rule_id: "X1",
severity: "High",
line: 3,
col: 7,
},
index::IssueRow {
rule_id: "X2",
severity: "Low",
line: 4,
col: 1,
},
];
idx.replace_issues(fid, issues.clone()).unwrap();
let stored = idx.get_issues_from_file(&file).unwrap();
assert_eq!(stored.len(), 2);
assert!(
stored
.iter()
.any(|d| d.id == "X1" && d.severity == crate::patterns::Severity::High)
);
assert!(
stored
.iter()
.any(|d| d.id == "X2" && d.severity == crate::patterns::Severity::Low)
);
}
#[test]
fn clear_and_vacuum_reset_tables() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("f.rs");
std::fs::write(&f, "//").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let idx = index::Indexer::from_pool("proj", &pool).unwrap();
idx.upsert_file(&f).unwrap();
assert!(!idx.get_files("proj").unwrap().is_empty());
idx.clear().unwrap();
idx.vacuum().unwrap();
assert!(idx.get_files("proj").unwrap().is_empty());
}
#[test]
fn clear_preserves_scan_history_tables() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
let idx = index::Indexer::from_pool("_scans", &pool).unwrap();
idx.insert_scan(&index::ScanRecord {
id: "scan-1".to_string(),
status: "completed".to_string(),
scan_root: td.path().display().to_string(),
started_at: Some("2026-03-25T12:00:00Z".to_string()),
finished_at: Some("2026-03-25T12:00:01Z".to_string()),
duration_secs: Some(1.0),
engine_version: Some("test".to_string()),
languages: None,
files_scanned: Some(1),
files_skipped: Some(0),
finding_count: Some(0),
findings_json: Some("[]".to_string()),
timing_json: None,
error: None,
})
.unwrap();
let proj_idx = index::Indexer::from_pool("proj", &pool).unwrap();
proj_idx.clear().unwrap();
let loaded = idx
.get_scan("scan-1")
.unwrap()
.expect("scan history should survive index clears");
assert_eq!(loaded.status, "completed");
}
#[test]
fn ssa_summaries_round_trip() {
use crate::labels::Cap;
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("app.py");
std::fs::write(&f, "def process(data): return data").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let hash = index::Indexer::digest_bytes(b"def process(data): return data");
let summaries = vec![
(
"process".to_string(),
1_usize,
"python".to_string(),
"app.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
SsaFuncSummary {
param_to_return: vec![(0, TaintTransform::Identity)],
param_to_sink: vec![],
source_caps: Cap::empty(),
param_to_sink_param: vec![],
param_container_to_return: vec![],
param_to_container_store: vec![],
return_type: None,
return_abstract: None,
source_to_callback: vec![],
receiver_to_return: None,
receiver_to_sink: Cap::empty(),
abstract_transfer: vec![],
param_return_paths: vec![],
points_to: Default::default(),
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
),
(
"sanitize".to_string(),
1_usize,
"python".to_string(),
"app.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
SsaFuncSummary {
param_to_return: vec![(0, TaintTransform::StripBits(Cap::HTML_ESCAPE))],
param_to_sink: vec![(
0,
smallvec::smallvec![crate::summary::SinkSite::cap_only(Cap::SQL_QUERY)],
)],
source_caps: Cap::ENV_VAR,
param_to_sink_param: vec![],
param_container_to_return: vec![],
param_to_container_store: vec![],
return_type: None,
return_abstract: None,
source_to_callback: vec![],
receiver_to_return: None,
receiver_to_sink: Cap::empty(),
abstract_transfer: vec![],
param_return_paths: vec![],
points_to: Default::default(),
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
),
];
idx.replace_ssa_summaries_for_file(&f, &hash, &summaries)
.unwrap();
let loaded = idx.load_all_ssa_summaries().unwrap();
assert_eq!(loaded.len(), 2);
// Check first summary
let (_, name1, lang1, arity1, ns1, _, _, _, sum1) = loaded
.iter()
.find(|(_, n, _, _, _, _, _, _, _)| n == "process")
.unwrap();
assert_eq!(name1, "process");
assert_eq!(lang1, "python");
assert_eq!(*arity1, 1);
assert_eq!(ns1, "app.py");
assert_eq!(sum1.param_to_return, vec![(0, TaintTransform::Identity)]);
assert!(sum1.param_to_sink.is_empty());
// Check second summary
let (_, name2, _, _, _, _, _, _, sum2) = loaded
.iter()
.find(|(_, n, _, _, _, _, _, _, _)| n == "sanitize")
.unwrap();
assert_eq!(name2, "sanitize");
assert_eq!(
sum2.param_to_return,
vec![(0, TaintTransform::StripBits(Cap::HTML_ESCAPE))]
);
assert_eq!(sum2.param_to_sink_caps(), vec![(0, Cap::SQL_QUERY)]);
assert_eq!(sum2.source_caps, Cap::ENV_VAR);
}
/// Round-trip test for [`crate::summary::ssa_summary::PathFactReturnEntry`]:
/// asserts that `return_path_facts` survive serialise → SQLite persist →
/// load → deserialise. Regression guard for the per-return-path PathFact
/// decomposition that closes the rs-safe-014 / tar-rs / rs-safe-016 FP
/// cluster, without this round-trip working, cross-file callers lose
/// the per-arm narrowing and inline-only callees regain the joined-fact
/// dilution.
#[test]
fn ssa_summaries_round_trip_preserves_return_path_facts() {
use crate::abstract_interp::PathFact;
use crate::summary::ssa_summary::{PathFactReturnEntry, SsaFuncSummary, TaintTransform};
use smallvec::smallvec;
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("sanitize.rs");
std::fs::write(&f, "// sanitizer body").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let hash = index::Indexer::digest_bytes(b"// sanitizer body");
let return_path_facts = smallvec![
PathFactReturnEntry {
predicate_hash: 0,
known_true: 0,
known_false: 0,
path_fact: PathFact::top(),
variant_inner_fact: None,
},
PathFactReturnEntry {
predicate_hash: 17,
known_true: 0,
known_false: 0,
path_fact: PathFact::top(),
variant_inner_fact: Some(
PathFact::top()
.with_dotdot_cleared()
.with_absolute_cleared(),
),
},
];
let summary = SsaFuncSummary {
param_to_return: vec![(0, TaintTransform::Identity)],
return_path_facts: return_path_facts.clone(),
..Default::default()
};
let row = (
"sanitize_path".to_string(),
1_usize,
"rust".to_string(),
"sanitize.rs".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
summary,
);
idx.replace_ssa_summaries_for_file(&f, &hash, &[row])
.unwrap();
let loaded = idx.load_all_ssa_summaries().unwrap();
assert_eq!(loaded.len(), 1);
let (_, name, _, _, _, _, _, _, sum) = &loaded[0];
assert_eq!(name, "sanitize_path");
assert_eq!(
sum.return_path_facts.len(),
2,
"two distinct return paths must round-trip"
);
// Find each entry by predicate hash so order doesn't matter.
let none_arm = sum
.return_path_facts
.iter()
.find(|e| e.predicate_hash == 0)
.expect("unguarded entry");
assert!(none_arm.path_fact.is_top());
assert!(none_arm.variant_inner_fact.is_none());
let some_arm = sum
.return_path_facts
.iter()
.find(|e| e.predicate_hash == 17)
.expect("guarded entry");
let inner = some_arm
.variant_inner_fact
.as_ref()
.expect("inner fact survives round-trip");
assert!(
inner.is_path_safe(),
"Some arm's inner fact stays path-safe"
);
}
#[test]
fn ssa_summaries_hash_rescan_replaces_stale() {
use crate::labels::Cap;
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("lib.py");
std::fs::write(&f, "v1").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let hash_v1 = index::Indexer::digest_bytes(b"v1");
let sums_v1 = vec![(
"old_func".to_string(),
1_usize,
"python".to_string(),
"lib.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
SsaFuncSummary {
param_to_return: vec![(0, TaintTransform::Identity)],
param_to_sink: vec![],
source_caps: Cap::empty(),
param_to_sink_param: vec![],
param_container_to_return: vec![],
param_to_container_store: vec![],
return_type: None,
return_abstract: None,
source_to_callback: vec![],
receiver_to_return: None,
receiver_to_sink: Cap::empty(),
abstract_transfer: vec![],
param_return_paths: vec![],
points_to: Default::default(),
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
)];
idx.replace_ssa_summaries_for_file(&f, &hash_v1, &sums_v1)
.unwrap();
// Simulate file change: different function, different hash
let hash_v2 = index::Indexer::digest_bytes(b"v2");
let sums_v2 = vec![(
"new_func".to_string(),
2_usize,
"python".to_string(),
"lib.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
SsaFuncSummary {
param_to_return: vec![(0, TaintTransform::StripBits(Cap::SHELL_ESCAPE))],
param_to_sink: vec![],
source_caps: Cap::empty(),
param_to_sink_param: vec![],
param_container_to_return: vec![],
param_to_container_store: vec![],
return_type: None,
return_abstract: None,
source_to_callback: vec![],
receiver_to_return: None,
receiver_to_sink: Cap::empty(),
abstract_transfer: vec![],
param_return_paths: vec![],
points_to: Default::default(),
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
)];
idx.replace_ssa_summaries_for_file(&f, &hash_v2, &sums_v2)
.unwrap();
let loaded = idx.load_all_ssa_summaries().unwrap();
assert_eq!(
loaded.len(),
1,
"old summary should be replaced, not duplicated"
);
assert_eq!(loaded[0].1, "new_func");
}
#[test]
fn clear_drops_ssa_summaries_table() {
use crate::labels::Cap;
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("test.py");
std::fs::write(&f, "x").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let hash = index::Indexer::digest_bytes(b"x");
let sums = vec![(
"f".to_string(),
1_usize,
"python".to_string(),
"test.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
SsaFuncSummary {
param_to_return: vec![(0, TaintTransform::Identity)],
param_to_sink: vec![],
source_caps: Cap::empty(),
param_to_sink_param: vec![],
param_container_to_return: vec![],
param_to_container_store: vec![],
return_type: None,
return_abstract: None,
source_to_callback: vec![],
receiver_to_return: None,
receiver_to_sink: Cap::empty(),
abstract_transfer: vec![],
param_return_paths: vec![],
points_to: Default::default(),
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
)];
idx.replace_ssa_summaries_for_file(&f, &hash, &sums)
.unwrap();
assert_eq!(idx.load_all_ssa_summaries().unwrap().len(), 1);
idx.clear().unwrap();
assert_eq!(idx.load_all_ssa_summaries().unwrap().len(), 0);
}
// ── CalleeSsaBody persistence tests ──────────────────────────────────────
/// Helper: build a minimal CalleeSsaBody for DB tests.
#[cfg(test)]
fn make_test_callee_body(
num_blocks: usize,
param_count: usize,
) -> crate::taint::ssa_transfer::CalleeSsaBody {
use crate::ssa::ir::*;
use smallvec::smallvec;
let mut blocks = Vec::new();
for i in 0..num_blocks {
blocks.push(SsaBlock {
id: BlockId(i as u32),
phis: vec![],
body: vec![SsaInst {
value: SsaValue(i as u32),
op: SsaOp::Const(Some(format!("{i}"))),
cfg_node: petgraph::graph::NodeIndex::new(0),
var_name: None,
span: (0, 0),
}],
terminator: Terminator::Return(Some(SsaValue(0))),
preds: smallvec![],
succs: smallvec![],
});
}
let value_defs: Vec<ValueDef> = (0..num_blocks)
.map(|i| ValueDef {
var_name: None,
cfg_node: petgraph::graph::NodeIndex::new(0),
block: BlockId(i as u32),
})
.collect();
crate::taint::ssa_transfer::CalleeSsaBody {
ssa: SsaBody {
blocks,
entry: BlockId(0),
value_defs,
cfg_node_map: std::collections::HashMap::new(),
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::new(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
},
opt: crate::ssa::OptimizeResult {
const_values: std::collections::HashMap::new(),
type_facts: crate::ssa::type_facts::TypeFactResult {
facts: std::collections::HashMap::new(),
},
xml_parser_config: crate::ssa::xml_config::XmlParserConfigResult::default(),
xpath_config: crate::ssa::xpath_config::XPathConfigResult::default(),
alias_result: crate::ssa::alias::BaseAliasResult::empty(),
points_to: crate::ssa::heap::PointsToResult::empty(),
module_aliases: std::collections::HashMap::new(),
branches_pruned: 0,
copies_eliminated: 0,
dead_defs_removed: 0,
},
param_count,
node_meta: std::collections::HashMap::new(),
body_graph: None,
cross_package_imports: std::sync::Arc::new(std::collections::HashMap::new()),
}
}
#[test]
fn cross_package_imports_round_trip_via_replace_all_for_file() {
use crate::symbol::{FuncKey, FuncKind, Lang};
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("caller.ts");
std::fs::write(&f, "import { escape } from '@scope/util';").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let hash = index::Indexer::digest_bytes(b"caller content");
let mut imports: std::collections::HashMap<String, FuncKey> = std::collections::HashMap::new();
imports.insert(
"escape".to_string(),
FuncKey {
lang: Lang::TypeScript,
namespace: "packages/util/src/escape.ts".to_string(),
container: String::new(),
name: "escape".to_string(),
arity: None,
disambig: None,
kind: FuncKind::Function,
},
);
idx.replace_all_for_file(&f, &hash, &[], &[], &[], &[], Some(("caller.ts", &imports)))
.unwrap();
let loaded = idx.load_all_cross_package_imports().unwrap();
assert_eq!(loaded.len(), 1);
let (fp, ns, map) = &loaded[0];
assert_eq!(fp, &f.to_string_lossy().to_string());
assert_eq!(ns, "caller.ts");
assert_eq!(map.len(), 1);
let key = map
.get("escape")
.expect("escape binding survives round-trip");
assert_eq!(key.namespace, "packages/util/src/escape.ts");
assert_eq!(key.name, "escape");
assert_eq!(key.lang, Lang::TypeScript);
// Empty input on rescan should drop the row.
idx.replace_all_for_file(&f, &hash, &[], &[], &[], &[], None)
.unwrap();
assert!(idx.load_all_cross_package_imports().unwrap().is_empty());
}
#[test]
fn ssa_bodies_round_trip() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("helper.py");
std::fs::write(&f, "def transform(val): return val").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let hash = index::Indexer::digest_bytes(b"def transform(val): return val");
let body = make_test_callee_body(3, 1);
let bodies = vec![(
"transform".to_string(),
1_usize,
"python".to_string(),
"helper.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
body,
)];
idx.replace_ssa_bodies_for_file(&f, &hash, &bodies).unwrap();
let loaded = idx.load_all_ssa_bodies().unwrap();
assert_eq!(loaded.len(), 1);
let (fp, name, lang, arity, ns, _, _, _, loaded_body) = &loaded[0];
assert_eq!(fp, &f.to_string_lossy().to_string());
assert_eq!(name, "transform");
assert_eq!(lang, "python");
assert_eq!(*arity, 1);
assert_eq!(ns, "helper.py");
assert_eq!(loaded_body.param_count, 1);
assert_eq!(loaded_body.ssa.blocks.len(), 3);
}
#[test]
fn ssa_bodies_replace_on_rescan() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("helper.py");
std::fs::write(&f, "v1").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
// Store v1 with 2 blocks
let hash1 = index::Indexer::digest_bytes(b"v1");
let bodies1 = vec![(
"func".to_string(),
1_usize,
"python".to_string(),
"h.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_callee_body(2, 1),
)];
idx.replace_ssa_bodies_for_file(&f, &hash1, &bodies1)
.unwrap();
assert_eq!(idx.load_all_ssa_bodies().unwrap().len(), 1);
assert_eq!(idx.load_all_ssa_bodies().unwrap()[0].8.ssa.blocks.len(), 2);
// Store v2 with 5 blocks, should replace, not accumulate
let hash2 = index::Indexer::digest_bytes(b"v2");
let bodies2 = vec![(
"func".to_string(),
1_usize,
"python".to_string(),
"h.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_callee_body(5, 1),
)];
idx.replace_ssa_bodies_for_file(&f, &hash2, &bodies2)
.unwrap();
let loaded = idx.load_all_ssa_bodies().unwrap();
assert_eq!(loaded.len(), 1, "should replace, not accumulate");
assert_eq!(loaded[0].8.ssa.blocks.len(), 5);
}
#[test]
fn ssa_bodies_with_node_meta_round_trip() {
use crate::cfg::{NodeInfo, TaintMeta};
use crate::labels::{Cap, DataLabel};
use crate::taint::ssa_transfer::CrossFileNodeMeta;
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("helper.py");
std::fs::write(&f, "code").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let hash = index::Indexer::digest_bytes(b"code");
let mut body = make_test_callee_body(1, 0);
body.node_meta.insert(
0,
CrossFileNodeMeta {
info: NodeInfo {
bin_op: Some(crate::cfg::BinOp::Add),
taint: TaintMeta {
labels: smallvec::smallvec![DataLabel::Sink(Cap::SQL_QUERY)],
..Default::default()
},
..Default::default()
},
},
);
let bodies = vec![(
"f".to_string(),
0_usize,
"python".to_string(),
"h.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
body,
)];
idx.replace_ssa_bodies_for_file(&f, &hash, &bodies).unwrap();
let loaded = idx.load_all_ssa_bodies().unwrap();
assert_eq!(loaded.len(), 1);
let meta = &loaded[0].8.node_meta;
assert_eq!(meta.len(), 1);
assert_eq!(meta[&0].info.bin_op, Some(crate::cfg::BinOp::Add));
assert!(matches!(meta[&0].info.taint.labels[0], DataLabel::Sink(cap) if cap == Cap::SQL_QUERY));
}
#[test]
fn ssa_bodies_removed_on_file_delete() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("helper.py");
std::fs::write(&f, "code").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let hash = index::Indexer::digest_bytes(b"code");
// Register file first so remove_file_and_related has something to find
idx.upsert_file(&f).unwrap();
let bodies = vec![(
"f".to_string(),
0_usize,
"python".to_string(),
"h.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_callee_body(1, 0),
)];
idx.replace_ssa_bodies_for_file(&f, &hash, &bodies).unwrap();
assert_eq!(idx.load_all_ssa_bodies().unwrap().len(), 1);
// Delete file, should also remove bodies
idx.remove_file_and_related(&f).unwrap();
assert_eq!(idx.load_all_ssa_bodies().unwrap().len(), 0);
}
// ── Persistence hardening tests ─────────────────────────────────────────────
/// Helper: build a minimal SsaFuncSummary for persistence tests.
#[cfg(test)]
fn make_test_ssa_summary() -> crate::summary::ssa_summary::SsaFuncSummary {
use crate::labels::Cap;
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
SsaFuncSummary {
param_to_return: vec![(0, TaintTransform::Identity)],
param_to_sink: vec![],
source_caps: Cap::empty(),
param_to_sink_param: vec![],
param_container_to_return: vec![],
param_to_container_store: vec![],
return_type: None,
return_abstract: None,
source_to_callback: vec![],
receiver_to_return: None,
receiver_to_sink: Cap::empty(),
abstract_transfer: vec![],
param_return_paths: vec![],
points_to: Default::default(),
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
}
}
/// Helper: insert a fake summary + SSA summary + file row for a project.
#[cfg(test)]
fn populate_project(
pool: &r2d2::Pool<r2d2_sqlite::SqliteConnectionManager>,
project: &str,
dir: &std::path::Path,
) {
let f = dir.join("app.py");
std::fs::write(&f, "# code").unwrap();
let mut idx = index::Indexer::from_pool(project, pool).unwrap();
idx.upsert_file(&f).unwrap();
let hash = index::Indexer::digest_bytes(b"# code");
// Insert a FuncSummary
let func_summary = crate::summary::FuncSummary {
name: "do_stuff".to_string(),
file_path: f.to_string_lossy().to_string(),
param_count: 1,
param_names: vec!["data".to_string()],
lang: "python".to_string(),
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![0],
propagates_taint: true,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
};
idx.replace_summaries_for_file(&f, &hash, &[func_summary])
.unwrap();
// Insert an SSA summary
let ssa_sums = vec![(
"do_stuff".to_string(),
1_usize,
"python".to_string(),
"app.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_ssa_summary(),
)];
idx.replace_ssa_summaries_for_file(&f, &hash, &ssa_sums)
.unwrap();
// Insert an SSA body
let bodies = vec![(
"do_stuff".to_string(),
1_usize,
"python".to_string(),
"app.py".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_callee_body(1, 1),
)];
idx.replace_ssa_bodies_for_file(&f, &hash, &bodies).unwrap();
}
// ── 1. Engine Version Tests ─────────────────────────────────────────────────
#[test]
fn version_match_no_reset() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
// First init: creates DB and sets version
let pool = index::Indexer::init(&db).unwrap();
populate_project(&pool, "proj", td.path());
// Verify data exists
assert_eq!(
index::Indexer::count_rows(&pool, "function_summaries", "proj").unwrap(),
1
);
assert_eq!(
index::Indexer::count_rows(&pool, "ssa_function_summaries", "proj").unwrap(),
1
);
assert_eq!(
index::Indexer::count_rows(&pool, "ssa_function_bodies", "proj").unwrap(),
1
);
// Second init with same version: data should be preserved
drop(pool);
let pool2 = index::Indexer::init(&db).unwrap();
assert_eq!(
index::Indexer::count_rows(&pool2, "function_summaries", "proj").unwrap(),
1
);
assert_eq!(
index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj").unwrap(),
1
);
assert_eq!(
index::Indexer::count_rows(&pool2, "ssa_function_bodies", "proj").unwrap(),
1
);
let stored = index::Indexer::get_stored_engine_version(&pool2).unwrap();
assert_eq!(stored.as_deref(), Some(index::ENGINE_VERSION));
}
#[test]
fn version_mismatch_triggers_reset() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
// First init
let pool = index::Indexer::init(&db).unwrap();
populate_project(&pool, "proj", td.path());
// Simulate an old version
index::Indexer::set_engine_version(&pool, "0.0.1-old").unwrap();
// Verify data is populated
assert_eq!(
index::Indexer::count_rows(&pool, "function_summaries", "proj").unwrap(),
1
);
// Reopen, version mismatch should trigger full wipe
drop(pool);
let pool2 = index::Indexer::init(&db).unwrap();
assert_eq!(
index::Indexer::count_rows(&pool2, "function_summaries", "proj").unwrap(),
0
);
assert_eq!(
index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj").unwrap(),
0
);
assert_eq!(
index::Indexer::count_rows(&pool2, "ssa_function_bodies", "proj").unwrap(),
0
);
// files table should also be cleared (forces rescan)
let idx = index::Indexer::from_pool("proj", &pool2).unwrap();
assert!(idx.get_files("proj").unwrap().is_empty());
// Version should now be updated
let stored = index::Indexer::get_stored_engine_version(&pool2).unwrap();
assert_eq!(stored.as_deref(), Some(index::ENGINE_VERSION));
}
#[test]
fn missing_version_triggers_reset() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
// Init the DB
let pool = index::Indexer::init(&db).unwrap();
populate_project(&pool, "proj", td.path());
// Remove the metadata row to simulate a pre-version DB
{
let conn = pool.get().unwrap();
conn.execute("DELETE FROM nyx_metadata WHERE key = 'engine_version'", [])
.unwrap();
}
// Reopen
drop(pool);
let pool2 = index::Indexer::init(&db).unwrap();
// All caches should be wiped
assert_eq!(
index::Indexer::count_rows(&pool2, "function_summaries", "proj").unwrap(),
0
);
assert_eq!(
index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj").unwrap(),
0
);
// Version should now be set
let stored = index::Indexer::get_stored_engine_version(&pool2).unwrap();
assert_eq!(stored.as_deref(), Some(index::ENGINE_VERSION));
}
#[test]
fn multiple_opens_no_repeated_resets() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
// First open
let pool = index::Indexer::init(&db).unwrap();
populate_project(&pool, "proj", td.path());
drop(pool);
// Second open, should preserve data
let pool2 = index::Indexer::init(&db).unwrap();
assert_eq!(
index::Indexer::count_rows(&pool2, "function_summaries", "proj").unwrap(),
1
);
// Re-populate after second open
populate_project(&pool2, "proj2", td.path());
drop(pool2);
// Third open, should still preserve both projects
let pool3 = index::Indexer::init(&db).unwrap();
assert_eq!(
index::Indexer::count_rows(&pool3, "function_summaries", "proj").unwrap(),
1
);
assert_eq!(
index::Indexer::count_rows(&pool3, "function_summaries", "proj2").unwrap(),
1
);
}
#[test]
fn write_engine_version_on_scan_completion() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
// Simulate writing version after scan
index::Indexer::write_engine_version(&pool).unwrap();
let stored = index::Indexer::get_stored_engine_version(&pool).unwrap();
assert_eq!(stored.as_deref(), Some(index::ENGINE_VERSION));
}
// ── 2. Migration Tests ──────────────────────────────────────────────────────
#[test]
fn fresh_db_no_migration_needed() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
// Should not panic and tables should exist
let pool = index::Indexer::init(&db).unwrap();
let idx = index::Indexer::from_pool("proj", &pool).unwrap();
// Verify tables are accessible
assert!(idx.load_all_summaries().unwrap().is_empty());
assert!(idx.load_all_ssa_summaries().unwrap().is_empty());
assert!(idx.load_all_ssa_bodies().unwrap().is_empty());
assert!(idx.get_files("proj").unwrap().is_empty());
}
#[test]
fn init_applies_busy_timeout_to_every_pooled_connection() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
// Hold several connections at once so r2d2 must hand out distinct pooled
// handles. The timeout is connection-local, so configuring only the schema
// setup connection would leave later worker connections at rusqlite's
// default.
let conns: Vec<_> = (0..4).map(|_| pool.get().unwrap()).collect();
for conn in &conns {
let timeout_ms: i64 = conn
.query_row("PRAGMA busy_timeout", [], |row| row.get(0))
.unwrap();
assert_eq!(timeout_ms, 60_000);
}
}
#[test]
fn index_write_queue_serializes_parallel_writes() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
let project = "proj";
let writer =
index::IndexWriteQueue::start_with_capacity(project, std::sync::Arc::clone(&pool), 2);
let tx = writer.sender();
let mut handles = Vec::new();
for i in 0..16 {
let path = td.path().join(format!("file_{i}.rs"));
let source = format!("fn f_{i}() {{}}\n");
std::fs::write(&path, &source).unwrap();
let hash = index::Indexer::digest_bytes(source.as_bytes());
let tx = tx.clone();
handles.push(std::thread::spawn(move || {
tx.enqueue(move |idx| {
let file_id = idx.upsert_file_with_hash(&path, &hash)?;
let issue_rows = [(String::from("test-rule"), String::from("LOW"), 1_i64, 0_i64)];
idx.replace_issues(
file_id,
issue_rows
.iter()
.map(|(rule_id, severity, line, col)| index::IssueRow {
rule_id: rule_id.as_str(),
severity: severity.as_str(),
line: *line,
col: *col,
}),
)?;
Ok(())
})
.unwrap();
}));
}
for handle in handles {
handle.join().unwrap();
}
drop(tx);
writer.finish("test").unwrap();
let idx = index::Indexer::from_pool(project, &pool).unwrap();
let files = idx.get_files(project).unwrap();
assert_eq!(files.len(), 16);
for path in files {
assert_eq!(idx.get_issues_from_file(&path).unwrap().len(), 1);
}
}
#[test]
fn missing_ssa_namespace_column_triggers_recreate() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
// Create DB with an outdated SSA table (no namespace column)
{
let conn = rusqlite::Connection::open(&db).unwrap();
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL, path TEXT NOT NULL,
hash BLOB NOT NULL, mtime INTEGER NOT NULL,
scanned_at INTEGER NOT NULL, UNIQUE(project, path)
);
CREATE TABLE IF NOT EXISTS function_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL, file_path TEXT NOT NULL,
file_hash BLOB NOT NULL, name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL,
summary TEXT NOT NULL, updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, arity)
);
CREATE TABLE IF NOT EXISTS ssa_function_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL, file_path TEXT NOT NULL,
file_hash BLOB NOT NULL, name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL,
summary TEXT NOT NULL, updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, arity)
);",
)
.unwrap();
}
// Open via init, should detect missing namespace and recreate
let pool = index::Indexer::init(&db).unwrap();
// Verify the table now has the namespace column by inserting with it
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let f = td.path().join("test.py");
std::fs::write(&f, "x").unwrap();
let hash = index::Indexer::digest_bytes(b"x");
let sums = vec![(
"func".to_string(),
1_usize,
"python".to_string(),
"ns".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_ssa_summary(),
)];
// This would fail if the namespace column doesn't exist
idx.replace_ssa_summaries_for_file(&f, &hash, &sums)
.unwrap();
assert_eq!(idx.load_all_ssa_summaries().unwrap().len(), 1);
}
/// Phase 10 migration test. Build a database whose
/// `(ssa_)function_summaries` tables are at the post-Phase 09 shape
/// (namespace + container + disambig + kind columns present, but no
/// `entry_kind` column). Insert a row directly so the migration must
/// preserve it. After `init`, the column should exist on both tables
/// without dropping the pre-existing data.
#[test]
fn entry_kind_column_added_in_place_without_data_loss() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
// Hand-build a pre-Phase-10 schema (no `entry_kind` column).
{
let conn = rusqlite::Connection::open(&db).unwrap();
conn.execute_batch(
"CREATE TABLE files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL, path TEXT NOT NULL,
hash BLOB NOT NULL, mtime INTEGER NOT NULL,
scanned_at INTEGER NOT NULL, UNIQUE(project, path)
);
CREATE TABLE function_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL, file_path TEXT NOT NULL,
file_hash BLOB NOT NULL, name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL,
container TEXT NOT NULL DEFAULT '',
disambig INTEGER,
kind TEXT NOT NULL DEFAULT 'fn',
summary TEXT NOT NULL, updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);
CREATE TABLE ssa_function_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL, file_path TEXT NOT NULL,
file_hash BLOB NOT NULL, name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL,
namespace TEXT NOT NULL DEFAULT '',
container TEXT NOT NULL DEFAULT '',
disambig INTEGER,
kind TEXT NOT NULL DEFAULT 'fn',
summary TEXT NOT NULL, updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);",
)
.unwrap();
conn.execute(
"INSERT INTO function_summaries
(project, file_path, file_hash, name, arity, lang,
container, disambig, kind, summary, updated_at)
VALUES ('proj', 'lib.py', X'00', 'old_func', 1, 'python',
'', NULL, 'fn', '{}', 0)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO ssa_function_summaries
(project, file_path, file_hash, name, arity, lang,
namespace, container, disambig, kind, summary, updated_at)
VALUES ('proj', 'lib.py', X'00', 'old_func', 1, 'python',
'', '', NULL, 'fn', '{}', 0)",
[],
)
.unwrap();
// Pre-populate the metadata so `check_schema_version` and
// `check_engine_version` consider the database current and do
// not wipe the rows we just inserted. The point of this test
// is the in-place `ALTER TABLE`; the version checks are a
// separate concern.
conn.execute(
"CREATE TABLE IF NOT EXISTS nyx_metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)",
[],
)
.unwrap();
conn.execute(
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('schema_version', ?1)",
rusqlite::params![index::SCHEMA_VERSION],
)
.unwrap();
conn.execute(
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('engine_version', ?1)",
rusqlite::params![index::ENGINE_VERSION],
)
.unwrap();
}
// Open via init — should non-destructively ALTER both tables to
// add `entry_kind`, leaving the seeded rows intact.
let pool = index::Indexer::init(&db).unwrap();
let conn = pool.get().unwrap();
let cols_for = |table: &str| {
let mut stmt = conn
.prepare(&format!("PRAGMA table_info({table})"))
.unwrap();
let v: Vec<String> = stmt
.query_map([], |r| r.get::<_, String>(1))
.unwrap()
.filter_map(Result::ok)
.collect();
v
};
assert!(
cols_for("function_summaries")
.iter()
.any(|c| c == "entry_kind"),
"function_summaries.entry_kind missing after migration"
);
assert!(
cols_for("ssa_function_summaries")
.iter()
.any(|c| c == "entry_kind"),
"ssa_function_summaries.entry_kind missing after migration"
);
// Pre-existing rows survive the migration.
let func_rows: i64 = conn
.query_row(
"SELECT COUNT(*) FROM function_summaries WHERE project = 'proj'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(func_rows, 1, "pre-existing function_summaries row was lost");
let ssa_rows: i64 = conn
.query_row(
"SELECT COUNT(*) FROM ssa_function_summaries WHERE project = 'proj'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(
ssa_rows, 1,
"pre-existing ssa_function_summaries row was lost"
);
// Existing rows have NULL entry_kind by default.
let entry_kind_value: Option<String> = conn
.query_row(
"SELECT entry_kind FROM function_summaries WHERE project = 'proj'",
[],
|r| r.get(0),
)
.unwrap();
assert!(entry_kind_value.is_none());
}
#[test]
fn valid_schema_no_recreate() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
// First init, creates all tables
let pool = index::Indexer::init(&db).unwrap();
populate_project(&pool, "proj", td.path());
drop(pool);
// Second init, schema is valid, should NOT drop/recreate
let pool2 = index::Indexer::init(&db).unwrap();
// Data survives because schema was already correct
assert_eq!(
index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj").unwrap(),
1
);
}
// ── 3. Deserialization Failure Tests ────────────────────────────────────────
#[test]
fn invalid_json_skipped_in_load_summaries() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
// Insert corrupted JSON directly
{
let conn = pool.get().unwrap();
conn.execute(
"INSERT INTO function_summaries (project, file_path, file_hash, name, arity, lang, summary, updated_at)
VALUES ('proj', 'bad.py', X'00', 'bad', 1, 'python', '{not valid json!!!', 0)",
[],
).unwrap();
}
let idx = index::Indexer::from_pool("proj", &pool).unwrap();
// Should not panic; invalid row is skipped
let loaded = idx.load_all_summaries().unwrap();
assert_eq!(loaded.len(), 0);
}
#[test]
fn invalid_json_skipped_in_load_ssa_summaries() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
// Insert corrupted JSON directly
{
let conn = pool.get().unwrap();
conn.execute(
"INSERT INTO ssa_function_summaries (project, file_path, file_hash, name, arity, lang, namespace, summary, updated_at)
VALUES ('proj', 'bad.py', X'00', 'bad', 1, 'python', '', 'CORRUPTED', 0)",
[],
).unwrap();
}
let idx = index::Indexer::from_pool("proj", &pool).unwrap();
let loaded = idx.load_all_ssa_summaries().unwrap();
assert_eq!(loaded.len(), 0);
}
#[test]
fn invalid_json_skipped_in_load_ssa_bodies() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
{
let conn = pool.get().unwrap();
conn.execute(
"INSERT INTO ssa_function_bodies (project, file_path, file_hash, name, arity, lang, namespace, body, updated_at)
VALUES ('proj', 'bad.py', X'00', 'bad', 1, 'python', '', '{{{{broken', 0)",
[],
).unwrap();
}
let idx = index::Indexer::from_pool("proj", &pool).unwrap();
let loaded = idx.load_all_ssa_bodies().unwrap();
assert_eq!(loaded.len(), 0);
}
#[test]
fn partial_failure_does_not_drop_valid_rows() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
// Insert one valid SSA summary via the normal API
let f = td.path().join("good.py");
std::fs::write(&f, "ok").unwrap();
let hash = index::Indexer::digest_bytes(b"ok");
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let sums = vec![(
"good_func".to_string(),
1_usize,
"python".to_string(),
"".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_ssa_summary(),
)];
idx.replace_ssa_summaries_for_file(&f, &hash, &sums)
.unwrap();
// Insert a corrupted row directly
{
let conn = pool.get().unwrap();
conn.execute(
"INSERT INTO ssa_function_summaries (project, file_path, file_hash, name, arity, lang, namespace, summary, updated_at)
VALUES ('proj', 'bad.py', X'00', 'bad_func', 1, 'python', '', 'NOT_JSON', 0)",
[],
).unwrap();
}
// Load: should get exactly the 1 valid row
let loaded = idx.load_all_ssa_summaries().unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].1, "good_func");
}
// ── 4. Integration / Round-Trip Tests ───────────────────────────────────────
#[test]
fn scan_persist_reload_cycle() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
populate_project(&pool, "myproject", td.path());
// Write version as scan completion would
index::Indexer::write_engine_version(&pool).unwrap();
// Reload from a fresh pool
drop(pool);
let pool2 = index::Indexer::init(&db).unwrap();
let idx = index::Indexer::from_pool("myproject", &pool2).unwrap();
assert_eq!(idx.load_all_summaries().unwrap().len(), 1);
assert_eq!(idx.load_all_ssa_summaries().unwrap().len(), 1);
assert_eq!(idx.load_all_ssa_bodies().unwrap().len(), 1);
assert_eq!(idx.get_files("myproject").unwrap().len(), 1);
}
#[test]
fn version_bump_forces_reindex_behavior() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
// Simulate a previous engine version
let pool = index::Indexer::init(&db).unwrap();
populate_project(&pool, "proj", td.path());
index::Indexer::set_engine_version(&pool, "0.1.0-alpha").unwrap();
drop(pool);
// Reopen: version bump should force full invalidation
let pool2 = index::Indexer::init(&db).unwrap();
// Everything should be wiped
let idx = index::Indexer::from_pool("proj", &pool2).unwrap();
assert!(idx.load_all_summaries().unwrap().is_empty());
assert!(idx.load_all_ssa_summaries().unwrap().is_empty());
assert!(idx.load_all_ssa_bodies().unwrap().is_empty());
assert!(idx.get_files("proj").unwrap().is_empty());
// After wiping, we can re-populate and it persists
populate_project(&pool2, "proj", td.path());
assert_eq!(idx.load_all_summaries().unwrap().len(), 1);
}
// ── 5. Edge Cases ───────────────────────────────────────────────────────────
#[test]
fn empty_db_file_works() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("empty.sqlite");
// Create empty file
std::fs::write(&db, "").unwrap();
// init should handle this (SQLite will overwrite the empty file)
let pool = index::Indexer::init(&db).unwrap();
let idx = index::Indexer::from_pool("proj", &pool).unwrap();
assert!(idx.load_all_summaries().unwrap().is_empty());
}
#[test]
fn multiple_projects_isolated() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
// Populate two different projects
let f1 = td.path().join("proj1_file.py");
let f2 = td.path().join("proj2_file.py");
std::fs::write(&f1, "p1").unwrap();
std::fs::write(&f2, "p2").unwrap();
let mut idx1 = index::Indexer::from_pool("project_a", &pool).unwrap();
idx1.upsert_file(&f1).unwrap();
let hash1 = index::Indexer::digest_bytes(b"p1");
let sums1 = vec![(
"func_a".to_string(),
0_usize,
"python".to_string(),
"".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_ssa_summary(),
)];
idx1.replace_ssa_summaries_for_file(&f1, &hash1, &sums1)
.unwrap();
let mut idx2 = index::Indexer::from_pool("project_b", &pool).unwrap();
idx2.upsert_file(&f2).unwrap();
let hash2 = index::Indexer::digest_bytes(b"p2");
let sums2 = vec![(
"func_b".to_string(),
0_usize,
"python".to_string(),
"".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_ssa_summary(),
)];
idx2.replace_ssa_summaries_for_file(&f2, &hash2, &sums2)
.unwrap();
// Each project sees only its own summaries
assert_eq!(idx1.load_all_ssa_summaries().unwrap().len(), 1);
assert_eq!(idx1.load_all_ssa_summaries().unwrap()[0].1, "func_a");
assert_eq!(idx2.load_all_ssa_summaries().unwrap().len(), 1);
assert_eq!(idx2.load_all_ssa_summaries().unwrap()[0].1, "func_b");
// Files are project-scoped too (get_files queries by its argument)
assert_eq!(idx1.get_files("project_a").unwrap().len(), 1);
assert_eq!(idx2.get_files("project_b").unwrap().len(), 1);
// Cross-project: project_a should have no project_b files
assert_eq!(idx1.get_files("nonexistent_project").unwrap().len(), 0);
}
#[test]
fn version_reset_wipes_all_projects() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
// Populate two projects
let f1 = td.path().join("a.py");
let f2 = td.path().join("b.py");
std::fs::write(&f1, "a").unwrap();
std::fs::write(&f2, "b").unwrap();
let mut idx1 = index::Indexer::from_pool("proj_x", &pool).unwrap();
idx1.upsert_file(&f1).unwrap();
let hash1 = index::Indexer::digest_bytes(b"a");
let sums1 = vec![(
"fx".to_string(),
0_usize,
"python".to_string(),
"".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_ssa_summary(),
)];
idx1.replace_ssa_summaries_for_file(&f1, &hash1, &sums1)
.unwrap();
let mut idx2 = index::Indexer::from_pool("proj_y", &pool).unwrap();
idx2.upsert_file(&f2).unwrap();
let hash2 = index::Indexer::digest_bytes(b"b");
let sums2 = vec![(
"fy".to_string(),
0_usize,
"python".to_string(),
"".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
make_test_ssa_summary(),
)];
idx2.replace_ssa_summaries_for_file(&f2, &hash2, &sums2)
.unwrap();
// Simulate version mismatch
index::Indexer::set_engine_version(&pool, "0.0.0-stale").unwrap();
drop(pool);
let pool2 = index::Indexer::init(&db).unwrap();
// Both projects' data should be gone (version check is global, not per-project)
assert_eq!(
index::Indexer::count_rows(&pool2, "function_summaries", "proj_x").unwrap(),
0
);
assert_eq!(
index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj_x").unwrap(),
0
);
assert_eq!(
index::Indexer::count_rows(&pool2, "function_summaries", "proj_y").unwrap(),
0
);
assert_eq!(
index::Indexer::count_rows(&pool2, "ssa_function_summaries", "proj_y").unwrap(),
0
);
}
#[test]
fn metadata_table_survives_clear() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
index::Indexer::write_engine_version(&pool).unwrap();
let idx = index::Indexer::from_pool("proj", &pool).unwrap();
idx.clear().unwrap();
// Metadata should survive clear (clear only drops analysis tables)
let stored = index::Indexer::get_stored_engine_version(&pool).unwrap();
assert_eq!(stored.as_deref(), Some(index::ENGINE_VERSION));
}
/// field_points_to round-trips through
/// the SsaFuncSummary SQLite blob. Pin that the new field_points_to
/// records preserve param_field_reads, param_field_writes, the
/// receiver sentinel (`u32::MAX`), the container-element marker
/// (`<elem>`), and the `overflow` flag across serialise → store →
/// load → deserialise. This is the strict-additive contract for
/// older blobs without field_points_to (default-empty deserialises cleanly) and the
/// completeness check for the W3 cross-call resolver.
#[test]
fn ssa_summaries_round_trip_preserves_field_points_to() {
use crate::summary::points_to::FieldPointsToSummary;
use crate::summary::ssa_summary::SsaFuncSummary;
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("store.rs");
std::fs::write(&f, "// helper that writes obj.cache").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let hash = index::Indexer::digest_bytes(b"// helper that writes obj.cache");
// Build a summary with one read on param 0 ("name"), one write on
// param 1 ("cache"), one read on the receiver sentinel ("kind"),
// and an ELEM marker on param 0. Round-trip must preserve all
// four channels.
let mut fpt = FieldPointsToSummary::empty();
fpt.add_read(0, "name");
fpt.add_write(1, "cache");
fpt.add_read(u32::MAX, "kind");
fpt.add_write(0, "<elem>");
let summary = SsaFuncSummary {
field_points_to: fpt.clone(),
..Default::default()
};
let row = (
"store".to_string(),
2_usize,
"rust".to_string(),
"store.rs".to_string(),
String::new(),
None,
crate::symbol::FuncKind::Function,
summary,
);
idx.replace_ssa_summaries_for_file(&f, &hash, &[row])
.unwrap();
let loaded = idx.load_all_ssa_summaries().unwrap();
assert_eq!(loaded.len(), 1, "single summary stored, single returned");
let (_, name, _, _, _, _, _, _, sum) = &loaded[0];
assert_eq!(name, "store");
assert_eq!(
sum.field_points_to, fpt,
"field_points_to must round-trip byte-equal",
);
// Spot-check sentinel + ELEM marker channels.
let recv_read = sum
.field_points_to
.param_field_reads
.iter()
.find(|(p, _)| *p == u32::MAX)
.expect("receiver read at u32::MAX sentinel");
assert!(recv_read.1.iter().any(|s| s == "kind"));
let elem_write = sum
.field_points_to
.param_field_writes
.iter()
.find(|(p, _)| *p == 0)
.expect("param 0 writes recorded");
assert!(
elem_write.1.iter().any(|s| s == "<elem>"),
"<elem> marker must survive round-trip without conversion",
);
assert!(!sum.field_points_to.overflow);
}
/// Older blob compatibility: a summary serialised without
/// `field_points_to` deserialises with the empty default, no
/// migration needed because the field is `#[serde(default)]`.
#[test]
fn ssa_summaries_legacy_blob_decodes_with_empty_field_points_to() {
use crate::summary::ssa_summary::SsaFuncSummary;
// Hand-craft JSON without the `field_points_to` key.
let legacy_json = r#"{
"param_to_return": [],
"param_to_sink": [],
"source_caps": 0,
"param_to_sink_param": [],
"param_container_to_return": [],
"param_to_container_store": [],
"return_type": null,
"return_abstract": null,
"source_to_callback": [],
"receiver_to_return": null,
"receiver_to_sink": 0,
"abstract_transfer": [],
"param_return_paths": [],
"return_path_facts": [],
"typed_call_receivers": []
}"#;
let sum: SsaFuncSummary = serde_json::from_str(legacy_json).unwrap();
assert!(
sum.field_points_to.is_empty(),
"missing field_points_to must default to empty",
);
}
/// Pre-`param_to_gate_filters` blob compatibility: a summary serialised
/// before this field existed deserialises with the empty default.
/// `#[serde(default)]` on the field means old SQLite blobs round-trip
/// without a schema migration, the new field is stored inside the JSON
/// `summary` column so SQL-level columns are unchanged.
#[test]
fn ssa_summaries_pre_gate_filters_blob_decodes_with_empty_param_to_gate_filters() {
use crate::summary::ssa_summary::SsaFuncSummary;
// Hand-craft JSON without the `param_to_gate_filters` key.
let pre_gate_filters_json = r#"{
"param_to_return": [],
"param_to_sink": [],
"source_caps": 0,
"param_to_sink_param": [],
"param_container_to_return": [],
"param_to_container_store": [],
"return_type": null,
"return_abstract": null,
"source_to_callback": [],
"receiver_to_return": null,
"receiver_to_sink": 0,
"abstract_transfer": [],
"param_return_paths": [],
"return_path_facts": [],
"typed_call_receivers": []
}"#;
let sum: SsaFuncSummary = serde_json::from_str(pre_gate_filters_json).unwrap();
assert!(
sum.param_to_gate_filters.is_empty(),
"missing param_to_gate_filters must default to empty",
);
}
/// Round-trip: a summary with a populated `param_to_gate_filters`
/// survives JSON serialise + deserialise, including the per-position
/// cap-mask values needed to preserve SSRF-vs-DATA_EXFIL splits across
/// the function-summary boundary.
#[test]
fn ssa_summaries_param_to_gate_filters_round_trip() {
use crate::labels::Cap;
use crate::summary::ssa_summary::SsaFuncSummary;
let mut sum = SsaFuncSummary::default();
sum.param_to_gate_filters.push((0, Cap::SSRF));
sum.param_to_gate_filters.push((1, Cap::DATA_EXFIL));
let json = serde_json::to_string(&sum).expect("serialize");
let restored: SsaFuncSummary = serde_json::from_str(&json).expect("deserialize");
assert_eq!(
restored.param_to_gate_filters,
vec![(0, Cap::SSRF), (1, Cap::DATA_EXFIL)],
"per-position cap masks must round-trip exactly",
);
}