From 0a62b6f40caba1b1881b1385f6bcd0cdaa55d1a2 Mon Sep 17 00:00:00 2001 From: elipeter Date: Tue, 17 Jun 2025 20:45:33 +0200 Subject: [PATCH] Refactor database connection handling with connection pooling and parallel processing - Introduced `r2d2` connection pooling for SQLite in `database.rs`. - Updated `Indexer` to use pooled connections for improved concurrency. - Replaced sequential processing with `rayon` for parallel file scanning. - Added a `clear` method to `Indexer` for reindexing support. - Enhanced database initialization with `init` and `from_pool` methods. - Updated `Cargo.toml` and `Cargo.lock` to include `r2d2`, `r2d2_sqlite`, and new dependencies. --- Cargo.lock | 277 +++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 2 + src/commands/index.rs | 51 ++++---- src/commands/scan.rs | 75 +++++++----- src/database.rs | 74 +++++++++-- 5 files changed, 412 insertions(+), 67 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34d7f2ef..06e8f4a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,6 +73,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + [[package]] name = "bitflags" version = "2.9.1" @@ -102,6 +108,12 @@ dependencies = [ "serde", ] +[[package]] +name = "bumpalo" +version = "3.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" + [[package]] name = "cc" version = "1.2.27" @@ -290,7 +302,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -374,6 +398,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -406,6 +440,16 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.27" @@ -465,6 +509,8 @@ dependencies = [ "ignore", "num_cpus", "once_cell", + "r2d2", + "r2d2_sqlite", "rayon", "rusqlite", "serde", @@ -507,6 +553,29 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -525,6 +594,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -543,6 +621,63 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r2d2" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93" +dependencies = [ + "log", + "parking_lot", + "scheduled-thread-pool", +] + +[[package]] +name = "r2d2_sqlite" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06cc23a61faf4643d8b59ed52c27ed434476dd7aa6f39e1eff7d6bbd35985093" +dependencies = [ + "r2d2", + "rusqlite", + "uuid", +] + +[[package]] +name = "rand" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.3", +] + [[package]] name = "rayon" version = "1.10.0" @@ -563,13 +698,22 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_users" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" dependencies = [ - "getrandom", + "getrandom 0.2.16", "libredox", "thiserror", ] @@ -632,6 +776,12 @@ dependencies = [ "smallvec", ] +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + [[package]] name = "ryu" version = "1.0.20" @@ -647,6 +797,21 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scheduled-thread-pool" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" +dependencies = [ + "parking_lot", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" version = "1.0.219" @@ -1037,6 +1202,18 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +dependencies = [ + "getrandom 0.3.3", + "js-sys", + "rand", + "wasm-bindgen", +] + [[package]] name = "valuable" version = "0.1.1" @@ -1065,6 +1242,73 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + [[package]] name = "winapi" version = "0.3.9" @@ -1177,3 +1421,32 @@ checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" dependencies = [ "memchr", ] + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + +[[package]] +name = "zerocopy" +version = "0.8.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index 9def6395..b76ae97e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,3 +28,5 @@ blake3 = "1.8.2" once_cell = "1.21.3" console = "0.15.11" rayon = "1.10.0" +r2d2_sqlite = "0.30.0" +r2d2 = "0.8.10" diff --git a/src/commands/index.rs b/src/commands/index.rs index 913e430c..1873e625 100644 --- a/src/commands/index.rs +++ b/src/commands/index.rs @@ -5,6 +5,7 @@ use crate::patterns::Severity; use crate::utils::Config; use crate::utils::project::get_project_info; use crate::walk::spawn_senders; +use rayon::prelude::*; pub fn handle( action: IndexAction, @@ -50,27 +51,35 @@ pub fn build_index( tracing::debug!("Building index for: {}", project_name); fs::File::create(db_path)?; - let mut indexer = Indexer::new(&project_name, &db_path)?; - let rx = spawn_senders(project_path, config); - for path in rx.iter().flatten() { - let issues = crate::commands::scan::run_rules_on_file(&path, config)?; - let file_id = indexer.upsert_file(&path)?; - - let issue_rows: Vec = issues - .iter() - .map(|d| IssueRow { - rule_id: d.id.as_ref(), - severity: match d.severity { - Severity::High => "HIGH", - Severity::Medium => "MEDIUM", - Severity::Low => "LOW", - }, - line: d.line as i64, - col: d.col as i64, - }) - .collect(); - - indexer.replace_issues(file_id, issue_rows)?; + let pool = Indexer::init(db_path)?; + { + let idx = Indexer::from_pool(&project_name, &pool).unwrap(); + idx.clear()?; } + + tracing::debug!("Cleaned index for: {}", project_name); + + let rx = spawn_senders(project_path, config); + let paths: Vec<_> = rx.into_iter().flatten().collect(); + + paths.into_par_iter().try_for_each(|path| -> Result<(), Box> { + let issues = crate::commands::scan::run_rules_on_file(&path, config).unwrap(); + let mut idx = Indexer::from_pool(project_name, &pool).unwrap(); + let file_id = idx.upsert_file(&path).unwrap(); + + let rows: Vec = issues.iter().map(|d| IssueRow { + rule_id: d.id.as_ref(), + severity: match d.severity { + Severity::High => "HIGH", + Severity::Medium => "MEDIUM", + Severity::Low => "LOW", + }, + line: d.line as i64, + col: d.col as i64, + }).collect(); + + idx.replace_issues(file_id, rows).unwrap(); + Ok(()) + }).unwrap(); Ok(()) } \ No newline at end of file diff --git a/src/commands/scan.rs b/src/commands/scan.rs index 7441bcbe..df23bca5 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -1,7 +1,9 @@ use crate::utils::project::get_project_info; use console::style; use std::path::Path; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; +use r2d2::Pool; +use r2d2_sqlite::SqliteConnectionManager; use crate::database::index::{IssueRow, Indexer}; use crate::patterns::Severity; use crate::utils::config::Config; @@ -44,8 +46,8 @@ pub fn handle( crate::commands::index::build_index(&project_name,&scan_path, &db_path, config)?; } - let mut indexer = Indexer::new(&project_name, &db_path)?; - diags = scan_with_index(&project_name, &db_path, config, &mut indexer)?; + let pool = Indexer::init(&db_path)?; + diags = scan_with_index_parallel(&project_name, pool, config)?; } if format == "console" || format == "" && config.output.default_format == "console" { @@ -95,42 +97,49 @@ fn scan_filesystem( Ok(acc.into_inner().unwrap()) } -fn scan_with_index( +fn scan_with_index_parallel( project: &str, - _db_path: &Path, + pool: Arc>, cfg: &Config, - indexer: &mut Indexer, ) -> Result, Box> { - let paths = indexer.get_files(project).unwrap_or_default(); - let mut issues: Vec = Vec::new(); - for path in paths { - if indexer.should_scan(&path)? { - tracing::debug!("scanning files{}", path.display()); - let mut diags = run_rules_on_file(&path, cfg)?; - let file_id = indexer.upsert_file(&path)?; + // Get the file list once (single connection, no contention) + let files = { + let idx = Indexer::from_pool(project, &pool)?; + idx.get_files(project)? + }; - let issue_rows: Vec = diags - .iter() - .map(|d| IssueRow { - rule_id: d.id.as_ref(), - severity: match d.severity { - Severity::High => "HIGH", - Severity::Medium => "MEDIUM", - Severity::Low => "LOW", - }, - line: d.line as i64, - col: d.col as i64, - }) - .collect(); + let acc = Mutex::new(Vec::new()); - indexer.replace_issues(file_id, issue_rows)?; - issues.append(&mut diags); - continue; - } - issues.append(&mut indexer.get_issues_from_file(&path)?); - } - Ok(issues) + files.into_par_iter() + .try_for_each(|path| -> Result<(), DynError> { + let mut idx = Indexer::from_pool(project, &pool).unwrap(); + + if idx.should_scan(&path).unwrap() { + let mut diags = run_rules_on_file(&path, cfg).unwrap(); + let file_id = idx.upsert_file(&path).unwrap(); + + let rows: Vec = diags.iter().map(|d| IssueRow { + rule_id: d.id.as_ref(), + severity: match d.severity { + Severity::High => "HIGH", + Severity::Medium => "MEDIUM", + Severity::Low => "LOW", + }, + line: d.line as i64, + col: d.col as i64, + }).collect(); + + idx.replace_issues(file_id, rows).unwrap(); + acc.lock().unwrap().append(&mut diags); + } else { + let mut cached = idx.get_issues_from_file(&path).unwrap(); + acc.lock().unwrap().append(&mut cached); + } + Ok(()) + }).unwrap(); + + Ok(acc.into_inner().unwrap()) } // -------------------------------------------------------------------------------------------- diff --git a/src/database.rs b/src/database.rs index 6bcea29c..38688b86 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1,11 +1,15 @@ pub mod index { - use rusqlite::{params, Connection, OptionalExtension}; + use rusqlite::{params, Connection, OpenFlags, OptionalExtension}; use std::fs; use std::path::{Path, PathBuf}; use std::str::FromStr; use std::time::{SystemTime, UNIX_EPOCH}; use crate::commands::scan::Diag; use crate::patterns::Severity; + use r2d2_sqlite::{SqliteConnectionManager}; + use std::ops::Deref; + use std::sync::Arc; + use r2d2::{Pool, PooledConnection}; /// DB schema (foreign‑keys enabled). const SCHEMA: &str = r#" @@ -43,18 +47,48 @@ pub mod index { } pub struct Indexer { - conn: Connection, + conn: PooledConnection, project: String, } impl Indexer { - /// Open (or create) the DB at `database_path` for the given project name. - pub fn new(project: &str, database_path: &Path) -> Result> { - let conn = Connection::open(database_path)?; - conn.execute_batch(SCHEMA)?; + + pub fn init( + database_path: &Path, + ) -> Result>, Box> { + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE + | OpenFlags::SQLITE_OPEN_CREATE + | OpenFlags::SQLITE_OPEN_FULL_MUTEX; + let manager = SqliteConnectionManager::file(&database_path).with_flags(flags); + let pool = Arc::new(Pool::new(manager)?); + + { + let conn = pool.get()?; + conn.pragma_update(None, "journal_mode", &"WAL")?; + conn.execute_batch(SCHEMA)?; + } + Ok(pool) + } + + pub fn from_pool( + project: &str, + pool: &Pool, + ) -> Result> { + let conn = pool.get()?; Ok(Self { conn, project: project.to_owned() }) } + // helper so code below can treat PooledConnection like &Connection + fn c(&self) -> &Connection { self.conn.deref() } + + /// Open (or create) the DB at `database_path` for the given project name. + // pub fn new(project: &str, database_path: &Path) -> Result> { + // let conn = Connection::open(database_path)?; + // conn.pragma_update(None, "journal_mode", &"WAL")?; + // conn.execute_batch(SCHEMA)?; + // Ok(Self { conn, project: project.to_owned() }) + // } + /// Return true when the file *content* or *mtime* changed since the last scan. pub fn should_scan(&self, path: &Path) -> Result> { let meta = fs::metadata(path)?; @@ -83,7 +117,7 @@ pub mod index { let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; let digest = Self::digest_file(path)?; - self.conn.execute( + self.c().execute( "INSERT INTO files (project, path, hash, mtime, scanned_at) VALUES (?1, ?2, ?3, ?4, ?5) ON CONFLICT(project,path) DO UPDATE @@ -93,7 +127,7 @@ pub mod index { params![self.project, path.to_string_lossy(), digest, mtime, scanned_at], )?; - let id: i64 = self.conn.query_row( + let id: i64 = self.c().query_row( "SELECT id FROM files WHERE project = ?1 AND path = ?2", params![self.project, path.to_string_lossy()], |r| r.get(0), @@ -125,13 +159,13 @@ pub mod index { &self, path: &Path, ) -> Result, Box> { - let file_id: i64 = self.conn.query_row( + let file_id: i64 = self.c().query_row( "SELECT id FROM files WHERE project = ?1 AND path = ?2", params![self.project, path.to_string_lossy()], |r| r.get(0), )?; - let mut stmt = self.conn.prepare( + let mut stmt = self.c().prepare( "SELECT rule_id, severity, line, col FROM issues WHERE file_id = ?1", @@ -153,7 +187,7 @@ pub mod index { /// gets files from the database pub fn get_files(&self, project: &str) -> Result, Box> { - let mut stmt = self.conn.prepare( + let mut stmt = self.c().prepare( "SELECT path FROM files WHERE project = ?1", @@ -164,6 +198,24 @@ pub mod index { Ok(file_iter.map(|p| p.map(PathBuf::from)).collect::>()?) } + /// Clears the tables to prep for a reindex + pub fn clear(&self) -> rusqlite::Result<()> { + self.c().execute_batch( + r#" + PRAGMA foreign_keys = OFF; + + DROP TABLE IF EXISTS issues; + DROP TABLE IF EXISTS files; + + PRAGMA foreign_keys = ON; + VACUUM; + "#, + )?; + + self.c().execute_batch(SCHEMA)?; + Ok(()) + } + fn digest_file(path: &Path) -> Result, Box> { let mut hasher = blake3::Hasher::new(); let mut file = fs::File::open(path)?;