Refactor database connection handling with connection pooling and parallel processing

- Introduced `r2d2` connection pooling for SQLite in `database.rs`.
- Updated `Indexer` to use pooled connections for improved concurrency.
- Replaced sequential processing with `rayon` for parallel file scanning.
- Added a `clear` method to `Indexer` for reindexing support.
- Enhanced database initialization with `init` and `from_pool` methods.
- Updated `Cargo.toml` and `Cargo.lock` to include `r2d2`, `r2d2_sqlite`, and new dependencies.
This commit is contained in:
elipeter 2025-06-17 20:45:33 +02:00
parent 1933082b41
commit 0a62b6f40c
5 changed files with 412 additions and 67 deletions

277
Cargo.lock generated
View file

@ -73,6 +73,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "autocfg"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "bitflags"
version = "2.9.1"
@ -102,6 +108,12 @@ dependencies = [
"serde",
]
[[package]]
name = "bumpalo"
version = "3.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee"
[[package]]
name = "cc"
version = "1.2.27"
@ -290,7 +302,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
"cfg-if",
"libc",
"wasi",
"wasi 0.11.1+wasi-snapshot-preview1",
]
[[package]]
name = "getrandom"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasi 0.14.2+wasi-0.2.4",
]
[[package]]
@ -374,6 +398,16 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "js-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@ -406,6 +440,16 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "lock_api"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.27"
@ -465,6 +509,8 @@ dependencies = [
"ignore",
"num_cpus",
"once_cell",
"r2d2",
"r2d2_sqlite",
"rayon",
"rusqlite",
"serde",
@ -507,6 +553,29 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
]
[[package]]
name = "pin-project-lite"
version = "0.2.16"
@ -525,6 +594,15 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppv-lite86"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
@ -543,6 +621,63 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "r2d2"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93"
dependencies = [
"log",
"parking_lot",
"scheduled-thread-pool",
]
[[package]]
name = "r2d2_sqlite"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06cc23a61faf4643d8b59ed52c27ed434476dd7aa6f39e1eff7d6bbd35985093"
dependencies = [
"r2d2",
"rusqlite",
"uuid",
]
[[package]]
name = "rand"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
dependencies = [
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
dependencies = [
"getrandom 0.3.3",
]
[[package]]
name = "rayon"
version = "1.10.0"
@ -563,13 +698,22 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
dependencies = [
"bitflags",
]
[[package]]
name = "redox_users"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b"
dependencies = [
"getrandom",
"getrandom 0.2.16",
"libredox",
"thiserror",
]
@ -632,6 +776,12 @@ dependencies = [
"smallvec",
]
[[package]]
name = "rustversion"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
[[package]]
name = "ryu"
version = "1.0.20"
@ -647,6 +797,21 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "scheduled-thread-pool"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19"
dependencies = [
"parking_lot",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.219"
@ -1037,6 +1202,18 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "uuid"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d"
dependencies = [
"getrandom 0.3.3",
"js-sys",
"rand",
"wasm-bindgen",
]
[[package]]
name = "valuable"
version = "0.1.1"
@ -1065,6 +1242,73 @@ version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasi"
version = "0.14.2+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [
"bumpalo",
"log",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]]
name = "winapi"
version = "0.3.9"
@ -1177,3 +1421,32 @@ checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd"
dependencies = [
"memchr",
]
[[package]]
name = "wit-bindgen-rt"
version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
"bitflags",
]
[[package]]
name = "zerocopy"
version = "0.8.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View file

@ -28,3 +28,5 @@ blake3 = "1.8.2"
once_cell = "1.21.3"
console = "0.15.11"
rayon = "1.10.0"
r2d2_sqlite = "0.30.0"
r2d2 = "0.8.10"

View file

@ -5,6 +5,7 @@ use crate::patterns::Severity;
use crate::utils::Config;
use crate::utils::project::get_project_info;
use crate::walk::spawn_senders;
use rayon::prelude::*;
pub fn handle(
action: IndexAction,
@ -50,27 +51,35 @@ pub fn build_index(
tracing::debug!("Building index for: {}", project_name);
fs::File::create(db_path)?;
let mut indexer = Indexer::new(&project_name, &db_path)?;
let rx = spawn_senders(project_path, config);
for path in rx.iter().flatten() {
let issues = crate::commands::scan::run_rules_on_file(&path, config)?;
let file_id = indexer.upsert_file(&path)?;
let issue_rows: Vec<IssueRow> = issues
.iter()
.map(|d| IssueRow {
rule_id: d.id.as_ref(),
severity: match d.severity {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
},
line: d.line as i64,
col: d.col as i64,
})
.collect();
indexer.replace_issues(file_id, issue_rows)?;
let pool = Indexer::init(db_path)?;
{
let idx = Indexer::from_pool(&project_name, &pool).unwrap();
idx.clear()?;
}
tracing::debug!("Cleaned index for: {}", project_name);
let rx = spawn_senders(project_path, config);
let paths: Vec<_> = rx.into_iter().flatten().collect();
paths.into_par_iter().try_for_each(|path| -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let issues = crate::commands::scan::run_rules_on_file(&path, config).unwrap();
let mut idx = Indexer::from_pool(project_name, &pool).unwrap();
let file_id = idx.upsert_file(&path).unwrap();
let rows: Vec<IssueRow> = issues.iter().map(|d| IssueRow {
rule_id: d.id.as_ref(),
severity: match d.severity {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
},
line: d.line as i64,
col: d.col as i64,
}).collect();
idx.replace_issues(file_id, rows).unwrap();
Ok(())
}).unwrap();
Ok(())
}

View file

@ -1,7 +1,9 @@
use crate::utils::project::get_project_info;
use console::style;
use std::path::Path;
use std::sync::Mutex;
use std::sync::{Arc, Mutex};
use r2d2::Pool;
use r2d2_sqlite::SqliteConnectionManager;
use crate::database::index::{IssueRow, Indexer};
use crate::patterns::Severity;
use crate::utils::config::Config;
@ -44,8 +46,8 @@ pub fn handle(
crate::commands::index::build_index(&project_name,&scan_path, &db_path, config)?;
}
let mut indexer = Indexer::new(&project_name, &db_path)?;
diags = scan_with_index(&project_name, &db_path, config, &mut indexer)?;
let pool = Indexer::init(&db_path)?;
diags = scan_with_index_parallel(&project_name, pool, config)?;
}
if format == "console" || format == "" && config.output.default_format == "console" {
@ -95,42 +97,49 @@ fn scan_filesystem(
Ok(acc.into_inner().unwrap())
}
fn scan_with_index(
fn scan_with_index_parallel(
project: &str,
_db_path: &Path,
pool: Arc<Pool<SqliteConnectionManager>>,
cfg: &Config,
indexer: &mut Indexer,
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
let paths = indexer.get_files(project).unwrap_or_default();
let mut issues: Vec<Diag> = Vec::new();
for path in paths {
if indexer.should_scan(&path)? {
tracing::debug!("scanning files{}", path.display());
let mut diags = run_rules_on_file(&path, cfg)?;
let file_id = indexer.upsert_file(&path)?;
// Get the file list once (single connection, no contention)
let files = {
let idx = Indexer::from_pool(project, &pool)?;
idx.get_files(project)?
};
let issue_rows: Vec<IssueRow> = diags
.iter()
.map(|d| IssueRow {
rule_id: d.id.as_ref(),
severity: match d.severity {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
},
line: d.line as i64,
col: d.col as i64,
})
.collect();
let acc = Mutex::new(Vec::new());
indexer.replace_issues(file_id, issue_rows)?;
issues.append(&mut diags);
continue;
}
issues.append(&mut indexer.get_issues_from_file(&path)?);
}
Ok(issues)
files.into_par_iter()
.try_for_each(|path| -> Result<(), DynError> {
let mut idx = Indexer::from_pool(project, &pool).unwrap();
if idx.should_scan(&path).unwrap() {
let mut diags = run_rules_on_file(&path, cfg).unwrap();
let file_id = idx.upsert_file(&path).unwrap();
let rows: Vec<IssueRow> = diags.iter().map(|d| IssueRow {
rule_id: d.id.as_ref(),
severity: match d.severity {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
},
line: d.line as i64,
col: d.col as i64,
}).collect();
idx.replace_issues(file_id, rows).unwrap();
acc.lock().unwrap().append(&mut diags);
} else {
let mut cached = idx.get_issues_from_file(&path).unwrap();
acc.lock().unwrap().append(&mut cached);
}
Ok(())
}).unwrap();
Ok(acc.into_inner().unwrap())
}
// --------------------------------------------------------------------------------------------

View file

@ -1,11 +1,15 @@
pub mod index {
use rusqlite::{params, Connection, OptionalExtension};
use rusqlite::{params, Connection, OpenFlags, OptionalExtension};
use std::fs;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::time::{SystemTime, UNIX_EPOCH};
use crate::commands::scan::Diag;
use crate::patterns::Severity;
use r2d2_sqlite::{SqliteConnectionManager};
use std::ops::Deref;
use std::sync::Arc;
use r2d2::{Pool, PooledConnection};
/// DB schema (foreignkeys enabled).
const SCHEMA: &str = r#"
@ -43,18 +47,48 @@ pub mod index {
}
pub struct Indexer {
conn: Connection,
conn: PooledConnection<SqliteConnectionManager>,
project: String,
}
impl Indexer {
/// Open (or create) the DB at `database_path` for the given project name.
pub fn new(project: &str, database_path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
let conn = Connection::open(database_path)?;
conn.execute_batch(SCHEMA)?;
pub fn init(
database_path: &Path,
) -> Result<std::sync::Arc<Pool<SqliteConnectionManager>>, Box<dyn std::error::Error>> {
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
| OpenFlags::SQLITE_OPEN_CREATE
| OpenFlags::SQLITE_OPEN_FULL_MUTEX;
let manager = SqliteConnectionManager::file(&database_path).with_flags(flags);
let pool = Arc::new(Pool::new(manager)?);
{
let conn = pool.get()?;
conn.pragma_update(None, "journal_mode", &"WAL")?;
conn.execute_batch(SCHEMA)?;
}
Ok(pool)
}
pub fn from_pool(
project: &str,
pool: &Pool<SqliteConnectionManager>,
) -> Result<Self, Box<dyn std::error::Error>> {
let conn = pool.get()?;
Ok(Self { conn, project: project.to_owned() })
}
// helper so code below can treat PooledConnection like &Connection
fn c(&self) -> &Connection { self.conn.deref() }
/// Open (or create) the DB at `database_path` for the given project name.
// pub fn new(project: &str, database_path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
// let conn = Connection::open(database_path)?;
// conn.pragma_update(None, "journal_mode", &"WAL")?;
// conn.execute_batch(SCHEMA)?;
// Ok(Self { conn, project: project.to_owned() })
// }
/// Return true when the file *content* or *mtime* changed since the last scan.
pub fn should_scan(&self, path: &Path) -> Result<bool, Box<dyn std::error::Error>> {
let meta = fs::metadata(path)?;
@ -83,7 +117,7 @@ pub mod index {
let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
let digest = Self::digest_file(path)?;
self.conn.execute(
self.c().execute(
"INSERT INTO files (project, path, hash, mtime, scanned_at)
VALUES (?1, ?2, ?3, ?4, ?5)
ON CONFLICT(project,path) DO UPDATE
@ -93,7 +127,7 @@ pub mod index {
params![self.project, path.to_string_lossy(), digest, mtime, scanned_at],
)?;
let id: i64 = self.conn.query_row(
let id: i64 = self.c().query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| r.get(0),
@ -125,13 +159,13 @@ pub mod index {
&self,
path: &Path,
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
let file_id: i64 = self.conn.query_row(
let file_id: i64 = self.c().query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| r.get(0),
)?;
let mut stmt = self.conn.prepare(
let mut stmt = self.c().prepare(
"SELECT rule_id, severity, line, col
FROM issues
WHERE file_id = ?1",
@ -153,7 +187,7 @@ pub mod index {
/// gets files from the database
pub fn get_files(&self, project: &str) -> Result<Vec<std::path::PathBuf>, Box<dyn std::error::Error>> {
let mut stmt = self.conn.prepare(
let mut stmt = self.c().prepare(
"SELECT path
FROM files
WHERE project = ?1",
@ -164,6 +198,24 @@ pub mod index {
Ok(file_iter.map(|p| p.map(PathBuf::from)).collect::<Result<_, _>>()?)
}
/// Clears the tables to prep for a reindex
pub fn clear(&self) -> rusqlite::Result<()> {
self.c().execute_batch(
r#"
PRAGMA foreign_keys = OFF;
DROP TABLE IF EXISTS issues;
DROP TABLE IF EXISTS files;
PRAGMA foreign_keys = ON;
VACUUM;
"#,
)?;
self.c().execute_batch(SCHEMA)?;
Ok(())
}
fn digest_file(path: &Path) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
let mut hasher = blake3::Hasher::new();
let mut file = fs::File::open(path)?;