2025-06-16 23:47:50 +02:00
|
|
|
|
pub mod index {
|
2025-06-17 20:45:33 +02:00
|
|
|
|
use rusqlite::{params, Connection, OpenFlags, OptionalExtension};
|
2025-06-16 23:47:50 +02:00
|
|
|
|
use std::fs;
|
2025-06-17 16:46:45 +02:00
|
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
|
|
use std::str::FromStr;
|
2025-06-16 23:47:50 +02:00
|
|
|
|
use std::time::{SystemTime, UNIX_EPOCH};
|
2025-06-17 16:46:45 +02:00
|
|
|
|
use crate::commands::scan::Diag;
|
|
|
|
|
|
use crate::patterns::Severity;
|
2025-06-17 20:45:33 +02:00
|
|
|
|
use r2d2_sqlite::{SqliteConnectionManager};
|
|
|
|
|
|
use std::ops::Deref;
|
|
|
|
|
|
use std::sync::Arc;
|
|
|
|
|
|
use r2d2::{Pool, PooledConnection};
|
2025-06-16 23:47:50 +02:00
|
|
|
|
|
2025-06-17 16:46:45 +02:00
|
|
|
|
/// DB schema (foreign‑keys enabled).
|
2025-06-16 23:47:50 +02:00
|
|
|
|
const SCHEMA: &str = r#"
|
2025-06-17 16:46:45 +02:00
|
|
|
|
PRAGMA foreign_keys = ON;
|
|
|
|
|
|
|
2025-06-16 23:47:50 +02:00
|
|
|
|
CREATE TABLE IF NOT EXISTS files (
|
2025-06-17 16:46:45 +02:00
|
|
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
|
|
|
|
project TEXT NOT NULL,
|
|
|
|
|
|
path TEXT NOT NULL,
|
|
|
|
|
|
hash BLOB NOT NULL,
|
|
|
|
|
|
mtime INTEGER NOT NULL,
|
|
|
|
|
|
scanned_at INTEGER NOT NULL,
|
|
|
|
|
|
UNIQUE(project, path)
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS issues (
|
|
|
|
|
|
file_id INTEGER NOT NULL
|
|
|
|
|
|
REFERENCES files(id)
|
|
|
|
|
|
ON DELETE CASCADE,
|
|
|
|
|
|
rule_id TEXT NOT NULL,
|
|
|
|
|
|
severity TEXT NOT NULL,
|
|
|
|
|
|
line INTEGER NOT NULL,
|
|
|
|
|
|
col INTEGER NOT NULL,
|
|
|
|
|
|
PRIMARY KEY (file_id, rule_id, line, col)
|
|
|
|
|
|
);
|
|
|
|
|
|
"#;
|
|
|
|
|
|
|
|
|
|
|
|
/// A single issue row, ready for insertion.
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
|
pub struct IssueRow<'a> {
|
|
|
|
|
|
pub rule_id: &'a str,
|
|
|
|
|
|
pub severity: &'a str,
|
|
|
|
|
|
pub line: i64,
|
|
|
|
|
|
pub col: i64,
|
|
|
|
|
|
}
|
2025-06-16 23:47:50 +02:00
|
|
|
|
|
2025-06-17 16:46:45 +02:00
|
|
|
|
pub struct Indexer {
|
2025-06-17 20:45:33 +02:00
|
|
|
|
conn: PooledConnection<SqliteConnectionManager>,
|
2025-06-17 16:46:45 +02:00
|
|
|
|
project: String,
|
2025-06-16 23:47:50 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl Indexer {
|
2025-06-17 20:45:33 +02:00
|
|
|
|
|
|
|
|
|
|
pub fn init(
|
|
|
|
|
|
database_path: &Path,
|
|
|
|
|
|
) -> Result<std::sync::Arc<Pool<SqliteConnectionManager>>, Box<dyn std::error::Error>> {
|
|
|
|
|
|
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
|
|
|
|
|
|
| OpenFlags::SQLITE_OPEN_CREATE
|
|
|
|
|
|
| OpenFlags::SQLITE_OPEN_FULL_MUTEX;
|
|
|
|
|
|
let manager = SqliteConnectionManager::file(&database_path).with_flags(flags);
|
|
|
|
|
|
let pool = Arc::new(Pool::new(manager)?);
|
|
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
let conn = pool.get()?;
|
|
|
|
|
|
conn.pragma_update(None, "journal_mode", &"WAL")?;
|
|
|
|
|
|
conn.execute_batch(SCHEMA)?;
|
|
|
|
|
|
}
|
|
|
|
|
|
Ok(pool)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
pub fn from_pool(
|
|
|
|
|
|
project: &str,
|
|
|
|
|
|
pool: &Pool<SqliteConnectionManager>,
|
|
|
|
|
|
) -> Result<Self, Box<dyn std::error::Error>> {
|
|
|
|
|
|
let conn = pool.get()?;
|
2025-06-17 16:46:45 +02:00
|
|
|
|
Ok(Self { conn, project: project.to_owned() })
|
2025-06-16 23:47:50 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-06-17 20:45:33 +02:00
|
|
|
|
// helper so code below can treat PooledConnection like &Connection
|
|
|
|
|
|
fn c(&self) -> &Connection { self.conn.deref() }
|
|
|
|
|
|
|
|
|
|
|
|
/// Open (or create) the DB at `database_path` for the given project name.
|
|
|
|
|
|
// pub fn new(project: &str, database_path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
|
|
|
|
|
|
// let conn = Connection::open(database_path)?;
|
|
|
|
|
|
// conn.pragma_update(None, "journal_mode", &"WAL")?;
|
|
|
|
|
|
// conn.execute_batch(SCHEMA)?;
|
|
|
|
|
|
// Ok(Self { conn, project: project.to_owned() })
|
|
|
|
|
|
// }
|
|
|
|
|
|
|
2025-06-17 16:46:45 +02:00
|
|
|
|
/// Return true when the file *content* or *mtime* changed since the last scan.
|
2025-06-16 23:47:50 +02:00
|
|
|
|
pub fn should_scan(&self, path: &Path) -> Result<bool, Box<dyn std::error::Error>> {
|
|
|
|
|
|
let meta = fs::metadata(path)?;
|
|
|
|
|
|
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
|
|
|
|
|
let digest = Self::digest_file(path)?;
|
|
|
|
|
|
|
|
|
|
|
|
let row: Option<(Vec<u8>, i64)> = self
|
2025-06-17 16:46:45 +02:00
|
|
|
|
.conn
|
|
|
|
|
|
.query_row(
|
|
|
|
|
|
"SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2",
|
|
|
|
|
|
params![self.project, path.to_string_lossy()],
|
|
|
|
|
|
|r| Ok((r.get(0)?, r.get(1)?)),
|
|
|
|
|
|
)
|
|
|
|
|
|
.optional()?;
|
|
|
|
|
|
|
|
|
|
|
|
Ok(match row {
|
|
|
|
|
|
Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime,
|
|
|
|
|
|
None => true,
|
|
|
|
|
|
})
|
2025-06-16 23:47:50 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-06-17 16:46:45 +02:00
|
|
|
|
/// Insert or update the `files` row and return its id.
|
|
|
|
|
|
pub fn upsert_file(&self, path: &Path) -> Result<i64, Box<dyn std::error::Error>> {
|
2025-06-16 23:47:50 +02:00
|
|
|
|
let meta = fs::metadata(path)?;
|
|
|
|
|
|
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
2025-06-17 16:46:45 +02:00
|
|
|
|
let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
2025-06-16 23:47:50 +02:00
|
|
|
|
let digest = Self::digest_file(path)?;
|
|
|
|
|
|
|
2025-06-17 20:45:33 +02:00
|
|
|
|
self.c().execute(
|
2025-06-17 16:46:45 +02:00
|
|
|
|
"INSERT INTO files (project, path, hash, mtime, scanned_at)
|
|
|
|
|
|
VALUES (?1, ?2, ?3, ?4, ?5)
|
|
|
|
|
|
ON CONFLICT(project,path) DO UPDATE
|
|
|
|
|
|
SET hash = excluded.hash,
|
|
|
|
|
|
mtime = excluded.mtime,
|
|
|
|
|
|
scanned_at = excluded.scanned_at",
|
|
|
|
|
|
params![self.project, path.to_string_lossy(), digest, mtime, scanned_at],
|
2025-06-16 23:47:50 +02:00
|
|
|
|
)?;
|
|
|
|
|
|
|
2025-06-17 20:45:33 +02:00
|
|
|
|
let id: i64 = self.c().query_row(
|
2025-06-17 16:46:45 +02:00
|
|
|
|
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
|
|
|
|
|
|
params![self.project, path.to_string_lossy()],
|
|
|
|
|
|
|r| r.get(0),
|
|
|
|
|
|
)?;
|
|
|
|
|
|
Ok(id)
|
2025-06-16 23:47:50 +02:00
|
|
|
|
}
|
2025-06-17 00:02:32 +02:00
|
|
|
|
|
2025-06-17 16:46:45 +02:00
|
|
|
|
/// Replace all issues for `file_id` with the supplied set.
|
|
|
|
|
|
pub fn replace_issues<'a>(&mut self, file_id: i64, issues: impl IntoIterator<Item = IssueRow<'a>>)
|
|
|
|
|
|
-> Result<(), Box<dyn std::error::Error>> {
|
|
|
|
|
|
let tx = self.conn.transaction()?;
|
|
|
|
|
|
tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?;
|
|
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
let mut stmt = tx.prepare(
|
|
|
|
|
|
"INSERT INTO issues (file_id, rule_id, severity, line, col)
|
|
|
|
|
|
VALUES (?1, ?2, ?3, ?4, ?5)",
|
|
|
|
|
|
)?;
|
|
|
|
|
|
for iss in issues {
|
|
|
|
|
|
stmt.execute(params![file_id, iss.rule_id, iss.severity, iss.line, iss.col])?;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
tx.commit()?;
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
2025-06-17 00:02:32 +02:00
|
|
|
|
|
2025-06-17 16:46:45 +02:00
|
|
|
|
/// Gets the issues for a specific file so we don't have to rescan
|
|
|
|
|
|
pub fn get_issues_from_file(
|
|
|
|
|
|
&self,
|
|
|
|
|
|
path: &Path,
|
|
|
|
|
|
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
|
2025-06-17 20:45:33 +02:00
|
|
|
|
let file_id: i64 = self.c().query_row(
|
2025-06-17 16:46:45 +02:00
|
|
|
|
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
|
|
|
|
|
|
params![self.project, path.to_string_lossy()],
|
|
|
|
|
|
|r| r.get(0),
|
|
|
|
|
|
)?;
|
|
|
|
|
|
|
2025-06-17 20:45:33 +02:00
|
|
|
|
let mut stmt = self.c().prepare(
|
2025-06-17 16:46:45 +02:00
|
|
|
|
"SELECT rule_id, severity, line, col
|
|
|
|
|
|
FROM issues
|
|
|
|
|
|
WHERE file_id = ?1",
|
|
|
|
|
|
)?;
|
2025-06-17 00:02:32 +02:00
|
|
|
|
|
2025-06-17 16:46:45 +02:00
|
|
|
|
let issue_iter = stmt.query_map([file_id], |row| {
|
|
|
|
|
|
let sev_str: String = row.get(1)?;
|
|
|
|
|
|
Ok(Diag {
|
|
|
|
|
|
path: path.to_string_lossy().to_string(),
|
|
|
|
|
|
id: row.get::<_, String>(0)?, // rule_id
|
|
|
|
|
|
line: row.get::<_, i64>(2)? as usize,
|
|
|
|
|
|
col: row.get::<_, i64>(3)? as usize,
|
|
|
|
|
|
severity: Severity::from_str(&sev_str).unwrap(),
|
|
|
|
|
|
})
|
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
|
|
Ok(issue_iter.filter_map(Result::ok).collect())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// gets files from the database
|
|
|
|
|
|
pub fn get_files(&self, project: &str) -> Result<Vec<std::path::PathBuf>, Box<dyn std::error::Error>> {
|
2025-06-17 20:45:33 +02:00
|
|
|
|
let mut stmt = self.c().prepare(
|
2025-06-17 16:46:45 +02:00
|
|
|
|
"SELECT path
|
|
|
|
|
|
FROM files
|
|
|
|
|
|
WHERE project = ?1",
|
|
|
|
|
|
)?;
|
2025-06-17 00:02:32 +02:00
|
|
|
|
|
2025-06-17 16:46:45 +02:00
|
|
|
|
let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?;
|
|
|
|
|
|
|
|
|
|
|
|
Ok(file_iter.map(|p| p.map(PathBuf::from)).collect::<Result<_, _>>()?)
|
2025-06-17 00:02:32 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-06-17 20:45:33 +02:00
|
|
|
|
/// Clears the tables to prep for a reindex
|
|
|
|
|
|
pub fn clear(&self) -> rusqlite::Result<()> {
|
|
|
|
|
|
self.c().execute_batch(
|
|
|
|
|
|
r#"
|
|
|
|
|
|
PRAGMA foreign_keys = OFF;
|
|
|
|
|
|
|
|
|
|
|
|
DROP TABLE IF EXISTS issues;
|
|
|
|
|
|
DROP TABLE IF EXISTS files;
|
|
|
|
|
|
|
|
|
|
|
|
PRAGMA foreign_keys = ON;
|
|
|
|
|
|
VACUUM;
|
|
|
|
|
|
"#,
|
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
|
|
self.c().execute_batch(SCHEMA)?;
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-06-17 16:46:45 +02:00
|
|
|
|
fn digest_file(path: &Path) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
|
|
|
|
|
|
let mut hasher = blake3::Hasher::new();
|
|
|
|
|
|
let mut file = fs::File::open(path)?;
|
|
|
|
|
|
std::io::copy(&mut file, &mut hasher)?;
|
|
|
|
|
|
Ok(hasher.finalize().as_bytes().to_vec())
|
|
|
|
|
|
}
|
2025-06-17 00:02:32 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|