//! SQL literal semantics. //! //! A focused, lightweight SQL detector that classifies a literal SQL //! query as **authorization-gated** when one of two patterns holds: //! //! 1. **JOIN-through-ACL**: `SELECT … FROM JOIN ON … WHERE //! .user_id = ?N` where `` is in the configured ACL-table //! list (`group_members`, `org_memberships`, …). The JOIN proves //! that every returned row belongs to a tenant the bound `?N` user //! is a member of. //! //! 2. **Direct ownership**: `WHERE … user_id = ?N` (with optional //! additional predicates like `WHERE id = ?M AND user_id = ?N`). //! The `user_id = ?N` predicate proves every returned row is owned //! by the bound user. //! //! Detection is conservative: ambiguous shapes return `None`, and the //! caller (in `extract::common::collect_row_population`) only synthesizes //! an `AuthCheck` when classification is positive. False negatives //! (missing real auth) are safe; false positives (spuriously claiming //! auth) are not. //! //! No SQL parser dependency: the rules below operate on lower-cased //! whitespace-normalised text and pattern-match the relevant clauses. /// Classification of a literal SQL query for authorization purposes. #[derive(Debug, Clone, PartialEq, Eq)] pub enum SqlAuthClassification { /// Query is auth-gated. The JOIN (or direct WHERE) pins returned /// rows to the bound user. We don't track *which* bind position /// here — the caller treats whichever bind value flows into the /// query as the user-id witness; that's safe because the caller /// already requires the row binding to come from a `let X = …` /// site we can name. Authorized, } /// Classify `sql` as auth-gated under the configured ACL tables. /// Returns `Some(Authorized)` when one of the recognized patterns /// holds, `None` otherwise (conservative — unknown shapes are treated /// as unauthorized). pub fn classify_sql_query(sql: &str, acl_tables: &[String]) -> Option { let normalized = normalize_sql(sql); if !normalized.trim_start().starts_with("select") { // For B3 we only authorize SELECT queries — INSERT/UPDATE/DELETE // need their own analysis and aren't in scope. (A literal // `DELETE … WHERE user_id = ?N` could be safely authorized, // but the call sites we care about for FP suppression are // reads.) return None; } if matches_join_through_acl(&normalized, acl_tables) { return Some(SqlAuthClassification::Authorized); } if matches_direct_user_id_predicate(&normalized) { return Some(SqlAuthClassification::Authorized); } None } /// `SELECT … FROM [AS] ? JOIN [AS] ? ON … WHERE /// .user_id = ?N` — verifies that an ACL table appears in a JOIN /// clause and that the WHERE clause contains a `<…>.user_id = ?` (or /// bare `user_id = ?`) predicate. Order of the WHERE predicates /// doesn't matter; AND/OR connectors are ignored. fn matches_join_through_acl(sql: &str, acl_tables: &[String]) -> bool { let Some(where_idx) = sql.find(" where ") else { return false; }; let from_to_where = &sql[..where_idx]; let where_clause = &sql[where_idx + " where ".len()..]; let has_acl_join = acl_tables.iter().any(|t| { let lower = t.to_ascii_lowercase(); // " join " or " join " from_to_where.contains(&format!(" join {} ", lower)) || from_to_where.ends_with(&format!(" join {}", lower)) || from_to_where.contains(&format!(" inner join {} ", lower)) || from_to_where.contains(&format!(" left join {} ", lower)) || from_to_where.contains(&format!(" right join {} ", lower)) }); if !has_acl_join { return false; } where_clause_contains_user_id_bind(where_clause) } /// Direct ownership: `SELECT … FROM WHERE … user_id = ?N` — no /// JOIN. Covers single-table reads where the row already carries the /// owning user id (`SELECT … FROM docs WHERE user_id = ?1`). We do /// NOT require `id = ?M` to also be present; the `user_id = ?N` /// predicate alone is sufficient, since any row returned must be /// owned by the bound user. /// /// Refuses to fire when a JOIN is present — the JOIN target may not /// be in the ACL list, so the WHERE predicate (which may apply to /// the joined table, e.g. `WHERE al.user_id = ?N` against an /// `audit_log` JOIN) doesn't actually pin the primary rows to the /// caller. The JOIN-through-ACL path handles those cases explicitly. fn matches_direct_user_id_predicate(sql: &str) -> bool { let Some(where_idx) = sql.find(" where ") else { return false; }; let from_to_where = &sql[..where_idx]; if from_to_where.contains(" join ") { return false; } let where_clause = &sql[where_idx + " where ".len()..]; where_clause_contains_user_id_bind(where_clause) } /// Does the WHERE clause contain `.user_id = ?` (or /// `.user_id = $` for postgres-style placeholders, or /// `.user_id = :name` for named binds)? The optional table /// qualifier handles `gm.user_id` (alias-qualified) and bare `user_id`. fn where_clause_contains_user_id_bind(where_clause: &str) -> bool { // Strip ORDER BY / LIMIT / GROUP BY / HAVING tails so we don't // hunt past the WHERE clause for a `user_id = ?` that isn't // actually a predicate. let where_only = strip_trailing_clauses(where_clause); let needles = ["user_id", "userid"]; for needle in needles { for (idx, _) in where_only.match_indices(needle) { // Make sure this is a column boundary on the left side // (avoid matching `posted_user_id` or `target_user_id` // — those don't pin to the actor). let before = where_only[..idx].chars().last(); if !is_column_boundary_left(before) { continue; } // Skip past `user_id`. Trim whitespace then look for `=`. let rest = &where_only[idx + needle.len()..]; let rest = rest.trim_start(); if !rest.starts_with('=') { continue; } let after_eq = rest[1..].trim_start(); if looks_like_bind_param(after_eq) { return true; } } } false } fn is_column_boundary_left(ch: Option) -> bool { match ch { None => true, Some(c) => matches!(c, ' ' | '\t' | '(' | '.' | ',' | '\n' | '\r'), } } fn looks_like_bind_param(after_eq: &str) -> bool { let bytes = after_eq.as_bytes(); if bytes.is_empty() { return false; } match bytes[0] { // ?N (sqlite/sqlx anonymous) — accept ?, ?1, ?2… b'?' => true, // $N (postgres style) — require a digit after. b'$' => bytes.get(1).is_some_and(|b| b.is_ascii_digit()), // :name (named bind) — require an identifier char after. b':' => bytes .get(1) .is_some_and(|b| b.is_ascii_alphabetic() || *b == b'_'), _ => false, } } /// Cut off ORDER BY / LIMIT / GROUP BY / HAVING tails so the WHERE /// scan stays inside the predicate region. fn strip_trailing_clauses(where_clause: &str) -> &str { let candidates = [" order by ", " limit ", " group by ", " having "]; let mut end = where_clause.len(); for cand in candidates { if let Some(idx) = where_clause.find(cand) { end = end.min(idx); } } &where_clause[..end] } /// Lower-case + collapse whitespace + flatten line breaks so the /// patterns above can use single-space tokens. fn normalize_sql(sql: &str) -> String { let mut out = String::with_capacity(sql.len()); let mut prev_space = true; // Surround with leading/trailing space so " where " etc. searches // hit boundary cases at the very start/end. out.push(' '); for ch in sql.chars() { if ch.is_whitespace() { if !prev_space { out.push(' '); prev_space = true; } } else { out.push(ch.to_ascii_lowercase()); prev_space = false; } } if !out.ends_with(' ') { out.push(' '); } out } #[cfg(test)] mod tests { use super::{SqlAuthClassification, classify_sql_query}; fn acl() -> Vec { vec![ "group_members".into(), "org_memberships".into(), "workspace_members".into(), "tenant_members".into(), "members".into(), "share_grants".into(), ] } #[test] fn join_through_group_members_with_user_bind_is_authorized() { let sql = "SELECT d.id, d.group_id, d.title \ FROM docs d \ JOIN group_members gm ON gm.group_id = d.group_id \ WHERE gm.user_id = ?1 \ ORDER BY d.updated_at DESC"; assert_eq!( classify_sql_query(sql, &acl()), Some(SqlAuthClassification::Authorized) ); } #[test] fn join_through_workspace_members_with_postgres_bind() { let sql = "SELECT t.* \ FROM tickets t \ INNER JOIN workspace_members wm ON wm.workspace_id = t.workspace_id \ WHERE wm.user_id = $1"; assert_eq!( classify_sql_query(sql, &acl()), Some(SqlAuthClassification::Authorized) ); } #[test] fn direct_user_id_predicate_is_authorized() { let sql = "SELECT id, name FROM peers WHERE user_id = ?1"; assert_eq!( classify_sql_query(sql, &acl()), Some(SqlAuthClassification::Authorized) ); } #[test] fn direct_id_and_user_id_predicate_is_authorized() { let sql = "SELECT title FROM docs WHERE id = ?1 AND user_id = ?2"; assert_eq!( classify_sql_query(sql, &acl()), Some(SqlAuthClassification::Authorized) ); } #[test] fn named_bind_is_authorized() { let sql = "SELECT * FROM peers WHERE user_id = :uid"; assert_eq!( classify_sql_query(sql, &acl()), Some(SqlAuthClassification::Authorized) ); } #[test] fn join_against_non_acl_table_is_not_authorized() { // `audit_log` is not in the configured ACL list — JOIN doesn't // pin rows to the bound user, so the query is unauthorized. let sql = "SELECT d.* FROM docs d \ JOIN audit_log al ON al.doc_id = d.id \ WHERE al.user_id = ?1"; assert_eq!(classify_sql_query(sql, &acl()), None); } #[test] fn select_without_user_id_predicate_is_not_authorized() { let sql = "SELECT * FROM docs WHERE id = ?1"; assert_eq!(classify_sql_query(sql, &acl()), None); } #[test] fn non_select_query_is_not_authorized() { // INSERT/UPDATE/DELETE are not in scope for B3 even when the // WHERE clause names the user. let sql = "DELETE FROM docs WHERE user_id = ?1"; assert_eq!(classify_sql_query(sql, &acl()), None); } #[test] fn similar_column_names_do_not_trip_user_id_match() { // `posted_user_id` shouldn't satisfy the `user_id = ?` check — // that column doesn't pin to the actor. let sql = "SELECT * FROM posts WHERE posted_user_id = ?1"; assert_eq!(classify_sql_query(sql, &acl()), None); } #[test] fn order_by_after_user_id_is_handled() { let sql = "SELECT * FROM peers WHERE user_id = ?1 ORDER BY created_at DESC LIMIT 50"; assert_eq!( classify_sql_query(sql, &acl()), Some(SqlAuthClassification::Authorized) ); } #[test] fn empty_acl_list_disables_join_pattern_but_keeps_direct() { let join_sql = "SELECT * FROM docs d \ JOIN group_members gm ON gm.group_id = d.group_id \ WHERE gm.user_id = ?1"; let direct_sql = "SELECT * FROM peers WHERE user_id = ?1"; let empty: Vec = Vec::new(); // No ACL configured → join pattern can't fire, but direct // predicate still authorizes. assert_eq!(classify_sql_query(join_sql, &empty), None); assert_eq!( classify_sql_query(direct_sql, &empty), Some(SqlAuthClassification::Authorized) ); } }