nyx/src/auth_analysis/sql_semantics.rs
Eli Peter a438886217
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
2026-04-29 19:53:34 -04:00

334 lines
12 KiB
Rust

//! SQL literal semantics.
//!
//! A focused, lightweight SQL detector that classifies a literal SQL
//! query as **authorization-gated** when one of two patterns holds:
//!
//! 1. **JOIN-through-ACL**: `SELECT … FROM <T> JOIN <ACL> ON … WHERE
//! <ACL>.user_id = ?N` where `<ACL>` is in the configured ACL-table
//! list (`group_members`, `org_memberships`, …). The JOIN proves
//! that every returned row belongs to a tenant the bound `?N` user
//! is a member of.
//!
//! 2. **Direct ownership**: `WHERE … user_id = ?N` (with optional
//! additional predicates like `WHERE id = ?M AND user_id = ?N`).
//! The `user_id = ?N` predicate proves every returned row is owned
//! by the bound user.
//!
//! Detection is conservative: ambiguous shapes return `None`, and the
//! caller (in `extract::common::collect_row_population`) only synthesizes
//! an `AuthCheck` when classification is positive. False negatives
//! (missing real auth) are safe; false positives (spuriously claiming
//! auth) are not.
//!
//! No SQL parser dependency: the rules below operate on lower-cased
//! whitespace-normalised text and pattern-match the relevant clauses.
/// Classification of a literal SQL query for authorization purposes.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SqlAuthClassification {
/// Query is auth-gated. The JOIN (or direct WHERE) pins returned
/// rows to the bound user. We don't track *which* bind position
/// here, the caller treats whichever bind value flows into the
/// query as the user-id witness; that's safe because the caller
/// already requires the row binding to come from a `let X = …`
/// site we can name.
Authorized,
}
/// Classify `sql` as auth-gated under the configured ACL tables.
/// Returns `Some(Authorized)` when one of the recognized patterns
/// holds, `None` otherwise (conservative, unknown shapes are treated
/// as unauthorized).
pub fn classify_sql_query(sql: &str, acl_tables: &[String]) -> Option<SqlAuthClassification> {
let normalized = normalize_sql(sql);
if !normalized.trim_start().starts_with("select") {
// For B3 we only authorize SELECT queries, INSERT/UPDATE/DELETE
// need their own analysis and aren't in scope. (A literal
// `DELETE … WHERE user_id = ?N` could be safely authorized,
// but the call sites we care about for FP suppression are
// reads.)
return None;
}
if matches_join_through_acl(&normalized, acl_tables) {
return Some(SqlAuthClassification::Authorized);
}
if matches_direct_user_id_predicate(&normalized) {
return Some(SqlAuthClassification::Authorized);
}
None
}
/// `SELECT … FROM <T> [AS] <ALIAS>? JOIN <ACL> [AS] <GA>? ON … WHERE
/// <GA?>.user_id = ?N`, verifies that an ACL table appears in a JOIN
/// clause and that the WHERE clause contains a `<…>.user_id = ?` (or
/// bare `user_id = ?`) predicate. Order of the WHERE predicates
/// doesn't matter; AND/OR connectors are ignored.
fn matches_join_through_acl(sql: &str, acl_tables: &[String]) -> bool {
let Some(where_idx) = sql.find(" where ") else {
return false;
};
let from_to_where = &sql[..where_idx];
let where_clause = &sql[where_idx + " where ".len()..];
let has_acl_join = acl_tables.iter().any(|t| {
let lower = t.to_ascii_lowercase();
// " join <acl>" or " join <acl> "
from_to_where.contains(&format!(" join {} ", lower))
|| from_to_where.ends_with(&format!(" join {}", lower))
|| from_to_where.contains(&format!(" inner join {} ", lower))
|| from_to_where.contains(&format!(" left join {} ", lower))
|| from_to_where.contains(&format!(" right join {} ", lower))
});
if !has_acl_join {
return false;
}
where_clause_contains_user_id_bind(where_clause)
}
/// Direct ownership: `SELECT … FROM <T> WHERE … user_id = ?N`, no
/// JOIN. Covers single-table reads where the row already carries the
/// owning user id (`SELECT … FROM docs WHERE user_id = ?1`). We do
/// NOT require `id = ?M` to also be present; the `user_id = ?N`
/// predicate alone is sufficient, since any row returned must be
/// owned by the bound user.
///
/// Refuses to fire when a JOIN is present, the JOIN target may not
/// be in the ACL list, so the WHERE predicate (which may apply to
/// the joined table, e.g. `WHERE al.user_id = ?N` against an
/// `audit_log` JOIN) doesn't actually pin the primary rows to the
/// caller. The JOIN-through-ACL path handles those cases explicitly.
fn matches_direct_user_id_predicate(sql: &str) -> bool {
let Some(where_idx) = sql.find(" where ") else {
return false;
};
let from_to_where = &sql[..where_idx];
if from_to_where.contains(" join ") {
return false;
}
let where_clause = &sql[where_idx + " where ".len()..];
where_clause_contains_user_id_bind(where_clause)
}
/// Does the WHERE clause contain `<table?>.user_id = ?<digits>` (or
/// `<table?>.user_id = $<digits>` for postgres-style placeholders, or
/// `<table?>.user_id = :name` for named binds)? The optional table
/// qualifier handles `gm.user_id` (alias-qualified) and bare `user_id`.
fn where_clause_contains_user_id_bind(where_clause: &str) -> bool {
// Strip ORDER BY / LIMIT / GROUP BY / HAVING tails so we don't
// hunt past the WHERE clause for a `user_id = ?` that isn't
// actually a predicate.
let where_only = strip_trailing_clauses(where_clause);
let needles = ["user_id", "userid"];
for needle in needles {
for (idx, _) in where_only.match_indices(needle) {
// Make sure this is a column boundary on the left side
// (avoid matching `posted_user_id` or `target_user_id`
//, those don't pin to the actor).
let before = where_only[..idx].chars().last();
if !is_column_boundary_left(before) {
continue;
}
// Skip past `user_id`. Trim whitespace then look for `=`.
let rest = &where_only[idx + needle.len()..];
let rest = rest.trim_start();
if !rest.starts_with('=') {
continue;
}
let after_eq = rest[1..].trim_start();
if looks_like_bind_param(after_eq) {
return true;
}
}
}
false
}
fn is_column_boundary_left(ch: Option<char>) -> bool {
match ch {
None => true,
Some(c) => matches!(c, ' ' | '\t' | '(' | '.' | ',' | '\n' | '\r'),
}
}
fn looks_like_bind_param(after_eq: &str) -> bool {
let bytes = after_eq.as_bytes();
if bytes.is_empty() {
return false;
}
match bytes[0] {
// ?N (sqlite/sqlx anonymous), accept ?, ?1, ?2…
b'?' => true,
// $N (postgres style), require a digit after.
b'$' => bytes.get(1).is_some_and(|b| b.is_ascii_digit()),
// :name (named bind), require an identifier char after.
b':' => bytes
.get(1)
.is_some_and(|b| b.is_ascii_alphabetic() || *b == b'_'),
_ => false,
}
}
/// Cut off ORDER BY / LIMIT / GROUP BY / HAVING tails so the WHERE
/// scan stays inside the predicate region.
fn strip_trailing_clauses(where_clause: &str) -> &str {
let candidates = [" order by ", " limit ", " group by ", " having "];
let mut end = where_clause.len();
for cand in candidates {
if let Some(idx) = where_clause.find(cand) {
end = end.min(idx);
}
}
&where_clause[..end]
}
/// Lower-case + collapse whitespace + flatten line breaks so the
/// patterns above can use single-space tokens.
fn normalize_sql(sql: &str) -> String {
let mut out = String::with_capacity(sql.len());
let mut prev_space = true;
// Surround with leading/trailing space so " where " etc. searches
// hit boundary cases at the very start/end.
out.push(' ');
for ch in sql.chars() {
if ch.is_whitespace() {
if !prev_space {
out.push(' ');
prev_space = true;
}
} else {
out.push(ch.to_ascii_lowercase());
prev_space = false;
}
}
if !out.ends_with(' ') {
out.push(' ');
}
out
}
#[cfg(test)]
mod tests {
use super::{SqlAuthClassification, classify_sql_query};
fn acl() -> Vec<String> {
vec![
"group_members".into(),
"org_memberships".into(),
"workspace_members".into(),
"tenant_members".into(),
"members".into(),
"share_grants".into(),
]
}
#[test]
fn join_through_group_members_with_user_bind_is_authorized() {
let sql = "SELECT d.id, d.group_id, d.title \
FROM docs d \
JOIN group_members gm ON gm.group_id = d.group_id \
WHERE gm.user_id = ?1 \
ORDER BY d.updated_at DESC";
assert_eq!(
classify_sql_query(sql, &acl()),
Some(SqlAuthClassification::Authorized)
);
}
#[test]
fn join_through_workspace_members_with_postgres_bind() {
let sql = "SELECT t.* \
FROM tickets t \
INNER JOIN workspace_members wm ON wm.workspace_id = t.workspace_id \
WHERE wm.user_id = $1";
assert_eq!(
classify_sql_query(sql, &acl()),
Some(SqlAuthClassification::Authorized)
);
}
#[test]
fn direct_user_id_predicate_is_authorized() {
let sql = "SELECT id, name FROM peers WHERE user_id = ?1";
assert_eq!(
classify_sql_query(sql, &acl()),
Some(SqlAuthClassification::Authorized)
);
}
#[test]
fn direct_id_and_user_id_predicate_is_authorized() {
let sql = "SELECT title FROM docs WHERE id = ?1 AND user_id = ?2";
assert_eq!(
classify_sql_query(sql, &acl()),
Some(SqlAuthClassification::Authorized)
);
}
#[test]
fn named_bind_is_authorized() {
let sql = "SELECT * FROM peers WHERE user_id = :uid";
assert_eq!(
classify_sql_query(sql, &acl()),
Some(SqlAuthClassification::Authorized)
);
}
#[test]
fn join_against_non_acl_table_is_not_authorized() {
// `audit_log` is not in the configured ACL list, JOIN doesn't
// pin rows to the bound user, so the query is unauthorized.
let sql = "SELECT d.* FROM docs d \
JOIN audit_log al ON al.doc_id = d.id \
WHERE al.user_id = ?1";
assert_eq!(classify_sql_query(sql, &acl()), None);
}
#[test]
fn select_without_user_id_predicate_is_not_authorized() {
let sql = "SELECT * FROM docs WHERE id = ?1";
assert_eq!(classify_sql_query(sql, &acl()), None);
}
#[test]
fn non_select_query_is_not_authorized() {
// INSERT/UPDATE/DELETE are not in scope for B3 even when the
// WHERE clause names the user.
let sql = "DELETE FROM docs WHERE user_id = ?1";
assert_eq!(classify_sql_query(sql, &acl()), None);
}
#[test]
fn similar_column_names_do_not_trip_user_id_match() {
// `posted_user_id` shouldn't satisfy the `user_id = ?` check ,
// that column doesn't pin to the actor.
let sql = "SELECT * FROM posts WHERE posted_user_id = ?1";
assert_eq!(classify_sql_query(sql, &acl()), None);
}
#[test]
fn order_by_after_user_id_is_handled() {
let sql = "SELECT * FROM peers WHERE user_id = ?1 ORDER BY created_at DESC LIMIT 50";
assert_eq!(
classify_sql_query(sql, &acl()),
Some(SqlAuthClassification::Authorized)
);
}
#[test]
fn empty_acl_list_disables_join_pattern_but_keeps_direct() {
let join_sql = "SELECT * FROM docs d \
JOIN group_members gm ON gm.group_id = d.group_id \
WHERE gm.user_id = ?1";
let direct_sql = "SELECT * FROM peers WHERE user_id = ?1";
let empty: Vec<String> = Vec::new();
// No ACL configured → join pattern can't fire, but direct
// predicate still authorizes.
assert_eq!(classify_sql_query(join_sql, &empty), None);
assert_eq!(
classify_sql_query(direct_sql, &empty),
Some(SqlAuthClassification::Authorized)
);
}
}