mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
334 lines
12 KiB
Rust
334 lines
12 KiB
Rust
//! SQL literal semantics.
|
|
//!
|
|
//! A focused, lightweight SQL detector that classifies a literal SQL
|
|
//! query as **authorization-gated** when one of two patterns holds:
|
|
//!
|
|
//! 1. **JOIN-through-ACL**: `SELECT … FROM <T> JOIN <ACL> ON … WHERE
|
|
//! <ACL>.user_id = ?N` where `<ACL>` is in the configured ACL-table
|
|
//! list (`group_members`, `org_memberships`, …). The JOIN proves
|
|
//! that every returned row belongs to a tenant the bound `?N` user
|
|
//! is a member of.
|
|
//!
|
|
//! 2. **Direct ownership**: `WHERE … user_id = ?N` (with optional
|
|
//! additional predicates like `WHERE id = ?M AND user_id = ?N`).
|
|
//! The `user_id = ?N` predicate proves every returned row is owned
|
|
//! by the bound user.
|
|
//!
|
|
//! Detection is conservative: ambiguous shapes return `None`, and the
|
|
//! caller (in `extract::common::collect_row_population`) only synthesizes
|
|
//! an `AuthCheck` when classification is positive. False negatives
|
|
//! (missing real auth) are safe; false positives (spuriously claiming
|
|
//! auth) are not.
|
|
//!
|
|
//! No SQL parser dependency: the rules below operate on lower-cased
|
|
//! whitespace-normalised text and pattern-match the relevant clauses.
|
|
|
|
/// Classification of a literal SQL query for authorization purposes.
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum SqlAuthClassification {
|
|
/// Query is auth-gated. The JOIN (or direct WHERE) pins returned
|
|
/// rows to the bound user. We don't track *which* bind position
|
|
/// here, the caller treats whichever bind value flows into the
|
|
/// query as the user-id witness; that's safe because the caller
|
|
/// already requires the row binding to come from a `let X = …`
|
|
/// site we can name.
|
|
Authorized,
|
|
}
|
|
|
|
/// Classify `sql` as auth-gated under the configured ACL tables.
|
|
/// Returns `Some(Authorized)` when one of the recognized patterns
|
|
/// holds, `None` otherwise (conservative, unknown shapes are treated
|
|
/// as unauthorized).
|
|
pub fn classify_sql_query(sql: &str, acl_tables: &[String]) -> Option<SqlAuthClassification> {
|
|
let normalized = normalize_sql(sql);
|
|
if !normalized.trim_start().starts_with("select") {
|
|
// For B3 we only authorize SELECT queries, INSERT/UPDATE/DELETE
|
|
// need their own analysis and aren't in scope. (A literal
|
|
// `DELETE … WHERE user_id = ?N` could be safely authorized,
|
|
// but the call sites we care about for FP suppression are
|
|
// reads.)
|
|
return None;
|
|
}
|
|
|
|
if matches_join_through_acl(&normalized, acl_tables) {
|
|
return Some(SqlAuthClassification::Authorized);
|
|
}
|
|
if matches_direct_user_id_predicate(&normalized) {
|
|
return Some(SqlAuthClassification::Authorized);
|
|
}
|
|
None
|
|
}
|
|
|
|
/// `SELECT … FROM <T> [AS] <ALIAS>? JOIN <ACL> [AS] <GA>? ON … WHERE
|
|
/// <GA?>.user_id = ?N`, verifies that an ACL table appears in a JOIN
|
|
/// clause and that the WHERE clause contains a `<…>.user_id = ?` (or
|
|
/// bare `user_id = ?`) predicate. Order of the WHERE predicates
|
|
/// doesn't matter; AND/OR connectors are ignored.
|
|
fn matches_join_through_acl(sql: &str, acl_tables: &[String]) -> bool {
|
|
let Some(where_idx) = sql.find(" where ") else {
|
|
return false;
|
|
};
|
|
let from_to_where = &sql[..where_idx];
|
|
let where_clause = &sql[where_idx + " where ".len()..];
|
|
|
|
let has_acl_join = acl_tables.iter().any(|t| {
|
|
let lower = t.to_ascii_lowercase();
|
|
// " join <acl>" or " join <acl> "
|
|
from_to_where.contains(&format!(" join {} ", lower))
|
|
|| from_to_where.ends_with(&format!(" join {}", lower))
|
|
|| from_to_where.contains(&format!(" inner join {} ", lower))
|
|
|| from_to_where.contains(&format!(" left join {} ", lower))
|
|
|| from_to_where.contains(&format!(" right join {} ", lower))
|
|
});
|
|
if !has_acl_join {
|
|
return false;
|
|
}
|
|
|
|
where_clause_contains_user_id_bind(where_clause)
|
|
}
|
|
|
|
/// Direct ownership: `SELECT … FROM <T> WHERE … user_id = ?N`, no
|
|
/// JOIN. Covers single-table reads where the row already carries the
|
|
/// owning user id (`SELECT … FROM docs WHERE user_id = ?1`). We do
|
|
/// NOT require `id = ?M` to also be present; the `user_id = ?N`
|
|
/// predicate alone is sufficient, since any row returned must be
|
|
/// owned by the bound user.
|
|
///
|
|
/// Refuses to fire when a JOIN is present, the JOIN target may not
|
|
/// be in the ACL list, so the WHERE predicate (which may apply to
|
|
/// the joined table, e.g. `WHERE al.user_id = ?N` against an
|
|
/// `audit_log` JOIN) doesn't actually pin the primary rows to the
|
|
/// caller. The JOIN-through-ACL path handles those cases explicitly.
|
|
fn matches_direct_user_id_predicate(sql: &str) -> bool {
|
|
let Some(where_idx) = sql.find(" where ") else {
|
|
return false;
|
|
};
|
|
let from_to_where = &sql[..where_idx];
|
|
if from_to_where.contains(" join ") {
|
|
return false;
|
|
}
|
|
let where_clause = &sql[where_idx + " where ".len()..];
|
|
where_clause_contains_user_id_bind(where_clause)
|
|
}
|
|
|
|
/// Does the WHERE clause contain `<table?>.user_id = ?<digits>` (or
|
|
/// `<table?>.user_id = $<digits>` for postgres-style placeholders, or
|
|
/// `<table?>.user_id = :name` for named binds)? The optional table
|
|
/// qualifier handles `gm.user_id` (alias-qualified) and bare `user_id`.
|
|
fn where_clause_contains_user_id_bind(where_clause: &str) -> bool {
|
|
// Strip ORDER BY / LIMIT / GROUP BY / HAVING tails so we don't
|
|
// hunt past the WHERE clause for a `user_id = ?` that isn't
|
|
// actually a predicate.
|
|
let where_only = strip_trailing_clauses(where_clause);
|
|
let needles = ["user_id", "userid"];
|
|
for needle in needles {
|
|
for (idx, _) in where_only.match_indices(needle) {
|
|
// Make sure this is a column boundary on the left side
|
|
// (avoid matching `posted_user_id` or `target_user_id`
|
|
//, those don't pin to the actor).
|
|
let before = where_only[..idx].chars().last();
|
|
if !is_column_boundary_left(before) {
|
|
continue;
|
|
}
|
|
// Skip past `user_id`. Trim whitespace then look for `=`.
|
|
let rest = &where_only[idx + needle.len()..];
|
|
let rest = rest.trim_start();
|
|
if !rest.starts_with('=') {
|
|
continue;
|
|
}
|
|
let after_eq = rest[1..].trim_start();
|
|
if looks_like_bind_param(after_eq) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
fn is_column_boundary_left(ch: Option<char>) -> bool {
|
|
match ch {
|
|
None => true,
|
|
Some(c) => matches!(c, ' ' | '\t' | '(' | '.' | ',' | '\n' | '\r'),
|
|
}
|
|
}
|
|
|
|
fn looks_like_bind_param(after_eq: &str) -> bool {
|
|
let bytes = after_eq.as_bytes();
|
|
if bytes.is_empty() {
|
|
return false;
|
|
}
|
|
match bytes[0] {
|
|
// ?N (sqlite/sqlx anonymous), accept ?, ?1, ?2…
|
|
b'?' => true,
|
|
// $N (postgres style), require a digit after.
|
|
b'$' => bytes.get(1).is_some_and(|b| b.is_ascii_digit()),
|
|
// :name (named bind), require an identifier char after.
|
|
b':' => bytes
|
|
.get(1)
|
|
.is_some_and(|b| b.is_ascii_alphabetic() || *b == b'_'),
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
/// Cut off ORDER BY / LIMIT / GROUP BY / HAVING tails so the WHERE
|
|
/// scan stays inside the predicate region.
|
|
fn strip_trailing_clauses(where_clause: &str) -> &str {
|
|
let candidates = [" order by ", " limit ", " group by ", " having "];
|
|
let mut end = where_clause.len();
|
|
for cand in candidates {
|
|
if let Some(idx) = where_clause.find(cand) {
|
|
end = end.min(idx);
|
|
}
|
|
}
|
|
&where_clause[..end]
|
|
}
|
|
|
|
/// Lower-case + collapse whitespace + flatten line breaks so the
|
|
/// patterns above can use single-space tokens.
|
|
fn normalize_sql(sql: &str) -> String {
|
|
let mut out = String::with_capacity(sql.len());
|
|
let mut prev_space = true;
|
|
// Surround with leading/trailing space so " where " etc. searches
|
|
// hit boundary cases at the very start/end.
|
|
out.push(' ');
|
|
for ch in sql.chars() {
|
|
if ch.is_whitespace() {
|
|
if !prev_space {
|
|
out.push(' ');
|
|
prev_space = true;
|
|
}
|
|
} else {
|
|
out.push(ch.to_ascii_lowercase());
|
|
prev_space = false;
|
|
}
|
|
}
|
|
if !out.ends_with(' ') {
|
|
out.push(' ');
|
|
}
|
|
out
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::{SqlAuthClassification, classify_sql_query};
|
|
|
|
fn acl() -> Vec<String> {
|
|
vec![
|
|
"group_members".into(),
|
|
"org_memberships".into(),
|
|
"workspace_members".into(),
|
|
"tenant_members".into(),
|
|
"members".into(),
|
|
"share_grants".into(),
|
|
]
|
|
}
|
|
|
|
#[test]
|
|
fn join_through_group_members_with_user_bind_is_authorized() {
|
|
let sql = "SELECT d.id, d.group_id, d.title \
|
|
FROM docs d \
|
|
JOIN group_members gm ON gm.group_id = d.group_id \
|
|
WHERE gm.user_id = ?1 \
|
|
ORDER BY d.updated_at DESC";
|
|
assert_eq!(
|
|
classify_sql_query(sql, &acl()),
|
|
Some(SqlAuthClassification::Authorized)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn join_through_workspace_members_with_postgres_bind() {
|
|
let sql = "SELECT t.* \
|
|
FROM tickets t \
|
|
INNER JOIN workspace_members wm ON wm.workspace_id = t.workspace_id \
|
|
WHERE wm.user_id = $1";
|
|
assert_eq!(
|
|
classify_sql_query(sql, &acl()),
|
|
Some(SqlAuthClassification::Authorized)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn direct_user_id_predicate_is_authorized() {
|
|
let sql = "SELECT id, name FROM peers WHERE user_id = ?1";
|
|
assert_eq!(
|
|
classify_sql_query(sql, &acl()),
|
|
Some(SqlAuthClassification::Authorized)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn direct_id_and_user_id_predicate_is_authorized() {
|
|
let sql = "SELECT title FROM docs WHERE id = ?1 AND user_id = ?2";
|
|
assert_eq!(
|
|
classify_sql_query(sql, &acl()),
|
|
Some(SqlAuthClassification::Authorized)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn named_bind_is_authorized() {
|
|
let sql = "SELECT * FROM peers WHERE user_id = :uid";
|
|
assert_eq!(
|
|
classify_sql_query(sql, &acl()),
|
|
Some(SqlAuthClassification::Authorized)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn join_against_non_acl_table_is_not_authorized() {
|
|
// `audit_log` is not in the configured ACL list, JOIN doesn't
|
|
// pin rows to the bound user, so the query is unauthorized.
|
|
let sql = "SELECT d.* FROM docs d \
|
|
JOIN audit_log al ON al.doc_id = d.id \
|
|
WHERE al.user_id = ?1";
|
|
assert_eq!(classify_sql_query(sql, &acl()), None);
|
|
}
|
|
|
|
#[test]
|
|
fn select_without_user_id_predicate_is_not_authorized() {
|
|
let sql = "SELECT * FROM docs WHERE id = ?1";
|
|
assert_eq!(classify_sql_query(sql, &acl()), None);
|
|
}
|
|
|
|
#[test]
|
|
fn non_select_query_is_not_authorized() {
|
|
// INSERT/UPDATE/DELETE are not in scope for B3 even when the
|
|
// WHERE clause names the user.
|
|
let sql = "DELETE FROM docs WHERE user_id = ?1";
|
|
assert_eq!(classify_sql_query(sql, &acl()), None);
|
|
}
|
|
|
|
#[test]
|
|
fn similar_column_names_do_not_trip_user_id_match() {
|
|
// `posted_user_id` shouldn't satisfy the `user_id = ?` check ,
|
|
// that column doesn't pin to the actor.
|
|
let sql = "SELECT * FROM posts WHERE posted_user_id = ?1";
|
|
assert_eq!(classify_sql_query(sql, &acl()), None);
|
|
}
|
|
|
|
#[test]
|
|
fn order_by_after_user_id_is_handled() {
|
|
let sql = "SELECT * FROM peers WHERE user_id = ?1 ORDER BY created_at DESC LIMIT 50";
|
|
assert_eq!(
|
|
classify_sql_query(sql, &acl()),
|
|
Some(SqlAuthClassification::Authorized)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn empty_acl_list_disables_join_pattern_but_keeps_direct() {
|
|
let join_sql = "SELECT * FROM docs d \
|
|
JOIN group_members gm ON gm.group_id = d.group_id \
|
|
WHERE gm.user_id = ?1";
|
|
let direct_sql = "SELECT * FROM peers WHERE user_id = ?1";
|
|
let empty: Vec<String> = Vec::new();
|
|
// No ACL configured → join pattern can't fire, but direct
|
|
// predicate still authorizes.
|
|
assert_eq!(classify_sql_query(join_sql, &empty), None);
|
|
assert_eq!(
|
|
classify_sql_query(direct_sql, &empty),
|
|
Some(SqlAuthClassification::Authorized)
|
|
);
|
|
}
|
|
}
|