Phase 1 (#33)

* chore: Exclude CLAUDE.md from Cargo.toml * feat: add callgraph module and integrate into main analysis flow * feat: enhance CLI with new severity filtering and analysis modes * feat: update CHANGELOG with recent enhancements and fixes to severity filtering and output handling * feat: implement state-model dataflow analysis for resource lifecycle and auth state * feat: enhance diagnostic output formatting and add evidence structure * feat: implement attack surface ranking for diagnostics with scoring and sorting * feat: add comprehensive documentation for installation, usage, and rules reference * feat: add multiple language support for command execution and evaluation endpoints * feat: implement inline suppression for findings using `nyx:ignore` comments * feat: add confidence levels to AST patterns and update output structure * feat: implement low-noise prioritization system with category filtering, rollup grouping, and configurable budgets * feat: bump version to 0.4.0 and update changelog with new features and improvements * feat: add dead code allowances to various functions in mod.rs and real_world_tests.rs
2026-06-15 20:05:13 +02:00 · 2026-02-25 21:16:36 -05:00 · 2026-02-25 21:16:36 -05:00 · 1bbe4b1cfb
commit 1bbe4b1cfb
parent 19b578c5c4
456 changed files with 25628 additions and 1228 deletions
--- a/tests/fixtures/real_world/python/cfg/context_manager.expect.json
+++ b/tests/fixtures/real_world/python/cfg/context_manager.expect.json
@ -0,0 +1,25 @@
+{
+  "description": "File handle resource management comparing manual open vs context manager",
+  "tags": [
+    "cfg",
+    "resource-leak",
+    "context-manager",
+    "file-io"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "cfg-resource-leak",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        1,
+        7
+      ],
+      "evidence_contains": [],
+      "notes": "read_file_unsafe opens file handle but never closes it"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/cfg/context_manager.py
+++ b/tests/fixtures/real_world/python/cfg/context_manager.py
@ -0,0 +1,15 @@
+def read_file_unsafe(path):
+    f = open(path, 'r')
+    data = f.read()
+    return data
+    # f never closed
+
+def read_file_safe(path):
+    with open(path, 'r') as f:
+        data = f.read()
+    return data
+
+def nested_context(path1, path2):
+    with open(path1, 'r') as f1:
+        with open(path2, 'w') as f2:
+            f2.write(f1.read())
--- a/tests/fixtures/real_world/python/cfg/early_return.expect.json
+++ b/tests/fixtures/real_world/python/cfg/early_return.expect.json
@ -0,0 +1,36 @@
+{
+  "description": "Early return leaks file handle when header check fails",
+  "tags": [
+    "cfg",
+    "resource-leak",
+    "early-return",
+    "file-io"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "cfg-resource-leak",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        1,
+        12
+      ],
+      "evidence_contains": [],
+      "notes": "process_file leaks file handle on early return when header does not start with #"
+    },
+    {
+      "rule_id": "state-resource-leak-possible",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        2,
+        9
+      ],
+      "evidence_contains": [],
+      "notes": "File handle leaked on one branch of the conditional"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/cfg/early_return.py
+++ b/tests/fixtures/real_world/python/cfg/early_return.py
@ -0,0 +1,19 @@
+import os
+
+def process_file(path):
+    f = open(path, 'r')
+    header = f.readline()
+    if not header.startswith('#'):
+        return None  # leak: f not closed
+    data = f.read()
+    f.close()
+    return data
+
+def process_with_guard(path):
+    if not os.path.exists(path):
+        return None
+    f = open(path, 'r')
+    try:
+        return f.read()
+    finally:
+        f.close()
--- a/tests/fixtures/real_world/python/cfg/raise_terminator.expect.json
+++ b/tests/fixtures/real_world/python/cfg/raise_terminator.expect.json
@ -0,0 +1,37 @@
+{
+  "description": "Validator raises exception on invalid input, acting as a guard before subprocess",
+  "tags": [
+    "cfg",
+    "validation",
+    "raise",
+    "flask",
+    "subprocess"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        14,
+        21
+      ],
+      "evidence_contains": [],
+      "notes": "Validator raise acts as guard - ideally no taint finding since invalid input is rejected"
+    },
+    {
+      "rule_id": "cfg-unguarded-sink",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        15,
+        20
+      ],
+      "evidence_contains": [],
+      "notes": "Subprocess call is guarded by validate_cmd raise - should not trigger"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/cfg/raise_terminator.py
+++ b/tests/fixtures/real_world/python/cfg/raise_terminator.py
@ -0,0 +1,19 @@
+from flask import Flask, request
+import subprocess
+
+app = Flask(__name__)
+
+class ValidationError(Exception):
+    pass
+
+def validate_cmd(cmd):
+    if not cmd.isalnum():
+        raise ValidationError("Invalid command")
+    return cmd
+
+@app.route('/exec')
+def exec_cmd():
+    cmd = request.args.get('cmd')
+    validated = validate_cmd(cmd)
+    result = subprocess.run([validated], capture_output=True)
+    return result.stdout.decode()
--- a/tests/fixtures/real_world/python/cfg/try_except_resource.expect.json
+++ b/tests/fixtures/real_world/python/cfg/try_except_resource.expect.json
@ -0,0 +1,25 @@
+{
+  "description": "Database connection resource management with try/except/finally vs missing close",
+  "tags": [
+    "cfg",
+    "resource-leak",
+    "sqlite",
+    "try-finally"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "cfg-resource-leak",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        14,
+        23
+      ],
+      "evidence_contains": [],
+      "notes": "query_db_leak opens sqlite3 connection but never closes it"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/cfg/try_except_resource.py
+++ b/tests/fixtures/real_world/python/cfg/try_except_resource.py
@ -0,0 +1,21 @@
+import sqlite3
+
+def query_db(path, sql):
+    conn = sqlite3.connect(path)
+    try:
+        cursor = conn.cursor()
+        cursor.execute(sql)
+        results = cursor.fetchall()
+        return results
+    except Exception as e:
+        print(f"Error: {e}")
+    finally:
+        conn.close()
+
+def query_db_leak(path, sql):
+    conn = sqlite3.connect(path)
+    cursor = conn.cursor()
+    cursor.execute(sql)
+    results = cursor.fetchall()
+    return results
+    # conn never closed
--- a/tests/fixtures/real_world/python/mixed/flask_full_stack.expect.json
+++ b/tests/fixtures/real_world/python/mixed/flask_full_stack.expect.json
@ -0,0 +1,72 @@
+{
+  "description": "Flask app with multiple vulnerability types: cmdi, path traversal, eval, resource leak",
+  "tags": [
+    "taint",
+    "state",
+    "cmdi",
+    "path-traversal",
+    "eval",
+    "flask",
+    "mixed"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        7,
+        13
+      ],
+      "evidence_contains": [],
+      "notes": "request.args.get('cmd') flows into subprocess.run with shell=True"
+    },
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        13,
+        20
+      ],
+      "evidence_contains": [],
+      "notes": "request.args.get('path') flows into open() - path traversal"
+    },
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        21,
+        26
+      ],
+      "evidence_contains": [],
+      "notes": "request.args.get('expr') flows into eval()"
+    },
+    {
+      "rule_id": "py.code_exec.eval",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        22,
+        26
+      ],
+      "evidence_contains": [],
+      "notes": "eval() is a dangerous function - AST pattern match"
+    },
+    {
+      "rule_id": "state-resource-leak",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        12,
+        21
+      ],
+      "evidence_contains": [],
+      "notes": "File handle opened in read_file but never closed"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/mixed/flask_full_stack.py
+++ b/tests/fixtures/real_world/python/mixed/flask_full_stack.py
@ -0,0 +1,24 @@
+from flask import Flask, request
+import subprocess
+import os
+
+app = Flask(__name__)
+
+@app.route('/api/exec')
+def execute():
+    cmd = request.args.get('cmd')
+    result = subprocess.run(cmd, shell=True, capture_output=True)
+    return result.stdout.decode()
+
+@app.route('/api/read')
+def read_file():
+    path = request.args.get('path')
+    f = open(path, 'r')
+    data = f.read()
+    return data
+    # f leaked + path traversal taint
+
+@app.route('/api/eval')
+def eval_expr():
+    expr = request.args.get('expr')
+    return str(eval(expr))
--- a/tests/fixtures/real_world/python/mixed/taint_through_file.expect.json
+++ b/tests/fixtures/real_world/python/mixed/taint_through_file.expect.json
@ -0,0 +1,38 @@
+{
+  "description": "User-controlled filename in open() with resource leak on early return",
+  "tags": [
+    "taint",
+    "state",
+    "path-traversal",
+    "resource-leak",
+    "flask",
+    "mixed"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        6,
+        13
+      ],
+      "evidence_contains": [],
+      "notes": "request.args.get('name') flows through os.path.join into open()"
+    },
+    {
+      "rule_id": "state-resource-leak-possible",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        9,
+        18
+      ],
+      "evidence_contains": [],
+      "notes": "File handle leaked when early return triggered by data length check"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/mixed/taint_through_file.py
+++ b/tests/fixtures/real_world/python/mixed/taint_through_file.py
@ -0,0 +1,17 @@
+from flask import Flask, request
+import os
+
+app = Flask(__name__)
+
+@app.route('/save')
+def save_data():
+    filename = request.args.get('name')
+    data = request.args.get('data')
+    filepath = os.path.join('/tmp', filename)
+    f = open(filepath, 'w')
+    f.write(data)
+    if len(data) > 10000:
+        return 'Too large', 413
+        # f leaks on early return
+    f.close()
+    return 'OK'
--- a/tests/fixtures/real_world/python/state/branch_leak.expect.json
+++ b/tests/fixtures/real_world/python/state/branch_leak.expect.json
@ -0,0 +1,25 @@
+{
+  "description": "File handle leaked in else branch of conditional",
+  "tags": [
+    "state",
+    "resource-leak",
+    "branch",
+    "file-io"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "state-resource-leak-possible",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        1,
+        13
+      ],
+      "evidence_contains": [],
+      "notes": "File handle closed in if branch but leaked in else branch"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/state/branch_leak.py
+++ b/tests/fixtures/real_world/python/state/branch_leak.py
@ -0,0 +1,11 @@
+import os
+
+def conditional_open(path, flag):
+    f = open(path, 'r')
+    if flag:
+        data = f.read()
+        f.close()
+        return data
+    else:
+        return "skipped"
+        # f leaked in else branch
--- a/tests/fixtures/real_world/python/state/file_lifecycle.expect.json
+++ b/tests/fixtures/real_world/python/state/file_lifecycle.expect.json
@ -0,0 +1,48 @@
+{
+  "description": "File handle lifecycle patterns: leak, proper close, double close, use after close",
+  "tags": [
+    "state",
+    "resource-leak",
+    "double-close",
+    "use-after-close",
+    "file-io"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "state-resource-leak",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        1,
+        6
+      ],
+      "evidence_contains": [],
+      "notes": "read_and_leak opens file but never closes it"
+    },
+    {
+      "rule_id": "state-double-close",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        10,
+        17
+      ],
+      "evidence_contains": [],
+      "notes": "double_close calls f.close() twice"
+    },
+    {
+      "rule_id": "state-use-after-close",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        15,
+        23
+      ],
+      "evidence_contains": [],
+      "notes": "use_after_close reads from file handle after closing it"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/state/file_lifecycle.py
+++ b/tests/fixtures/real_world/python/state/file_lifecycle.py
@ -0,0 +1,21 @@
+def read_and_leak(path):
+    f = open(path, 'r')
+    data = f.read()
+    return data
+
+def read_and_close(path):
+    f = open(path, 'r')
+    data = f.read()
+    f.close()
+    return data
+
+def double_close(path):
+    f = open(path, 'r')
+    f.close()
+    f.close()
+
+def use_after_close(path):
+    f = open(path, 'r')
+    f.close()
+    data = f.read()
+    return data
--- a/tests/fixtures/real_world/python/state/socket_lifecycle.expect.json
+++ b/tests/fixtures/real_world/python/state/socket_lifecycle.expect.json
@ -0,0 +1,24 @@
+{
+  "description": "Socket resource lifecycle - leaked vs properly closed with try/finally",
+  "tags": [
+    "state",
+    "resource-leak",
+    "socket"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "state-resource-leak",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        1,
+        10
+      ],
+      "evidence_contains": [],
+      "notes": "connect_and_leak creates socket but never closes it"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/state/socket_lifecycle.py
+++ b/tests/fixtures/real_world/python/state/socket_lifecycle.py
@ -0,0 +1,18 @@
+import socket
+
+def connect_and_leak(host, port):
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.connect((host, port))
+    s.send(b'hello')
+    data = s.recv(1024)
+    return data
+
+def connect_and_close(host, port):
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.connect((host, port))
+    try:
+        s.send(b'hello')
+        data = s.recv(1024)
+        return data
+    finally:
+        s.close()
--- a/tests/fixtures/real_world/python/state/with_statement.expect.json
+++ b/tests/fixtures/real_world/python/state/with_statement.expect.json
@ -0,0 +1,25 @@
+{
+  "description": "Context manager vs manual open - else branch leaks file handle",
+  "tags": [
+    "state",
+    "resource-leak",
+    "context-manager",
+    "file-io"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "state-resource-leak",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        12,
+        19
+      ],
+      "evidence_contains": [],
+      "notes": "else branch opens file manually and never closes it"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/state/with_statement.py
+++ b/tests/fixtures/real_world/python/state/with_statement.py
@ -0,0 +1,17 @@
+def safe_with(path):
+    with open(path, 'r') as f:
+        return f.read()
+
+def nested_with(src, dst):
+    with open(src, 'r') as reader:
+        with open(dst, 'w') as writer:
+            writer.write(reader.read())
+
+def conditional_with(path, mode):
+    if mode == 'read':
+        with open(path, 'r') as f:
+            return f.read()
+    else:
+        f = open(path, 'w')
+        f.write('default')
+        # f not closed in else branch
--- a/tests/fixtures/real_world/python/taint/cmdi_subprocess.expect.json
+++ b/tests/fixtures/real_world/python/taint/cmdi_subprocess.expect.json
@ -0,0 +1,25 @@
+{
+  "description": "Flask handler passes user input directly to subprocess.run with shell=True",
+  "tags": [
+    "taint",
+    "cmdi",
+    "flask",
+    "subprocess"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        6,
+        12
+      ],
+      "evidence_contains": [],
+      "notes": "request.args.get('cmd') flows directly into subprocess.run with shell=True"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/taint/cmdi_subprocess.py
+++ b/tests/fixtures/real_world/python/taint/cmdi_subprocess.py
@ -0,0 +1,19 @@
+from flask import Flask, request
+import subprocess
+
+app = Flask(__name__)
+
+@app.route('/run')
+def run_cmd():
+    cmd = request.args.get('cmd')
+    result = subprocess.run(cmd, shell=True, capture_output=True)
+    return result.stdout.decode()
+
+@app.route('/run-safe')
+def run_cmd_safe():
+    cmd = request.args.get('cmd')
+    allowed = ['ls', 'date', 'whoami']
+    if cmd not in allowed:
+        return 'Not allowed', 403
+    result = subprocess.run([cmd], capture_output=True)
+    return result.stdout.decode()
--- a/tests/fixtures/real_world/python/taint/eval_input.expect.json
+++ b/tests/fixtures/real_world/python/taint/eval_input.expect.json
@ -0,0 +1,36 @@
+{
+  "description": "eval() called with user-controlled input from Flask request",
+  "tags": [
+    "taint",
+    "code-exec",
+    "eval",
+    "flask"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "py.code_exec.eval",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        6,
+        10
+      ],
+      "evidence_contains": [],
+      "notes": "eval() is an AST-level dangerous function pattern"
+    },
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        5,
+        11
+      ],
+      "evidence_contains": [],
+      "notes": "request.args.get('expr') flows directly into eval()"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/taint/eval_input.py
+++ b/tests/fixtures/real_world/python/taint/eval_input.py
@ -0,0 +1,9 @@
+from flask import Flask, request
+
+app = Flask(__name__)
+
+@app.route('/calc')
+def calculate():
+    expr = request.args.get('expr')
+    result = eval(expr)
+    return str(result)
--- a/tests/fixtures/real_world/python/taint/path_traversal.expect.json
+++ b/tests/fixtures/real_world/python/taint/path_traversal.expect.json
@ -0,0 +1,25 @@
+{
+  "description": "Path traversal via user-controlled filename passed to send_file",
+  "tags": [
+    "taint",
+    "path-traversal",
+    "flask",
+    "file-io"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        6,
+        12
+      ],
+      "evidence_contains": [],
+      "notes": "request.args.get('file') flows into os.path.join then send_file without validation"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/taint/path_traversal.py
+++ b/tests/fixtures/real_world/python/taint/path_traversal.py
@ -0,0 +1,19 @@
+from flask import Flask, request, send_file
+import os
+
+app = Flask(__name__)
+
+@app.route('/download')
+def download():
+    filename = request.args.get('file')
+    filepath = os.path.join('/uploads', filename)
+    return send_file(filepath)
+
+@app.route('/download-safe')
+def download_safe():
+    filename = request.args.get('file')
+    filepath = os.path.join('/uploads', filename)
+    realpath = os.path.realpath(filepath)
+    if not realpath.startswith('/uploads'):
+        return 'Forbidden', 403
+    return send_file(realpath)
--- a/tests/fixtures/real_world/python/taint/pickle_deser.expect.json
+++ b/tests/fixtures/real_world/python/taint/pickle_deser.expect.json
@ -0,0 +1,37 @@
+{
+  "description": "Pickle deserialization of user-supplied base64 data",
+  "tags": [
+    "taint",
+    "deser",
+    "pickle",
+    "flask"
+  ],
+  "modes": [
+    "full",
+    "ast"
+  ],
+  "expected": [
+    {
+      "rule_id": "py.deser.pickle_loads",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        9,
+        13
+      ],
+      "evidence_contains": [],
+      "notes": "pickle.loads on user-controlled data enables arbitrary code execution"
+    },
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        7,
+        14
+      ],
+      "evidence_contains": [],
+      "notes": "User data flows through base64 decode into pickle.loads - aspirational taint finding"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/taint/pickle_deser.py
+++ b/tests/fixtures/real_world/python/taint/pickle_deser.py
@ -0,0 +1,12 @@
+from flask import Flask, request
+import pickle
+import base64
+
+app = Flask(__name__)
+
+@app.route('/load', methods=['POST'])
+def load_object():
+    data = request.get_data()
+    decoded = base64.b64decode(data)
+    obj = pickle.loads(decoded)
+    return str(obj)
--- a/tests/fixtures/real_world/python/taint/sqli_concat.expect.json
+++ b/tests/fixtures/real_world/python/taint/sqli_concat.expect.json
@ -0,0 +1,36 @@
+{
+  "description": "SQL injection via string concatenation with user input in cursor.execute",
+  "tags": [
+    "taint",
+    "sqli",
+    "flask",
+    "sqlite"
+  ],
+  "modes": [
+    "full"
+  ],
+  "expected": [
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        6,
+        14
+      ],
+      "evidence_contains": [],
+      "notes": "request.args.get('id') concatenated directly into SQL query string"
+    },
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        14,
+        22
+      ],
+      "evidence_contains": [],
+      "notes": "Safe version uses parameterized query - should not trigger"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/taint/sqli_concat.py
+++ b/tests/fixtures/real_world/python/taint/sqli_concat.py
@ -0,0 +1,20 @@
+from flask import Flask, request
+import sqlite3
+
+app = Flask(__name__)
+
+@app.route('/user')
+def get_user():
+    user_id = request.args.get('id')
+    conn = sqlite3.connect('app.db')
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM users WHERE id = " + user_id)
+    return str(cursor.fetchall())
+
+@app.route('/user-safe')
+def get_user_safe():
+    user_id = request.args.get('id')
+    conn = sqlite3.connect('app.db')
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,))
+    return str(cursor.fetchall())
--- a/tests/fixtures/real_world/python/taint/yaml_deser.expect.json
+++ b/tests/fixtures/real_world/python/taint/yaml_deser.expect.json
@ -0,0 +1,37 @@
+{
+  "description": "Unsafe YAML deserialization with yaml.load vs safe yaml.safe_load",
+  "tags": [
+    "taint",
+    "deser",
+    "yaml",
+    "flask"
+  ],
+  "modes": [
+    "full",
+    "ast"
+  ],
+  "expected": [
+    {
+      "rule_id": "py.deser.yaml_load",
+      "severity": null,
+      "must_match": true,
+      "line_range": [
+        7,
+        11
+      ],
+      "evidence_contains": [],
+      "notes": "yaml.load with FullLoader is unsafe with user-controlled data"
+    },
+    {
+      "rule_id": "taint-unsanitised-flow",
+      "severity": null,
+      "must_match": false,
+      "line_range": [
+        6,
+        12
+      ],
+      "evidence_contains": [],
+      "notes": "User data flows into yaml.load - aspirational taint finding"
+    }
+  ]
+}
--- a/tests/fixtures/real_world/python/taint/yaml_deser.py
+++ b/tests/fixtures/real_world/python/taint/yaml_deser.py
@ -0,0 +1,16 @@
+from flask import Flask, request
+import yaml
+
+app = Flask(__name__)
+
+@app.route('/parse', methods=['POST'])
+def parse_config():
+    data = request.get_data()
+    config = yaml.load(data, Loader=yaml.FullLoader)
+    return str(config)
+
+@app.route('/parse-safe', methods=['POST'])
+def parse_config_safe():
+    data = request.get_data()
+    config = yaml.safe_load(data)
+    return str(config)