fix(sqlserver): hoist leading CTEs out of row-limit derived-table wrap (#311)

* test(sql): cover leading CTE row-limit wrapping

* fix(sql): hoist leading CTEs before generic row limits

* fix(sqlserver): hoist leading CTEs before TOP row limits

* test(scan): note relationship limiter coverage boundary

* chore: sync uv.lock to ktx-daemon/ktx-sl 0.13.0
This commit is contained in:
Andrey Avtomonov 2026-06-23 15:03:46 +02:00 committed by GitHub
parent 9f715f93f1
commit c815e10fb3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 275 additions and 5 deletions

View file

@ -1,4 +1,4 @@
import { assertReadOnlySql, stripTrailingSqlNoise } from '../../context/connections/read-only-sql.js';
import { assertReadOnlySql, hoistLeadingCte, stripTrailingSqlNoise } from '../../context/connections/read-only-sql.js';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -277,7 +277,8 @@ function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefi
if (!Number.isInteger(maxRows) || maxRows <= 0) {
throw new Error('maxRows must be a positive integer.');
}
return `SELECT TOP ${maxRows} * FROM (${trimmed}) AS ktx_query_result`;
const { withPrefix, body } = hoistLeadingCte(trimmed);
return `${withPrefix}SELECT TOP ${maxRows} * FROM (${body}) AS ktx_query_result`;
}
export function isKtxSqlServerConnectionConfig(

View file

@ -97,6 +97,161 @@ export function assertReadOnlySql(sql: string): string {
return trimmed;
}
function isSqlIdentifierPart(char: string | undefined): boolean {
return char !== undefined && /[A-Za-z0-9_$]/.test(char);
}
function keywordAt(sql: string, index: number, keyword: string): boolean {
if (sql.slice(index, index + keyword.length).toLowerCase() !== keyword.toLowerCase()) {
return false;
}
return !isSqlIdentifierPart(sql[index - 1]) && !isSqlIdentifierPart(sql[index + keyword.length]);
}
function skipWhitespaceAndComments(sql: string, index: number): number {
let current = index;
while (current < sql.length) {
while (/\s/.test(sql[current] ?? '')) {
current += 1;
}
if (sql.startsWith('--', current) || sql.startsWith('/*', current)) {
current = skipQuotedOrComment(sql, current);
continue;
}
return current;
}
return current;
}
function skipBracketIdentifier(sql: string, index: number): number {
let current = index + 1;
while (current < sql.length) {
if (sql[current] === ']') {
if (sql[current + 1] === ']') {
current += 2;
continue;
}
return current + 1;
}
current += 1;
}
return -1;
}
function skipBacktickIdentifier(sql: string, index: number): number {
let current = index + 1;
while (current < sql.length) {
if (sql[current] === '`') {
if (sql[current + 1] === '`') {
current += 2;
continue;
}
return current + 1;
}
current += 1;
}
return -1;
}
function skipIdentifier(sql: string, index: number): number {
if (sql[index] === '"') {
const skipped = skipQuotedOrComment(sql, index);
return skipped > index ? skipped : -1;
}
if (sql[index] === '[') {
return skipBracketIdentifier(sql, index);
}
if (sql[index] === '`') {
return skipBacktickIdentifier(sql, index);
}
let current = index;
while (isSqlIdentifierPart(sql[current])) {
current += 1;
}
return current > index ? current : -1;
}
function skipBalancedParentheses(sql: string, index: number): number {
if (sql[index] !== '(') {
return -1;
}
let current = index;
let depth = 0;
while (current < sql.length) {
const skipped = skipQuotedOrComment(sql, current);
if (skipped > current) {
current = skipped;
continue;
}
if (sql[current] === '(') {
depth += 1;
} else if (sql[current] === ')') {
depth -= 1;
if (depth === 0) {
return current + 1;
}
}
current += 1;
}
return -1;
}
/** @internal */
export function hoistLeadingCte(sql: string): { withPrefix: string; body: string } {
const trimmed = sql.trim();
if (!keywordAt(trimmed, 0, 'with')) {
return { withPrefix: '', body: sql };
}
let current = skipWhitespaceAndComments(trimmed, 4);
if (keywordAt(trimmed, current, 'recursive')) {
current = skipWhitespaceAndComments(trimmed, current + 'recursive'.length);
}
while (current < trimmed.length) {
current = skipIdentifier(trimmed, current);
if (current < 0) {
return { withPrefix: '', body: trimmed };
}
current = skipWhitespaceAndComments(trimmed, current);
if (trimmed[current] === '(') {
current = skipBalancedParentheses(trimmed, current);
if (current < 0) {
return { withPrefix: '', body: trimmed };
}
current = skipWhitespaceAndComments(trimmed, current);
}
if (!keywordAt(trimmed, current, 'as')) {
return { withPrefix: '', body: trimmed };
}
current = skipWhitespaceAndComments(trimmed, current + 2);
current = skipBalancedParentheses(trimmed, current);
if (current < 0) {
return { withPrefix: '', body: trimmed };
}
current = skipWhitespaceAndComments(trimmed, current);
if (trimmed[current] === ',') {
current = skipWhitespaceAndComments(trimmed, current + 1);
continue;
}
const body = trimmed.slice(current).trimStart();
if (!body) {
return { withPrefix: '', body: trimmed };
}
return { withPrefix: `${trimmed.slice(0, current).trimEnd()} `, body };
}
return { withPrefix: '', body: trimmed };
}
// `assertReadOnlySql` deliberately keeps trailing semicolons, comments, and
// whitespace (e.g. `select 1; -- done`) — harmless for direct single-statement
// execution. A row-limit subquery wrapper needs a bare expression instead: a
@ -137,5 +292,6 @@ export function limitSqlForExecution(sql: string, maxRows: number | undefined):
if (!Number.isInteger(maxRows) || maxRows <= 0) {
throw new KtxQueryError('maxRows must be a positive integer.');
}
return `select * from (${trimmed}) as ktx_query_result limit ${maxRows}`;
const { withPrefix, body } = hoistLeadingCte(trimmed);
return `${withPrefix}select * from (${body}) as ktx_query_result limit ${maxRows}`;
}