mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
feat(mysql): implement columnStats using INFORMATION_SCHEMA.STATISTICS (#233)
* feat(mysql): implement columnStats using INFORMATION_SCHEMA.STATISTICS Enable column cardinality statistics for the MySQL connector by querying INFORMATION_SCHEMA.STATISTICS, which provides index-based cardinality estimates without requiring additional permissions. - Add generateColumnStatisticsQuery() to KtxMysqlDialect - Add getColumnStatistics() and columnStats() to KtxMysqlScanConnector - Flip columnStats capability from false to true - Add MysqlStatsRow and KtxMysqlColumnStatisticsResult interfaces - Add tests for dialect query generation and connector stats retrieval - Update dialect conformance fixture for mysql * fix(mysql): filter to leading index columns to avoid inflated cardinality Add AND SEQ_IN_INDEX = 1 to INFORMATION_SCHEMA.STATISTICS query to ensure only leading index columns are returned. For composite indexes, non-leading columns report the cardinality of the index prefix rather than the column's own distinct count, which inflates distinctCount. Add regression test asserting SEQ_IN_INDEX = 1 is present in the query. * fix: add trailing newline to dialect.test.ts --------- Co-authored-by: Andrey Avtomonov <andreybavt@gmail.com>
This commit is contained in:
parent
0d0ea55184
commit
18245c2373
5 changed files with 97 additions and 10 deletions
|
|
@ -159,6 +159,15 @@ interface MysqlDistinctValueRow extends RowDataPacket {
|
|||
val: unknown;
|
||||
}
|
||||
|
||||
interface MysqlStatsRow extends RowDataPacket {
|
||||
column_name: string;
|
||||
estimated_cardinality: number | null;
|
||||
}
|
||||
|
||||
export interface KtxMysqlColumnStatisticsResult {
|
||||
cardinalityByColumn: Map<string, number>;
|
||||
}
|
||||
|
||||
class DefaultMysqlPoolFactory implements KtxMysqlPoolFactory {
|
||||
createPool(config: KtxMysqlPoolConfig): KtxMysqlPool {
|
||||
return mysql.createPool(config) as Pool;
|
||||
|
|
@ -384,7 +393,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
|
|||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
columnStats: true,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: true,
|
||||
|
|
@ -562,8 +571,29 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
|
|||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
async columnStats(input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
const stats = await this.getColumnStatistics(input.table);
|
||||
const value = stats?.cardinalityByColumn.get(input.column);
|
||||
return value === undefined
|
||||
? null
|
||||
: { min: null, max: null, average: null, nullCount: null, distinctCount: value };
|
||||
}
|
||||
|
||||
async getColumnStatistics(table: KtxTableRef): Promise<KtxMysqlColumnStatisticsResult | null> {
|
||||
const schema = table.db ?? this.poolConfig.database;
|
||||
const sql = this.dialect.generateColumnStatisticsQuery(schema, table.name);
|
||||
if (!sql) {
|
||||
return null;
|
||||
}
|
||||
const rows = await this.queryRaw<MysqlStatsRow>(sql);
|
||||
const cardinalityByColumn = new Map<string, number>();
|
||||
for (const row of rows) {
|
||||
const cardinality = Number(row.estimated_cardinality);
|
||||
if (Number.isFinite(cardinality) && cardinality >= 0) {
|
||||
cardinalityByColumn.set(row.column_name, cardinality);
|
||||
}
|
||||
}
|
||||
return cardinalityByColumn.size > 0 ? { cardinalityByColumn } : null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxMysqlReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
|
|
|
|||
|
|
@ -171,8 +171,18 @@ export class KtxMysqlDialect implements KtxDialect {
|
|||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null {
|
||||
return `
|
||||
SELECT
|
||||
COLUMN_NAME AS column_name,
|
||||
MAX(CARDINALITY) AS estimated_cardinality
|
||||
FROM INFORMATION_SCHEMA.STATISTICS
|
||||
WHERE TABLE_SCHEMA = '${schemaName.replace(/'/g, "''")}'
|
||||
AND TABLE_NAME = '${tableName.replace(/'/g, "''")}'
|
||||
AND CARDINALITY IS NOT NULL
|
||||
AND SEQ_IN_INDEX = 1
|
||||
GROUP BY COLUMN_NAME
|
||||
`;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue