mirror of
https://github.com/Kaelio/ktx.git
synced 2026-07-04 10:52:13 +02:00
* feat(connector): add Amazon Athena connector via Glue Data Catalog * fix(athena): address reviewer feedback * fix(athena): wire scope discovery, fix normalizeDriver, tighten types and tests * fix(athena): honor databases scope, wire sql-analysis dialect, harden config resolution - introspect() limits to the configured `databases` scope instead of scanning every Glue database in the account (docs promised this; connector ignored it) - add athena -> athena to sql-analysis SQLGLOT_DIALECTS so `ktx sql` and MCP read-only validation parse Athena SQL under the Trino grammar, not postgres - stringConfigValue coerces a resolved-empty `env:` reference to undefined so optional fields fall back to their defaults (workgroup 'primary', catalog 'AwsDataCatalog') instead of '' - drop trailing whitespace in dialect.test.ts * fix(athena): integrate with main's SQL/non-SQL dialect split and add dialect notes Rebase onto main, which introduced the KtxDialect (core) vs KtxSqlDialect (SQL-only) split for MongoDB: - KtxAthenaDialect implements KtxSqlDialect; the connector resolves it via getSqlDialectForDriver so SQL-generation methods stay in scope - add authored athena.md SQL notes for the sql_dialect_notes MCP tool, required now that athena resolves to the athena sqlglot dialect (dialect-notes coverage is derived from the warehouse-driver registry) --------- Co-authored-by: Andrey Avtomonov <andreybavt@gmail.com>
72 lines
3.5 KiB
TypeScript
72 lines
3.5 KiB
TypeScript
import { describe, expect, it } from 'vitest';
|
|
import { KtxAthenaDialect } from '../../../src/connectors/athena/dialect.js';
|
|
|
|
describe('KtxAthenaDialect', () => {
|
|
const dialect = new KtxAthenaDialect();
|
|
|
|
it('quotes identifiers and formats catalog.database.table names', () => {
|
|
expect(dialect.quoteIdentifier('my"col')).toBe('"my""col"');
|
|
expect(dialect.formatTableName({ catalog: 'AwsDataCatalog', db: 'analytics', name: 'orders' })).toBe(
|
|
'"AwsDataCatalog"."analytics"."orders"',
|
|
);
|
|
expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('"analytics"."orders"');
|
|
expect(dialect.formatTableName({ name: 'orders' })).toBe('"orders"');
|
|
});
|
|
|
|
it('maps native Athena/Glue types to normalized types and dimension types', () => {
|
|
expect(dialect.mapDataType('bigint')).toBe('BIGINT');
|
|
expect(dialect.mapDataType('string')).toBe('VARCHAR');
|
|
expect(dialect.mapDataType('array<string>')).toBe('ARRAY');
|
|
expect(dialect.mapDataType('map<string,bigint>')).toBe('MAP');
|
|
expect(dialect.mapDataType('struct<id:bigint>')).toBe('STRUCT');
|
|
expect(dialect.mapDataType('decimal(18,2)')).toBe('DECIMAL');
|
|
expect(dialect.mapDataType('UNKNOWN_TYPE')).toBe('UNKNOWN_TYPE');
|
|
|
|
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
|
|
expect(dialect.mapToDimensionType('date')).toBe('time');
|
|
expect(dialect.mapToDimensionType('bigint')).toBe('number');
|
|
expect(dialect.mapToDimensionType('double')).toBe('number');
|
|
expect(dialect.mapToDimensionType('decimal(10,2)')).toBe('number');
|
|
expect(dialect.mapToDimensionType('boolean')).toBe('boolean');
|
|
expect(dialect.mapToDimensionType('string')).toBe('string');
|
|
expect(dialect.mapToDimensionType('varchar')).toBe('string');
|
|
});
|
|
|
|
it('generates correct sample and column-sample SQL', () => {
|
|
expect(dialect.generateSampleQuery('"analytics"."orders"', 10, ['id', 'status'])).toBe(
|
|
'SELECT "id", "status" FROM "analytics"."orders" LIMIT 10',
|
|
);
|
|
expect(dialect.generateSampleQuery('"analytics"."orders"', 5)).toBe(
|
|
'SELECT * FROM "analytics"."orders" LIMIT 5',
|
|
);
|
|
expect(dialect.generateColumnSampleQuery('"analytics"."orders"', 'status', 20)).toBe(
|
|
'SELECT "status" FROM "analytics"."orders" WHERE "status" IS NOT NULL LIMIT 20',
|
|
);
|
|
});
|
|
|
|
it('generates Presto-style cardinality and distinct-values SQL', () => {
|
|
expect(dialect.generateCardinalitySampleQuery('"t"', '"col"', 1000)).toContain('approx_distinct');
|
|
expect(dialect.generateRandomizedCardinalitySampleQuery('"t"', '"col"', 500)).toContain('rand()');
|
|
expect(dialect.generateDistinctValuesQuery('"t"', '"col"', 50)).toContain(
|
|
'SELECT DISTINCT CAST("col" AS VARCHAR) AS val',
|
|
);
|
|
});
|
|
|
|
it('returns null for column statistics (unsupported)', () => {
|
|
expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull();
|
|
});
|
|
|
|
it('produces Trino-correct OFFSET-before-LIMIT ordering', () => {
|
|
expect(dialect.getLimitOffsetClause(10)).toBe('LIMIT 10');
|
|
expect(dialect.getLimitOffsetClause(10, 0)).toBe('LIMIT 10');
|
|
expect(dialect.getLimitOffsetClause(10, 20)).toBe('OFFSET 20 LIMIT 10');
|
|
});
|
|
|
|
it('uses unit-separator (U+001F) as the array_join delimiter', () => {
|
|
const sql = dialect.getSampleValueAggregation('SELECT value FROM t');
|
|
const separatorIndex =
|
|
sql.indexOf("array_join(array_agg(CAST(value AS VARCHAR)), '") +
|
|
"array_join(array_agg(CAST(value AS VARCHAR)), '".length;
|
|
expect(sql.charCodeAt(separatorIndex)).toBe(0x1f);
|
|
});
|
|
});
|