ktx/packages/cli/test/connectors/athena/dialect.test.ts
Patel Dhrit fe7e6bd1fa
feat(connector): add Amazon Athena connector via Glue Data Catalog (#309)
* feat(connector): add Amazon Athena connector via Glue Data Catalog

* fix(athena): address reviewer feedback

* fix(athena): wire scope discovery, fix normalizeDriver, tighten types and tests

* fix(athena): honor databases scope, wire sql-analysis dialect, harden config resolution

- introspect() limits to the configured `databases` scope instead of scanning
  every Glue database in the account (docs promised this; connector ignored it)
- add athena -> athena to sql-analysis SQLGLOT_DIALECTS so `ktx sql` and MCP
  read-only validation parse Athena SQL under the Trino grammar, not postgres
- stringConfigValue coerces a resolved-empty `env:` reference to undefined so
  optional fields fall back to their defaults (workgroup 'primary', catalog
  'AwsDataCatalog') instead of ''
- drop trailing whitespace in dialect.test.ts

* fix(athena): integrate with main's SQL/non-SQL dialect split and add dialect notes

Rebase onto main, which introduced the KtxDialect (core) vs KtxSqlDialect
(SQL-only) split for MongoDB:
- KtxAthenaDialect implements KtxSqlDialect; the connector resolves it via
  getSqlDialectForDriver so SQL-generation methods stay in scope
- add authored athena.md SQL notes for the sql_dialect_notes MCP tool, required
  now that athena resolves to the athena sqlglot dialect (dialect-notes coverage
  is derived from the warehouse-driver registry)

---------

Co-authored-by: Andrey Avtomonov <andreybavt@gmail.com>
2026-07-02 15:00:26 +02:00

72 lines
3.5 KiB
TypeScript

import { describe, expect, it } from 'vitest';
import { KtxAthenaDialect } from '../../../src/connectors/athena/dialect.js';
describe('KtxAthenaDialect', () => {
const dialect = new KtxAthenaDialect();
it('quotes identifiers and formats catalog.database.table names', () => {
expect(dialect.quoteIdentifier('my"col')).toBe('"my""col"');
expect(dialect.formatTableName({ catalog: 'AwsDataCatalog', db: 'analytics', name: 'orders' })).toBe(
'"AwsDataCatalog"."analytics"."orders"',
);
expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('"analytics"."orders"');
expect(dialect.formatTableName({ name: 'orders' })).toBe('"orders"');
});
it('maps native Athena/Glue types to normalized types and dimension types', () => {
expect(dialect.mapDataType('bigint')).toBe('BIGINT');
expect(dialect.mapDataType('string')).toBe('VARCHAR');
expect(dialect.mapDataType('array<string>')).toBe('ARRAY');
expect(dialect.mapDataType('map<string,bigint>')).toBe('MAP');
expect(dialect.mapDataType('struct<id:bigint>')).toBe('STRUCT');
expect(dialect.mapDataType('decimal(18,2)')).toBe('DECIMAL');
expect(dialect.mapDataType('UNKNOWN_TYPE')).toBe('UNKNOWN_TYPE');
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
expect(dialect.mapToDimensionType('date')).toBe('time');
expect(dialect.mapToDimensionType('bigint')).toBe('number');
expect(dialect.mapToDimensionType('double')).toBe('number');
expect(dialect.mapToDimensionType('decimal(10,2)')).toBe('number');
expect(dialect.mapToDimensionType('boolean')).toBe('boolean');
expect(dialect.mapToDimensionType('string')).toBe('string');
expect(dialect.mapToDimensionType('varchar')).toBe('string');
});
it('generates correct sample and column-sample SQL', () => {
expect(dialect.generateSampleQuery('"analytics"."orders"', 10, ['id', 'status'])).toBe(
'SELECT "id", "status" FROM "analytics"."orders" LIMIT 10',
);
expect(dialect.generateSampleQuery('"analytics"."orders"', 5)).toBe(
'SELECT * FROM "analytics"."orders" LIMIT 5',
);
expect(dialect.generateColumnSampleQuery('"analytics"."orders"', 'status', 20)).toBe(
'SELECT "status" FROM "analytics"."orders" WHERE "status" IS NOT NULL LIMIT 20',
);
});
it('generates Presto-style cardinality and distinct-values SQL', () => {
expect(dialect.generateCardinalitySampleQuery('"t"', '"col"', 1000)).toContain('approx_distinct');
expect(dialect.generateRandomizedCardinalitySampleQuery('"t"', '"col"', 500)).toContain('rand()');
expect(dialect.generateDistinctValuesQuery('"t"', '"col"', 50)).toContain(
'SELECT DISTINCT CAST("col" AS VARCHAR) AS val',
);
});
it('returns null for column statistics (unsupported)', () => {
expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull();
});
it('produces Trino-correct OFFSET-before-LIMIT ordering', () => {
expect(dialect.getLimitOffsetClause(10)).toBe('LIMIT 10');
expect(dialect.getLimitOffsetClause(10, 0)).toBe('LIMIT 10');
expect(dialect.getLimitOffsetClause(10, 20)).toBe('OFFSET 20 LIMIT 10');
});
it('uses unit-separator (U+001F) as the array_join delimiter', () => {
const sql = dialect.getSampleValueAggregation('SELECT value FROM t');
const separatorIndex =
sql.indexOf("array_join(array_agg(CAST(value AS VARCHAR)), '") +
"array_join(array_agg(CAST(value AS VARCHAR)), '".length;
expect(sql.charCodeAt(separatorIndex)).toBe(0x1f);
});
});