feat(connector): add Amazon Athena connector via Glue Data Catalog (#309)

* feat(connector): add Amazon Athena connector via Glue Data Catalog

* fix(athena): address reviewer feedback

* fix(athena): wire scope discovery, fix normalizeDriver, tighten types and tests

* fix(athena): honor databases scope, wire sql-analysis dialect, harden config resolution

- introspect() limits to the configured `databases` scope instead of scanning
  every Glue database in the account (docs promised this; connector ignored it)
- add athena -> athena to sql-analysis SQLGLOT_DIALECTS so `ktx sql` and MCP
  read-only validation parse Athena SQL under the Trino grammar, not postgres
- stringConfigValue coerces a resolved-empty `env:` reference to undefined so
  optional fields fall back to their defaults (workgroup 'primary', catalog
  'AwsDataCatalog') instead of ''
- drop trailing whitespace in dialect.test.ts

* fix(athena): integrate with main's SQL/non-SQL dialect split and add dialect notes

Rebase onto main, which introduced the KtxDialect (core) vs KtxSqlDialect
(SQL-only) split for MongoDB:
- KtxAthenaDialect implements KtxSqlDialect; the connector resolves it via
  getSqlDialectForDriver so SQL-generation methods stay in scope
- add authored athena.md SQL notes for the sql_dialect_notes MCP tool, required
  now that athena resolves to the athena sqlglot dialect (dialect-notes coverage
  is derived from the warehouse-driver registry)

---------

Co-authored-by: Andrey Avtomonov <andreybavt@gmail.com>
This commit is contained in:
Patel Dhrit 2026-07-02 06:00:26 -07:00 committed by GitHub
parent 6d01030745
commit fe7e6bd1fa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 2047 additions and 6 deletions

View file

@ -305,7 +305,7 @@ describe('getDialectForDriver', () => {
it('throws with a supported-driver list for unknown drivers', () => {
expect(() => getDialectForDriver('oracle')).toThrow(
'Unsupported driver "oracle". Supported drivers: bigquery, clickhouse, duckdb, mongodb, mysql, postgres, snowflake, sqlite, sqlserver',
'Unsupported driver "oracle". Supported drivers: athena, bigquery, clickhouse, duckdb, mongodb, mysql, postgres, snowflake, sqlite, sqlserver',
);
});

View file

@ -70,6 +70,11 @@ const connectionFixtures: Record<KtxConnectionDriver, FixtureFactory> = {
database: 'ANALYTICS',
schema: 'PUBLIC',
}),
athena: () => ({
driver: 'athena',
region: 'us-east-1',
s3_staging_dir: 's3://my-bucket/athena-results/',
}),
};
const allowedScopeKeys = new Set(['dataset_ids', 'databases', 'schemas', 'schema_names']);
@ -100,6 +105,7 @@ describe('driverRegistrations', () => {
const registryDrivers = Object.keys(driverRegistrations).sort();
expect(listSupportedDrivers()).toEqual(registryDrivers);
expect(listSupportedDrivers()).toEqual([
'athena',
'bigquery',
'clickhouse',
'duckdb',

View file

@ -2175,6 +2175,40 @@ describe('local scan', () => {
};
expect(manifest.tables.orders?.joins?.some((join) => join.to === 'accounts')).toBe(true);
});
it('accepts athena as a native standalone scan driver when the host supplies a live-database adapter', async () => {
await writeFile(
join(project.projectDir, 'ktx.yaml'),
[
'connections:',
' warehouse:',
' driver: athena',
' region: us-east-1',
' s3_staging_dir: s3://my-bucket/athena-results/',
' databases:',
' - analytics',
'ingest:',
' adapters:',
' - live-database',
'',
].join('\n'),
'utf-8',
);
project = await loadKtxProject({ projectDir: project.projectDir });
const result = await runLocalScan({
project,
adapters: [fetchOnlyAdapter()],
connectionId: 'warehouse',
jobId: 'scan-run-athena',
now: () => new Date('2026-04-29T17:00:00.000Z'),
});
expect(result.report.driver).toBe('athena');
expect(result.report.artifactPaths.reportPath).toBe(
'raw-sources/warehouse/live-database/2026-04-29-170000-scan-run-athena/scan-report.json',
);
});
});
describe('resolveEnabledTables', () => {

View file

@ -12,6 +12,7 @@ describe('sqlAnalysisDialectForDriver', () => {
expect(sqlAnalysisDialectForDriver('duckdb')).toBe('duckdb');
expect(sqlAnalysisDialectForDriver('clickhouse')).toBe('clickhouse');
expect(sqlAnalysisDialectForDriver('databricks')).toBe('databricks');
expect(sqlAnalysisDialectForDriver('athena')).toBe('athena');
});
it('maps local connection-type spellings to sqlglot dialects', () => {