mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-19 08:28:06 +02:00
207 lines
8 KiB
TypeScript
207 lines
8 KiB
TypeScript
import * as z from 'zod';
|
|
import {
|
|
lookerMappingsSchema,
|
|
lookmlMappingsSchema,
|
|
metabaseMappingsSchema,
|
|
} from './mappings-yaml-schema.js';
|
|
|
|
const warehouseDrivers = [
|
|
'postgres',
|
|
'mysql',
|
|
'snowflake',
|
|
'bigquery',
|
|
'sqlite',
|
|
'clickhouse',
|
|
'sqlserver',
|
|
] as const;
|
|
|
|
type WarehouseDriver = (typeof warehouseDrivers)[number];
|
|
|
|
function warehouseConnectionSchema<const Driver extends WarehouseDriver>(driver: Driver) {
|
|
return z
|
|
.looseObject({
|
|
driver: z.literal(driver),
|
|
url: z
|
|
.string()
|
|
.min(1)
|
|
.optional()
|
|
.describe('Warehouse connection URL or DSN; may contain environment-variable references like env:DATABASE_URL.'),
|
|
enabled_tables: z
|
|
.array(z.string().min(1))
|
|
.optional()
|
|
.describe(
|
|
'Optional allowlist of fully-qualified table names ("schema.table") to ingest. When set, live-database ingest discards any table whose schema-qualified name is not in this list. Useful for smoke-testing deep ingest on a single table.',
|
|
),
|
|
})
|
|
.describe(
|
|
`${driver} warehouse connection. Additional driver-tunable fields (e.g. context.queryHistory) are accepted and passed through.`,
|
|
);
|
|
}
|
|
|
|
const warehouseConnectionSchemas = [
|
|
warehouseConnectionSchema('postgres'),
|
|
warehouseConnectionSchema('mysql'),
|
|
warehouseConnectionSchema('snowflake'),
|
|
warehouseConnectionSchema('bigquery'),
|
|
warehouseConnectionSchema('sqlite'),
|
|
warehouseConnectionSchema('clickhouse'),
|
|
warehouseConnectionSchema('sqlserver'),
|
|
] as const;
|
|
|
|
const positiveIntKeyMessage = (field: string) => `${field} keys must be positive-integer strings (e.g. "1", "42")`;
|
|
|
|
const positiveIntKeyRegex = /^[1-9]\d*$/;
|
|
|
|
const metabaseMappingsStrictSchema = metabaseMappingsSchema.superRefine((value, ctx) => {
|
|
for (const key of Object.keys(value.databaseMappings ?? {})) {
|
|
if (!positiveIntKeyRegex.test(key)) {
|
|
ctx.addIssue({
|
|
code: 'custom',
|
|
path: ['databaseMappings', key],
|
|
message: positiveIntKeyMessage('databaseMappings'),
|
|
});
|
|
}
|
|
}
|
|
for (const key of Object.keys(value.syncEnabled ?? {})) {
|
|
if (!positiveIntKeyRegex.test(key)) {
|
|
ctx.addIssue({
|
|
code: 'custom',
|
|
path: ['syncEnabled', key],
|
|
message: positiveIntKeyMessage('syncEnabled'),
|
|
});
|
|
}
|
|
}
|
|
});
|
|
|
|
const metabaseConnectionSchema = z
|
|
.looseObject({
|
|
driver: z.literal('metabase'),
|
|
api_url: z.string().url().describe('Metabase instance API URL (e.g. https://metabase.example.com).'),
|
|
api_key: z.string().min(1).optional().describe('Literal Metabase API key. Prefer api_key_ref for safety.'),
|
|
api_key_ref: z
|
|
.string()
|
|
.min(1)
|
|
.optional()
|
|
.describe('Reference to Metabase API key (e.g. env:METABASE_API_KEY or file:/path).'),
|
|
network_proxy: z.looseObject({}).optional().describe('Optional network proxy configuration (snake_case form).'),
|
|
networkProxy: z.looseObject({}).optional().describe('Optional network proxy configuration (camelCase form).'),
|
|
mappings: metabaseMappingsStrictSchema
|
|
.optional()
|
|
.describe('Metabase database-to-warehouse mappings and sync configuration.'),
|
|
})
|
|
.describe('Metabase context-source connection.');
|
|
|
|
const lookerConnectionSchema = z
|
|
.looseObject({
|
|
driver: z.literal('looker'),
|
|
base_url: z.string().url().describe('Looker instance base URL (e.g. https://looker.example.com).'),
|
|
client_id: z.string().min(1).describe('Looker OAuth client ID.'),
|
|
client_secret: z.string().min(1).optional().describe('Literal Looker OAuth client secret. Prefer client_secret_ref.'),
|
|
client_secret_ref: z
|
|
.string()
|
|
.min(1)
|
|
.optional()
|
|
.describe('Reference to Looker OAuth client secret (e.g. env:LOOKER_CLIENT_SECRET).'),
|
|
mappings: lookerMappingsSchema.optional().describe('Looker connection-name to KTX warehouse mappings.'),
|
|
})
|
|
.describe('Looker context-source connection.');
|
|
|
|
const lookmlConnectionSchema = z
|
|
.looseObject({
|
|
driver: z.literal('lookml'),
|
|
repoUrl: z
|
|
.string()
|
|
.min(1)
|
|
.describe('Git URL of the LookML project (https, ssh, or file:). Field is camelCase by convention.'),
|
|
branch: z.string().min(1).optional().describe('Git branch (default "main" downstream).'),
|
|
path: z.string().optional().describe('Subdirectory within the repo when the LookML project lives in a monorepo.'),
|
|
auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos (e.g. env:GITHUB_TOKEN).'),
|
|
mappings: lookmlMappingsSchema.optional().describe('LookML expected-connection mapping for ingest gating.'),
|
|
})
|
|
.describe('LookML context-source connection.');
|
|
|
|
const notionConnectionSchema = z
|
|
.looseObject({
|
|
driver: z.literal('notion'),
|
|
auth_token: z.string().min(1).optional().describe('Literal Notion integration token. Prefer auth_token_ref.'),
|
|
auth_token_ref: z
|
|
.string()
|
|
.min(1)
|
|
.optional()
|
|
.describe('Reference to Notion integration token (e.g. env:NOTION_TOKEN).'),
|
|
crawl_mode: z
|
|
.enum(['selected_roots', 'all_accessible'])
|
|
.optional()
|
|
.describe(
|
|
'Crawl scope. "selected_roots" requires at least one of root_page_ids, root_database_ids, root_data_source_ids.',
|
|
),
|
|
root_page_ids: z.array(z.string().min(1)).optional().describe('Notion page IDs to crawl when crawl_mode is selected_roots.'),
|
|
root_database_ids: z
|
|
.array(z.string().min(1))
|
|
.optional()
|
|
.describe('Notion database IDs to crawl when crawl_mode is selected_roots.'),
|
|
root_data_source_ids: z
|
|
.array(z.string().min(1))
|
|
.optional()
|
|
.describe('Notion data source IDs to crawl when crawl_mode is selected_roots.'),
|
|
max_pages_per_run: z
|
|
.number()
|
|
.int()
|
|
.min(1)
|
|
.max(10000)
|
|
.optional()
|
|
.describe('Maximum Notion pages fetched in a single ingest run.'),
|
|
max_knowledge_creates_per_run: z
|
|
.number()
|
|
.int()
|
|
.min(0)
|
|
.max(25)
|
|
.optional()
|
|
.describe('Maximum new wiki pages created per run.'),
|
|
max_knowledge_updates_per_run: z
|
|
.number()
|
|
.int()
|
|
.min(0)
|
|
.max(100)
|
|
.optional()
|
|
.describe('Maximum existing wiki pages updated per run.'),
|
|
})
|
|
.describe('Notion context-source connection.');
|
|
|
|
const dbtConnectionSchema = z
|
|
.looseObject({
|
|
driver: z.literal('dbt'),
|
|
source_dir: z.string().min(1).optional().describe('Absolute or project-relative path to a local dbt project.'),
|
|
repo_url: z.string().min(1).optional().describe('Git URL of the dbt project (https, ssh, or file:).'),
|
|
branch: z.string().min(1).optional().describe('Git branch when using repo_url.'),
|
|
path: z.string().optional().describe('Subdirectory within the repo when the dbt project lives in a monorepo.'),
|
|
auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos.'),
|
|
profiles_path: z.string().optional().describe('Override path to dbt profiles.yml.'),
|
|
target: z.string().min(1).optional().describe('dbt target name (e.g. dev, prod).'),
|
|
project_name: z.string().min(1).optional().describe('Override auto-detected dbt project name.'),
|
|
})
|
|
.describe('dbt context-source connection.');
|
|
|
|
const metricflowConnectionSchema = z
|
|
.looseObject({
|
|
driver: z.literal('metricflow'),
|
|
metricflow: z
|
|
.looseObject({
|
|
repoUrl: z.string().min(1).describe('Git URL of the MetricFlow / SL project.'),
|
|
branch: z.string().min(1).optional().describe('Git branch (default "main").'),
|
|
path: z.string().optional().describe('Subdirectory within the repo when the SL config lives in a monorepo.'),
|
|
auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos.'),
|
|
})
|
|
.describe('Nested MetricFlow configuration block.'),
|
|
})
|
|
.describe('MetricFlow / SL context-source connection.');
|
|
|
|
export const connectionConfigSchema = z.discriminatedUnion('driver', [
|
|
...warehouseConnectionSchemas,
|
|
metabaseConnectionSchema,
|
|
lookerConnectionSchema,
|
|
lookmlConnectionSchema,
|
|
notionConnectionSchema,
|
|
dbtConnectionSchema,
|
|
metricflowConnectionSchema,
|
|
]);
|