import * as z from 'zod'; import { lookerMappingsSchema, lookmlMappingsSchema, metabaseMappingsSchema, } from './mappings-yaml-schema.js'; const warehouseDrivers = [ 'postgres', 'postgresql', 'mysql', 'snowflake', 'bigquery', 'sqlite', 'clickhouse', 'sqlserver', ] as const; type WarehouseDriver = (typeof warehouseDrivers)[number]; function warehouseConnectionSchema(driver: Driver) { return z .looseObject({ driver: z.literal(driver), url: z .string() .min(1) .optional() .describe('Warehouse connection URL or DSN; may contain environment-variable references like env:DATABASE_URL.'), }) .describe( `${driver} warehouse connection. Additional driver-tunable fields (e.g. historicSql, context.queryHistory) are accepted and passed through.`, ); } const warehouseConnectionSchemas = [ warehouseConnectionSchema('postgres'), warehouseConnectionSchema('postgresql'), warehouseConnectionSchema('mysql'), warehouseConnectionSchema('snowflake'), warehouseConnectionSchema('bigquery'), warehouseConnectionSchema('sqlite'), warehouseConnectionSchema('clickhouse'), warehouseConnectionSchema('sqlserver'), ] as const; const positiveIntKeyMessage = (field: string) => `${field} keys must be positive-integer strings (e.g. "1", "42")`; const positiveIntKeyRegex = /^[1-9]\d*$/; const metabaseMappingsStrictSchema = metabaseMappingsSchema.superRefine((value, ctx) => { for (const key of Object.keys(value.databaseMappings ?? {})) { if (!positiveIntKeyRegex.test(key)) { ctx.addIssue({ code: 'custom', path: ['databaseMappings', key], message: positiveIntKeyMessage('databaseMappings'), }); } } for (const key of Object.keys(value.syncEnabled ?? {})) { if (!positiveIntKeyRegex.test(key)) { ctx.addIssue({ code: 'custom', path: ['syncEnabled', key], message: positiveIntKeyMessage('syncEnabled'), }); } } }); const metabaseConnectionSchema = z .looseObject({ driver: z.literal('metabase'), api_url: z.string().url().describe('Metabase instance API URL (e.g. https://metabase.example.com).'), api_key: z.string().min(1).optional().describe('Literal Metabase API key. Prefer api_key_ref for safety.'), api_key_ref: z .string() .min(1) .optional() .describe('Reference to Metabase API key (e.g. env:METABASE_API_KEY or file:/path).'), network_proxy: z.looseObject({}).optional().describe('Optional network proxy configuration (snake_case form).'), networkProxy: z.looseObject({}).optional().describe('Optional network proxy configuration (camelCase form).'), mappings: metabaseMappingsStrictSchema .optional() .describe('Metabase database-to-warehouse mappings and sync configuration.'), }) .describe('Metabase context-source connection.'); const lookerConnectionSchema = z .looseObject({ driver: z.literal('looker'), base_url: z.string().url().describe('Looker instance base URL (e.g. https://looker.example.com).'), client_id: z.string().min(1).describe('Looker OAuth client ID.'), client_secret: z.string().min(1).optional().describe('Literal Looker OAuth client secret. Prefer client_secret_ref.'), client_secret_ref: z .string() .min(1) .optional() .describe('Reference to Looker OAuth client secret (e.g. env:LOOKER_CLIENT_SECRET).'), mappings: lookerMappingsSchema.optional().describe('Looker connection-name to KTX warehouse mappings.'), }) .describe('Looker context-source connection.'); const lookmlConnectionSchema = z .looseObject({ driver: z.literal('lookml'), repoUrl: z .string() .min(1) .describe('Git URL of the LookML project (https, ssh, or file:). Field is camelCase by convention.'), branch: z.string().min(1).optional().describe('Git branch (default "main" downstream).'), path: z.string().optional().describe('Subdirectory within the repo when the LookML project lives in a monorepo.'), auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos (e.g. env:GITHUB_TOKEN).'), mappings: lookmlMappingsSchema.optional().describe('LookML expected-connection mapping for ingest gating.'), }) .describe('LookML context-source connection.'); const notionConnectionSchema = z .looseObject({ driver: z.literal('notion'), auth_token: z.string().min(1).optional().describe('Literal Notion integration token. Prefer auth_token_ref.'), auth_token_ref: z .string() .min(1) .optional() .describe('Reference to Notion integration token (e.g. env:NOTION_TOKEN).'), crawl_mode: z .enum(['selected_roots', 'all_accessible']) .optional() .describe( 'Crawl scope. "selected_roots" requires at least one of root_page_ids, root_database_ids, root_data_source_ids.', ), root_page_ids: z.array(z.string().min(1)).optional().describe('Notion page IDs to crawl when crawl_mode is selected_roots.'), root_database_ids: z .array(z.string().min(1)) .optional() .describe('Notion database IDs to crawl when crawl_mode is selected_roots.'), root_data_source_ids: z .array(z.string().min(1)) .optional() .describe('Notion data source IDs to crawl when crawl_mode is selected_roots.'), max_pages_per_run: z .number() .int() .min(1) .max(10000) .optional() .describe('Maximum Notion pages fetched in a single ingest run.'), max_knowledge_creates_per_run: z .number() .int() .min(0) .max(25) .optional() .describe('Maximum new wiki pages created per run.'), max_knowledge_updates_per_run: z .number() .int() .min(0) .max(100) .optional() .describe('Maximum existing wiki pages updated per run.'), }) .describe('Notion context-source connection.'); const dbtConnectionSchema = z .looseObject({ driver: z.literal('dbt'), source_dir: z.string().min(1).optional().describe('Absolute or project-relative path to a local dbt project.'), repo_url: z.string().min(1).optional().describe('Git URL of the dbt project (https, ssh, or file:).'), branch: z.string().min(1).optional().describe('Git branch when using repo_url.'), path: z.string().optional().describe('Subdirectory within the repo when the dbt project lives in a monorepo.'), auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos.'), profiles_path: z.string().optional().describe('Override path to dbt profiles.yml.'), target: z.string().min(1).optional().describe('dbt target name (e.g. dev, prod).'), project_name: z.string().min(1).optional().describe('Override auto-detected dbt project name.'), }) .describe('dbt context-source connection.'); const metricflowConnectionSchema = z .looseObject({ driver: z.literal('metricflow'), metricflow: z .looseObject({ repoUrl: z.string().min(1).describe('Git URL of the MetricFlow / SL project.'), branch: z.string().min(1).optional().describe('Git branch (default "main").'), path: z.string().optional().describe('Subdirectory within the repo when the SL config lives in a monorepo.'), auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos.'), }) .describe('Nested MetricFlow configuration block.'), }) .describe('MetricFlow / SL context-source connection.'); export const connectionConfigSchema = z.discriminatedUnion('driver', [ ...warehouseConnectionSchemas, metabaseConnectionSchema, lookerConnectionSchema, lookmlConnectionSchema, notionConnectionSchema, dbtConnectionSchema, metricflowConnectionSchema, ]); export type KtxConnectionConfig = z.infer;