From acd20ac24837e981eeb55762df35ddc4020af8ef Mon Sep 17 00:00:00 2001 From: "Matt Senick (Sigma)" Date: Tue, 30 Jun 2026 16:14:57 -0700 Subject: [PATCH] feat(sigma): add Sigma Computing context-source adapter (#316) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(sigma): add Sigma Computing context-source adapter Closes #168 Adds a full ingest adapter for Sigma Computing so `ktx ingest` can pull data model specs and workbook summaries into the ktx context layer. The implementation follows the same fetch → chunk → project → LLM pattern used by the Looker, Metabase, and MetricFlow adapters. Co-Authored-By: Claude Sonnet 4.6 * fix(sigma): address PR review comments - Remove manifest from rawFiles; moves to peerFileIndex so fetchedAt changes don't mark all work units dirty every run - Fix workbookFilter.updatedSince eviction bug: fetch full universe first, apply filter client-side, evict only on archived/deleted - Remove measure projection entirely; project() writes measures: [] and the sigma_ingest skill surfaces Lookup/aggregation formulas as wiki prose - Remove joins projection (v1 limitation); project() writes joins: [] and Lookup relationships are described in wiki prose instead - Remove write-back dead code: createDataModel, updateDataModel, SigmaDataModelPushResult, mutate/post/put - Fix emitBatches notes pluralization bug ('2 data modelss' → '2 data models') - Add tokenInflight dedup on ensureToken to coalesce concurrent auth requests - Retry spec fetch when existing staged spec is null (transient failure cache) - Drop unused WorkbookFilter import from client-port.ts - Note in docs that joins are not projected from Sigma data models in this release Co-Authored-By: Claude Sonnet 4.6 * updates * fix(sigma): restore sigma in local adapter test + small cleanups The gdrive↔sigma merge dropped 'sigma' from the expected adapter source list in local-adapters.test.ts while keeping gdrive, so the slow TS suite failed even though the source registers both. Add 'sigma' back at its registration position (after metabase, before gdrive). Also: - Move the orphaned SigmaPullConfig docstring onto the schema it documents and drop the stale BullMQ reference (standalone ktx has no BullMQ; the config lives in the ingest job's bundleRef.config). - Drop an O(n^2) find() round-trip in fetch() when building the active data-model list; filter once and reuse for the eviction id set. --------- Co-authored-by: Claude Sonnet 4.6 Co-authored-by: Andrey Avtomonov Co-authored-by: Luca Martial <48870843+luca-martial@users.noreply.github.com> --- .../content/docs/cli-reference/ktx-ingest.mdx | 2 +- .../content/docs/cli-reference/ktx-setup.mdx | 9 +- .../content/docs/configuration/ktx-yaml.mdx | 26 + .../content/docs/guides/building-context.mdx | 1 + .../docs/integrations/context-sources.mdx | 99 +++- packages/cli/src/commands/setup-commands.ts | 1 + .../context/ingest/adapters/sigma/chunk.ts | 148 ++++++ .../ingest/adapters/sigma/client-port.ts | 51 ++ .../context/ingest/adapters/sigma/client.ts | 231 ++++++++ .../context/ingest/adapters/sigma/detect.ts | 21 + .../context/ingest/adapters/sigma/fetch.ts | 241 +++++++++ .../adapters/sigma/local-sigma.adapter.ts | 76 +++ .../context/ingest/adapters/sigma/project.ts | 231 ++++++++ .../ingest/adapters/sigma/sigma.adapter.ts | 53 ++ .../context/ingest/adapters/sigma/types.ts | 105 ++++ .../cli/src/context/ingest/local-adapters.ts | 29 +- .../cli/src/context/project/driver-schemas.ts | 42 ++ packages/cli/src/public-ingest.ts | 1 + packages/cli/src/setup-sources.ts | 86 ++- packages/cli/src/skills/sigma_ingest/SKILL.md | 189 +++++++ .../ingest/adapters/sigma/chunk.test.ts | 325 ++++++++++++ .../ingest/adapters/sigma/client.test.ts | 309 +++++++++++ .../ingest/adapters/sigma/detect.test.ts | 61 +++ .../ingest/adapters/sigma/fetch.test.ts | 493 ++++++++++++++++++ .../ingest/adapters/sigma/project.test.ts | 301 +++++++++++ .../adapters/sigma/sigma.adapter.test.ts | 64 +++ .../ingest/adapters/sigma/types.test.ts | 113 ++++ .../context/ingest/local-adapters.test.ts | 1 + .../memory/memory-runtime-assets.test.ts | 2 + .../sigma/empty-manifest/sigma-manifest.json | 5 + .../multi-folder/data-models/dm-aaa111.json | 9 + .../multi-folder/data-models/dm-bbb222.json | 9 + .../multi-folder/data-models/dm-ccc333.json | 9 + .../sigma/multi-folder/sigma-manifest.json | 6 + .../multi-folder/workbooks/wb-yyy222.json | 9 + .../multi-folder/workbooks/wb-zzz333.json | 10 + .../single-folder/data-models/dm-aaa111.json | 9 + .../single-folder/data-models/dm-bbb222.json | 9 + .../sigma/single-folder/sigma-manifest.json | 6 + .../single-folder/workbooks/wb-xxx111.json | 10 + packages/cli/test/setup-sources.test.ts | 214 ++++++++ 41 files changed, 3610 insertions(+), 6 deletions(-) create mode 100644 packages/cli/src/context/ingest/adapters/sigma/chunk.ts create mode 100644 packages/cli/src/context/ingest/adapters/sigma/client-port.ts create mode 100644 packages/cli/src/context/ingest/adapters/sigma/client.ts create mode 100644 packages/cli/src/context/ingest/adapters/sigma/detect.ts create mode 100644 packages/cli/src/context/ingest/adapters/sigma/fetch.ts create mode 100644 packages/cli/src/context/ingest/adapters/sigma/local-sigma.adapter.ts create mode 100644 packages/cli/src/context/ingest/adapters/sigma/project.ts create mode 100644 packages/cli/src/context/ingest/adapters/sigma/sigma.adapter.ts create mode 100644 packages/cli/src/context/ingest/adapters/sigma/types.ts create mode 100644 packages/cli/src/skills/sigma_ingest/SKILL.md create mode 100644 packages/cli/test/context/ingest/adapters/sigma/chunk.test.ts create mode 100644 packages/cli/test/context/ingest/adapters/sigma/client.test.ts create mode 100644 packages/cli/test/context/ingest/adapters/sigma/detect.test.ts create mode 100644 packages/cli/test/context/ingest/adapters/sigma/fetch.test.ts create mode 100644 packages/cli/test/context/ingest/adapters/sigma/project.test.ts create mode 100644 packages/cli/test/context/ingest/adapters/sigma/sigma.adapter.test.ts create mode 100644 packages/cli/test/context/ingest/adapters/sigma/types.test.ts create mode 100644 packages/cli/test/fixtures/sigma/empty-manifest/sigma-manifest.json create mode 100644 packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-aaa111.json create mode 100644 packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-bbb222.json create mode 100644 packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-ccc333.json create mode 100644 packages/cli/test/fixtures/sigma/multi-folder/sigma-manifest.json create mode 100644 packages/cli/test/fixtures/sigma/multi-folder/workbooks/wb-yyy222.json create mode 100644 packages/cli/test/fixtures/sigma/multi-folder/workbooks/wb-zzz333.json create mode 100644 packages/cli/test/fixtures/sigma/single-folder/data-models/dm-aaa111.json create mode 100644 packages/cli/test/fixtures/sigma/single-folder/data-models/dm-bbb222.json create mode 100644 packages/cli/test/fixtures/sigma/single-folder/sigma-manifest.json create mode 100644 packages/cli/test/fixtures/sigma/single-folder/workbooks/wb-xxx111.json diff --git a/docs-site/content/docs/cli-reference/ktx-ingest.mdx b/docs-site/content/docs/cli-reference/ktx-ingest.mdx index 6bff774f..0ab6a274 100644 --- a/docs-site/content/docs/cli-reference/ktx-ingest.mdx +++ b/docs-site/content/docs/cli-reference/ktx-ingest.mdx @@ -8,7 +8,7 @@ can also capture free-form text into **ktx** memory. Database connections build enriched context — schema plus AI-generated descriptions, embeddings, and relationship evidence — and require a configured model and embeddings. Context-source connections ingest metadata from tools such as dbt, Looker, -Metabase, MetricFlow, LookML, and Notion. Pass `--text` or `--file` to capture +Metabase, MetricFlow, LookML, Notion, and Sigma. Pass `--text` or `--file` to capture inline text or text files into memory instead. ## Command signature diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index 10c27c16..a424626f 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -193,7 +193,7 @@ sources. This is equivalent to passing `--skip-sources` in scripted setup. | Flag | Description | |------|-------------| -| `--source ` | Context-source connector type: `dbt`, `metricflow`, `metabase`, `looker`, `lookml`, or `notion` | +| `--source ` | Context-source connector type: `dbt`, `metricflow`, `metabase`, `looker`, `lookml`, `notion`, or `sigma` | | `--source-connection-id ` | Connection id for context-source setup | | `--source-path ` | Local source path for dbt, MetricFlow, or LookML | | `--source-git-url ` | Git URL for dbt, MetricFlow, or LookML | @@ -278,6 +278,13 @@ ktx setup \ --notion-crawl-mode selected_roots \ --notion-root-page-id abc123def456 +# Add a Sigma source +ktx setup \ + --source sigma \ + --source-connection-id sigma-main \ + --source-client-id your-client-id \ + --source-client-secret-ref env:SIGMA_CLIENT_SECRET + # Install project-scoped agent integration for Codex ktx setup --agents --target codex ``` diff --git a/docs-site/content/docs/configuration/ktx-yaml.mdx b/docs-site/content/docs/configuration/ktx-yaml.mdx index 24e58e39..2ff54cbd 100644 --- a/docs-site/content/docs/configuration/ktx-yaml.mdx +++ b/docs-site/content/docs/configuration/ktx-yaml.mdx @@ -119,6 +119,7 @@ context-source drivers share the map. | `dbt` | Context source | `driver`, one of `source_dir` or `repo_url` | `branch`, `path`, `profiles_path`, `target`, `project_name` | | `metricflow` | Context source | `driver`, `metricflow.repoUrl` | `metricflow.branch`, `metricflow.path`, `metricflow.auth_token_ref` | | `notion` | Context source | `driver`, `auth_token_ref` | `crawl_mode`, `root_*_ids`, `max_*_per_run` | +| `sigma` | Context source | `driver`, `client_id`, `client_secret_ref` | `api_url` | ### Warehouse drivers @@ -345,6 +346,31 @@ connections: | `max_knowledge_creates_per_run` | Max new wiki pages created per run (0-25). | | `max_knowledge_updates_per_run` | Max existing wiki pages updated per run (0-100). | +### Sigma + +```yaml +connections: + sigma-main: + driver: sigma + api_url: https://api.sigmacomputing.com + client_id: "" + client_secret_ref: env:SIGMA_CLIENT_SECRET + workbookFilter: + includeArchived: false + includeExplorations: false + updatedSince: "2026-01-01T00:00:00Z" +``` + +| Field | Purpose | +|-------|---------| +| `api_url` | Sigma API base URL. Defaults to `https://api.sigmacomputing.com` (GCP US). Override for AWS US (`https://aws-api.sigmacomputing.com`) or other regions. | +| `client_id` | Sigma OAuth client ID. Required. | +| `client_secret` / `client_secret_ref` | Literal secret or reference. Prefer the `_ref`. | +| `connectionMappings` | Maps Sigma internal connection UUIDs to **ktx** warehouse connection IDs. Enables `sl_validate` for projected semantic-layer sources. | +| `workbookFilter.includeArchived` | Include archived workbooks during ingest. Default: `false`. | +| `workbookFilter.includeExplorations` | Include exploration workbooks during ingest. Default: `false`. | +| `workbookFilter.updatedSince` | ISO 8601 date string. Only workbooks updated on or after this date are fetched. Useful for limiting ingest scope at large scale. | + ## `setup` Captured by the setup wizard. The only field **ktx** still reads is diff --git a/docs-site/content/docs/guides/building-context.mdx b/docs-site/content/docs/guides/building-context.mdx index 24550c85..f494abf7 100644 --- a/docs-site/content/docs/guides/building-context.mdx +++ b/docs-site/content/docs/guides/building-context.mdx @@ -102,6 +102,7 @@ Supported source types: | `looker` | Looker API | Explores, looks, dashboards, and model metadata | | `metabase` | Metabase API | Questions, dashboards, table metadata, and mappings | | `notion` | Notion API | Wiki pages and business knowledge | +| `sigma` | Sigma API | Data model specs, pages, element metadata, and workbook metadata | Context-source ingest writes semantic source YAML and wiki Markdown, reconciling with local edits. diff --git a/docs-site/content/docs/integrations/context-sources.mdx b/docs-site/content/docs/integrations/context-sources.mdx index ed789266..3bfc3ff3 100644 --- a/docs-site/content/docs/integrations/context-sources.mdx +++ b/docs-site/content/docs/integrations/context-sources.mdx @@ -1,6 +1,6 @@ --- title: Context Sources -description: Ingest semantic context from dbt, MetricFlow, LookML, Metabase, Looker, Notion, and Google Drive. +description: Ingest semantic context from dbt, MetricFlow, LookML, Metabase, Looker, Notion, Sigma, and Google Drive. --- Context sources feed your existing analytics tooling into **ktx**. During ingestion, **ktx** extracts metadata from each source and uses a reconciliation agent to reconcile it with your existing semantic layer and knowledge base - preserving accepted edits rather than overwriting. @@ -27,7 +27,7 @@ LookML uses top-level `repoUrl`, and MetricFlow uses nested | Field | Required | Description | |-------|----------|-------------| -| `driver` | Yes | Source connector: `dbt`, `metricflow`, `lookml`, `metabase`, `looker`, `notion`, or `gdrive` | +| `driver` | Yes | Source connector: `dbt`, `metricflow`, `lookml`, `metabase`, `looker`, `notion`, `sigma`, or `gdrive` | | `source_dir` | For local file sources | Absolute or project-relative source directory | | `repo_url` | For Git-hosted dbt sources | Git repository URL | | `repoUrl` | For Git-hosted LookML sources | Git repository URL | @@ -378,6 +378,101 @@ Create an integration at [notion.so/my-integrations](https://www.notion.so/my-in --- +## Sigma + +Ingests data model definitions and workbook metadata from a Sigma workspace as semantic context. Uses the Sigma REST API to fetch data model specs and workbook summaries. + +### What it provides + +- Data model names, folder paths, and ownership metadata +- Page and element definitions within each data model +- Column identifiers and data types where available +- Workbook names, paths, descriptions, and version metadata + +### Connection config + +```yaml title="ktx.yaml" +connections: + sigma-main: + driver: sigma + api_url: https://api.sigmacomputing.com # Omit for GCP US (default) + client_id: "" + client_secret_ref: env:SIGMA_CLIENT_SECRET +``` + +For the AWS US region, override `api_url`: + +```yaml title="ktx.yaml" +connections: + sigma-main: + driver: sigma + api_url: https://aws-api.sigmacomputing.com + client_id: "" + client_secret_ref: env:SIGMA_CLIENT_SECRET +``` + +### Authentication + +| Method | Config | +|--------|--------| +| OAuth client credentials | `client_id` + `client_secret_ref: env:SIGMA_CLIENT_SECRET` | + +Generate a client in Sigma: **Administration → Developer Access → Add New Client**. + +### What gets ingested + +- Active data model specs, organized by folder into work units +- Workbook metadata (name, path, description, version) — archived and exploration workbooks excluded by default +- Models backed by CSV uploads or unsupported connector subtypes are listed in the manifest but skipped during spec fetch (a Sigma API limitation) + +### Warehouse connection mapping + +`connectionMappings` is optional. Without it, **ktx** produces wiki knowledge only — no semantic-layer sources are written and warehouse validation is skipped. To get semantic-layer output and enable `sl_validate`, map each Sigma internal connection UUID to a **ktx** warehouse connection ID: + +```yaml title="ktx.yaml" +connections: + sigma-main: + driver: sigma + client_id: "" + client_secret_ref: env:SIGMA_CLIENT_SECRET + connectionMappings: + "": snowflake-prod # data models using this connection get SL sources +``` + +Find the Sigma connection UUID in **Administration → Connections** or from the `source.connectionId` field in a fetched data model spec. Data model elements whose `connectionId` has no mapping are ingested as wiki-only. + +### Workbook filter + +At large scale, you can limit which workbooks are fetched during ingest using `workbookFilter`: + +```yaml title="ktx.yaml" +connections: + sigma-main: + driver: sigma + client_id: "" + client_secret_ref: env:SIGMA_CLIENT_SECRET + workbookFilter: + includeArchived: false # default + includeExplorations: false # default + updatedSince: "2026-01-01T00:00:00Z" # only recently updated workbooks +``` + +| Field | Default | Description | +|-------|---------|-------------| +| `includeArchived` | `false` | Include archived workbooks | +| `includeExplorations` | `false` | Include exploration workbooks | +| `updatedSince` | — | ISO 8601 date; only workbooks updated on or after this date are fetched | + +### Notes + +- `connectionMappings` is optional for wiki-only ingest; it is required to generate semantic-layer sources and run warehouse validation +- Context ingest (`ktx ingest sigma-main`) fetches from the Sigma API directly +- Ingest is incremental: items whose `updatedAt` timestamp is unchanged since the last run are skipped +- Models backed by CSV uploads or unsupported connector subtypes cannot have their spec exported; these are skipped with a warning (a Sigma API limitation) +- Joins are not projected from Sigma data models in this release; `joins: []` is always written by the projection step. Lookup relationships visible in data model specs are captured as wiki knowledge instead. + +--- + ## Google Drive Ingests Google Docs from a shared Google Drive folder as wiki-ready knowledge content. This v1 implementation is knowledge-only and ingests Google Docs MIME types only. diff --git a/packages/cli/src/commands/setup-commands.ts b/packages/cli/src/commands/setup-commands.ts index 5f86b9c9..ff8f015d 100644 --- a/packages/cli/src/commands/setup-commands.ts +++ b/packages/cli/src/commands/setup-commands.ts @@ -58,6 +58,7 @@ function sourceType(value: string): KtxSetupSourceType { value === 'looker' || value === 'lookml' || value === 'notion' || + value === 'sigma' || value === 'gdrive' ) { return value; diff --git a/packages/cli/src/context/ingest/adapters/sigma/chunk.ts b/packages/cli/src/context/ingest/adapters/sigma/chunk.ts new file mode 100644 index 00000000..b0e6b1e6 --- /dev/null +++ b/packages/cli/src/context/ingest/adapters/sigma/chunk.ts @@ -0,0 +1,148 @@ +import { readdir, readFile } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js'; +import { + type SigmaManifest, + type StagedDataModelFile, + type StagedWorkbookFile, + sigmaManifestSchema, + stagedDataModelFileSchema, + stagedWorkbookFileSchema, + STAGED_FILES, +} from './types.js'; + +interface LoadedBundle { + manifest: SigmaManifest | null; + dataModelsByPath: Map; + workbooksByPath: Map; + allPaths: string[]; +} + +async function walkStagedDir(stagedDir: string): Promise { + let entries; + try { + entries = await readdir(stagedDir, { withFileTypes: true, recursive: true }); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') return []; + throw err; + } + const paths: string[] = []; + for (const entry of entries) { + if (!entry.isFile()) continue; + const abs = join(entry.parentPath, entry.name); + paths.push(relative(stagedDir, abs).replace(/\\/g, '/')); + } + paths.sort(); + return paths; +} + +async function loadBundle(stagedDir: string): Promise { + const allPaths = await walkStagedDir(stagedDir); + let manifest: SigmaManifest | null = null; + try { + const body = await readFile(join(stagedDir, STAGED_FILES.manifest), 'utf-8'); + manifest = sigmaManifestSchema.parse(JSON.parse(body)); + } catch { + manifest = null; + } + + const dataModelsByPath = new Map(); + const dmPrefix = `${STAGED_FILES.dataModelsDir}/`; + for (const path of allPaths) { + if (!path.startsWith(dmPrefix) || !path.endsWith('.json')) continue; + try { + const body = await readFile(join(stagedDir, path), 'utf-8'); + const parsed = stagedDataModelFileSchema.parse(JSON.parse(body)); + dataModelsByPath.set(path, parsed); + } catch { + // Malformed file — skip. + } + } + + const workbooksByPath = new Map(); + const wbPrefix = `${STAGED_FILES.workbooksDir}/`; + for (const path of allPaths) { + if (!path.startsWith(wbPrefix) || !path.endsWith('.json')) continue; + try { + const body = await readFile(join(stagedDir, path), 'utf-8'); + const parsed = stagedWorkbookFileSchema.parse(JSON.parse(body)); + workbooksByPath.set(path, parsed); + } catch { + // Malformed file — skip. + } + } + + return { manifest, dataModelsByPath, workbooksByPath, allPaths }; +} + +/** Max data models per LLM work unit. Controls parallel processing granularity. */ +const DATA_MODELS_PER_UNIT = 50; +/** Max workbooks per LLM work unit. Controls incremental re-sync granularity. */ +const WORKBOOKS_PER_UNIT = 2000; + +function emitBatches( + paths: string[], + perUnit: number, + unitKeyBase: string, + labelBase: string, + noun: string, + allPaths: string[], +): WorkUnit[] { + const batches = Math.ceil(paths.length / perUnit) || 0; + const units: WorkUnit[] = []; + for (let i = 0; i < batches; i++) { + const batch = paths.slice(i * perUnit, (i + 1) * perUnit); + const rawFiles = [...batch].sort(); + const rawFilesSet = new Set(rawFiles); + const suffix = batches > 1 ? `-${i}` : ''; + units.push({ + unitKey: `${unitKeyBase}${suffix}`, + displayLabel: batches > 1 ? `${labelBase} (${i + 1}/${batches})` : labelBase, + rawFiles, + peerFileIndex: allPaths.filter((p) => !rawFilesSet.has(p)).sort(), + dependencyPaths: [], + notes: `${batch.length} ${noun}${batch.length === 1 ? '' : 's'}`, + }); + } + return units; +} + +function emitWorkUnits(bundle: LoadedBundle): WorkUnit[] { + if (!bundle.manifest) return []; + const dmPaths = [...bundle.dataModelsByPath.keys()].sort(); + const wbPaths = [...bundle.workbooksByPath.keys()].sort(); + return [ + ...emitBatches(dmPaths, DATA_MODELS_PER_UNIT, 'sigma-data-models', 'Sigma: data models', 'data model', bundle.allPaths), + ...emitBatches(wbPaths, WORKBOOKS_PER_UNIT, 'sigma-workbooks', 'Sigma: workbooks', 'workbook', bundle.allPaths), + ]; +} + +interface ChunkOptions { + diffSet?: DiffSet; +} + +export async function chunkSigmaStagedDir(stagedDir: string, opts: ChunkOptions = {}): Promise { + const bundle = await loadBundle(stagedDir); + if (!bundle.manifest) { + return { workUnits: [] }; + } + + const firstRunUnits = emitWorkUnits(bundle); + if (!opts.diffSet) { + return { workUnits: firstRunUnits }; + } + + const touched = new Set([...opts.diffSet.added, ...opts.diffSet.modified]); + const kept: WorkUnit[] = []; + for (const wu of firstRunUnits) { + const anyTouched = wu.rawFiles.some((p) => touched.has(p)); + if (!anyTouched) continue; + const changedFiles = wu.rawFiles.filter((p) => touched.has(p)); + const unchangedFiles = wu.rawFiles.filter((p) => !touched.has(p)); + const deps = new Set([...wu.dependencyPaths, ...unchangedFiles]); + kept.push({ ...wu, rawFiles: changedFiles.sort(), dependencyPaths: [...deps].sort() }); + } + const eviction = + opts.diffSet.deleted.length > 0 ? { deletedRawPaths: [...opts.diffSet.deleted].sort() } : undefined; + return { workUnits: kept, eviction }; +} diff --git a/packages/cli/src/context/ingest/adapters/sigma/client-port.ts b/packages/cli/src/context/ingest/adapters/sigma/client-port.ts new file mode 100644 index 00000000..cdcba6f0 --- /dev/null +++ b/packages/cli/src/context/ingest/adapters/sigma/client-port.ts @@ -0,0 +1,51 @@ +import type { FetchContext } from '../../types.js'; +import type { SigmaPullConfig, WorkbookFilterInput } from './types.js'; + +export interface SigmaTestConnectionResult { + success: boolean; + message?: string; + error?: string; +} + +/** Data model summary shape from GET /v2/dataModels list response. */ +export interface SigmaDataModelSummary { + dataModelId: string; + dataModelUrlId: string; + name: string; + path: string; + latestVersion: number; + ownerId: string; + createdAt: string; + updatedAt: string; + isArchived?: boolean; +} + +/** Workbook summary shape from GET /v2/workbooks list response. */ +export interface SigmaWorkbookSummary { + workbookId: string; + workbookUrlId: string; + name: string; + path: string; + latestVersion: number; + ownerId: string; + createdAt: string; + updatedAt: string; + isArchived?: boolean; + description?: string; +} + +/** Re-exported so callers can reference the type without importing from types.ts directly. */ +export type { WorkbookFilterInput as ListWorkbooksOptions } from './types.js'; + +export interface SigmaRuntimeClient { + testConnection(): Promise; + listDataModels(): Promise; + listWorkbooks(opts?: WorkbookFilterInput): Promise; + /** Returns the raw spec object from GET /v2/dataModels/{id}/spec. */ + getDataModelSpec(dataModelId: string): Promise; + cleanup(): Promise; +} + +export interface SigmaClientFactory { + createClient(config: SigmaPullConfig, ctx: FetchContext): Promise | SigmaRuntimeClient; +} diff --git a/packages/cli/src/context/ingest/adapters/sigma/client.ts b/packages/cli/src/context/ingest/adapters/sigma/client.ts new file mode 100644 index 00000000..dd5be3d3 --- /dev/null +++ b/packages/cli/src/context/ingest/adapters/sigma/client.ts @@ -0,0 +1,231 @@ +import type { + ListWorkbooksOptions, + SigmaDataModelSummary, + SigmaRuntimeClient, + SigmaTestConnectionResult, + SigmaWorkbookSummary, +} from './client-port.js'; + +export interface SigmaClientRuntimeConfig { + apiUrl: string; + clientId: string; + clientSecret: string; +} + +export interface SigmaClientConfig { + maxRetries: number; + baseDelayMs: number; + maxDelayMs: number; + timeoutMs: number; +} + +export const DEFAULT_SIGMA_CLIENT_CONFIG: SigmaClientConfig = { + maxRetries: 3, + baseDelayMs: 500, + maxDelayMs: 10_000, + timeoutMs: 30_000, +}; + +interface TokenResponse { + access_token: string; + refresh_token?: string; + token_type: string; + expires_in: number; +} + +interface PaginatedResponse { + entries: T[]; + nextPage: string | null; + total?: number; +} + +function isNonRetryable500(text: string): boolean { + try { + const body = JSON.parse(text) as Record; + // service_error indicates a deterministic Sigma rejection (e.g. unsupported data + // source subtype). Retrying will not help, so throw immediately. + return body['code'] === 'service_error'; + } catch { + return false; + } +} + +export class DefaultSigmaClient implements SigmaRuntimeClient { + private accessToken: string | null = null; + private refreshToken: string | null = null; + private tokenExpiresAt = 0; + private tokenInflight: Promise | null = null; + + constructor( + private readonly runtimeConfig: SigmaClientRuntimeConfig, + private readonly clientConfig: SigmaClientConfig = DEFAULT_SIGMA_CLIENT_CONFIG, + ) {} + + private get apiUrl(): string { + return this.runtimeConfig.apiUrl.replace(/\/$/, ''); + } + + private basicAuthHeader(): string { + const credentials = Buffer.from( + `${this.runtimeConfig.clientId}:${this.runtimeConfig.clientSecret}`, + ).toString('base64'); + return `Basic ${credentials}`; + } + + private async fetchToken(body: URLSearchParams): Promise { + const res = await fetch(`${this.apiUrl}/v2/auth/token`, { + method: 'POST', + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + Authorization: this.basicAuthHeader(), + }, + body: body.toString(), + signal: AbortSignal.timeout(this.clientConfig.timeoutMs), + }); + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`Sigma auth failed (${res.status}): ${text}`); + } + return res.json() as Promise; + } + + private async ensureToken(): Promise { + const now = Date.now(); + // Refresh 60 s before expiry so in-flight requests don't get 401. + if (this.accessToken && now < this.tokenExpiresAt - 60_000) { + return; + } + if (this.tokenInflight) return this.tokenInflight; + const body = new URLSearchParams(); + if (this.refreshToken) { + body.set('grant_type', 'refresh_token'); + body.set('refresh_token', this.refreshToken); + } else { + body.set('grant_type', 'client_credentials'); + } + this.tokenInflight = this.fetchToken(body) + .then((data) => { + this.accessToken = data.access_token; + this.refreshToken = data.refresh_token ?? null; + this.tokenExpiresAt = Date.now() + data.expires_in * 1000; + }) + .finally(() => { + this.tokenInflight = null; + }); + return this.tokenInflight; + } + + private async request(path: string, query?: Record): Promise { + await this.ensureToken(); + + const url = new URL(`${this.apiUrl}${path}`); + if (query) { + for (const [k, v] of Object.entries(query)) { + url.searchParams.set(k, v); + } + } + + let lastError: Error | null = null; + for (let attempt = 0; attempt <= this.clientConfig.maxRetries; attempt++) { + if (attempt > 0) { + const delay = Math.min( + this.clientConfig.baseDelayMs * 2 ** (attempt - 1), + this.clientConfig.maxDelayMs, + ); + await new Promise((resolve) => setTimeout(resolve, delay)); + } + + const res = await fetch(url.toString(), { + headers: { Authorization: `Bearer ${this.accessToken}` }, + signal: AbortSignal.timeout(this.clientConfig.timeoutMs), + }); + + if (res.status === 401) { + // Token rejected — force full re-auth and retry once. + this.accessToken = null; + this.refreshToken = null; + this.tokenExpiresAt = 0; + await this.ensureToken(); + const retried = await fetch(url.toString(), { + headers: { Authorization: `Bearer ${this.accessToken}` }, + signal: AbortSignal.timeout(this.clientConfig.timeoutMs), + }); + if (!retried.ok) { + const text = await retried.text().catch(() => ''); + throw new Error(`Sigma API error after token refresh (${retried.status}): ${text}`); + } + return retried.json() as Promise; + } + + if (res.status === 429 || res.status >= 500) { + const text = await res.text().catch(() => ''); + lastError = new Error(`Sigma API error (${res.status}): ${text}`); + if (isNonRetryable500(text)) throw lastError; + continue; + } + + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`Sigma API error (${res.status}): ${text}`); + } + + return res.json() as Promise; + } + + throw lastError ?? new Error('Sigma API request failed after retries'); + } + + private async paginateAll(path: string, query: Record = {}): Promise { + const all: T[] = []; + let page: string | null = null; + do { + const q: Record = { ...query, limit: '1000' }; + if (page) { + q['page'] = page; + } + const res = await this.request>(path, q); + all.push(...res.entries); + page = res.nextPage ?? null; + } while (page !== null); + return all; + } + + async testConnection(): Promise { + try { + await this.ensureToken(); + return { success: true }; + } catch (err) { + return { success: false, error: err instanceof Error ? err.message : String(err) }; + } + } + + async listDataModels(): Promise { + return this.paginateAll('/v2/dataModels'); + } + + async listWorkbooks(opts: ListWorkbooksOptions = {}): Promise { + const query: Record = {}; + if (!opts.includeExplorations) query['excludeExplorations'] = 'true'; + + let results = await this.paginateAll('/v2/workbooks', query); + + if (!opts.includeArchived) { + results = results.filter((wb) => !wb.isArchived); + } + if (opts.updatedSince) { + const since = new Date(opts.updatedSince).getTime(); + results = results.filter((wb) => new Date(wb.updatedAt).getTime() >= since); + } + return results; + } + + async getDataModelSpec(dataModelId: string): Promise { + return this.request(`/v2/dataModels/${encodeURIComponent(dataModelId)}/spec`); + } + + async cleanup(): Promise { + this.accessToken = null; + this.refreshToken = null; + this.tokenExpiresAt = 0; + } +} diff --git a/packages/cli/src/context/ingest/adapters/sigma/detect.ts b/packages/cli/src/context/ingest/adapters/sigma/detect.ts new file mode 100644 index 00000000..c64c56d1 --- /dev/null +++ b/packages/cli/src/context/ingest/adapters/sigma/detect.ts @@ -0,0 +1,21 @@ +import { readdir, stat } from 'node:fs/promises'; +import { join } from 'node:path'; +import { STAGED_FILES } from './types.js'; + +export async function detectSigmaStagedDir(stagedDir: string): Promise { + try { + await stat(join(stagedDir, STAGED_FILES.manifest)); + } catch { + return false; + } + for (const subdir of [STAGED_FILES.dataModelsDir, STAGED_FILES.workbooksDir]) { + let entries: string[]; + try { + entries = await readdir(join(stagedDir, subdir)); + } catch { + continue; + } + if (entries.some((name) => name.endsWith('.json'))) return true; + } + return false; +} diff --git a/packages/cli/src/context/ingest/adapters/sigma/fetch.ts b/packages/cli/src/context/ingest/adapters/sigma/fetch.ts new file mode 100644 index 00000000..7c4057f2 --- /dev/null +++ b/packages/cli/src/context/ingest/adapters/sigma/fetch.ts @@ -0,0 +1,241 @@ +import { mkdir, readdir, readFile, rm, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { FetchContext } from '../../types.js'; +import type { SigmaClientFactory } from './client-port.js'; +import { + type SigmaManifest, + type SigmaProjectionConfig, + type StagedDataModelFile, + type StagedWorkbookFile, + parseSigmaPullConfig, + stagedDataModelFileSchema, + stagedWorkbookFileSchema, + STAGED_FILES, +} from './types.js'; + +export interface SigmaFetchLogger { + log(message: string): void; + warn(message: string): void; +} + +const noopLogger: SigmaFetchLogger = { log: () => undefined, warn: () => undefined }; + +export interface FetchSigmaBundleParams { + pullConfig: unknown; + stagedDir: string; + ctx: FetchContext; + clientFactory: SigmaClientFactory; + logger?: SigmaFetchLogger; +} + +async function loadExistingStagedFiles(stagedDir: string): Promise> { + const existing = new Map(); + const dmDir = join(stagedDir, STAGED_FILES.dataModelsDir); + let entries: string[]; + try { + entries = await readdir(dmDir); + } catch { + return existing; + } + for (const entry of entries) { + if (!entry.endsWith('.json')) continue; + try { + const body = await readFile(join(dmDir, entry), 'utf-8'); + const parsed = stagedDataModelFileSchema.parse(JSON.parse(body)); + existing.set(parsed.sigmaId, parsed); + } catch { + // Skip malformed files. + } + } + return existing; +} + +async function loadExistingWorkbookFiles(stagedDir: string): Promise> { + const existing = new Map(); + const wbDir = join(stagedDir, STAGED_FILES.workbooksDir); + let entries: string[]; + try { + entries = await readdir(wbDir); + } catch { + return existing; + } + for (const entry of entries) { + if (!entry.endsWith('.json')) continue; + try { + const body = await readFile(join(wbDir, entry), 'utf-8'); + const parsed = stagedWorkbookFileSchema.parse(JSON.parse(body)); + existing.set(parsed.sigmaId, parsed); + } catch { + // Skip malformed files. + } + } + return existing; +} + +export async function fetchSigmaBundle({ + pullConfig, + stagedDir, + ctx, + clientFactory, + logger = noopLogger, +}: FetchSigmaBundleParams): Promise { + const config = parseSigmaPullConfig(pullConfig); + const client = await clientFactory.createClient(config, ctx); + + try { + await mkdir(join(stagedDir, STAGED_FILES.dataModelsDir), { recursive: true }); + await mkdir(join(stagedDir, STAGED_FILES.workbooksDir), { recursive: true }); + + // Load existing staged files to enable incremental sync. + const existingByModelId = await loadExistingStagedFiles(stagedDir); + const existingByWorkbookId = await loadExistingWorkbookFiles(stagedDir); + + logger.log('Listing Sigma data models...'); + const summaries = await client.listDataModels(); + const nonArchived = summaries.filter((dm) => !dm.isArchived); + const nonArchivedIds = new Set(nonArchived.map((dm) => dm.dataModelId)); + let active = nonArchived; + if (config.dataModelFilter?.updatedSince) { + const since = new Date(config.dataModelFilter.updatedSince).getTime(); + active = active.filter((dm) => new Date(dm.updatedAt).getTime() >= since); + } + logger.log(`Found ${active.length} active data model(s) (${summaries.length} total).`); + + let fetched = 0; + let skipped = 0; + + const SPEC_CONCURRENCY = 10; + const queue = [...active]; + await Promise.all( + Array.from({ length: Math.min(SPEC_CONCURRENCY, queue.length) }, async () => { + let summary; + while ((summary = queue.shift()) !== undefined) { + const existing = existingByModelId.get(summary.dataModelId); + + // Only skip when the cached spec was successfully fetched. spec: null means + // the previous attempt failed transiently — retry regardless of updatedAt. + if (existing && existing.updatedAt === summary.updatedAt && existing.spec !== null) { + logger.log(`Unchanged: ${summary.name}`); + skipped++; + continue; + } + + let spec: unknown = null; + try { + spec = await client.getDataModelSpec(summary.dataModelId); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes('dataSource subtype not supported')) { + logger.warn( + `Skipping spec for "${summary.name}" (${summary.dataModelId}): data source type not supported by Sigma spec export API.`, + ); + } else { + logger.warn(`Failed to fetch spec for "${summary.name}" (${summary.dataModelId}): ${msg}`); + } + } + + const staged: StagedDataModelFile = { + sigmaId: summary.dataModelId, + name: summary.name, + path: summary.path, + latestVersion: summary.latestVersion, + updatedAt: summary.updatedAt, + isArchived: summary.isArchived ?? false, + dataModelUrlId: summary.dataModelUrlId, + spec, + }; + + const filePath = join(stagedDir, STAGED_FILES.dataModelsDir, `${summary.dataModelId}.json`); + await writeFile(filePath, JSON.stringify(staged, null, 2), 'utf-8'); + logger.log(`Staged data model: ${summary.name}`); + fetched++; + } + }), + ); + + // Remove staged files for models that are archived or deleted — but not those merely outside the filter window. + for (const [modelId] of existingByModelId) { + if (nonArchivedIds.has(modelId)) continue; + try { + await rm(join(stagedDir, STAGED_FILES.dataModelsDir, `${modelId}.json`)); + logger.log(`Removed stale staged file for model ${modelId}.`); + } catch { + // Best-effort removal. + } + } + + // Fetch workbooks (summary metadata only — no separate spec endpoint). + // Fetch the full non-archived/non-exploration universe first so eviction is based on + // all known workbooks, not just the updatedSince slice. Mirrors the data-model path. + logger.log('Listing Sigma workbooks...'); + const { updatedSince, ...filterWithoutSince } = config.workbookFilter ?? {}; + const allWorkbooks = await client.listWorkbooks(filterWithoutSince); + const nonArchivedWorkbookIds = new Set(allWorkbooks.map((wb) => wb.workbookId)); + const activeWorkbooks = updatedSince + ? allWorkbooks.filter((wb) => new Date(wb.updatedAt).getTime() >= new Date(updatedSince).getTime()) + : allWorkbooks; + logger.log(`Found ${activeWorkbooks.length} workbook(s) to process (${allWorkbooks.length} total).`); + + let workbooksFetched = 0; + let workbooksSkipped = 0; + + for (const wb of activeWorkbooks) { + const existing = existingByWorkbookId.get(wb.workbookId); + + if (existing && existing.updatedAt === wb.updatedAt) { + workbooksSkipped++; + continue; + } + + const staged: StagedWorkbookFile = { + sigmaId: wb.workbookId, + name: wb.name, + path: wb.path, + latestVersion: wb.latestVersion, + updatedAt: wb.updatedAt, + isArchived: wb.isArchived ?? false, + workbookUrlId: wb.workbookUrlId, + description: wb.description, + }; + + const filePath = join(stagedDir, STAGED_FILES.workbooksDir, `${wb.workbookId}.json`); + await writeFile(filePath, JSON.stringify(staged, null, 2), 'utf-8'); + logger.log(`Staged workbook: ${wb.name}`); + workbooksFetched++; + } + + // Evict only workbooks that are archived or deleted — not those outside the updatedSince window. + for (const [workbookId] of existingByWorkbookId) { + if (nonArchivedWorkbookIds.has(workbookId)) continue; + try { + await rm(join(stagedDir, STAGED_FILES.workbooksDir, `${workbookId}.json`)); + logger.log(`Removed stale staged file for workbook ${workbookId}.`); + } catch { + // Best-effort removal. + } + } + + const projectionConfig: SigmaProjectionConfig = { + connectionMappings: config.connectionMappings ?? {}, + workbookFilter: config.workbookFilter ?? { includeArchived: false, includeExplorations: false }, + }; + await writeFile( + join(stagedDir, STAGED_FILES.projectionConfig), + JSON.stringify(projectionConfig, null, 2), + 'utf-8', + ); + + const manifest: SigmaManifest = { + sigmaConnectionId: config.sigmaConnectionId, + fetchedAt: new Date().toISOString(), + dataModelCount: active.length, + workbookCount: activeWorkbooks.length, + }; + await writeFile(join(stagedDir, STAGED_FILES.manifest), JSON.stringify(manifest, null, 2), 'utf-8'); + logger.log( + `Sigma fetch complete. Data models: ${fetched} fetched, ${skipped} unchanged. Workbooks: ${workbooksFetched} fetched, ${workbooksSkipped} unchanged.`, + ); + } finally { + await client.cleanup(); + } +} diff --git a/packages/cli/src/context/ingest/adapters/sigma/local-sigma.adapter.ts b/packages/cli/src/context/ingest/adapters/sigma/local-sigma.adapter.ts new file mode 100644 index 00000000..a24dab6e --- /dev/null +++ b/packages/cli/src/context/ingest/adapters/sigma/local-sigma.adapter.ts @@ -0,0 +1,76 @@ +import type { KtxProjectConnectionConfig } from '../../../../context/project/config.js'; +import type { KtxLocalProject } from '../../../../context/project/project.js'; +import { resolveKtxConfigReference } from '../../../core/config-reference.js'; +import { DEFAULT_SIGMA_CLIENT_CONFIG, DefaultSigmaClient, type SigmaClientConfig } from './client.js'; +import type { SigmaClientFactory, SigmaRuntimeClient } from './client-port.js'; +import type { SigmaFetchLogger } from './fetch.js'; +import type { SigmaPullConfig } from './types.js'; +import { SigmaSourceAdapter } from './sigma.adapter.js'; +import type { FetchContext } from '../../types.js'; + +function stringField(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +export function sigmaRuntimeConfigFromLocalConnection( + connectionId: string, + connection: KtxProjectConnectionConfig | undefined, + env: NodeJS.ProcessEnv = process.env, +): { apiUrl: string; clientId: string; clientSecret: string } { + if (!connection || String(connection.driver).toLowerCase() !== 'sigma') { + throw new Error(`Connection "${connectionId}" is not a Sigma connection`); + } + + const apiUrl = stringField(connection.api_url) ?? 'https://api.sigmacomputing.com'; + const clientId = stringField(connection.client_id); + const literalSecret = stringField(connection.client_secret); + const secretRef = stringField(connection.client_secret_ref); + const clientSecret = + literalSecret ?? (secretRef ? (resolveKtxConfigReference(secretRef, env) ?? null) : null); + + if (!clientId) { + throw new Error(`Connection "${connectionId}" is missing Sigma client_id`); + } + if (!clientSecret) { + throw new Error( + `Connection "${connectionId}" is missing Sigma client_secret or client_secret_ref`, + ); + } + + return { apiUrl, clientId, clientSecret }; +} + +interface CreateLocalSigmaSourceAdapterOptions { + env?: NodeJS.ProcessEnv; + defaultClientConfig?: SigmaClientConfig; + logger?: SigmaFetchLogger; +} + +class LocalSigmaClientFactory implements SigmaClientFactory { + constructor( + private readonly project: KtxLocalProject, + private readonly options: CreateLocalSigmaSourceAdapterOptions, + ) {} + + createClient(config: SigmaPullConfig, _ctx: FetchContext): SigmaRuntimeClient { + const runtimeConfig = sigmaRuntimeConfigFromLocalConnection( + config.sigmaConnectionId, + this.project.config.connections[config.sigmaConnectionId], + this.options.env, + ); + return new DefaultSigmaClient( + runtimeConfig, + this.options.defaultClientConfig ?? DEFAULT_SIGMA_CLIENT_CONFIG, + ); + } +} + +export function createLocalSigmaSourceAdapter( + project: KtxLocalProject, + options: CreateLocalSigmaSourceAdapterOptions = {}, +): SigmaSourceAdapter { + return new SigmaSourceAdapter({ + clientFactory: new LocalSigmaClientFactory(project, options), + ...(options.logger ? { logger: options.logger } : {}), + }); +} diff --git a/packages/cli/src/context/ingest/adapters/sigma/project.ts b/packages/cli/src/context/ingest/adapters/sigma/project.ts new file mode 100644 index 00000000..963583f3 --- /dev/null +++ b/packages/cli/src/context/ingest/adapters/sigma/project.ts @@ -0,0 +1,231 @@ +import { readdir, readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { z } from 'zod'; +import type { SemanticLayerService } from '../../../../context/sl/semantic-layer.service.js'; +import type { SemanticLayerSource } from '../../../../context/sl/types.js'; +import type { DeterministicProjectionContext, ProjectionResult } from '../../types.js'; +import { sigmaProjectionConfigSchema, stagedDataModelFileSchema, STAGED_FILES } from './types.js'; + +async function readProjectionConfig(stagedDir: string): Promise> { + try { + const body = await readFile(join(stagedDir, STAGED_FILES.projectionConfig), 'utf-8'); + return sigmaProjectionConfigSchema.parse(JSON.parse(body)).connectionMappings; + } catch { + return {}; + } +} + +const SIGMA_AUTHOR = { name: 'Sigma', email: 'system@kaelio.dev' } as const; + +// Best-effort schema for the raw spec blob stored in staged data model files. +const warehouseTableSourceSchema = z.object({ + kind: z.literal('warehouse-table'), + connectionId: z.string(), + path: z.array(z.string()), +}); + +const specColumnSchema = z + .object({ + id: z.string(), + formula: z.string().optional(), + name: z.string().optional(), + hidden: z.boolean().optional(), + description: z.string().optional(), + format: z.object({ kind: z.string() }).passthrough().optional(), + }) + .passthrough(); + +const specElementSchema = z + .object({ + id: z.string(), + kind: z.string().optional(), + name: z.string().optional(), + hidden: z.boolean().optional(), + source: z.object({ kind: z.string() }).passthrough().optional(), + columns: z.array(specColumnSchema).optional(), + }) + .passthrough(); + +const specPageSchema = z + .object({ + id: z.string(), + name: z.string().optional(), + elements: z.array(specElementSchema).optional(), + }) + .passthrough(); + +const sigmaSpecSchema = z + .object({ + name: z.string().optional(), + pages: z.array(specPageSchema).optional(), + }) + .passthrough(); + +type SpecColumn = z.infer; + +/** Extract the column name from a bracket formula like `[TABLE/Column Name]` or `[Column]`. */ +function extractColumnName(formula: string): string | null { + const match = /\[(?:[^\]/]+\/)?([^\]]+)\]/.exec(formula.trim()); + return match?.[1] ?? null; +} + +function slugify(value: string): string { + return value + .toLowerCase() + .replace(/[^a-z0-9]+/g, '_') + .replace(/^_+|_+$/g, ''); +} + +function inferColumnType(col: SpecColumn): string { + const kind = col.format?.kind; + if (kind === 'datetime' || kind === 'date') return 'time'; + if (kind === 'number' || kind === 'currency' || kind === 'percent') return 'number'; + return 'string'; +} + +function buildSourceFromElement( + dataModelName: string, + elementName: string | undefined, + elementId: string, + warehousePath: string[], + columns: SpecColumn[], +): SemanticLayerSource | null { + const table = warehousePath.join('.'); + if (!table) return null; + + const modelSlug = slugify(dataModelName || elementId); + const elemSlug = elementName ? slugify(elementName) : ''; + const sourceName = elemSlug && elemSlug !== modelSlug ? `${modelSlug}_${elemSlug}` : modelSlug; + if (!sourceName) return null; + + const slColumns: SemanticLayerSource['columns'] = []; + + for (const col of columns) { + if (col.hidden) continue; + if (!col.formula) continue; + // Aggregation formulas (Sum, Count, etc.) are Sigma-specific expressions that don't map to + // warehouse columns — skip them silently. The sigma_ingest skill surfaces them as wiki candidates. + if (/^[A-Za-z]+\(/.test(col.formula.trim())) continue; + + const displayName = col.name ?? extractColumnName(col.formula); + if (!displayName) continue; + const colSlug = slugify(displayName); + if (!colSlug) continue; + + slColumns.push({ + name: colSlug, + type: inferColumnType(col), + ...(col.description ? { descriptions: { user: col.description } } : {}), + }); + } + + const source: SemanticLayerSource = { + name: sourceName, + table, + grain: [], + columns: slColumns, + joins: [], + measures: [], + }; + + if (dataModelName) { + source.descriptions = { user: dataModelName }; + } + + return source; +} + +type SlService = Pick & { + forWorktree(workdir: string): Pick; +}; + +/** @internal */ +export async function projectSigmaDataModels( + ctx: DeterministicProjectionContext, + slService: SlService, +): Promise { + const svc = ctx.workdir ? slService.forWorktree(ctx.workdir) : slService; + const warnings: string[] = []; + const errors: string[] = []; + const touchedSources: Array<{ connectionId: string; sourceName: string }> = []; + + const connectionMappings = await readProjectionConfig(ctx.stagedDir); + + const dmDir = join(ctx.stagedDir, STAGED_FILES.dataModelsDir); + let entries: string[]; + try { + entries = await readdir(dmDir); + } catch { + return { warnings, errors, touchedSources, changedWikiPageKeys: [] }; + } + + for (const entry of entries) { + if (!entry.endsWith('.json')) continue; + + let stagedFile: z.infer; + try { + const body = await readFile(join(dmDir, entry), 'utf-8'); + stagedFile = stagedDataModelFileSchema.parse(JSON.parse(body)); + } catch { + warnings.push(`Skipping malformed staged file: ${entry}`); + continue; + } + + if (!stagedFile.spec) continue; + + let spec: z.infer; + try { + spec = sigmaSpecSchema.parse(stagedFile.spec); + } catch { + warnings.push(`Skipping unparseable spec for data model "${stagedFile.name}"`); + continue; + } + + for (const page of spec.pages ?? []) { + for (const element of page.elements ?? []) { + if (element.hidden) continue; + + const warehouseSource = warehouseTableSourceSchema.safeParse(element.source); + if (!warehouseSource.success) continue; + + const source = buildSourceFromElement( + stagedFile.name, + element.name, + element.id, + warehouseSource.data.path, + element.columns ?? [], + ); + if (!source) continue; + + // Only write SL sources for elements whose Sigma connection is mapped to a warehouse connection. + // Writing under an unmapped connection produces gate failures because the Sigma connection + // is not a warehouse connection and cannot be validated. + const targetConnectionId = connectionMappings[warehouseSource.data.connectionId]; + if (!targetConnectionId) { + warnings.push( + `Skipping SL source for "${stagedFile.name}" / "${element.name ?? element.id}": ` + + `no connectionMappings entry for Sigma connection ${warehouseSource.data.connectionId}. ` + + `Add a connectionMappings entry in ktx.yaml to enable SL projection for this element.`, + ); + continue; + } + + try { + const result = await svc.writeSource( + targetConnectionId, + source, + SIGMA_AUTHOR.name, + SIGMA_AUTHOR.email, + `Sigma: import data model "${stagedFile.name}"`, + ); + touchedSources.push({ connectionId: targetConnectionId, sourceName: source.name }); + warnings.push(...result.warnings); + } catch (err) { + errors.push(`Failed to write source "${source.name}": ${err instanceof Error ? err.message : String(err)}`); + } + } + } + } + + return { warnings, errors, touchedSources, changedWikiPageKeys: [] }; +} diff --git a/packages/cli/src/context/ingest/adapters/sigma/sigma.adapter.ts b/packages/cli/src/context/ingest/adapters/sigma/sigma.adapter.ts new file mode 100644 index 00000000..b51da827 --- /dev/null +++ b/packages/cli/src/context/ingest/adapters/sigma/sigma.adapter.ts @@ -0,0 +1,53 @@ +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { ChunkResult, DeterministicProjectionContext, DiffSet, FetchContext, ProjectionResult, SourceAdapter } from '../../types.js'; +import { chunkSigmaStagedDir } from './chunk.js'; +import type { SigmaClientFactory } from './client-port.js'; +import { detectSigmaStagedDir } from './detect.js'; +import { fetchSigmaBundle, type SigmaFetchLogger } from './fetch.js'; +import { projectSigmaDataModels } from './project.js'; +import { sigmaProjectionConfigSchema, STAGED_FILES } from './types.js'; + +export interface SigmaSourceAdapterDeps { + clientFactory: SigmaClientFactory; + logger?: SigmaFetchLogger; +} + +export class SigmaSourceAdapter implements SourceAdapter { + readonly source = 'sigma'; + readonly skillNames: string[] = ['sigma_ingest']; + + constructor(private readonly deps: SigmaSourceAdapterDeps) {} + + detect(stagedDir: string): Promise { + return detectSigmaStagedDir(stagedDir); + } + + async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + await fetchSigmaBundle({ + pullConfig, + stagedDir, + ctx, + clientFactory: this.deps.clientFactory, + ...(this.deps.logger ? { logger: this.deps.logger } : {}), + }); + } + + chunk(stagedDir: string, diffSet?: DiffSet): Promise { + return chunkSigmaStagedDir(stagedDir, { diffSet }); + } + + async listTargetConnectionIds(stagedDir: string): Promise { + try { + const body = await readFile(join(stagedDir, STAGED_FILES.projectionConfig), 'utf-8'); + const config = sigmaProjectionConfigSchema.parse(JSON.parse(body)); + return [...new Set(Object.values(config.connectionMappings))].sort(); + } catch { + return []; + } + } + + project(ctx: DeterministicProjectionContext): Promise { + return projectSigmaDataModels(ctx, ctx.semanticLayerService); + } +} diff --git a/packages/cli/src/context/ingest/adapters/sigma/types.ts b/packages/cli/src/context/ingest/adapters/sigma/types.ts new file mode 100644 index 00000000..e0d40c1e --- /dev/null +++ b/packages/cli/src/context/ingest/adapters/sigma/types.ts @@ -0,0 +1,105 @@ +import { z } from 'zod'; + +const sigmaLocalConnectionIdSchema = z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/); + +/** Filters applied when listing workbooks. Shared with ListWorkbooksOptions in client-port.ts. */ +const workbookFilterSchema = z.object({ + includeArchived: z.boolean().default(false), + includeExplorations: z.boolean().default(false), + /** ISO 8601 date string. Only workbooks updated on or after this date are included. */ + updatedSince: z.string().optional(), +}); + +/** Input shape for listWorkbooks — all fields optional since the client applies its own defaults. */ +export type WorkbookFilterInput = z.input; + +const dataModelFilterSchema = z.object({ + /** ISO 8601 date string. Only data models updated on or after this date are fetched. */ + updatedSince: z.string().optional(), +}); + +/** The lean config the adapter needs at `fetch()` time, stored in the ingest job's `bundleRef.config`. */ +const sigmaPullConfigSchema = z.object({ + /** The ktx connection ID for the Sigma instance being swept. */ + sigmaConnectionId: sigmaLocalConnectionIdSchema, + /** + * Maps Sigma internal connection UUIDs (source.connectionId in data model specs) + * to ktx warehouse connection IDs. When present, projected semantic-layer sources + * are written under the mapped warehouse connection rather than the Sigma connection. + */ + connectionMappings: z.record(z.string(), z.string()).optional(), + /** Filters applied when listing workbooks. Defaults exclude archived and exploration workbooks. */ + workbookFilter: workbookFilterSchema.default({ includeArchived: false, includeExplorations: false }), + /** Filters applied when listing data models. */ + dataModelFilter: dataModelFilterSchema.optional(), +}); + +export type SigmaPullConfig = z.infer; + +export function parseSigmaPullConfig(raw: unknown): SigmaPullConfig { + return sigmaPullConfigSchema.parse(raw); +} + +/** Written to stagedDir during fetch() and read back by project(), listTargetConnectionIds(), and the sigma_ingest skill. */ +export const sigmaProjectionConfigSchema = z.object({ + connectionMappings: z.record(z.string(), z.string()).default({}), + /** Filters that were active when workbooks were last fetched. Tells the skill what the staged set covers. */ + workbookFilter: workbookFilterSchema.default({ includeArchived: false, includeExplorations: false }), +}); + +export type SigmaProjectionConfig = z.infer; + +/** + * A staged data model file, one per `data-models/.json`. + * Stores the summary metadata plus the raw spec blob from GET /v2/dataModels/{id}/spec. + */ +export const stagedDataModelFileSchema = z.object({ + sigmaId: z.string(), + name: z.string(), + /** Full path in Sigma, e.g. "Finance/Revenue Model". */ + path: z.string(), + latestVersion: z.number(), + updatedAt: z.string(), + isArchived: z.boolean().default(false), + /** URL-safe slug Sigma uses in the web UI (dataModelUrlId from the API). */ + dataModelUrlId: z.string().optional(), + /** Raw spec from GET /v2/dataModels/{id}/spec (JSON format). */ + spec: z.unknown(), +}); + +export type StagedDataModelFile = z.infer; + +/** The manifest written once per `fetch()`. Presence acts as the detect() sentinel. */ +export const sigmaManifestSchema = z.object({ + sigmaConnectionId: sigmaLocalConnectionIdSchema, + fetchedAt: z.string(), + dataModelCount: z.number().int(), + workbookCount: z.number().int().default(0), +}); + +export type SigmaManifest = z.infer; + +/** + * A staged workbook file, one per `workbooks/.json`. + * Stores the summary metadata from GET /v2/workbooks (no separate spec endpoint). + */ +export const stagedWorkbookFileSchema = z.object({ + sigmaId: z.string(), + name: z.string(), + path: z.string(), + latestVersion: z.number(), + updatedAt: z.string(), + isArchived: z.boolean().default(false), + workbookUrlId: z.string().optional(), + description: z.string().optional(), +}); + +export type StagedWorkbookFile = z.infer; + +/** Filenames inside stagedDir. Centralized so chunk() + fetch() + detect() all agree. */ +export const STAGED_FILES = { + manifest: 'sigma-manifest.json', + projectionConfig: 'sigma-projection-config.json', + dataModelsDir: 'data-models', + workbooksDir: 'workbooks', +} as const; diff --git a/packages/cli/src/context/ingest/local-adapters.ts b/packages/cli/src/context/ingest/local-adapters.ts index 0dded4cb..75e54a33 100644 --- a/packages/cli/src/context/ingest/local-adapters.ts +++ b/packages/cli/src/context/ingest/local-adapters.ts @@ -40,6 +40,8 @@ import { pullConfigFromIntegrationConfig } from './adapters/lookml/pull-config.j import { createLocalMetabaseSourceAdapter } from './adapters/metabase/local-metabase.adapter.js'; import type { MetabaseClientLogger } from './adapters/metabase/client.js'; import type { MetabaseFetchLogger } from './adapters/metabase/fetch.js'; +import { createLocalSigmaSourceAdapter } from './adapters/sigma/local-sigma.adapter.js'; +import type { SigmaFetchLogger } from './adapters/sigma/fetch.js'; import { MetricflowSourceAdapter } from './adapters/metricflow/metricflow.adapter.js'; import { pullConfigFromMetricflowIntegration } from './adapters/metricflow/pull-config.js'; import { LocalNotionRuntimeStore } from './adapters/notion/local-state-store.js'; @@ -72,7 +74,8 @@ export interface DefaultLocalIngestAdaptersOptions { type LocalIngestOperationalLogger = MetabaseClientLogger & MetabaseFetchLogger & LookerClientLogger & - NotionFetchLogger; + NotionFetchLogger & + SigmaFetchLogger; export function createDefaultLocalIngestAdapters( project: KtxLocalProject, @@ -105,6 +108,9 @@ export function createDefaultLocalIngestAdapters( createLocalMetabaseSourceAdapter(project, { ...(options.logger ? { logger: options.logger } : {}), }), + createLocalSigmaSourceAdapter(project, { + ...(options.logger ? { logger: options.logger } : {}), + }), new GdriveSourceAdapter(), new LookerSourceAdapter({ clientFactory: { @@ -271,6 +277,27 @@ export async function localPullConfigForAdapter( 'Metabase scheduled pulls fan out by mapping. Call runLocalMetabaseIngest() or use `ktx ingest ` from the CLI.', ); } + if (adapter.source === 'sigma') { + const sigmaConn = project.config.connections[connectionId]; + const connectionMappings = + sigmaConn && 'connectionMappings' in sigmaConn && sigmaConn.connectionMappings != null + ? (sigmaConn.connectionMappings as Record) + : undefined; + const workbookFilter = + sigmaConn && 'workbookFilter' in sigmaConn && sigmaConn.workbookFilter != null + ? (sigmaConn.workbookFilter as { includeArchived?: boolean; includeExplorations?: boolean; updatedSince?: string }) + : undefined; + const dataModelFilter = + sigmaConn && 'dataModelFilter' in sigmaConn && sigmaConn.dataModelFilter != null + ? (sigmaConn.dataModelFilter as { updatedSince?: string }) + : undefined; + return { + sigmaConnectionId: connectionId, + ...(connectionMappings ? { connectionMappings } : {}), + ...(workbookFilter ? { workbookFilter } : {}), + ...(dataModelFilter ? { dataModelFilter } : {}), + }; + } const connection = project.config.connections[connectionId]; if (adapter.source === HISTORIC_SQL_SOURCE_KEY) { if (options.historicSqlPullConfigOverride) { diff --git a/packages/cli/src/context/project/driver-schemas.ts b/packages/cli/src/context/project/driver-schemas.ts index 25fa3507..a19fee0f 100644 --- a/packages/cli/src/context/project/driver-schemas.ts +++ b/packages/cli/src/context/project/driver-schemas.ts @@ -251,6 +251,47 @@ const metricflowConnectionSchema = z }) .describe('MetricFlow / SL context-source connection.'); +const sigmaConnectionSchema = z + .looseObject({ + driver: z.literal('sigma'), + api_url: z + .string() + .url() + .default('https://api.sigmacomputing.com') + .describe('Sigma API base URL. Defaults to the GCP US endpoint; change for other regions.'), + client_id: z.string().min(1).describe('Sigma API client ID.'), + client_secret: z.string().min(1).optional().describe('Literal Sigma client secret. Prefer client_secret_ref.'), + client_secret_ref: z + .string() + .min(1) + .optional() + .describe('Reference to Sigma client secret (e.g. env:SIGMA_CLIENT_SECRET).'), + connectionMappings: z + .record(z.string(), z.string()) + .optional() + .describe( + 'Maps Sigma internal connection UUIDs to ktx warehouse connection IDs. ' + + 'When set, projected semantic-layer sources land under the mapped warehouse connection ' + + 'instead of the Sigma connection, enabling sl_validate. ' + + 'Find UUIDs in data model specs under source.connectionId.', + ), + workbookFilter: z + .object({ + includeArchived: z.boolean().default(false), + includeExplorations: z.boolean().default(false), + updatedSince: z.string().optional().describe('ISO 8601 date string. Only workbooks updated on or after this date are ingested.'), + }) + .optional() + .describe('Filters applied when listing workbooks during ingest. Defaults exclude archived and exploration workbooks.'), + dataModelFilter: z + .object({ + updatedSince: z.string().optional().describe('ISO 8601 date string. Only data models updated on or after this date are fetched.'), + }) + .optional() + .describe('Filters applied when listing data models during ingest.'), + }) + .describe('Sigma Computing API connection for ingesting data models.'); + export const connectionConfigSchema = z.discriminatedUnion('driver', [ ...warehouseConnectionSchemas, mongodbConnectionSchema, @@ -261,4 +302,5 @@ export const connectionConfigSchema = z.discriminatedUnion('driver', [ gdriveConnectionSchema, dbtConnectionSchema, metricflowConnectionSchema, + sigmaConnectionSchema, ]); diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index 899a53d0..beab6dca 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -139,6 +139,7 @@ const sourceAdapterByDriver = new Map([ ['metricflow', 'metricflow'], ['dbt', 'dbt'], ['lookml', 'lookml'], + ['sigma', 'sigma'], ]); export function publicProgressMessage(message: string, target: KtxPublicIngestPlanTarget): string { diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index 4c6feb26..e13ec6f3 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -16,6 +16,8 @@ import { import { gdriveServiceAccountKeySchema } from './context/ingest/adapters/gdrive/types.js'; import { cloneOrPull, testRepoConnection } from './context/ingest/repo-fetch.js'; import { DEFAULT_METABASE_CLIENT_CONFIG, MetabaseClient } from './context/ingest/adapters/metabase/client.js'; +import { DEFAULT_SIGMA_CLIENT_CONFIG, DefaultSigmaClient } from './context/ingest/adapters/sigma/client.js'; +import { sigmaRuntimeConfigFromLocalConnection } from './context/ingest/adapters/sigma/local-sigma.adapter.js'; import { discoverMetabaseDatabases, type DiscoveredMetabaseDatabase } from './context/ingest/adapters/metabase/mapping.js'; import { loadDbtSchemaFiles } from './context/ingest/dbt-shared/schema-files.js'; import { loadProjectInfo } from './context/ingest/dbt-shared/project-vars.js'; @@ -46,7 +48,7 @@ import { type KtxSetupPromptOption, } from './setup-prompts.js'; -export type KtxSetupSourceType = 'dbt' | 'metricflow' | 'metabase' | 'looker' | 'lookml' | 'notion' | 'gdrive'; +export type KtxSetupSourceType = 'dbt' | 'metricflow' | 'metabase' | 'looker' | 'lookml' | 'notion' | 'sigma' | 'gdrive'; const DEFAULT_NOTION_MAX_KNOWLEDGE_CREATES_PER_RUN = 25; @@ -115,6 +117,7 @@ export interface KtxSetupSourcesDeps { validateLooker?: (projectDir: string, connectionId: string) => Promise; validateLookml?: (connection: KtxProjectConnectionConfig) => Promise; validateNotion?: (connection: KtxProjectConnectionConfig) => Promise; + validateSigma?: (connection: KtxProjectConnectionConfig) => Promise; validateGdrive?: (connection: KtxProjectConnectionConfig) => Promise; pickNotionRootPages?: typeof pickNotionRootPages; discoverMetabaseDatabases?: (args: { @@ -138,6 +141,7 @@ const SOURCE_OPTIONS: Array<{ value: KtxSetupSourceType; label: string }> = [ { value: 'metricflow', label: 'MetricFlow' }, { value: 'looker', label: 'Looker' }, { value: 'lookml', label: 'LookML' }, + { value: 'sigma', label: 'Sigma Computing' }, { value: 'gdrive', label: 'Google Drive' }, ]; @@ -248,6 +252,7 @@ const SOURCE_CREDENTIAL_FLAG: Record = notion: { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, metabase: { field: 'sourceApiKeyRef', flag: '--source-api-key-ref' }, looker: { field: 'sourceClientSecretRef', flag: '--source-client-secret-ref' }, + sigma: { field: 'sourceClientSecretRef', flag: '--source-client-secret-ref' }, gdrive: { field: null, flag: '--gdrive-service-account-key-ref' }, }; @@ -577,6 +582,18 @@ function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC }; } +function buildSigmaConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionConfig { + if (!args.sourceClientId) { + throw new Error('Missing Sigma client id: pass --source-client-id.'); + } + return { + driver: 'sigma', + api_url: args.sourceUrl ?? 'https://api.sigmacomputing.com', + client_id: args.sourceClientId, + client_secret_ref: credentialRef(args.sourceClientSecretRef, 'Sigma client secret ref'), + }; +} + function buildGdriveConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionConfig { const folderId = args.gdriveFolderId?.trim(); if (!folderId) { @@ -713,6 +730,23 @@ async function defaultValidateNotion(connection: KtxProjectConnectionConfig): Pr return { ok: true, detail: `roots=${roots.length}` }; } +async function defaultValidateSigma(connection: KtxProjectConnectionConfig): Promise { + try { + const runtimeConfig = sigmaRuntimeConfigFromLocalConnection('sigma-main', connection); + const client = new DefaultSigmaClient(runtimeConfig, DEFAULT_SIGMA_CLIENT_CONFIG); + try { + const result = await client.testConnection(); + return result.success + ? { ok: true, detail: 'Sigma API connection verified' } + : { ok: false, message: result.error ?? 'Sigma connection test failed' }; + } finally { + await client.cleanup(); + } + } catch (err) { + return { ok: false, message: err instanceof Error ? err.message : String(err) }; + } +} + async function defaultValidateGdrive(connection: KtxProjectConnectionConfig): Promise { const config = parseGdriveConnectionConfig(connection); const keyText = await resolveGdriveServiceAccountKey(config.service_account_key_ref); @@ -1370,6 +1404,43 @@ async function promptForInteractiveSource( ]); } + if (source === 'sigma') { + return await runSourcePromptSteps(initialState, () => [ + ...connectionSteps, + async (state) => { + const sourceUrl = await promptText(prompts, { + message: 'Sigma API URL', + initialValue: state.sourceUrl ?? 'https://api.sigmacomputing.com', + }); + if (sourceUrl === undefined) return 'back'; + state.sourceUrl = sourceUrl; + return 'next'; + }, + async (state) => { + const sourceClientId = await promptText(prompts, { + message: 'Sigma client ID', + ...(state.sourceClientId ? { initialValue: state.sourceClientId } : {}), + }); + if (sourceClientId === undefined) return 'back'; + state.sourceClientId = sourceClientId; + return 'next'; + }, + async (state) => { + const ref = await chooseSourceCredentialRef({ + prompts, + projectDir: args.projectDir, + label: 'Sigma client secret', + envName: 'SIGMA_CLIENT_SECRET', + secretFileName: `${state.sourceConnectionId ?? 'sigma-main'}-client-secret`, + existingRef: state.sourceClientSecretRef, + }); + if (ref === 'back') return 'back'; + state.sourceClientSecretRef = ref; + return 'next'; + }, + ]); + } + if (source === 'notion') { return await runSourcePromptSteps(initialState, (state) => [ ...connectionSteps, @@ -1638,6 +1709,13 @@ function sourceArgsFromExistingConnection(input: { return sourceArgs; } + if (input.source === 'sigma') { + sourceArgs.sourceUrl = stringField(input.connection.api_url) ?? undefined; + sourceArgs.sourceClientId = stringField(input.connection.client_id) ?? undefined; + sourceArgs.sourceClientSecretRef = stringField(input.connection.client_secret_ref) ?? undefined; + return sourceArgs; + } + if (input.source === 'gdrive') { sourceArgs.gdriveServiceAccountKeyRef = stringField(input.connection.service_account_key_ref); sourceArgs.gdriveFolderId = stringField(input.connection.folder_id); @@ -1826,6 +1904,9 @@ function buildConnection(source: KtxSetupSourceType, args: KtxSetupSourcesArgs): if (source === 'lookml') { return buildLookmlConnection(args); } + if (source === 'sigma') { + return buildSigmaConnection(args); + } if (source === 'notion') { return buildNotionConnection(args); } @@ -1854,6 +1935,9 @@ async function validateSource( if (source === 'lookml') { return await (deps.validateLookml ?? defaultValidateLookml)(args.connection); } + if (source === 'sigma') { + return await (deps.validateSigma ?? defaultValidateSigma)(args.connection); + } if (source === 'notion') { return await (deps.validateNotion ?? defaultValidateNotion)(args.connection); } diff --git a/packages/cli/src/skills/sigma_ingest/SKILL.md b/packages/cli/src/skills/sigma_ingest/SKILL.md new file mode 100644 index 00000000..a62921a0 --- /dev/null +++ b/packages/cli/src/skills/sigma_ingest/SKILL.md @@ -0,0 +1,189 @@ +--- +name: sigma_ingest +description: Extract durable ktx wiki knowledge from staged Sigma data model specs and workbook summaries. Load for WorkUnits with unitKey sigma-data-models or sigma-workbooks. +callers: [memory_agent] +--- + +# Sigma Ingest + +Sigma ingest turns staged data model specs and workbook summaries into durable ktx wiki knowledge. The deterministic `project()` step has already written semantic-layer YAML for all warehouse-table data model elements before this skill runs — do not re-write those SL sources. + +## Work unit structure + +Sigma produces at minimum two work units per ingest run: + +- `sigma-data-models` or `sigma-data-models-N` + - `rawFiles`: `data-models/.json` files (one per data model in this batch) + - `peerFileIndex`: `workbooks/.json` files + `sigma-manifest.json` + `sigma-projection-config.json` + - When the workspace has more than 50 data models, split into batches: `sigma-data-models-0`, `sigma-data-models-1`, … with `displayLabel` like `"Sigma: data models (1/8)"`. When ≤50 data models, the unitKey is simply `sigma-data-models` with no suffix. +- `sigma-workbooks` or `sigma-workbooks-N` + - `rawFiles`: `workbooks/.json` files (one per workbook in this batch) + - `peerFileIndex`: `data-models/.json` files + `sigma-manifest.json` + `sigma-projection-config.json` + - When the workspace has more than 2000 workbooks, split into batches: `sigma-workbooks-0`, `sigma-workbooks-1`, … with `displayLabel` like `"Sigma: workbooks (1/4)"`. When ≤2000 workbooks, the unitKey is simply `sigma-workbooks` with no suffix. + +`sigma-manifest.json` and `sigma-projection-config.json` are never in `rawFiles`. They live at the staged dir root and always appear in `peerFileIndex`. + +## Staged file shapes + +**`data-models/.json`** — one per data model (in `rawFiles` for data-model units): +```json +{ + "sigmaId": "abc-123", + "name": "Revenue Model", + "path": "Finance/Revenue Model", + "latestVersion": 3, + "updatedAt": "2026-01-15T00:00:00Z", + "isArchived": false, + "spec": { + "name": "Revenue Model", + "pages": [{ + "id": "p1", + "name": "Main", + "elements": [{ + "id": "elem1", + "kind": "table", + "name": "Opportunities", + "hidden": false, + "source": { + "kind": "warehouse-table", + "connectionId": "", + "path": ["DATABASE", "SCHEMA", "OPPORTUNITIES"] + }, + "columns": [ + { "id": "c1", "name": "Deal Amount", "formula": "[OPPORTUNITIES/Amount]", "description": "Net contract value in USD" }, + { "id": "c2", "name": "Total ARR", "formula": "Sum([OPPORTUNITIES/ARR])", "description": "Annualised recurring revenue" } + ] + }] + }] + } +} +``` + +`source.kind` discriminates: +- `warehouse-table` — element maps directly to a warehouse table. Has `connectionId` and `path` (array of path segments forming the fully-qualified table name). `project()` writes an SL source when `connectionMappings` covers this `connectionId`. +- `table` — element is a derived view layered on top of another element; identified by `source.elementId`. No warehouse path. Wiki-only. + +**`workbooks/.json`** — one per workbook, in `rawFiles` for workbook units (summary only; no spec endpoint exists): +```json +{ + "sigmaId": "wb-abc", + "name": "ARR Tracker", + "path": "Finance/Dashboards", + "latestVersion": 2, + "updatedAt": "2026-01-16T00:00:00Z", + "isArchived": false, + "workbookUrlId": "57a96EMo3G...", + "description": "Tracks ARR by segment and cohort for the finance team" +} +``` + +**Peer files (available via `peerFileIndex`, not `rawFiles`):** + +**`sigma-manifest.json`** — fetch summary; use for provenance only. + +**`sigma-projection-config.json`** — written by `fetch()`, contains two fields the skill must read: + +- `connectionMappings`: `{sigmaInternalUuid: ktxWarehouseConnectionId}`. Use the mapped warehouse connection ID for `entity_details` when verifying warehouse identifiers found in data model specs. +- `workbookFilter`: the filter settings that were active when workbooks were last fetched: + - `includeArchived` (default `false`) — when `false`, archived workbooks are not in `workbooks/`; `isArchived: true` files will only appear when this was `true`. + - `includeExplorations` (default `false`) — when `false`, exploration-type workbooks (unsaved analyses) are excluded; treat present workbooks as intentional, curated reports. + - `updatedSince` (optional ISO 8601 string) — when set, only workbooks updated on or after this date are staged; the set is a recent-changes slice, not the full workspace. Do not infer that absent workbooks were deleted. + +`sigma-manifest.json` also reflects any active `dataModelFilter`. When `dataModelFilter.updatedSince` was set during fetch, `dataModelCount` reflects only matching models, not the full workspace. Do not infer that absent data models were deleted. + +Read `sigma-projection-config.json` first and keep `workbookFilter` in scope while processing the WorkUnit. + +## Required workflow + +1. Read every `rawFiles` entry for the WorkUnit. +2. Read `sigma-projection-config.json` from the staged dir to get `connectionMappings`. +3. For each data model file: extract business semantics from element names, column descriptions, and the domain context of the model. Skip hidden elements and hidden columns. +4. For each workbook file: extract business domain knowledge from the name and description. When `workbookFilter.updatedSince` is set, treat the staged set as a recent-changes slice — absent workbooks were not deleted, they were simply outside the filter window. +5. Use `discover_data` before writing to find existing wiki pages on the same topic. +6. Write wiki candidates with `context_candidate_write`. Do not call `wiki_write` directly from a Sigma WorkUnit; Stage 4 reconciliation promotes candidates. +7. Do not write or edit SL sources. The `project()` step owns all SL output for Sigma. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionId, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. Use the warehouse `connectionId` from `connectionMappings` in + `sigma-projection-config.json`, not the Sigma connection ID. If + `connectionMappings` has no entry for the element's `source.connectionId`, + skip `entity_details` — there is no mapped warehouse to verify against — + and wrap any identifier references with `[unverified - from ]`. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection id: + `sql_execution({connectionId, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionId, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.` placeholder strings from these instructions + into output. + +## Data model elements + +### Warehouse-table elements (`source.kind === "warehouse-table"`) + +`project()` writes an SL source for a warehouse-table element **only when** the element's `source.connectionId` has an entry in `connectionMappings`. When no mapping exists, no SL source is written and the element is wiki-only. + +To determine whether an SL source exists: check whether `connectionMappings[element.source.connectionId]` resolves. If it does, use `sl_discover` to find the source by its slugified name (`_`), then: + +- Read the existing SL source with `sl_read_source` to understand what columns and measures are captured. +- Write a wiki candidate about the business domain if the element name, column descriptions, or data model description reveals durable knowledge not already in the wiki. +- `sl_refs` in the wiki candidate should point to the already-written SL source name. + +If `connectionMappings` has no entry for the element's `source.connectionId`, treat the element as wiki-only — do not attempt `sl_discover` or `sl_read_source` for it, as no source was written. + +### Joins within a data model + +Joins are not projected in v1; `joins: []` is always written by `project()`. `Lookup()` formulas may be described in wiki prose instead. + +### Non-warehouse elements (`source.kind === "table"`) + +These reference another element by `elementId` — they are derived views layered on top of a warehouse-table element. They have no warehouse path of their own. Do not attempt SL writes for these elements. They may produce wiki candidates if their column names or descriptions reveal business semantics not captured by the underlying warehouse-table element. + +## Workbooks + +Workbooks have summary metadata only. There is no spec endpoint. + +Extract business domain knowledge from: +- `name`: the workbook's primary topic (e.g. "ARR Tracker" → ARR tracking concepts) +- `description`: business context and intended audience +- `path`: team or functional area (e.g. `Finance/Dashboards`) + +Write wiki candidates when the name or description reveals a reusable business concept, metric definition, or domain convention. Write one candidate per distinct concept, not one per workbook. + +Skip workbooks whose name or description contains no durable business semantics (e.g. "Untitled Workbook", "Test Dashboard"). + +## Capture rules + +Write wiki candidates for: +- Metric definitions mentioned in element names or column descriptions (e.g. "Net ARR", "Churned MRR") +- Domain conventions such as cohort definitions, segment taxonomies, or fiscal calendar rules +- Relationships between business entities revealed by data model joins + +Skip: +- Visualization settings, layout, colors, chart types +- Owner names, folder paths, and version numbers as wiki narrative +- Hidden elements and hidden columns +- Data model names that are purely technical with no business meaning +- When `workbookFilter.includeExplorations` is `false` (the default), all staged workbooks are intentional reports — no extra exploration filter needed. When it is `true`, workbooks without a description or with a generic auto-generated name are likely ephemeral explorations; skip those. + +## Usage signals + +Sigma workbooks carry `latestVersion` but no usage counts. Treat a higher `latestVersion` as weak evidence of continued maintenance; do not include version numbers in wiki prose. diff --git a/packages/cli/test/context/ingest/adapters/sigma/chunk.test.ts b/packages/cli/test/context/ingest/adapters/sigma/chunk.test.ts new file mode 100644 index 00000000..50b0c861 --- /dev/null +++ b/packages/cli/test/context/ingest/adapters/sigma/chunk.test.ts @@ -0,0 +1,325 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { chunkSigmaStagedDir } from '../../../../../src/context/ingest/adapters/sigma/chunk.js'; + +// Keep in sync with constants in chunk.ts +const DATA_MODELS_PER_UNIT = 50; +const WORKBOOKS_PER_UNIT = 2000; + +const FIXTURES = resolve(import.meta.dirname, '../../../../fixtures/sigma'); +const SINGLE = join(FIXTURES, 'single-folder'); +const MULTI = join(FIXTURES, 'multi-folder'); +const EMPTY = join(FIXTURES, 'empty-manifest'); + +describe('chunkSigmaStagedDir — first run', () => { + it('single-folder fixture emits two WUs (data-models and workbooks)', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + expect(result.workUnits).toHaveLength(2); + }); + + it('data-models WU has correct unitKey and displayLabel', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-data-models')!; + expect(wu).toBeDefined(); + expect(wu.displayLabel).toBe('Sigma: data models'); + }); + + it('workbooks WU has correct unitKey and displayLabel', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-workbooks')!; + expect(wu).toBeDefined(); + expect(wu.displayLabel).toBe('Sigma: workbooks'); + }); + + it('data-models WU rawFiles contains data model files but not the manifest', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-data-models')!; + expect(wu.rawFiles).toContain('data-models/dm-aaa111.json'); + expect(wu.rawFiles).toContain('data-models/dm-bbb222.json'); + expect(wu.rawFiles).not.toContain('sigma-manifest.json'); + expect(wu.rawFiles).not.toContain('workbooks/wb-xxx111.json'); + }); + + it('manifest is in peerFileIndex so the LLM can read it without affecting the hash', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + const dmWu = result.workUnits.find((w) => w.unitKey === 'sigma-data-models')!; + const wbWu = result.workUnits.find((w) => w.unitKey === 'sigma-workbooks')!; + expect(dmWu.peerFileIndex).toContain('sigma-manifest.json'); + expect(wbWu.peerFileIndex).toContain('sigma-manifest.json'); + }); + + it('workbooks WU rawFiles contains workbook files but not the manifest', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-workbooks')!; + expect(wu.rawFiles).toContain('workbooks/wb-xxx111.json'); + expect(wu.rawFiles).not.toContain('sigma-manifest.json'); + expect(wu.rawFiles).not.toContain('data-models/dm-aaa111.json'); + }); + + it('data-models WU peerFileIndex contains workbook files', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-data-models')!; + expect(wu.peerFileIndex).toContain('workbooks/wb-xxx111.json'); + }); + + it('workbooks WU peerFileIndex contains data model files', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-workbooks')!; + expect(wu.peerFileIndex).toContain('data-models/dm-aaa111.json'); + expect(wu.peerFileIndex).toContain('data-models/dm-bbb222.json'); + }); + + it('data-models WU notes describes model count', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-data-models')!; + expect(wu.notes).toBe('2 data models'); + }); + + it('workbooks WU notes describes workbook count', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-workbooks')!; + expect(wu.notes).toBe('1 workbook'); + }); + + it('dependencyPaths is empty on first run for both WUs', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + for (const wu of result.workUnits) { + expect(wu.dependencyPaths).toEqual([]); + } + }); + + it('multi-folder fixture still emits two WUs (data-models and workbooks)', async () => { + const result = await chunkSigmaStagedDir(MULTI); + expect(result.workUnits).toHaveLength(2); + expect(result.workUnits.map((w) => w.unitKey).sort()).toEqual(['sigma-data-models', 'sigma-workbooks']); + }); + + it('multi-folder: data-models WU contains all data models regardless of folder', async () => { + const result = await chunkSigmaStagedDir(MULTI); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-data-models')!; + expect(wu.rawFiles).toContain('data-models/dm-aaa111.json'); + expect(wu.rawFiles).toContain('data-models/dm-bbb222.json'); + expect(wu.rawFiles).toContain('data-models/dm-ccc333.json'); + }); + + it('multi-folder: workbooks WU contains all workbooks regardless of folder', async () => { + const result = await chunkSigmaStagedDir(MULTI); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-workbooks')!; + expect(wu.rawFiles).toContain('workbooks/wb-yyy222.json'); + expect(wu.rawFiles).toContain('workbooks/wb-zzz333.json'); + }); + + it('unitKey is slug-safe (no slashes or spaces)', async () => { + const result = await chunkSigmaStagedDir(SINGLE); + for (const wu of result.workUnits) { + expect(wu.unitKey).toMatch(/^[a-zA-Z0-9_-]+$/); + } + }); + + it('empty-manifest fixture emits zero WUs', async () => { + const result = await chunkSigmaStagedDir(EMPTY); + expect(result.workUnits).toHaveLength(0); + }); + + it('missing manifest directory emits zero WUs without crashing', async () => { + const result = await chunkSigmaStagedDir('/tmp/sigma-nonexistent-dir-ktx-test'); + expect(result.workUnits).toHaveLength(0); + }); + + it('is deterministic: two identical calls produce structurally equal output', async () => { + const r1 = await chunkSigmaStagedDir(SINGLE); + const r2 = await chunkSigmaStagedDir(SINGLE); + expect(JSON.stringify(r1)).toBe(JSON.stringify(r2)); + }); +}); + +describe('chunkSigmaStagedDir — data model batching', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'sigma-dm-batch-')); + await mkdir(join(stagedDir, 'data-models'), { recursive: true }); + const manifest = JSON.stringify({ + fetchedAt: new Date().toISOString(), + dataModelCount: DATA_MODELS_PER_UNIT + 1, + workbookCount: 0, + sigmaConnectionId: 'conn-1', + }); + await writeFile(join(stagedDir, 'sigma-manifest.json'), manifest); + for (let i = 0; i < DATA_MODELS_PER_UNIT + 1; i++) { + const dm = JSON.stringify({ + sigmaId: `dm-${i}`, + name: `Data Model ${i}`, + path: 'Engineering', + latestVersion: 1, + updatedAt: '2026-01-01T00:00:00Z', + isArchived: false, + dataModelUrlId: `url-${i}`, + spec: null, + }); + await writeFile(join(stagedDir, 'data-models', `dm-${String(i).padStart(6, '0')}.json`), dm); + } + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('splits into two data model WUs when count exceeds DATA_MODELS_PER_UNIT', async () => { + const result = await chunkSigmaStagedDir(stagedDir); + const dmUnits = result.workUnits.filter((w) => w.unitKey.startsWith('sigma-data-models')); + expect(dmUnits).toHaveLength(2); + }); + + it('batched data model WUs get indexed unitKeys (sigma-data-models-0, sigma-data-models-1)', async () => { + const result = await chunkSigmaStagedDir(stagedDir); + const keys = result.workUnits.map((w) => w.unitKey).filter((k) => k.startsWith('sigma-data-models')).sort(); + expect(keys).toEqual(['sigma-data-models-0', 'sigma-data-models-1']); + }); + + it('first batch has exactly DATA_MODELS_PER_UNIT files (manifest excluded from rawFiles)', async () => { + const result = await chunkSigmaStagedDir(stagedDir); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-data-models-0')!; + expect(wu.rawFiles).toHaveLength(DATA_MODELS_PER_UNIT); + }); + + it('displayLabel includes batch position when split', async () => { + const result = await chunkSigmaStagedDir(stagedDir); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-data-models-0')!; + expect(wu.displayLabel).toMatch(/\(1\/2\)/); + }); +}); + +describe('chunkSigmaStagedDir — workbook batching', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'sigma-batch-')); + await mkdir(join(stagedDir, 'workbooks'), { recursive: true }); + const manifest = JSON.stringify({ + fetchedAt: new Date().toISOString(), + dataModelCount: 0, + workbookCount: WORKBOOKS_PER_UNIT + 1, + sigmaConnectionId: 'conn-1', + }); + await writeFile(join(stagedDir, 'sigma-manifest.json'), manifest); + for (let i = 0; i < WORKBOOKS_PER_UNIT + 1; i++) { + const wb = JSON.stringify({ + sigmaId: `wb-${i}`, + name: `Workbook ${i}`, + path: 'Finance', + latestVersion: 1, + updatedAt: '2026-01-01T00:00:00Z', + isArchived: false, + workbookUrlId: `url-${i}`, + }); + await writeFile(join(stagedDir, 'workbooks', `wb-${String(i).padStart(6, '0')}.json`), wb); + } + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('splits into two workbook WUs when count exceeds WORKBOOKS_PER_UNIT', async () => { + const result = await chunkSigmaStagedDir(stagedDir); + const wbUnits = result.workUnits.filter((w) => w.unitKey.startsWith('sigma-workbooks')); + expect(wbUnits).toHaveLength(2); + }); + + it('batched WUs get indexed unitKeys (sigma-workbooks-0, sigma-workbooks-1)', async () => { + const result = await chunkSigmaStagedDir(stagedDir); + const keys = result.workUnits.map((w) => w.unitKey).filter((k) => k.startsWith('sigma-workbooks')).sort(); + expect(keys).toEqual(['sigma-workbooks-0', 'sigma-workbooks-1']); + }); + + it('first batch has exactly WORKBOOKS_PER_UNIT files (manifest excluded from rawFiles)', async () => { + const result = await chunkSigmaStagedDir(stagedDir); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-workbooks-0')!; + expect(wu.rawFiles).toHaveLength(WORKBOOKS_PER_UNIT); + }); + + it('second batch has the remainder only', async () => { + const result = await chunkSigmaStagedDir(stagedDir); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-workbooks-1')!; + expect(wu.rawFiles).toHaveLength(1); // 1 overflow workbook + }); + + it('displayLabel includes batch position when split', async () => { + const result = await chunkSigmaStagedDir(stagedDir); + const wu = result.workUnits.find((w) => w.unitKey === 'sigma-workbooks-0')!; + expect(wu.displayLabel).toMatch(/\(1\/2\)/); + }); +}); + +describe('chunkSigmaStagedDir — diffSet re-sync', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'sigma-chunk-diff-')); + await mkdir(join(stagedDir, 'data-models'), { recursive: true }); + const fs = await import('node:fs/promises'); + const manifestBody = await fs.readFile(join(SINGLE, 'sigma-manifest.json'), 'utf-8'); + await writeFile(join(stagedDir, 'sigma-manifest.json'), manifestBody); + for (const file of ['dm-aaa111.json', 'dm-bbb222.json']) { + const body = await fs.readFile(join(SINGLE, 'data-models', file), 'utf-8'); + await writeFile(join(stagedDir, 'data-models', file), body); + } + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('only the WU containing the modified file is kept', async () => { + const result = await chunkSigmaStagedDir(stagedDir, { + diffSet: { + added: [], + modified: ['data-models/dm-aaa111.json'], + deleted: [], + unchanged: ['data-models/dm-bbb222.json', 'sigma-manifest.json'], + }, + }); + expect(result.workUnits).toHaveLength(1); + expect(result.workUnits[0]!.rawFiles).toEqual(['data-models/dm-aaa111.json']); + }); + + it('unchanged sibling data-model moves to dependencyPaths', async () => { + const result = await chunkSigmaStagedDir(stagedDir, { + diffSet: { + added: [], + modified: ['data-models/dm-aaa111.json'], + deleted: [], + unchanged: ['data-models/dm-bbb222.json', 'sigma-manifest.json'], + }, + }); + expect(result.workUnits[0]!.dependencyPaths).toContain('data-models/dm-bbb222.json'); + }); + + it('all-unchanged diffSet produces zero WUs and no eviction', async () => { + const result = await chunkSigmaStagedDir(stagedDir, { + diffSet: { + added: [], + modified: [], + deleted: [], + unchanged: ['data-models/dm-aaa111.json', 'data-models/dm-bbb222.json', 'sigma-manifest.json'], + }, + }); + expect(result.workUnits).toHaveLength(0); + expect(result.eviction).toBeUndefined(); + }); + + it('deleted paths produce an eviction unit listing those paths', async () => { + const result = await chunkSigmaStagedDir(stagedDir, { + diffSet: { + added: [], + modified: [], + deleted: ['data-models/dm-aaa111.json'], + unchanged: ['data-models/dm-bbb222.json', 'sigma-manifest.json'], + }, + }); + expect(result.eviction?.deletedRawPaths).toContain('data-models/dm-aaa111.json'); + }); +}); diff --git a/packages/cli/test/context/ingest/adapters/sigma/client.test.ts b/packages/cli/test/context/ingest/adapters/sigma/client.test.ts new file mode 100644 index 00000000..e8680746 --- /dev/null +++ b/packages/cli/test/context/ingest/adapters/sigma/client.test.ts @@ -0,0 +1,309 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { DefaultSigmaClient } from '../../../../../src/context/ingest/adapters/sigma/client.js'; + +const BASE = 'https://api.sigmacomputing.com'; + +const TOKEN_RESPONSE = { + access_token: 'test-token', + token_type: 'Bearer', + expires_in: 3600, +}; + +function makeResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { 'Content-Type': 'application/json' }, + }); +} + +function makeClient(): DefaultSigmaClient { + return new DefaultSigmaClient( + { apiUrl: BASE, clientId: 'cid', clientSecret: 'csec' }, // pragma: allowlist secret + { maxRetries: 1, baseDelayMs: 0, maxDelayMs: 0, timeoutMs: 5000 }, + ); +} + +beforeEach(() => { + globalThis.fetch = vi.fn(); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('DefaultSigmaClient.testConnection', () => { + it('returns success:true when auth succeeds', async () => { + vi.mocked(fetch).mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)); + const client = makeClient(); + const result = await client.testConnection(); + expect(result.success).toBe(true); + }); + + it('returns success:false with error message when auth fails', async () => { + vi.mocked(fetch).mockResolvedValueOnce(makeResponse({ error: 'unauthorized' }, 401)); + const client = makeClient(); + const result = await client.testConnection(); + expect(result.success).toBe(false); + expect(result.error).toMatch(/401/); + }); +}); + +describe('DefaultSigmaClient.listDataModels', () => { + it('returns entries from a single page', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) // auth + .mockResolvedValueOnce( + makeResponse({ + entries: [ + { + dataModelId: 'dm-1', + dataModelUrlId: 'url-1', + name: 'Revenue Model', + path: 'Finance/Revenue', + latestVersion: 1, + ownerId: 'user-1', + createdAt: '2026-01-01T00:00:00Z', + updatedAt: '2026-01-01T00:00:00Z', + isArchived: false, + }, + ], + nextPage: null, + }), + ); + const client = makeClient(); + const models = await client.listDataModels(); + expect(models).toHaveLength(1); + expect(models[0]!.name).toBe('Revenue Model'); + }); + + it('paginates across multiple pages', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce( + makeResponse({ + entries: [ + { + dataModelId: 'dm-1', + dataModelUrlId: 'url-1', + name: 'Model A', + path: 'Finance/A', + latestVersion: 1, + ownerId: 'u1', + createdAt: '2026-01-01T00:00:00Z', + updatedAt: '2026-01-01T00:00:00Z', + }, + ], + nextPage: 'cursor-abc', + }), + ) + .mockResolvedValueOnce( + makeResponse({ + entries: [ + { + dataModelId: 'dm-2', + dataModelUrlId: 'url-2', + name: 'Model B', + path: 'Finance/B', + latestVersion: 1, + ownerId: 'u1', + createdAt: '2026-01-02T00:00:00Z', + updatedAt: '2026-01-02T00:00:00Z', + }, + ], + nextPage: null, + }), + ); + const client = makeClient(); + const models = await client.listDataModels(); + expect(models).toHaveLength(2); + expect(models.map((m) => m.name)).toEqual(['Model A', 'Model B']); + }); + + it('second page request includes cursor in query string', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce(makeResponse({ entries: [{ dataModelId: 'dm-1', dataModelUrlId: 'url-1', name: 'A', path: 'F/A', latestVersion: 1, ownerId: 'u', createdAt: '', updatedAt: '' }], nextPage: 'cursor-xyz' })) + .mockResolvedValueOnce(makeResponse({ entries: [], nextPage: null })); + const client = makeClient(); + await client.listDataModels(); + const calls = vi.mocked(fetch).mock.calls; + const pageCall = calls[calls.length - 1]!; + expect(String(pageCall[0])).toContain('cursor-xyz'); + }); +}); + +function makeWorkbook(overrides: Record = {}) { + return { + workbookId: 'wb-1', + workbookUrlId: 'Sales-Dashboard-wb1', + name: 'Sales Dashboard', + url: 'https://app.sigmacomputing.com/workbooks/wb-1', + path: 'Finance', + latestVersion: 3, + ownerId: 'user-1', + createdAt: '2026-01-01T00:00:00Z', + updatedAt: '2026-01-15T00:00:00Z', + createdBy: 'user-1', + updatedBy: 'user-1', + isArchived: false, + ...overrides, + }; +} + +describe('DefaultSigmaClient.listWorkbooks', () => { + it('returns entries from a single page', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce(makeResponse({ entries: [makeWorkbook()], nextPage: null })); + const client = makeClient(); + const workbooks = await client.listWorkbooks(); + expect(workbooks).toHaveLength(1); + expect(workbooks[0]!.name).toBe('Sales Dashboard'); + }); + + it('passes excludeExplorations=true by default', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce(makeResponse({ entries: [], nextPage: null })); + const client = makeClient(); + await client.listWorkbooks(); + const url = String(vi.mocked(fetch).mock.calls[1]![0]); + expect(url).toContain('excludeExplorations=true'); + }); + + it('omits excludeExplorations when includeExplorations=true', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce(makeResponse({ entries: [], nextPage: null })); + const client = makeClient(); + await client.listWorkbooks({ includeExplorations: true }); + const url = String(vi.mocked(fetch).mock.calls[1]![0]); + expect(url).not.toContain('excludeExplorations'); + }); + + it('filters out archived workbooks by default', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce( + makeResponse({ + entries: [makeWorkbook({ isArchived: false }), makeWorkbook({ workbookId: 'wb-2', name: 'Old', isArchived: true })], + nextPage: null, + }), + ); + const client = makeClient(); + const workbooks = await client.listWorkbooks(); + expect(workbooks).toHaveLength(1); + expect(workbooks[0]!.name).toBe('Sales Dashboard'); + }); + + it('includes archived workbooks when includeArchived=true', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce( + makeResponse({ + entries: [makeWorkbook({ isArchived: false }), makeWorkbook({ workbookId: 'wb-2', name: 'Old', isArchived: true })], + nextPage: null, + }), + ); + const client = makeClient(); + const workbooks = await client.listWorkbooks({ includeArchived: true }); + expect(workbooks).toHaveLength(2); + }); + + it('filters workbooks by updatedSince', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce( + makeResponse({ + entries: [ + makeWorkbook({ workbookId: 'wb-1', updatedAt: '2026-01-10T00:00:00Z' }), + makeWorkbook({ workbookId: 'wb-2', updatedAt: '2026-01-20T00:00:00Z' }), + ], + nextPage: null, + }), + ); + const client = makeClient(); + const workbooks = await client.listWorkbooks({ updatedSince: '2026-01-15T00:00:00Z' }); + expect(workbooks).toHaveLength(1); + expect(workbooks[0]!.workbookId).toBe('wb-2'); + }); +}); + +describe('DefaultSigmaClient.getDataModelSpec', () => { + it('calls the correct URL with encoded id', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce(makeResponse({ schemaVersion: 1 })); + const client = makeClient(); + const spec = await client.getDataModelSpec('dm/123'); + expect(spec).toEqual({ schemaVersion: 1 }); + const calls = vi.mocked(fetch).mock.calls; + expect(String(calls[1]![0])).toContain('/v2/dataModels/dm%2F123/spec'); + }); +}); + +describe('DefaultSigmaClient — error handling', () => { + it('retries on 500 and succeeds on retry', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) // auth + .mockResolvedValueOnce(makeResponse({ error: 'server error' }, 500)) // first attempt + .mockResolvedValueOnce(makeResponse({ entries: [], nextPage: null })); // retry + const client = makeClient(); + const models = await client.listDataModels(); + expect(models).toHaveLength(0); + }); + + it('throws after exhausting retries on 500', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValue(makeResponse({ error: 'server error' }, 500)); + const client = makeClient(); + await expect(client.listDataModels()).rejects.toThrow(/500/); + }); + + it('throws immediately on service_error 500 without retrying', async () => { + const serviceError = { requestId: 'abc', message: 'dataSource subtype not supported in data model read', code: 'service_error' }; + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce(makeResponse(serviceError, 500)); + const client = makeClient(); + await expect(client.getDataModelSpec('dm-1')).rejects.toThrow(/service_error/); + // Only 2 calls: auth + one request. No retries. + expect(vi.mocked(fetch)).toHaveBeenCalledTimes(2); + }); + + it('throws immediately on 404 (non-retryable)', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) + .mockResolvedValueOnce(makeResponse({ error: 'not found' }, 404)); + const client = makeClient(); + await expect(client.getDataModelSpec('dm-999')).rejects.toThrow(/404/); + }); + + it('re-authenticates and retries on 401', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) // initial auth + .mockResolvedValueOnce(makeResponse({ error: 'expired' }, 401)) // 401 on first request + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) // re-auth + .mockResolvedValueOnce(makeResponse({ entries: [], nextPage: null })); // retried request + const client = makeClient(); + const models = await client.listDataModels(); + expect(models).toHaveLength(0); + }); +}); + +describe('DefaultSigmaClient.cleanup', () => { + it('clears cached token so next call re-authenticates', async () => { + vi.mocked(fetch) + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) // first auth + .mockResolvedValueOnce(makeResponse({ entries: [], nextPage: null })) // first list + .mockResolvedValueOnce(makeResponse(TOKEN_RESPONSE)) // second auth after cleanup + .mockResolvedValueOnce(makeResponse({ entries: [], nextPage: null })); // second list + const client = makeClient(); + await client.listDataModels(); + await client.cleanup(); + await client.listDataModels(); + // 4 calls total: 2 auths + 2 lists + expect(vi.mocked(fetch)).toHaveBeenCalledTimes(4); + }); +}); diff --git a/packages/cli/test/context/ingest/adapters/sigma/detect.test.ts b/packages/cli/test/context/ingest/adapters/sigma/detect.test.ts new file mode 100644 index 00000000..e3ecf456 --- /dev/null +++ b/packages/cli/test/context/ingest/adapters/sigma/detect.test.ts @@ -0,0 +1,61 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { detectSigmaStagedDir } from '../../../../../src/context/ingest/adapters/sigma/detect.js'; + +async function touch(dir: string, relPath: string, body = '{}'): Promise { + const abs = join(dir, relPath); + await mkdir(join(abs, '..'), { recursive: true }); + await writeFile(abs, body, 'utf-8'); +} + +describe('detectSigmaStagedDir', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'sigma-detect-')); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('returns true when manifest and at least one data-model file are present', async () => { + await touch(stagedDir, 'sigma-manifest.json'); + await touch(stagedDir, 'data-models/dm-aaa111.json'); + expect(await detectSigmaStagedDir(stagedDir)).toBe(true); + }); + + it('returns true when manifest and at least one workbook file are present', async () => { + await touch(stagedDir, 'sigma-manifest.json'); + await touch(stagedDir, 'workbooks/wb-xxx111.json'); + expect(await detectSigmaStagedDir(stagedDir)).toBe(true); + }); + + it('returns false when sigma-manifest.json is absent', async () => { + await touch(stagedDir, 'data-models/dm-aaa111.json'); + expect(await detectSigmaStagedDir(stagedDir)).toBe(false); + }); + + it('returns false for a completely empty directory', async () => { + expect(await detectSigmaStagedDir(stagedDir)).toBe(false); + }); + + it('returns false when manifest is present but both entity dirs are empty', async () => { + await touch(stagedDir, 'sigma-manifest.json'); + await mkdir(join(stagedDir, 'data-models'), { recursive: true }); + await mkdir(join(stagedDir, 'workbooks'), { recursive: true }); + expect(await detectSigmaStagedDir(stagedDir)).toBe(false); + }); + + it('returns false when manifest is present but entity dirs are absent', async () => { + await touch(stagedDir, 'sigma-manifest.json'); + expect(await detectSigmaStagedDir(stagedDir)).toBe(false); + }); + + it('returns false when only unrelated files are present', async () => { + await touch(stagedDir, 'data-models/dm-aaa111.json'); + expect(await detectSigmaStagedDir(stagedDir)).toBe(false); + }); +}); diff --git a/packages/cli/test/context/ingest/adapters/sigma/fetch.test.ts b/packages/cli/test/context/ingest/adapters/sigma/fetch.test.ts new file mode 100644 index 00000000..b94eb480 --- /dev/null +++ b/packages/cli/test/context/ingest/adapters/sigma/fetch.test.ts @@ -0,0 +1,493 @@ +import { mkdtemp, readFile, rm, writeFile, mkdir } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { SigmaClientFactory, SigmaRuntimeClient } from '../../../../../src/context/ingest/adapters/sigma/client-port.js'; +import { fetchSigmaBundle } from '../../../../../src/context/ingest/adapters/sigma/fetch.js'; +import type { SigmaPullConfig } from '../../../../../src/context/ingest/adapters/sigma/types.js'; + +const TEST_PULL_CONFIG = { sigmaConnectionId: 'sigma-prod' }; + +function makeSummary(id: string, name: string, path: string, isArchived = false) { + return { + dataModelId: id, + dataModelUrlId: `${name.replace(/\s+/g, '-')}-${id}`, + name, + path, + latestVersion: 1, + ownerId: 'user-1', + createdAt: '2026-01-01T00:00:00Z', + updatedAt: '2026-01-15T00:00:00Z', + isArchived, + }; +} + +function makeFactory(client: Partial): SigmaClientFactory { + const fullClient: SigmaRuntimeClient = { + testConnection: vi.fn().mockResolvedValue({ success: true }), + listDataModels: vi.fn().mockResolvedValue([]), + listWorkbooks: vi.fn().mockResolvedValue([]), + getDataModelSpec: vi.fn().mockResolvedValue(null), + cleanup: vi.fn().mockResolvedValue(undefined), + ...client, + }; + return { + createClient: vi.fn().mockResolvedValue(fullClient), + }; +} + +describe('fetchSigmaBundle', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'sigma-fetch-')); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('creates sigma-manifest.json after a successful fetch', async () => { + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([ + makeSummary('dm-1', 'Revenue Model', 'Finance/Revenue'), + ]), + getDataModelSpec: vi.fn().mockResolvedValue({ schemaVersion: 1 }), + }); + await fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + const manifest = JSON.parse(await readFile(join(stagedDir, 'sigma-manifest.json'), 'utf-8')); + expect(manifest.sigmaConnectionId).toBe('sigma-prod'); + expect(manifest.dataModelCount).toBe(1); + expect(manifest.fetchedAt).toBeDefined(); + }); + + it('writes one data-model file per active model', async () => { + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([ + makeSummary('dm-1', 'Revenue Model', 'Finance/Revenue'), + makeSummary('dm-2', 'ARR Model', 'Finance/ARR'), + ]), + getDataModelSpec: vi.fn().mockResolvedValue({ schemaVersion: 1 }), + }); + await fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + const dm1 = JSON.parse(await readFile(join(stagedDir, 'data-models', 'dm-1.json'), 'utf-8')); + const dm2 = JSON.parse(await readFile(join(stagedDir, 'data-models', 'dm-2.json'), 'utf-8')); + expect(dm1.name).toBe('Revenue Model'); + expect(dm2.name).toBe('ARR Model'); + }); + + it('skips archived models and does not write their files', async () => { + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([ + makeSummary('dm-1', 'Active Model', 'Finance/Active', false), + makeSummary('dm-archived', 'Archived Model', 'Finance/Old', true), + ]), + getDataModelSpec: vi.fn().mockResolvedValue({ schemaVersion: 1 }), + }); + await fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + const manifest = JSON.parse(await readFile(join(stagedDir, 'sigma-manifest.json'), 'utf-8')); + expect(manifest.dataModelCount).toBe(1); + await expect(readFile(join(stagedDir, 'data-models', 'dm-archived.json'), 'utf-8')).rejects.toThrow(); + }); + + it('logs a specific message for unsupported data source subtype (service_error)', async () => { + const warnMessages: string[] = []; + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([ + makeSummary('dm-1', 'CSV Upload Model', 'Finance/CSV'), + ]), + getDataModelSpec: vi.fn().mockRejectedValue( + new Error('Sigma API error (500): {"code":"service_error","message":"dataSource subtype not supported in data model read"}'), + ), + }); + await fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + logger: { log: () => undefined, warn: (m) => warnMessages.push(m) }, + }); + expect(warnMessages[0]).toContain('data source type not supported'); + expect(warnMessages[0]).not.toContain('Sigma API error (500)'); + }); + + it('writes null spec when getDataModelSpec throws, and does not abort the whole fetch', async () => { + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([ + makeSummary('dm-1', 'Good Model', 'Finance/Good'), + makeSummary('dm-2', 'Broken Model', 'Finance/Broken'), + ]), + getDataModelSpec: vi + .fn() + .mockResolvedValueOnce({ schemaVersion: 1 }) + .mockRejectedValueOnce(new Error('Spec fetch failed')), + }); + await fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + const dm2 = JSON.parse(await readFile(join(stagedDir, 'data-models', 'dm-2.json'), 'utf-8')); + expect(dm2.spec).toBeNull(); + const manifest = JSON.parse(await readFile(join(stagedDir, 'sigma-manifest.json'), 'utf-8')); + expect(manifest.dataModelCount).toBe(2); + }); + + it('calls cleanup on the client even when an error is thrown', async () => { + const cleanupMock = vi.fn().mockResolvedValue(undefined); + const factory = makeFactory({ + listDataModels: vi.fn().mockRejectedValue(new Error('Network failure')), + cleanup: cleanupMock, + }); + await expect( + fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }), + ).rejects.toThrow('Network failure'); + expect(cleanupMock).toHaveBeenCalledOnce(); + }); + + it('passes the resolved config to clientFactory.createClient', async () => { + const createClientMock = vi.fn().mockResolvedValue({ + testConnection: vi.fn(), + listDataModels: vi.fn().mockResolvedValue([]), + listWorkbooks: vi.fn().mockResolvedValue([]), + getDataModelSpec: vi.fn(), + cleanup: vi.fn().mockResolvedValue(undefined), + } satisfies SigmaRuntimeClient); + const factory: SigmaClientFactory = { createClient: createClientMock }; + await fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + const calledConfig = createClientMock.mock.calls[0]![0] as SigmaPullConfig; + expect(calledConfig.sigmaConnectionId).toBe('sigma-prod'); + }); + + it('writes sigma-projection-config.json with connectionMappings from pullConfig', async () => { + const factory = makeFactory({}); + await fetchSigmaBundle({ + pullConfig: { sigmaConnectionId: 'sigma-prod', connectionMappings: { 'uuid-1': 'snowflake-prod' } }, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + const config = JSON.parse(await readFile(join(stagedDir, 'sigma-projection-config.json'), 'utf-8')); + expect(config.connectionMappings['uuid-1']).toBe('snowflake-prod'); + }); + + it('writes sigma-projection-config.json with empty mappings when none are provided', async () => { + const factory = makeFactory({}); + await fetchSigmaBundle({ pullConfig: TEST_PULL_CONFIG, stagedDir, ctx: {} as never, clientFactory: factory }); + const config = JSON.parse(await readFile(join(stagedDir, 'sigma-projection-config.json'), 'utf-8')); + expect(config.connectionMappings).toEqual({}); + }); + + it('writes workbookFilter defaults to projection config when not specified', async () => { + const factory = makeFactory({}); + await fetchSigmaBundle({ pullConfig: TEST_PULL_CONFIG, stagedDir, ctx: {} as never, clientFactory: factory }); + const config = JSON.parse(await readFile(join(stagedDir, 'sigma-projection-config.json'), 'utf-8')); + expect(config.workbookFilter.includeArchived).toBe(false); + expect(config.workbookFilter.includeExplorations).toBe(false); + expect(config.workbookFilter.updatedSince).toBeUndefined(); + }); + + it('writes explicit workbookFilter settings to projection config', async () => { + const factory = makeFactory({}); + await fetchSigmaBundle({ + pullConfig: { + sigmaConnectionId: 'sigma-prod', + workbookFilter: { includeArchived: true, includeExplorations: false, updatedSince: '2026-01-01T00:00:00Z' }, + }, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + const config = JSON.parse(await readFile(join(stagedDir, 'sigma-projection-config.json'), 'utf-8')); + expect(config.workbookFilter.includeArchived).toBe(true); + expect(config.workbookFilter.updatedSince).toBe('2026-01-01T00:00:00Z'); + }); + + it('throws on invalid pullConfig', async () => { + const factory = makeFactory({}); + await expect( + fetchSigmaBundle({ + pullConfig: { sigmaConnectionId: 'invalid id with spaces' }, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }), + ).rejects.toThrow(); + }); + + it('handles zero active models gracefully', async () => { + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([]), + }); + await fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + const manifest = JSON.parse(await readFile(join(stagedDir, 'sigma-manifest.json'), 'utf-8')); + expect(manifest.dataModelCount).toBe(0); + }); + + it('skips spec fetch for a model whose updatedAt matches the existing staged file', async () => { + const summary = makeSummary('dm-1', 'Revenue Model', 'Finance/Revenue'); + // Pre-populate a staged file with the same updatedAt. + await mkdir(join(stagedDir, 'data-models'), { recursive: true }); + const existingStaged = { + sigmaId: 'dm-1', + name: 'Revenue Model', + path: 'Finance/Revenue', + latestVersion: 1, + updatedAt: summary.updatedAt, + isArchived: false, + spec: { schemaVersion: 1, name: 'old' }, + }; + await writeFile( + join(stagedDir, 'data-models', 'dm-1.json'), + JSON.stringify(existingStaged), + 'utf-8', + ); + const getSpecMock = vi.fn().mockResolvedValue({ schemaVersion: 1 }); + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([summary]), + getDataModelSpec: getSpecMock, + }); + await fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + // Spec fetch must be skipped for the unchanged model. + expect(getSpecMock).not.toHaveBeenCalled(); + }); + + it('retries spec fetch for a model whose updatedAt matches but staged spec is null (transient failure)', async () => { + const summary = makeSummary('dm-1', 'Revenue Model', 'Finance/Revenue'); + await mkdir(join(stagedDir, 'data-models'), { recursive: true }); + const existingStaged = { + sigmaId: 'dm-1', + name: 'Revenue Model', + path: 'Finance/Revenue', + latestVersion: 1, + updatedAt: summary.updatedAt, + isArchived: false, + spec: null, + }; + await writeFile( + join(stagedDir, 'data-models', 'dm-1.json'), + JSON.stringify(existingStaged), + 'utf-8', + ); + const freshSpec = { schemaVersion: 1, name: 'Revenue Model' }; + const getSpecMock = vi.fn().mockResolvedValue(freshSpec); + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([summary]), + getDataModelSpec: getSpecMock, + }); + await fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + expect(getSpecMock).toHaveBeenCalledWith('dm-1'); + const written = JSON.parse(await readFile(join(stagedDir, 'data-models', 'dm-1.json'), 'utf-8')); + expect(written.spec).toEqual(freshSpec); + }); + + it('writes workbook count to manifest', async () => { + const factory = makeFactory({ + listWorkbooks: vi.fn().mockResolvedValue([ + { workbookId: 'wb-1', workbookUrlId: 'wb-url-1', name: 'Sales Dashboard', path: 'Finance/Dashboards', latestVersion: 1, ownerId: 'u1', createdAt: '2026-01-01T00:00:00Z', updatedAt: '2026-01-15T00:00:00Z', isArchived: false }, + { workbookId: 'wb-2', workbookUrlId: 'wb-url-2', name: 'ARR Tracker', path: 'Finance/Dashboards', latestVersion: 2, ownerId: 'u1', createdAt: '2026-01-02T00:00:00Z', updatedAt: '2026-01-16T00:00:00Z', isArchived: false }, + ]), + }); + await fetchSigmaBundle({ pullConfig: TEST_PULL_CONFIG, stagedDir, ctx: {} as never, clientFactory: factory }); + const manifest = JSON.parse(await readFile(join(stagedDir, 'sigma-manifest.json'), 'utf-8')); + expect(manifest.workbookCount).toBe(2); + }); + + it('writes one staged file per active workbook', async () => { + const factory = makeFactory({ + listWorkbooks: vi.fn().mockResolvedValue([ + { workbookId: 'wb-1', workbookUrlId: 'wb-url-1', name: 'Sales Dashboard', path: 'Finance/Dashboards', latestVersion: 1, ownerId: 'u1', createdAt: '2026-01-01T00:00:00Z', updatedAt: '2026-01-15T00:00:00Z', isArchived: false, description: 'Finance overview' }, + ]), + }); + await fetchSigmaBundle({ pullConfig: TEST_PULL_CONFIG, stagedDir, ctx: {} as never, clientFactory: factory }); + const wb = JSON.parse(await readFile(join(stagedDir, 'workbooks', 'wb-1.json'), 'utf-8')); + expect(wb.name).toBe('Sales Dashboard'); + expect(wb.description).toBe('Finance overview'); + }); + + it('skips workbook re-staging when updatedAt is unchanged', async () => { + await mkdir(join(stagedDir, 'workbooks'), { recursive: true }); + const existing = { sigmaId: 'wb-1', name: 'Sales Dashboard', path: 'Finance', latestVersion: 1, updatedAt: '2026-01-15T00:00:00Z', isArchived: false }; + await writeFile(join(stagedDir, 'workbooks', 'wb-1.json'), JSON.stringify(existing), 'utf-8'); + const listWorkbooksMock = vi.fn().mockResolvedValue([ + { workbookId: 'wb-1', workbookUrlId: 'wb-url-1', name: 'Sales Dashboard', path: 'Finance', latestVersion: 1, ownerId: 'u1', createdAt: '2026-01-01T00:00:00Z', updatedAt: '2026-01-15T00:00:00Z', isArchived: false }, + ]); + const factory = makeFactory({ listWorkbooks: listWorkbooksMock }); + await fetchSigmaBundle({ pullConfig: TEST_PULL_CONFIG, stagedDir, ctx: {} as never, clientFactory: factory }); + // File should still contain the pre-existing content (not overwritten). + const wb = JSON.parse(await readFile(join(stagedDir, 'workbooks', 'wb-1.json'), 'utf-8')); + expect(wb.sigmaId).toBe('wb-1'); + }); + + it('removes the staged file when a model is no longer in the active list', async () => { + // Pre-populate a staged file for dm-stale. + await mkdir(join(stagedDir, 'data-models'), { recursive: true }); + const staleStaged = { + sigmaId: 'dm-stale', + name: 'Stale Model', + path: 'Old/Stale', + latestVersion: 1, + updatedAt: '2026-01-01T00:00:00Z', + isArchived: false, + spec: null, + }; + await writeFile( + join(stagedDir, 'data-models', 'dm-stale.json'), + JSON.stringify(staleStaged), + 'utf-8', + ); + // API now returns only dm-1 (dm-stale was archived or deleted). + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([makeSummary('dm-1', 'Active Model', 'Finance/Active')]), + getDataModelSpec: vi.fn().mockResolvedValue({ schemaVersion: 1 }), + }); + await fetchSigmaBundle({ + pullConfig: TEST_PULL_CONFIG, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + await expect( + readFile(join(stagedDir, 'data-models', 'dm-stale.json'), 'utf-8'), + ).rejects.toThrow(); + // The active model's file must still exist. + await expect( + readFile(join(stagedDir, 'data-models', 'dm-1.json'), 'utf-8'), + ).resolves.toBeDefined(); + }); + + it('filters spec fetches by dataModelFilter.updatedSince but preserves existing staged files for filtered-out models', async () => { + // Pre-stage the old model from a previous full fetch. + await mkdir(join(stagedDir, 'data-models'), { recursive: true }); + const oldStaged = { + sigmaId: 'dm-old', name: 'Old Model', path: 'Finance/Old', + latestVersion: 1, updatedAt: '2026-06-20T00:00:00Z', isArchived: false, spec: { schemaVersion: 0 }, + }; + await writeFile(join(stagedDir, 'data-models', 'dm-old.json'), JSON.stringify(oldStaged), 'utf-8'); + const getSpecMock = vi.fn().mockResolvedValue({ schemaVersion: 1 }); + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([ + { ...makeSummary('dm-old', 'Old Model', 'Finance/Old'), updatedAt: '2026-06-20T00:00:00Z' }, + { ...makeSummary('dm-new', 'New Model', 'Finance/New'), updatedAt: '2026-06-26T00:00:00Z' }, + ]), + getDataModelSpec: getSpecMock, + }); + await fetchSigmaBundle({ + pullConfig: { sigmaConnectionId: 'sigma-prod', dataModelFilter: { updatedSince: '2026-06-25T00:00:00Z' } }, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + // Only the new model's spec is fetched (old one is outside the filter window). + expect(getSpecMock).toHaveBeenCalledTimes(1); + // Manifest reflects only the filtered count. + const manifest = JSON.parse(await readFile(join(stagedDir, 'sigma-manifest.json'), 'utf-8')); + expect(manifest.dataModelCount).toBe(1); + // New model is staged. + await expect(readFile(join(stagedDir, 'data-models', 'dm-new.json'), 'utf-8')).resolves.toBeDefined(); + // Old model's staged file is PRESERVED — it is still active, just outside the filter window. + await expect(readFile(join(stagedDir, 'data-models', 'dm-old.json'), 'utf-8')).resolves.toBeDefined(); + }); + + it('includes all active models when dataModelFilter is not set', async () => { + const factory = makeFactory({ + listDataModels: vi.fn().mockResolvedValue([ + { ...makeSummary('dm-old', 'Old Model', 'Finance/Old'), updatedAt: '2026-01-01T00:00:00Z' }, + { ...makeSummary('dm-new', 'New Model', 'Finance/New'), updatedAt: '2026-06-26T00:00:00Z' }, + ]), + getDataModelSpec: vi.fn().mockResolvedValue({ schemaVersion: 1 }), + }); + await fetchSigmaBundle({ pullConfig: TEST_PULL_CONFIG, stagedDir, ctx: {} as never, clientFactory: factory }); + const manifest = JSON.parse(await readFile(join(stagedDir, 'sigma-manifest.json'), 'utf-8')); + expect(manifest.dataModelCount).toBe(2); + }); + + it('removes the staged file when a workbook is no longer returned by the API', async () => { + await mkdir(join(stagedDir, 'workbooks'), { recursive: true }); + const stale = { + sigmaId: 'wb-stale', + name: 'Old Dashboard', + path: 'Finance/Old', + latestVersion: 1, + updatedAt: '2026-01-01T00:00:00Z', + isArchived: false, + }; + await writeFile(join(stagedDir, 'workbooks', 'wb-stale.json'), JSON.stringify(stale), 'utf-8'); + const factory = makeFactory({ + listWorkbooks: vi.fn().mockResolvedValue([ + { workbookId: 'wb-active', workbookUrlId: 'wb-url-active', name: 'Active Dashboard', path: 'Finance/Active', latestVersion: 1, ownerId: 'u1', createdAt: '2026-01-01T00:00:00Z', updatedAt: '2026-01-16T00:00:00Z', isArchived: false }, + ]), + }); + await fetchSigmaBundle({ pullConfig: TEST_PULL_CONFIG, stagedDir, ctx: {} as never, clientFactory: factory }); + await expect(readFile(join(stagedDir, 'workbooks', 'wb-stale.json'), 'utf-8')).rejects.toThrow(); + await expect(readFile(join(stagedDir, 'workbooks', 'wb-active.json'), 'utf-8')).resolves.toBeDefined(); + }); + + it('workbookFilter.updatedSince filters fetch but preserves existing staged files for older workbooks', async () => { + // Pre-stage an old workbook from a previous full fetch. + await mkdir(join(stagedDir, 'workbooks'), { recursive: true }); + const oldStaged = { + sigmaId: 'wb-old', name: 'Old Dashboard', path: 'Finance/Old', + latestVersion: 1, updatedAt: '2026-06-20T00:00:00Z', isArchived: false, workbookUrlId: 'wb-url-old', + }; + await writeFile(join(stagedDir, 'workbooks', 'wb-old.json'), JSON.stringify(oldStaged), 'utf-8'); + const listWorkbooksMock = vi.fn().mockResolvedValue([ + { workbookId: 'wb-old', workbookUrlId: 'wb-url-old', name: 'Old Dashboard', path: 'Finance/Old', latestVersion: 1, ownerId: 'u1', createdAt: '2026-01-01T00:00:00Z', updatedAt: '2026-06-20T00:00:00Z', isArchived: false }, + { workbookId: 'wb-new', workbookUrlId: 'wb-url-new', name: 'New Dashboard', path: 'Finance/New', latestVersion: 1, ownerId: 'u1', createdAt: '2026-01-01T00:00:00Z', updatedAt: '2026-06-26T00:00:00Z', isArchived: false }, + ]); + const factory = makeFactory({ listWorkbooks: listWorkbooksMock }); + await fetchSigmaBundle({ + pullConfig: { sigmaConnectionId: 'sigma-prod', workbookFilter: { updatedSince: '2026-06-25T00:00:00Z' } }, + stagedDir, + ctx: {} as never, + clientFactory: factory, + }); + // Only the new workbook is staged on this run. + await expect(readFile(join(stagedDir, 'workbooks', 'wb-new.json'), 'utf-8')).resolves.toBeDefined(); + // Old workbook's staged file is PRESERVED — it is still active, just outside the filter window. + await expect(readFile(join(stagedDir, 'workbooks', 'wb-old.json'), 'utf-8')).resolves.toBeDefined(); + // listWorkbooks is called without updatedSince to get the full universe for eviction. + expect(listWorkbooksMock).toHaveBeenCalledWith(expect.not.objectContaining({ updatedSince: expect.anything() })); + }); +}); diff --git a/packages/cli/test/context/ingest/adapters/sigma/project.test.ts b/packages/cli/test/context/ingest/adapters/sigma/project.test.ts new file mode 100644 index 00000000..59e849bb --- /dev/null +++ b/packages/cli/test/context/ingest/adapters/sigma/project.test.ts @@ -0,0 +1,301 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { projectSigmaDataModels } from '../../../../../src/context/ingest/adapters/sigma/project.js'; +import type { DeterministicProjectionContext } from '../../../../../src/context/ingest/types.js'; +import type { SemanticLayerService } from '../../../../../src/context/sl/semantic-layer.service.js'; +import type { SemanticLayerSource } from '../../../../../src/context/sl/types.js'; + +function makeCtx( + stagedDir: string, + writeSource: (connectionId: string, source: SemanticLayerSource, ...rest: string[]) => Promise<{ warnings: string[] }>, +): DeterministicProjectionContext { + const svc = { + writeSource, + forWorktree: () => ({ writeSource }), + } as unknown as SemanticLayerService; + + return { + connectionId: 'sigma-prod', + sourceKey: 'sigma-prod', + syncId: 'sync-1', + jobId: 'job-1', + runId: 'run-1', + stagedDir, + workdir: '', + semanticLayerService: svc, + }; +} + +function makeSpec(elements: unknown[]) { + return { + schemaVersion: 1, + name: 'Test Model', + pages: [{ id: 'p1', name: 'Main', elements }], + }; +} + +function makeStagedModel(id: string, name: string, spec: unknown) { + return JSON.stringify({ + sigmaId: id, + name, + path: 'Finance/Models', + latestVersion: 1, + updatedAt: '2026-01-15T00:00:00Z', + isArchived: false, + spec, + }); +} + +/** Write a projection config that maps the given sigma connection IDs to 'warehouse-main'. */ +async function writeProjectionConfig(stagedDir: string, sigmaConnectionIds: string[]): Promise { + const mappings = Object.fromEntries(sigmaConnectionIds.map((id) => [id, 'warehouse-main'])); + await writeFile( + join(stagedDir, 'sigma-projection-config.json'), + JSON.stringify({ connectionMappings: mappings }), + 'utf-8', + ); +} + +describe('projectSigmaDataModels', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'sigma-project-')); + await mkdir(join(stagedDir, 'data-models'), { recursive: true }); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('returns empty result when data-models directory is missing', async () => { + const emptyDir = await mkdtemp(join(tmpdir(), 'sigma-project-empty-')); + try { + const writeSource = vi.fn().mockResolvedValue({ warnings: [] }); + const result = await projectSigmaDataModels(makeCtx(emptyDir, writeSource), makeCtx(emptyDir, writeSource).semanticLayerService as never); + expect(result.touchedSources).toHaveLength(0); + expect(writeSource).not.toHaveBeenCalled(); + } finally { + await rm(emptyDir, { recursive: true, force: true }); + } + }); + + it('converts a warehouse-table element to a semantic-layer source', async () => { + await writeProjectionConfig(stagedDir, ['sigma-conn-uuid']); + const spec = makeSpec([ + { + id: 'elem1', + kind: 'table', + name: 'Opportunities', + source: { kind: 'warehouse-table', connectionId: 'sigma-conn-uuid', path: ['FIVETRAN', 'SALESFORCE', 'OPPORTUNITIES'] }, + columns: [ + { id: 'c1', formula: '[OPPORTUNITIES/Amount]', name: 'Deal Amount' }, + { id: 'c2', formula: 'Sum([OPPORTUNITIES/Amount])', name: 'Total Amount' }, + ], + }, + ]); + await writeFile(join(stagedDir, 'data-models', 'dm-1.json'), makeStagedModel('dm-1', 'Revenue Model', spec)); + + const written: Array<{ connectionId: string; source: SemanticLayerSource }> = []; + const writeSource = vi.fn().mockImplementation((connectionId: string, source: SemanticLayerSource) => { + written.push({ connectionId, source }); + return Promise.resolve({ warnings: [] }); + }); + + const result = await projectSigmaDataModels(makeCtx(stagedDir, writeSource), makeCtx(stagedDir, writeSource).semanticLayerService as never); + + expect(writeSource).toHaveBeenCalledOnce(); + expect(written[0]!.connectionId).toBe('warehouse-main'); + const source = written[0]!.source; + expect(source.table).toBe('FIVETRAN.SALESFORCE.OPPORTUNITIES'); + expect(source.columns.some((c) => c.name === 'deal_amount')).toBe(true); + expect(source.columns.some((c) => c.name === 'total_amount')).toBe(false); + expect(source.measures).toEqual([]); + expect(result.touchedSources).toHaveLength(1); + expect(result.errors).toHaveLength(0); + }); + + it('skips elements whose source kind is not warehouse-table', async () => { + const spec = makeSpec([ + { + id: 'elem1', + kind: 'table', + name: 'Derived', + source: { kind: 'data-model', dataModelId: 'dm-other', elementId: 'e1' }, + columns: [{ id: 'c1', formula: '[Derived/Revenue]' }], + }, + ]); + await writeFile(join(stagedDir, 'data-models', 'dm-1.json'), makeStagedModel('dm-1', 'Derived Model', spec)); + + const writeSource = vi.fn().mockResolvedValue({ warnings: [] }); + const result = await projectSigmaDataModels(makeCtx(stagedDir, writeSource), makeCtx(stagedDir, writeSource).semanticLayerService as never); + + expect(writeSource).not.toHaveBeenCalled(); + expect(result.touchedSources).toHaveLength(0); + }); + + it('skips hidden elements', async () => { + const spec = makeSpec([ + { + id: 'elem1', + kind: 'table', + name: 'Hidden', + hidden: true, + source: { kind: 'warehouse-table', connectionId: 'c', path: ['DB', 'SCHEMA', 'TABLE'] }, + columns: [{ id: 'c1', formula: '[TABLE/Col]' }], + }, + ]); + await writeFile(join(stagedDir, 'data-models', 'dm-1.json'), makeStagedModel('dm-1', 'Hidden Model', spec)); + + const writeSource = vi.fn().mockResolvedValue({ warnings: [] }); + const result = await projectSigmaDataModels(makeCtx(stagedDir, writeSource), makeCtx(stagedDir, writeSource).semanticLayerService as never); + expect(writeSource).not.toHaveBeenCalled(); + expect(result.touchedSources).toHaveLength(0); + }); + + it('skips hidden columns', async () => { + await writeProjectionConfig(stagedDir, ['c']); + const spec = makeSpec([ + { + id: 'elem1', + kind: 'table', + name: 'Revenue', + source: { kind: 'warehouse-table', connectionId: 'c', path: ['DB', 'S', 'T'] }, + columns: [ + { id: 'c1', formula: '[T/Visible]', name: 'Visible' }, + { id: 'c2', formula: '[T/Hidden]', name: 'Hidden', hidden: true }, + ], + }, + ]); + await writeFile(join(stagedDir, 'data-models', 'dm-1.json'), makeStagedModel('dm-1', 'Revenue', spec)); + + const written: SemanticLayerSource[] = []; + const writeSource = vi.fn().mockImplementation((_: string, source: SemanticLayerSource) => { + written.push(source); + return Promise.resolve({ warnings: [] }); + }); + await projectSigmaDataModels(makeCtx(stagedDir, writeSource), makeCtx(stagedDir, writeSource).semanticLayerService as never); + const source = written[0]!; + expect(source.columns.some((c) => c.name === 'visible')).toBe(true); + expect(source.columns.some((c) => c.name === 'hidden')).toBe(false); + }); + + it('silently skips aggregation formula columns and never emits measures', async () => { + await writeProjectionConfig(stagedDir, ['c']); + const spec = makeSpec([ + { + id: 'e1', + kind: 'table', + name: 'Sales', + source: { kind: 'warehouse-table', connectionId: 'c', path: ['DB', 'S', 'ORDERS'] }, + columns: [ + { id: 'c1', formula: 'Sum([ORDERS/Revenue])', name: 'Total Revenue' }, + { id: 'c2', formula: 'CountDistinct([ORDERS/CustomerId])', name: 'Unique Customers' }, + { id: 'c3', formula: '[ORDERS/OrderDate]', name: 'Order Date' }, + ], + }, + ]); + await writeFile(join(stagedDir, 'data-models', 'dm-1.json'), makeStagedModel('dm-1', 'Sales', spec)); + + const written: SemanticLayerSource[] = []; + const writeSource = vi.fn().mockImplementation((_: string, source: SemanticLayerSource) => { + written.push(source); + return Promise.resolve({ warnings: [] }); + }); + await projectSigmaDataModels(makeCtx(stagedDir, writeSource), makeCtx(stagedDir, writeSource).semanticLayerService as never); + + const source = written[0]!; + expect(source.measures).toEqual([]); + expect(source.columns.map((c) => c.name)).toContain('order_date'); + expect(source.columns.map((c) => c.name)).not.toContain('total_revenue'); + expect(source.columns.map((c) => c.name)).not.toContain('unique_customers'); + }); + + it('skips models with null spec', async () => { + await writeFile(join(stagedDir, 'data-models', 'dm-1.json'), makeStagedModel('dm-1', 'No Spec Model', null)); + + const writeSource = vi.fn().mockResolvedValue({ warnings: [] }); + const result = await projectSigmaDataModels(makeCtx(stagedDir, writeSource), makeCtx(stagedDir, writeSource).semanticLayerService as never); + expect(writeSource).not.toHaveBeenCalled(); + expect(result.touchedSources).toHaveLength(0); + }); + + it('routes to the mapped warehouse connection when connectionMappings is set', async () => { + // Write a projection config that maps the Sigma internal connection UUID to a ktx warehouse. + await writeFile( + join(stagedDir, 'sigma-projection-config.json'), + JSON.stringify({ connectionMappings: { 'sigma-internal-uuid': 'snowflake-prod' } }), + 'utf-8', + ); + + const spec = makeSpec([ + { + id: 'e1', + kind: 'table', + name: 'Accounts', + source: { kind: 'warehouse-table', connectionId: 'sigma-internal-uuid', path: ['PROD', 'SF', 'ACCOUNTS'] }, + columns: [{ id: 'c1', formula: '[ACCOUNTS/Name]', name: 'Account Name' }], + }, + ]); + await writeFile(join(stagedDir, 'data-models', 'dm-1.json'), makeStagedModel('dm-1', 'Accounts', spec)); + + const written: Array<{ connectionId: string }> = []; + const writeSource = vi.fn().mockImplementation((connectionId: string) => { + written.push({ connectionId }); + return Promise.resolve({ warnings: [] }); + }); + + await projectSigmaDataModels(makeCtx(stagedDir, writeSource), makeCtx(stagedDir, writeSource).semanticLayerService as never); + + expect(written[0]!.connectionId).toBe('snowflake-prod'); + }); + + it('skips SL source and emits a warning when no connectionMappings entry exists for the element', async () => { + await writeFile( + join(stagedDir, 'sigma-projection-config.json'), + JSON.stringify({ connectionMappings: { 'other-uuid': 'snowflake-prod' } }), + 'utf-8', + ); + + const spec = makeSpec([ + { + id: 'e1', + kind: 'table', + name: 'Orders', + source: { kind: 'warehouse-table', connectionId: 'unmapped-uuid', path: ['DB', 'S', 'ORDERS'] }, + columns: [{ id: 'c1', formula: '[ORDERS/Id]', name: 'Order Id' }], + }, + ]); + await writeFile(join(stagedDir, 'data-models', 'dm-1.json'), makeStagedModel('dm-1', 'Orders', spec)); + + const writeSource = vi.fn().mockResolvedValue({ warnings: [] }); + const result = await projectSigmaDataModels( + makeCtx(stagedDir, writeSource), + makeCtx(stagedDir, writeSource).semanticLayerService as never, + ); + + expect(writeSource).not.toHaveBeenCalled(); + expect(result.touchedSources).toHaveLength(0); + expect(result.warnings.some((w) => w.includes('no connectionMappings entry'))).toBe(true); + }); + + it('surfaces writeSource warnings in result', async () => { + await writeProjectionConfig(stagedDir, ['c']); + const spec = makeSpec([ + { + id: 'e1', + kind: 'table', + name: 'Revenue', + source: { kind: 'warehouse-table', connectionId: 'c', path: ['DB', 'S', 'T'] }, + columns: [{ id: 'c1', formula: '[T/Amount]', name: 'Amount' }], + }, + ]); + await writeFile(join(stagedDir, 'data-models', 'dm-1.json'), makeStagedModel('dm-1', 'Revenue', spec)); + + const writeSource = vi.fn().mockResolvedValue({ warnings: ['schema: some warning'] }); + const result = await projectSigmaDataModels(makeCtx(stagedDir, writeSource), makeCtx(stagedDir, writeSource).semanticLayerService as never); + expect(result.warnings).toContain('schema: some warning'); + }); +}); diff --git a/packages/cli/test/context/ingest/adapters/sigma/sigma.adapter.test.ts b/packages/cli/test/context/ingest/adapters/sigma/sigma.adapter.test.ts new file mode 100644 index 00000000..75ff03a7 --- /dev/null +++ b/packages/cli/test/context/ingest/adapters/sigma/sigma.adapter.test.ts @@ -0,0 +1,64 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { SigmaSourceAdapter } from '../../../../../src/context/ingest/adapters/sigma/sigma.adapter.js'; +import type { SigmaClientFactory } from '../../../../../src/context/ingest/adapters/sigma/client-port.js'; + +function makeFactory(): SigmaClientFactory { + return { createClient: vi.fn() }; +} + +describe('SigmaSourceAdapter.listTargetConnectionIds', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'sigma-adapter-')); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + async function writeProjectionConfig(mappings: Record) { + await writeFile( + join(stagedDir, 'sigma-projection-config.json'), + JSON.stringify({ connectionMappings: mappings }), + 'utf-8', + ); + } + + it('returns mapped warehouse connection IDs when mappings are present', async () => { + await writeProjectionConfig({ 'uuid-a': 'snowflake-prod', 'uuid-b': 'snowflake-prod', 'uuid-c': 'bigquery-prod' }); + const adapter = new SigmaSourceAdapter({ clientFactory: makeFactory() }); + const ids = await adapter.listTargetConnectionIds(stagedDir); + expect(ids).toEqual(['bigquery-prod', 'snowflake-prod']); + }); + + it('returns empty array when connectionMappings is empty', async () => { + await writeProjectionConfig({}); + const adapter = new SigmaSourceAdapter({ clientFactory: makeFactory() }); + const ids = await adapter.listTargetConnectionIds(stagedDir); + expect(ids).toEqual([]); + }); + + it('returns empty array when the projection config file is missing', async () => { + const adapter = new SigmaSourceAdapter({ clientFactory: makeFactory() }); + const ids = await adapter.listTargetConnectionIds(stagedDir); + expect(ids).toEqual([]); + }); + + it('returns empty array when the projection config is malformed', async () => { + await mkdir(stagedDir, { recursive: true }); + await writeFile(join(stagedDir, 'sigma-projection-config.json'), 'not json', 'utf-8'); + const adapter = new SigmaSourceAdapter({ clientFactory: makeFactory() }); + const ids = await adapter.listTargetConnectionIds(stagedDir); + expect(ids).toEqual([]); + }); + + it('returns empty array when both projection config and manifest are missing', async () => { + const adapter = new SigmaSourceAdapter({ clientFactory: makeFactory() }); + const ids = await adapter.listTargetConnectionIds(stagedDir); + expect(ids).toEqual([]); + }); +}); diff --git a/packages/cli/test/context/ingest/adapters/sigma/types.test.ts b/packages/cli/test/context/ingest/adapters/sigma/types.test.ts new file mode 100644 index 00000000..7b9206b3 --- /dev/null +++ b/packages/cli/test/context/ingest/adapters/sigma/types.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, it } from 'vitest'; +import { + parseSigmaPullConfig, + sigmaManifestSchema, + stagedDataModelFileSchema, +} from '../../../../../src/context/ingest/adapters/sigma/types.js'; + +describe('parseSigmaPullConfig', () => { + it('accepts a simple alphanumeric connection ID', () => { + const result = parseSigmaPullConfig({ sigmaConnectionId: 'sigma-prod' }); + expect(result.sigmaConnectionId).toBe('sigma-prod'); + }); + + it('accepts IDs with underscores', () => { + const result = parseSigmaPullConfig({ sigmaConnectionId: 'sigma_prod_2' }); + expect(result.sigmaConnectionId).toBe('sigma_prod_2'); + }); + + it('rejects IDs starting with a special char', () => { + expect(() => parseSigmaPullConfig({ sigmaConnectionId: '../prod' })).toThrow(); + }); + + it('rejects IDs with spaces', () => { + expect(() => parseSigmaPullConfig({ sigmaConnectionId: 'sigma prod' })).toThrow(); + }); + + it('rejects missing sigmaConnectionId', () => { + expect(() => parseSigmaPullConfig({})).toThrow(); + }); + + it('rejects null', () => { + expect(() => parseSigmaPullConfig(null)).toThrow(); + }); +}); + +describe('stagedDataModelFileSchema', () => { + const minimal = { + sigmaId: 'dm-aaa111', + name: 'Revenue Model', + path: 'My Documents/Finance/Revenue Model', + latestVersion: 3, + updatedAt: '2026-01-15T10:00:00Z', + isArchived: false, + spec: { schemaVersion: 1, pages: [] }, + }; + + it('parses a fully-populated file', () => { + const result = stagedDataModelFileSchema.parse(minimal); + expect(result.sigmaId).toBe('dm-aaa111'); + expect(result.name).toBe('Revenue Model'); + expect(result.isArchived).toBe(false); + }); + + it('coerces absent isArchived to false', () => { + const { isArchived: _, ...rest } = minimal; + void _; + const result = stagedDataModelFileSchema.parse(rest); + expect(result.isArchived).toBe(false); + }); + + it('accepts null spec', () => { + const result = stagedDataModelFileSchema.parse({ ...minimal, spec: null }); + expect(result.spec).toBeNull(); + }); + + it('rejects missing sigmaId', () => { + const { sigmaId: _, ...rest } = minimal; + void _; + expect(() => stagedDataModelFileSchema.parse(rest)).toThrow(); + }); + + it('rejects missing name', () => { + const { name: _, ...rest } = minimal; + void _; + expect(() => stagedDataModelFileSchema.parse(rest)).toThrow(); + }); + + it('rejects missing path', () => { + const { path: _, ...rest } = minimal; + void _; + expect(() => stagedDataModelFileSchema.parse(rest)).toThrow(); + }); +}); + +describe('sigmaManifestSchema', () => { + const valid = { + sigmaConnectionId: 'sigma-prod', + fetchedAt: '2026-01-15T10:00:00Z', + dataModelCount: 2, + }; + + it('parses a valid manifest', () => { + const result = sigmaManifestSchema.parse(valid); + expect(result.sigmaConnectionId).toBe('sigma-prod'); + expect(result.dataModelCount).toBe(2); + }); + + it('rejects missing fetchedAt', () => { + const { fetchedAt: _, ...rest } = valid; + void _; + expect(() => sigmaManifestSchema.parse(rest)).toThrow(); + }); + + it('rejects missing dataModelCount', () => { + const { dataModelCount: _, ...rest } = valid; + void _; + expect(() => sigmaManifestSchema.parse(rest)).toThrow(); + }); + + it('rejects a non-integer dataModelCount', () => { + expect(() => sigmaManifestSchema.parse({ ...valid, dataModelCount: 2.5 })).toThrow(); + }); +}); diff --git a/packages/cli/test/context/ingest/local-adapters.test.ts b/packages/cli/test/context/ingest/local-adapters.test.ts index 0db65153..ec17ef7f 100644 --- a/packages/cli/test/context/ingest/local-adapters.test.ts +++ b/packages/cli/test/context/ingest/local-adapters.test.ts @@ -73,6 +73,7 @@ describe('local ingest adapters', () => { 'lookml', 'dbt', 'metabase', + 'sigma', 'gdrive', 'looker', 'metricflow', diff --git a/packages/cli/test/context/memory/memory-runtime-assets.test.ts b/packages/cli/test/context/memory/memory-runtime-assets.test.ts index db8c31c3..92f72f38 100644 --- a/packages/cli/test/context/memory/memory-runtime-assets.test.ts +++ b/packages/cli/test/context/memory/memory-runtime-assets.test.ts @@ -24,6 +24,7 @@ const expectedAdapterSkillHeadings: Record = { lookml_ingest: '# LookML to ktx Semantic Layer', metabase_ingest: '# Metabase to ktx Semantic Layer', metricflow_ingest: '# MetricFlow to ktx Semantic Layer', + sigma_ingest: '# Sigma Ingest', }; const verificationWriterSkills = [ 'gdrive_synthesize', @@ -33,6 +34,7 @@ const verificationWriterSkills = [ 'looker_ingest', 'metabase_ingest', 'metricflow_ingest', + 'sigma_ingest', 'live_database_ingest', 'historic_sql_table_digest', 'historic_sql_patterns', diff --git a/packages/cli/test/fixtures/sigma/empty-manifest/sigma-manifest.json b/packages/cli/test/fixtures/sigma/empty-manifest/sigma-manifest.json new file mode 100644 index 00000000..823bbdc2 --- /dev/null +++ b/packages/cli/test/fixtures/sigma/empty-manifest/sigma-manifest.json @@ -0,0 +1,5 @@ +{ + "sigmaConnectionId": "sigma-prod", + "fetchedAt": "2026-01-15T10:00:00Z", + "dataModelCount": 0 +} diff --git a/packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-aaa111.json b/packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-aaa111.json new file mode 100644 index 00000000..32963afb --- /dev/null +++ b/packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-aaa111.json @@ -0,0 +1,9 @@ +{ + "sigmaId": "dm-aaa111", + "name": "Revenue Model", + "path": "Finance/Revenue Model", + "latestVersion": 3, + "updatedAt": "2026-01-15T10:00:00Z", + "isArchived": false, + "spec": { "schemaVersion": 1, "pages": [] } +} diff --git a/packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-bbb222.json b/packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-bbb222.json new file mode 100644 index 00000000..c3943d46 --- /dev/null +++ b/packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-bbb222.json @@ -0,0 +1,9 @@ +{ + "sigmaId": "dm-bbb222", + "name": "ARR Model", + "path": "Finance/ARR Model", + "latestVersion": 1, + "updatedAt": "2026-01-10T08:00:00Z", + "isArchived": false, + "spec": { "schemaVersion": 1, "pages": [] } +} diff --git a/packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-ccc333.json b/packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-ccc333.json new file mode 100644 index 00000000..4404170b --- /dev/null +++ b/packages/cli/test/fixtures/sigma/multi-folder/data-models/dm-ccc333.json @@ -0,0 +1,9 @@ +{ + "sigmaId": "dm-ccc333", + "name": "Usage Model", + "path": "Product/Usage Model", + "latestVersion": 2, + "updatedAt": "2026-01-12T09:00:00Z", + "isArchived": false, + "spec": { "schemaVersion": 1, "pages": [] } +} diff --git a/packages/cli/test/fixtures/sigma/multi-folder/sigma-manifest.json b/packages/cli/test/fixtures/sigma/multi-folder/sigma-manifest.json new file mode 100644 index 00000000..022a4fdd --- /dev/null +++ b/packages/cli/test/fixtures/sigma/multi-folder/sigma-manifest.json @@ -0,0 +1,6 @@ +{ + "sigmaConnectionId": "sigma-prod", + "fetchedAt": "2026-01-15T10:00:00Z", + "dataModelCount": 3, + "workbookCount": 2 +} diff --git a/packages/cli/test/fixtures/sigma/multi-folder/workbooks/wb-yyy222.json b/packages/cli/test/fixtures/sigma/multi-folder/workbooks/wb-yyy222.json new file mode 100644 index 00000000..de7cb6de --- /dev/null +++ b/packages/cli/test/fixtures/sigma/multi-folder/workbooks/wb-yyy222.json @@ -0,0 +1,9 @@ +{ + "sigmaId": "wb-yyy222", + "name": "Revenue Tracker", + "path": "Finance/Revenue Tracker", + "latestVersion": 2, + "updatedAt": "2026-01-13T08:00:00Z", + "isArchived": false, + "workbookUrlId": "Revenue-Tracker-yyy222" +} diff --git a/packages/cli/test/fixtures/sigma/multi-folder/workbooks/wb-zzz333.json b/packages/cli/test/fixtures/sigma/multi-folder/workbooks/wb-zzz333.json new file mode 100644 index 00000000..620469e8 --- /dev/null +++ b/packages/cli/test/fixtures/sigma/multi-folder/workbooks/wb-zzz333.json @@ -0,0 +1,10 @@ +{ + "sigmaId": "wb-zzz333", + "name": "Usage Dashboard", + "path": "Product/Usage Dashboard", + "latestVersion": 8, + "updatedAt": "2026-01-12T11:00:00Z", + "isArchived": false, + "workbookUrlId": "Usage-Dashboard-zzz333", + "description": "Product usage metrics" +} diff --git a/packages/cli/test/fixtures/sigma/single-folder/data-models/dm-aaa111.json b/packages/cli/test/fixtures/sigma/single-folder/data-models/dm-aaa111.json new file mode 100644 index 00000000..32963afb --- /dev/null +++ b/packages/cli/test/fixtures/sigma/single-folder/data-models/dm-aaa111.json @@ -0,0 +1,9 @@ +{ + "sigmaId": "dm-aaa111", + "name": "Revenue Model", + "path": "Finance/Revenue Model", + "latestVersion": 3, + "updatedAt": "2026-01-15T10:00:00Z", + "isArchived": false, + "spec": { "schemaVersion": 1, "pages": [] } +} diff --git a/packages/cli/test/fixtures/sigma/single-folder/data-models/dm-bbb222.json b/packages/cli/test/fixtures/sigma/single-folder/data-models/dm-bbb222.json new file mode 100644 index 00000000..c3943d46 --- /dev/null +++ b/packages/cli/test/fixtures/sigma/single-folder/data-models/dm-bbb222.json @@ -0,0 +1,9 @@ +{ + "sigmaId": "dm-bbb222", + "name": "ARR Model", + "path": "Finance/ARR Model", + "latestVersion": 1, + "updatedAt": "2026-01-10T08:00:00Z", + "isArchived": false, + "spec": { "schemaVersion": 1, "pages": [] } +} diff --git a/packages/cli/test/fixtures/sigma/single-folder/sigma-manifest.json b/packages/cli/test/fixtures/sigma/single-folder/sigma-manifest.json new file mode 100644 index 00000000..d1c7d94b --- /dev/null +++ b/packages/cli/test/fixtures/sigma/single-folder/sigma-manifest.json @@ -0,0 +1,6 @@ +{ + "sigmaConnectionId": "sigma-prod", + "fetchedAt": "2026-01-15T10:00:00Z", + "dataModelCount": 2, + "workbookCount": 1 +} diff --git a/packages/cli/test/fixtures/sigma/single-folder/workbooks/wb-xxx111.json b/packages/cli/test/fixtures/sigma/single-folder/workbooks/wb-xxx111.json new file mode 100644 index 00000000..759d50af --- /dev/null +++ b/packages/cli/test/fixtures/sigma/single-folder/workbooks/wb-xxx111.json @@ -0,0 +1,10 @@ +{ + "sigmaId": "wb-xxx111", + "name": "Finance Overview", + "path": "Finance/Finance Overview", + "latestVersion": 5, + "updatedAt": "2026-01-14T09:00:00Z", + "isArchived": false, + "workbookUrlId": "Finance-Overview-xxx111", + "description": "Top-level finance dashboard" +} diff --git a/packages/cli/test/setup-sources.test.ts b/packages/cli/test/setup-sources.test.ts index 18e91bf4..5960e593 100644 --- a/packages/cli/test/setup-sources.test.ts +++ b/packages/cli/test/setup-sources.test.ts @@ -1364,6 +1364,18 @@ describe('setup sources step', () => { deps: { validateNotion: vi.fn(async () => ({ ok: true as const, detail: 'roots=0' })) }, expectedLabel: 'Notion', }, + { + source: 'sigma', + connectionId: 'sigma-main', + connection: { + driver: 'sigma', + api_url: 'https://api.sigmacomputing.com', + client_id: 'my-client-id', + client_secret_ref: 'env:SIGMA_CLIENT_SECRET', // pragma: allowlist secret + }, + deps: { validateSigma: vi.fn(async () => ({ ok: true as const, detail: 'Sigma API connection verified' })) }, + expectedLabel: 'Sigma Computing', + }, ]; for (const testCase of cases) { @@ -2035,4 +2047,206 @@ describe('setup sources step', () => { path: 'staging', }); }); + + it('writes Sigma config in non-interactive mode', async () => { + await addPrimarySource(); + const validateSigma = vi.fn(async () => ({ ok: true as const, detail: 'Sigma API connection verified' })); + + await expect( + runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'sigma', + sourceConnectionId: 'sigma-prod', + sourceUrl: 'https://api.sigmacomputing.com', + sourceClientId: 'my-client-id', + sourceClientSecretRef: 'env:SIGMA_CLIENT_SECRET', // pragma: allowlist secret + runInitialSourceIngest: false, + skipSources: false, + }, + makeIo().io, + { validateSigma }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['sigma-prod'] }); + + expect((await readConfig()).connections['sigma-prod']).toMatchObject({ + driver: 'sigma', + api_url: 'https://api.sigmacomputing.com', + client_id: 'my-client-id', + client_secret_ref: 'env:SIGMA_CLIENT_SECRET', // pragma: allowlist secret + }); + expect(validateSigma).toHaveBeenCalledOnce(); + }); + + it('defaults Sigma api_url when --source-url is omitted', async () => { + await addPrimarySource(); + const validateSigma = vi.fn(async () => ({ ok: true as const, detail: 'Sigma API connection verified' })); + + await expect( + runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'sigma', + sourceConnectionId: 'sigma-main', + sourceClientId: 'my-client-id', + sourceClientSecretRef: 'env:SIGMA_CLIENT_SECRET', // pragma: allowlist secret + runInitialSourceIngest: false, + skipSources: false, + }, + makeIo().io, + { validateSigma }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['sigma-main'] }); + + expect((await readConfig()).connections['sigma-main']).toMatchObject({ + driver: 'sigma', + api_url: 'https://api.sigmacomputing.com', + }); + }); + + it('rejects --source-auth-token-ref for Sigma and points at --source-client-secret-ref', async () => { + await addPrimarySource(); + const io = makeIo(); + + await expect( + runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'sigma', + sourceConnectionId: 'sigma-main', + sourceClientId: 'my-client-id', + sourceAuthTokenRef: 'env:SIGMA_CLIENT_SECRET', // pragma: allowlist secret + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + {}, + ), + ).resolves.toEqual({ status: 'failed', projectDir }); + + expect(io.stderr()).toContain('--source-auth-token-ref does not apply to --source sigma; use --source-client-secret-ref.'); + }); + + it('runs interactive Sigma setup with API URL, client ID, and env credential', async () => { + await addPrimarySource(); + const validateSigma = vi.fn(async () => ({ ok: true as const, detail: 'Sigma API connection verified' })); + const testPrompts = prompts({ + multiselect: [['sigma']], + select: ['env', 'done'], + // connection name, API URL (default accepted), client ID + text: ['sigma-main', 'https://api.sigmacomputing.com', 'my-client-id'], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { prompts: testPrompts, validateSigma }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['sigma-main'] }); + + expect(testPrompts.text).toHaveBeenCalledWith( + expect.objectContaining({ + message: textInputPrompt(connectionNamePrompt('Sigma Computing')), + initialValue: 'sigma-main', + }), + ); + expect(testPrompts.text).toHaveBeenCalledWith( + expect.objectContaining({ message: textInputPrompt('Sigma API URL') }), + ); + expect(testPrompts.text).toHaveBeenCalledWith( + expect.objectContaining({ message: textInputPrompt('Sigma client ID') }), + ); + expect(testPrompts.select).toHaveBeenCalledWith({ + message: 'How should ktx find your Sigma client secret?', + options: [ + { value: 'paste', label: 'Paste a key and save it as a local secret file' }, + { value: 'env', label: 'Use SIGMA_CLIENT_SECRET from the environment' }, + { value: 'back', label: 'Back' }, + ], + }); + expect((await readConfig()).connections['sigma-main']).toMatchObject({ + driver: 'sigma', + api_url: 'https://api.sigmacomputing.com', + client_id: 'my-client-id', + client_secret_ref: 'env:SIGMA_CLIENT_SECRET', // pragma: allowlist secret + }); + }); + + it('edits an existing Sigma source with current URL and client ID as defaults', async () => { + await addPrimarySource(); + await addConnection('sigma-main', { + driver: 'sigma', + api_url: 'https://api.sigmacomputing.com', + client_id: 'old-client-id', + client_secret_ref: 'env:SIGMA_CLIENT_SECRET', // pragma: allowlist secret + }); + const testPrompts = prompts({ + multiselect: [['sigma']], + select: ['edit:sigma-main', 'keep', 'done'], + // API URL and new client ID + text: ['https://api.sigmacomputing.com', 'new-client-id'], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + validateSigma: vi.fn(async () => ({ ok: true as const, detail: 'Sigma API connection verified' })), + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['sigma-main'] }); + + expect(testPrompts.text).toHaveBeenCalledWith( + expect.objectContaining({ + message: textInputPrompt('Sigma API URL'), + initialValue: 'https://api.sigmacomputing.com', + }), + ); + expect(testPrompts.text).toHaveBeenCalledWith( + expect.objectContaining({ + message: textInputPrompt('Sigma client ID'), + initialValue: 'old-client-id', // pre-filled from existing connection + }), + ); + expect(testPrompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'How should ktx find your Sigma client secret?', + options: expect.arrayContaining([{ value: 'keep', label: 'Keep existing credential' }]), + }), + ); + expect((await readConfig()).connections['sigma-main']).toMatchObject({ + driver: 'sigma', + client_id: 'new-client-id', + client_secret_ref: 'env:SIGMA_CLIENT_SECRET', // pragma: allowlist secret + }); + }); + + it('fails Sigma setup when validation rejects the credentials', async () => { + await addPrimarySource(); + const io = makeIo(); + + const result = await runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'sigma', + sourceConnectionId: 'sigma-main', + sourceClientId: 'bad-client-id', + sourceClientSecretRef: 'env:SIGMA_CLIENT_SECRET', // pragma: allowlist secret + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + { validateSigma: vi.fn(async () => ({ ok: false as const, message: 'Sigma auth failed (401): Unauthorized' })) }, + ); + + expect(result.status).toBe('failed'); + expect(io.stderr()).toContain('Sigma auth failed (401): Unauthorized'); + }); });