ktx/packages/cli/test/context/ingest/local-bundle-ingest.test.ts

948 lines
31 KiB
TypeScript
Raw Permalink Normal View History

2026-05-10 23:12:26 +02:00
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
2026-05-10 23:13:17 -07:00
import Database from 'better-sqlite3';
import YAML from 'yaml';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import type { AgentRunnerPort, RunLoopParams } from '../../../src/context/llm/runtime-port.js';
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../../../src/context/project/project.js';
2026-05-10 23:12:26 +02:00
import { makeLocalGitRepo } from '../test/make-local-git-repo.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import { FakeSourceAdapter } from '../../../src/context/ingest/adapters/fake/fake.adapter.js';
import { projectHistoricSqlEvidence } from '../../../src/context/ingest/adapters/historic-sql/projection.js';
import { LocalLookerRuntimeStore } from '../../../src/context/ingest/adapters/looker/local-runtime-store.js';
import { createDefaultLocalIngestAdapters, localPullConfigForAdapter } from '../../../src/context/ingest/local-adapters.js';
import { getLocalIngestStatus, runLocalIngest } from '../../../src/context/ingest/local-ingest.js';
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
import type {
ChunkResult,
DeterministicFinalizationContext,
DiffSet,
FinalizationResult,
SourceAdapter,
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
} from '../../../src/context/ingest/types.js';
2026-05-10 23:12:26 +02:00
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
class TestAgentRunner implements AgentRunnerPort {
runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const });
2026-05-10 23:12:26 +02:00
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
class LookerSlWritingAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
2026-05-10 23:12:26 +02:00
if (
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
) {
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
const ledger = params.toolSet.record_verification_ledger;
if (!ledger?.execute) {
throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit');
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
await ledger.execute({
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
unverifiedIdentifiers: [],
});
2026-05-10 23:12:26 +02:00
const slWrite = params.toolSet.sl_write_source;
if (!slWrite?.execute) {
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
const result = await slWrite.execute({
connectionId: 'prod-warehouse',
sourceName: 'looker__ecommerce__orders',
source: {
name: 'looker__ecommerce__orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{ name: 'revenue', type: 'number' },
],
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
2026-05-10 23:12:26 +02:00
},
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
});
if (!(result.structured as { success?: boolean } | undefined)?.success) {
2026-05-10 23:12:26 +02:00
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
class WikiWritingAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
2026-05-10 23:13:17 -07:00
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
const ledger = params.toolSet.record_verification_ledger;
if (!ledger?.execute) {
throw new Error('record_verification_ledger tool was not available to the WorkUnit');
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
await ledger.execute({
summary: 'Test fixture writes wiki-only context with no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
});
2026-05-10 23:13:17 -07:00
const wikiWrite = params.toolSet.wiki_write;
if (!wikiWrite?.execute) {
throw new Error('wiki_write tool was not available to the WorkUnit');
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
const result = await wikiWrite.execute({
key: 'orders_context',
summary: 'Orders source context',
content: 'Orders are purchase records used for revenue analysis.',
tags: ['orders'],
});
if (!(result.structured as { success?: boolean } | undefined)?.success) {
2026-05-10 23:13:17 -07:00
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
class WikiWritingWithRawPathAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
const ledger = params.toolSet.record_verification_ledger;
if (!ledger?.execute) {
throw new Error('record_verification_ledger tool was not available to the WorkUnit');
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
await ledger.execute({
summary: 'Test fixture writes wiki-only context with explicit raw provenance and no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
});
const wikiWrite = params.toolSet.wiki_write;
if (!wikiWrite?.execute) {
throw new Error('wiki_write tool was not available to the WorkUnit');
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
const result = await wikiWrite.execute({
key: 'orders_context',
summary: 'Orders source context',
content: 'Orders are purchase records used for revenue analysis.',
tags: ['orders'],
rawPaths: ['orders/orders.json'],
});
if (!(result.structured as { success?: boolean } | undefined)?.success) {
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
class HistoricSqlEvidenceAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
params.telemetryTags?.unitKey === 'historic-sql-table-public-orders'
) {
const emitEvidence = params.toolSet.emit_historic_sql_evidence;
if (!emitEvidence?.execute) {
throw new Error('emit_historic_sql_evidence tool was not available to the historic-SQL WorkUnit');
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.orders',
usage: {
narrative: 'Orders are repeatedly queried by lifecycle status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [],
staleSince: null,
},
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected historic-SQL evidence result: ${result.markdown}`);
}
}
return { stopReason: 'natural' as const };
});
}
class HistoricSqlEvidenceTestAdapter implements SourceAdapter {
readonly source = 'historic-sql';
readonly skillNames = ['historic_sql_table_digest'];
readonly reconcileSkillNames: string[] = [];
readonly triageSupported = false;
detect(): Promise<boolean> {
return Promise.resolve(true);
}
chunk(_stagedDir: string, _diffSet?: DiffSet): Promise<ChunkResult> {
return Promise.resolve({
workUnits: [
{
unitKey: 'historic-sql-table-public-orders',
displayLabel: 'public.orders',
rawFiles: ['tables/public.orders.json'],
peerFileIndex: [],
dependencyPaths: ['manifest.json'],
notes:
'Use historic_sql_table_digest. Read this table usage JSON and emit exactly one table_usage object with emit_historic_sql_evidence.',
},
],
});
}
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
async finalize(ctx: DeterministicFinalizationContext): Promise<FinalizationResult> {
const projection = await projectHistoricSqlEvidence({
workdir: ctx.workdir,
connectionId: ctx.connectionId,
syncId: ctx.syncId,
runId: ctx.runId,
overrideReplay: ctx.overrideReplay,
});
return {
result: projection,
warnings: projection.warnings,
errors: [],
touchedSources: projection.touchedSources,
changedWikiPageKeys: projection.changedWikiPageKeys,
actions: projection.actions,
};
}
}
2026-05-10 23:12:26 +02:00
function makeLookerRuntimeClient() {
const lookerModels = {
models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }],
};
const lookerExplore = {
modelName: 'ecommerce',
exploreName: 'orders',
label: 'Orders',
description: null,
connectionName: 'analytics',
viewName: 'orders',
rawSqlTableName: 'public.orders',
fields: {
dimensions: [{ name: 'orders.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'orders.revenue', label: null, type: null, sql: null, description: null }],
},
joins: [
{
name: 'users',
type: 'left_outer',
relationship: 'many_to_one',
rawSqlTableName: 'public.users',
sqlOn: '${orders.user_id} = ${users.id}',
from: null,
targetTable: null,
},
],
targetWarehouseConnectionId: null,
targetTable: null,
};
return {
listDashboards: vi.fn().mockResolvedValue([{ id: '10', updatedAt: '2026-05-05T08:00:00.000Z' }]),
getDashboard: vi.fn().mockResolvedValue({
lookerId: '10',
title: 'Revenue Overview',
description: 'Revenue dashboard',
folderId: '7',
ownerId: '3',
updatedAt: '2026-05-05T08:00:00.000Z',
tiles: [{ id: '100', title: 'Revenue', lookId: null, query: { model: 'ecommerce', view: 'orders' } }],
}),
listLooks: vi.fn().mockResolvedValue([{ id: '20', updatedAt: '2026-05-05T08:10:00.000Z' }]),
getLook: vi.fn().mockResolvedValue({
lookerId: '20',
title: 'Revenue Look',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-05-05T08:10:00.000Z',
query: { model: 'ecommerce', view: 'orders', fields: ['orders.revenue'] },
}),
listFolders: vi.fn().mockResolvedValue({ folders: [{ id: '7', name: 'Shared', parentId: null, path: ['Shared'] }] }),
listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: 'ada@example.test' }]),
listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Analysts' }]),
listLookmlModels: vi.fn().mockResolvedValue(lookerModels),
getExplore: vi.fn().mockResolvedValue(lookerExplore),
getSignals: vi.fn().mockResolvedValue({
dashboardUsage: [{ contentId: '10', queryCount30d: 12, uniqueUsers30d: 3, lastRunAt: null, topUsers: ['3'] }],
lookUsage: [{ contentId: '20', queryCount30d: 4, uniqueUsers30d: 2, lastRunAt: null, topUsers: ['3'] }],
scheduledPlans: [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 4 },
],
favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 2 }],
}),
cleanup: vi.fn().mockResolvedValue(undefined),
};
}
function makeLookerParser() {
return {
parse: vi.fn().mockResolvedValue({
'ecommerce.orders': {
ok: true,
catalog: null,
schema: 'public',
name: 'orders',
canonical_table: 'public.orders',
},
'ecommerce.orders.users': {
ok: true,
catalog: null,
schema: 'public',
name: 'users',
canonical_table: 'public.users',
},
}),
};
}
describe('canonical local ingest', () => {
let tempDir: string;
2026-05-10 23:51:24 +02:00
let project: KtxLocalProject;
2026-05-10 23:12:26 +02:00
beforeEach(async () => {
2026-05-10 23:51:24 +02:00
tempDir = await mkdtemp(join(tmpdir(), 'ktx-local-full-ingest-'));
2026-05-10 23:12:26 +02:00
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(projectDir, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: postgres',
'ingest:',
' adapters:',
' - fake',
' embeddings:',
' backend: none',
2026-05-10 23:12:26 +02:00
'',
].join('\n'),
'utf-8',
);
2026-05-10 23:51:24 +02:00
project = await loadKtxProject({ projectDir });
2026-05-10 23:12:26 +02:00
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('runs the full IngestBundleRunner through local ports and stores a bundle report', async () => {
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const agentRunner = new TestAgentRunner();
const result = await runLocalIngest({
project,
adapters: [new FakeSourceAdapter()],
adapter: 'fake',
connectionId: 'warehouse',
sourceDir,
jobId: 'full-local-1',
agentRunner,
});
expect(agentRunner.runLoop).toHaveBeenCalledTimes(1);
expect(result.result).toMatchObject({
jobId: 'full-local-1',
runId: expect.any(String),
workUnitCount: 1,
failedWorkUnits: [],
});
expect(result.report).toMatchObject({
jobId: 'full-local-1',
connectionId: 'warehouse',
sourceKey: 'fake',
body: {
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
failedWorkUnits: [],
workUnits: [
expect.objectContaining({
unitKey: 'fake-orders',
status: 'success',
rawFiles: ['orders/orders.json'],
}),
],
},
});
expect(result.report.body.provenanceRows).toEqual([
{
rawPath: 'orders/orders.json',
artifactKind: null,
artifactKey: null,
targetConnectionId: null,
actionType: 'skipped',
},
]);
const stagedRawPath = join(
project.projectDir,
'raw-sources',
'warehouse',
'fake',
result.report.body.syncId,
'orders',
'orders.json',
);
await expect(readFile(stagedRawPath, 'utf-8')).resolves.toBe('{"name":"orders"}\n');
await expect(getLocalIngestStatus(project, result.report.id)).resolves.toMatchObject({
id: result.report.id,
jobId: 'full-local-1',
});
await expect(getLocalIngestStatus(project, result.report.runId)).resolves.toMatchObject({
id: result.report.id,
jobId: 'full-local-1',
});
await expect(getLocalIngestStatus(project, 'full-local-1')).resolves.toMatchObject({
id: result.report.id,
jobId: 'full-local-1',
});
});
2026-05-10 23:13:17 -07:00
it('indexes wiki pages written by local ingest into the SQLite knowledge tables', async () => {
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const agentRunner = new WikiWritingAgentRunner();
const result = await runLocalIngest({
project,
adapters: [new FakeSourceAdapter()],
adapter: 'fake',
connectionId: 'warehouse',
sourceDir,
jobId: 'wiki-local-1',
agentRunner,
});
expect(result.result.failedWorkUnits).toEqual([]);
const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true });
try {
expect(
db
.prepare('SELECT key, summary, embedding_json IS NOT NULL AS has_embedding FROM knowledge_pages ORDER BY key')
.all(),
).toEqual([{ key: 'orders_context', summary: 'Orders source context', has_embedding: 0 }]);
} finally {
db.close();
}
});
it('does not persist noop embedding vectors when local embeddings are disabled', async () => {
await writeFile(
join(project.projectDir, 'ktx.yaml'),
[
'connections:',
' warehouse:',
' driver: postgres',
'ingest:',
' adapters:',
' - fake',
' embeddings:',
' backend: none',
'',
].join('\n'),
'utf-8',
);
project = await loadKtxProject({ projectDir: project.projectDir });
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const agentRunner = new WikiWritingAgentRunner();
const result = await runLocalIngest({
project,
adapters: [new FakeSourceAdapter()],
adapter: 'fake',
connectionId: 'warehouse',
sourceDir,
jobId: 'wiki-local-no-embeddings-1',
agentRunner,
});
expect(result.result.failedWorkUnits).toEqual([]);
const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true });
try {
expect(db.prepare('SELECT key, summary, embedding_json IS NOT NULL AS has_embedding FROM knowledge_pages ORDER BY key').all()).toEqual([
{ key: 'orders_context', summary: 'Orders source context', has_embedding: 0 },
2026-05-10 23:13:17 -07:00
]);
} finally {
db.close();
}
});
it('uses explicit action raw paths to avoid over-attributing work-unit provenance', async () => {
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await writeFile(join(sourceDir, 'orders', 'unrelated.json'), '{"name":"unrelated"}\n', 'utf-8');
const agentRunner = new WikiWritingWithRawPathAgentRunner();
const result = await runLocalIngest({
project,
adapters: [new FakeSourceAdapter()],
adapter: 'fake',
connectionId: 'warehouse',
sourceDir,
jobId: 'wiki-raw-path-local-1',
agentRunner,
});
expect(result.result.failedWorkUnits).toEqual([]);
expect(result.report.body.provenanceRows).toEqual([
{
rawPath: 'orders/orders.json',
artifactKind: 'wiki',
artifactKey: 'orders_context',
targetConnectionId: null,
actionType: 'wiki_written',
},
{
rawPath: 'orders/unrelated.json',
artifactKind: null,
artifactKey: null,
targetConnectionId: null,
actionType: 'skipped',
},
]);
});
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
it('runs historic-SQL evidence projection through local bundle finalization', async () => {
const projectDir = join(tempDir, 'historic-sql-project');
await initKtxProject({ projectDir });
await writeFile(
join(projectDir, 'ktx.yaml'),
[
'connections:',
' warehouse:',
' driver: postgres',
'ingest:',
' adapters:',
' - historic-sql',
' embeddings:',
' backend: none',
'storage:',
' state: sqlite',
' search: sqlite-fts5',
' git:',
' auto_commit: false',
' author: KTX Test <system@ktx.local>',
'',
].join('\n'),
'utf-8',
);
const historicProject = await loadKtxProject({ projectDir });
await historicProject.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({ tables: { orders: { table: 'public.orders', columns: [{ name: 'id', type: 'string' }] } } }),
'KTX Test',
'system@ktx.local',
'Seed schema shard',
);
const sourceDir = join(tempDir, 'historic-sql-source');
await mkdir(join(sourceDir, 'tables'), { recursive: true });
await writeFile(
join(sourceDir, 'manifest.json'),
`${JSON.stringify(
{
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 1,
touchedTableCount: 1,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
},
null,
2,
)}\n`,
'utf-8',
);
await writeFile(join(sourceDir, 'tables/public.orders.json'), '{"table":"public.orders"}\n', 'utf-8');
await writeFile(join(sourceDir, 'patterns-input.json'), '{"templates":[]}\n', 'utf-8');
const agentRunner = new HistoricSqlEvidenceAgentRunner();
const result = await runLocalIngest({
project: historicProject,
adapters: [new HistoricSqlEvidenceTestAdapter()],
adapter: 'historic-sql',
connectionId: 'warehouse',
sourceDir,
jobId: 'historic-sql-local-projection',
agentRunner,
});
expect(result.result.failedWorkUnits).toEqual([]);
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
expect(result.report.body.finalization).toMatchObject({
sourceKey: 'historic-sql',
status: 'success',
result: { tableUsageMerged: 1 },
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
declaredTouchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
derivedTouchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
});
await expect(readFile(join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves.toContain(
'Orders are repeatedly queried by lifecycle status.',
);
});
2026-05-10 23:12:26 +02:00
it('rejects direct Metabase scheduled pulls before requiring a local ingest LLM provider', async () => {
const projectDir = join(tempDir, 'metabase-project');
await initKtxProject({ projectDir });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(projectDir, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: postgres',
'ingest:',
' adapters:',
' - metabase',
' embeddings:',
' backend: none',
2026-05-10 23:12:26 +02:00
'',
].join('\n'),
'utf-8',
);
2026-05-10 23:51:24 +02:00
const metabaseProject = await loadKtxProject({ projectDir });
2026-05-10 23:12:26 +02:00
await expect(
runLocalIngest({
project: metabaseProject,
adapters: createDefaultLocalIngestAdapters(metabaseProject),
adapter: 'metabase',
connectionId: 'warehouse',
jobId: 'metabase-local',
}),
).rejects.toThrow('Metabase scheduled pulls fan out by mapping');
});
it('runs full MetricFlow local ingest from a dbt repo fixture through the canonical runner', async () => {
const projectDir = join(tempDir, 'metricflow-run-project');
await initKtxProject({ projectDir });
2026-05-10 23:12:26 +02:00
const fixtureDir = join(tempDir, 'metricflow-fixture');
await mkdir(join(fixtureDir, 'models'), { recursive: true });
await writeFile(
join(fixtureDir, 'dbt_project.yml'),
[
'name: analytics',
'version: "1.0.0"',
'config-version: 2',
'profile: analytics',
'model-paths: ["models"]',
'',
].join('\n'),
'utf-8',
);
await writeFile(
join(fixtureDir, 'models/orders.yml'),
[
'semantic_models:',
' - name: orders',
' model: ref("orders")',
' entities:',
' - name: order',
' type: primary',
' expr: order_id',
' dimensions:',
' - name: ordered_at',
' type: time',
' expr: ordered_at',
' measures:',
' - name: revenue',
' agg: sum',
' expr: revenue',
'metrics:',
' - name: total_revenue',
' type: simple',
' type_params:',
' measure: revenue',
'',
].join('\n'),
'utf-8',
);
const repo = await makeLocalGitRepo(fixtureDir, join(tempDir, 'metricflow-origin'));
await writeFile(
2026-05-10 23:51:24 +02:00
join(projectDir, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: postgres',
' metricflow:',
` repoUrl: ${repo.repoUrl}`,
' branch: main',
'ingest:',
' adapters:',
' - metricflow',
' embeddings:',
' backend: none',
2026-05-10 23:12:26 +02:00
'storage:',
' state: sqlite',
' search: sqlite-fts5',
' git:',
' auto_commit: false',
2026-05-10 23:51:24 +02:00
' author: KTX Test <system@ktx.local>',
2026-05-10 23:12:26 +02:00
'',
].join('\n'),
'utf-8',
);
2026-05-10 23:51:24 +02:00
const metricflowProject = await loadKtxProject({ projectDir });
2026-05-10 23:12:26 +02:00
const agentRunner = new TestAgentRunner();
const result = await runLocalIngest({
project: metricflowProject,
adapters: createDefaultLocalIngestAdapters(metricflowProject),
adapter: 'metricflow',
connectionId: 'warehouse',
jobId: 'metricflow-local-full',
agentRunner,
});
expect(agentRunner.runLoop).toHaveBeenCalledTimes(1);
expect(result.result).toMatchObject({
jobId: 'metricflow-local-full',
workUnitCount: 1,
failedWorkUnits: [],
});
expect(result.report).toMatchObject({
jobId: 'metricflow-local-full',
connectionId: 'warehouse',
sourceKey: 'metricflow',
body: {
failedWorkUnits: [],
workUnits: [
expect.objectContaining({
unitKey: 'metricflow-orders',
status: 'success',
rawFiles: ['models/orders.yml'],
}),
],
},
});
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
expect(result.report.body.isolatedDiff).toMatchObject({
enabled: true,
acceptedPatches: 0,
projectionSha: expect.any(String),
});
const projectedSourcePath = join(metricflowProject.projectDir, 'semantic-layer/warehouse/orders.yaml');
await expect(readFile(projectedSourcePath, 'utf-8')).resolves.toContain('name: orders');
2026-05-10 23:12:26 +02:00
const stagedRawPath = join(
metricflowProject.projectDir,
'raw-sources',
'warehouse',
'metricflow',
result.report.body.syncId,
'models',
'orders.yml',
);
await expect(readFile(stagedRawPath, 'utf-8')).resolves.toContain('semantic_models:');
});
it('local metricflow ingest can fetch from connection metricflow config without sourceDir', async () => {
2026-05-10 23:51:24 +02:00
const projectDir = await mkdtemp(join(tmpdir(), 'ktx-local-mf-fetch-'));
2026-05-10 23:12:26 +02:00
const fixtureDir = join(projectDir, 'fixture-src');
await mkdir(join(fixtureDir, 'models'), { recursive: true });
await writeFile(join(fixtureDir, 'dbt_project.yml'), 'name: analytics\n', 'utf-8');
await writeFile(
join(fixtureDir, 'models/orders.yml'),
'semantic_models:\n - name: orders\n model: ref("orders")\n',
'utf-8',
);
const repo = await makeLocalGitRepo(fixtureDir, join(projectDir, 'origin'));
await writeFile(
2026-05-10 23:51:24 +02:00
join(projectDir, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: postgres',
' metricflow:',
` repoUrl: ${repo.repoUrl}`,
' branch: main',
'storage:',
' state: sqlite',
' search: sqlite-fts5',
' git:',
' auto_commit: false',
2026-05-10 23:51:24 +02:00
' author: KTX Test <system@ktx.local>',
2026-05-10 23:12:26 +02:00
'',
].join('\n'),
'utf-8',
);
2026-05-10 23:51:24 +02:00
const metricflowProject = await loadKtxProject({ projectDir });
2026-05-10 23:12:26 +02:00
const adapters = createDefaultLocalIngestAdapters(metricflowProject);
const metricflow = adapters.find((adapter) => adapter.source === 'metricflow');
expect(metricflow?.fetch).toBeTypeOf('function');
await expect(localPullConfigForAdapter(metricflowProject, metricflow!, 'warehouse')).resolves.toMatchObject({
repoUrl: repo.repoUrl,
branch: 'main',
path: null,
authToken: null,
parsedTargetTables: {},
});
});
it('runs scheduled Looker ingest through the canonical local runner and records SL target evidence', async () => {
const projectDir = join(tempDir, 'looker-project');
await initKtxProject({ projectDir });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(projectDir, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' prod-looker:',
' driver: looker',
' base_url: https://looker.example.test',
' client_id: client',
' prod-warehouse:',
' driver: postgres',
' url: postgresql://readonly@warehouse.example.test/analytics',
'ingest:',
' adapters:',
' - looker',
' embeddings:',
' backend: none',
2026-05-10 23:12:26 +02:00
'storage:',
' state: sqlite',
' search: sqlite-fts5',
' git:',
' auto_commit: false',
2026-05-10 23:51:24 +02:00
' author: KTX Test <system@ktx.local>',
2026-05-10 23:12:26 +02:00
'',
].join('\n'),
'utf-8',
);
2026-05-10 23:51:24 +02:00
const lookerProject = await loadKtxProject({ projectDir });
const localStore = new LocalLookerRuntimeStore({ dbPath: join(lookerProject.projectDir, '.ktx', 'db.sqlite') });
2026-05-10 23:12:26 +02:00
await localStore.setCursors('prod-looker', {
dashboardsLastSyncedAt: null,
looksLastSyncedAt: null,
});
await localStore.upsertConnectionMapping({
lookerConnectionId: 'prod-looker',
lookerConnectionName: 'analytics',
2026-05-10 23:51:24 +02:00
ktxConnectionId: 'prod-warehouse',
2026-05-10 23:12:26 +02:00
source: 'cli',
});
const runtimeClient = makeLookerRuntimeClient();
const parser = makeLookerParser();
const agentRunner = new LookerSlWritingAgentRunner();
const result = await runLocalIngest({
project: lookerProject,
adapters: createDefaultLocalIngestAdapters(lookerProject, { looker: { runtimeClient } }),
adapter: 'looker',
connectionId: 'prod-looker',
jobId: 'looker-local-report-parity',
agentRunner,
pullConfigOptions: {
looker: {
client: runtimeClient,
parser,
},
},
});
expect(runtimeClient.cleanup).toHaveBeenCalledTimes(1);
expect(parser.parse).toHaveBeenCalledWith([
{ key: 'ecommerce.orders', sql_table_name: 'public.orders', dialect: 'postgres' },
{ key: 'ecommerce.orders.users', sql_table_name: 'public.users', dialect: 'postgres' },
]);
expect(result.result).toMatchObject({
jobId: 'looker-local-report-parity',
workUnitCount: 3,
failedWorkUnits: [],
});
expect(result.report).toMatchObject({
jobId: 'looker-local-report-parity',
connectionId: 'prod-looker',
sourceKey: 'looker',
body: {
fetch: {
status: 'success',
retryRecommended: false,
skipped: [],
warnings: [],
},
failedWorkUnits: [],
},
});
const exploreWorkUnit = result.report.body.workUnits.find((wu) => wu.unitKey === 'looker-explore-ecommerce-orders');
expect(exploreWorkUnit).toMatchObject({
status: 'success',
rawFiles: expect.arrayContaining(['explores/ecommerce/orders.json']),
actions: [
expect.objectContaining({
target: 'sl',
type: 'created',
key: 'looker__ecommerce__orders',
targetConnectionId: 'prod-warehouse',
}),
],
touchedSlSources: [{ connectionId: 'prod-warehouse', sourceName: 'looker__ecommerce__orders' }],
});
expect(result.report.body.provenanceRows).toEqual(
expect.arrayContaining([
expect.objectContaining({
rawPath: 'explores/ecommerce/orders.json',
artifactKind: 'sl',
artifactKey: 'looker__ecommerce__orders',
targetConnectionId: 'prod-warehouse',
actionType: 'source_created',
}),
]),
);
const rawRoot = join(
lookerProject.projectDir,
'raw-sources',
'prod-looker',
'looker',
result.report.body.syncId,
);
const explore = JSON.parse(await readFile(join(rawRoot, 'explores/ecommerce/orders.json'), 'utf-8'));
expect(explore).toMatchObject({
targetWarehouseConnectionId: 'prod-warehouse',
targetTable: {
ok: true,
schema: 'public',
name: 'orders',
canonicalTable: 'public.orders',
},
joins: [
expect.objectContaining({
name: 'users',
targetTable: expect.objectContaining({
ok: true,
schema: 'public',
name: 'users',
canonicalTable: 'public.users',
}),
}),
],
});
const dashboard = JSON.parse(await readFile(join(rawRoot, 'dashboards/10.json'), 'utf-8'));
expect(dashboard.tiles[0].query).toMatchObject({
targetWarehouseConnectionId: 'prod-warehouse',
targetTable: expect.objectContaining({ ok: true, canonicalTable: 'public.orders' }),
});
const sourceYaml = await readFile(
join(lookerProject.projectDir, 'semantic-layer/prod-warehouse/looker__ecommerce__orders.yaml'),
'utf-8',
);
expect(sourceYaml).toContain('table: public.orders');
expect(sourceYaml).toContain('total_revenue');
});
});