ktx/packages/cli/test/context/ingest/local-adapters.test.ts

693 lines
23 KiB
TypeScript
Raw Permalink Normal View History

import { mkdtemp, rm, writeFile } from 'node:fs/promises';
2026-05-10 23:12:26 +02:00
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../../../src/context/project/project.js';
import type { SqlAnalysisPort } from '../../../src/context/sql-analysis/ports.js';
import type { HistoricSqlReader } from '../../../src/context/ingest/adapters/historic-sql/types.js';
import { LocalLookerRuntimeStore } from '../../../src/context/ingest/adapters/looker/local-runtime-store.js';
import { LocalNotionRuntimeStore } from '../../../src/context/ingest/adapters/notion/local-state-store.js';
import { createDefaultLocalIngestAdapters, localPullConfigForAdapter } from '../../../src/context/ingest/local-adapters.js';
2026-05-10 23:12:26 +02:00
describe('local ingest adapters', () => {
let tempDir: string;
2026-05-10 23:51:24 +02:00
let project: KtxLocalProject;
2026-05-10 23:12:26 +02:00
beforeEach(async () => {
2026-05-10 23:51:24 +02:00
tempDir = await mkdtemp(join(tmpdir(), 'ktx-local-adapters-'));
2026-05-10 23:12:26 +02:00
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
2026-05-10 23:51:24 +02:00
project = await loadKtxProject({ projectDir });
2026-05-10 23:12:26 +02:00
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
2026-05-10 23:51:24 +02:00
function projectWithConnections(connections: KtxLocalProject['config']['connections']): KtxLocalProject {
2026-05-10 23:12:26 +02:00
return {
...project,
config: {
...project.config,
connections,
},
};
}
it('registers Metabase locally as a staged-bundle adapter', () => {
const adapters = createDefaultLocalIngestAdapters(project);
expect(adapters.map((adapter) => adapter.source)).toEqual([
'fake',
'live-database',
'lookml',
'dbt',
'metabase',
'looker',
'metricflow',
'notion',
]);
expect(adapters.find((adapter) => adapter.source === 'metabase')?.fetch).toBeTypeOf('function');
});
it('uses an explicit Looker runtime client seam for local adapter fetch tests', async () => {
const runtimeClient = {
cleanup: vi.fn().mockResolvedValue(undefined),
listDashboards: vi.fn().mockResolvedValue([]),
listLooks: vi.fn().mockResolvedValue([]),
listFolders: vi.fn().mockResolvedValue({ folders: [] }),
listUsers: vi.fn().mockResolvedValue([]),
listGroups: vi.fn().mockResolvedValue([]),
listLookmlModels: vi.fn().mockResolvedValue({ models: [] }),
getDashboard: vi.fn(),
getLook: vi.fn(),
getExplore: vi.fn(),
getSignals: vi.fn().mockResolvedValue({
dashboardUsage: [],
lookUsage: [],
scheduledPlans: [],
favorites: [],
}),
};
const adapters = createDefaultLocalIngestAdapters(project, { looker: { runtimeClient } });
const looker = adapters.find((adapter) => adapter.source === 'looker');
expect(looker).toBeDefined();
expect(looker?.fetch).toBeTypeOf('function');
});
it('returns the explicit Metabase fanout boundary before runner construction', async () => {
2026-05-10 23:12:26 +02:00
const metabase = createDefaultLocalIngestAdapters(project).find((adapter) => adapter.source === 'metabase');
await expect(localPullConfigForAdapter(project, metabase!, 'warehouse')).rejects.toThrow(
'Metabase scheduled pulls fan out by mapping',
);
});
it('registers historic-sql locally when Postgres historic-SQL deps are provided', () => {
const sqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint(sql) {
return {
fingerprint: 'fp',
normalizedSql: sql,
tablesTouched: ['public.orders'],
literalSlots: [],
};
},
2026-05-11 17:03:22 +02:00
async analyzeBatch() {
return new Map();
},
feat(mcp):added MCP server (#97) * docs(specs): design research-agent MCP tools and ktx mcp daemon Adds the 2026-05-14 design spec for exposing four new MCP tools (discover_data, entity_details, dictionary_search, sql_execution), shipping a ktx-research skill, and introducing an HTTP-only ktx mcp daemon so external agents can use KTX as a research-capable context layer. * Refine research-agent MCP tools spec after adversarial review iteration 1 * Refine research-agent MCP tools spec after adversarial review iteration 2 * Refine research-agent MCP tools spec after adversarial review iteration 3 * Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind * feat(daemon): validate read-only SQL with sqlglot * feat(context): expose read-only SQL validation port * feat(context): register MCP sql execution tool * feat(context): execute MCP SQL through validated connector path * test(context): update SQL analysis port fixtures * docs: add research-agent MCP sql execution foundation plan * feat(context): add scan-backed entity details service * feat(context): register MCP entity details tool * feat(context): expose local MCP entity details * test(context): align entity details scan fixtures * docs: add research-agent MCP entity_details plan * feat(context): add dictionary search service * feat(context): register MCP dictionary search tool * feat(context): expose local MCP dictionary search * docs: add research-agent MCP dictionary_search plan * feat: add MCP discover data service * feat: expose discover data MCP tool * feat: wire local discover data MCP port * docs: add research-agent MCP discover_data plan * feat(cli): add mcp http security helpers * feat(cli): host mcp over streamable http * feat(cli): manage mcp daemon lifecycle * feat(cli): add ktx mcp commands * fix(cli): stabilize mcp daemon verification * docs: add research-agent MCP http daemon plan * feat(cli): install KTX research skill * feat(cli): configure MCP clients in setup agents * feat(cli): support Claude local MCP setup scope * docs: add research-agent MCP setup-agents plan * refactor(context): use connectionId in warehouse verification tools * docs(context): update ingest verification prompts for connectionId * docs: add research-agent MCP ingest contract convergence plan * chore: build runtime artifacts in conductor setup --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-15 02:35:09 +02:00
async validateReadOnly() {
return { ok: true };
},
2026-05-10 23:12:26 +02:00
};
const adapters = createDefaultLocalIngestAdapters(project, {
historicSql: {
sqlAnalysis,
postgresQueryClient: {
async executeQuery() {
return { headers: [], rows: [] };
},
},
},
});
expect(adapters.map((adapter) => adapter.source)).toContain('historic-sql');
expect(adapters.find((adapter) => adapter.source === 'historic-sql')?.fetch).toBeTypeOf('function');
expect(adapters.find((adapter) => adapter.source === 'historic-sql')?.skillNames).toEqual([
'historic_sql_table_digest',
'historic_sql_patterns',
]);
});
it('registers historic-sql with an injected non-Postgres reader and query client', () => {
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {},
};
const queryClient = { executeQuery: async () => ({ headers: [], rows: [], totalRows: 0 }) };
const adapters = createDefaultLocalIngestAdapters(project, {
historicSql: {
sqlAnalysis: {
async analyzeForFingerprint(sql) {
return {
fingerprint: 'fp',
normalizedSql: sql,
tablesTouched: [],
literalSlots: [],
};
},
async analyzeBatch() {
return new Map();
},
feat(mcp):added MCP server (#97) * docs(specs): design research-agent MCP tools and ktx mcp daemon Adds the 2026-05-14 design spec for exposing four new MCP tools (discover_data, entity_details, dictionary_search, sql_execution), shipping a ktx-research skill, and introducing an HTTP-only ktx mcp daemon so external agents can use KTX as a research-capable context layer. * Refine research-agent MCP tools spec after adversarial review iteration 1 * Refine research-agent MCP tools spec after adversarial review iteration 2 * Refine research-agent MCP tools spec after adversarial review iteration 3 * Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind * feat(daemon): validate read-only SQL with sqlglot * feat(context): expose read-only SQL validation port * feat(context): register MCP sql execution tool * feat(context): execute MCP SQL through validated connector path * test(context): update SQL analysis port fixtures * docs: add research-agent MCP sql execution foundation plan * feat(context): add scan-backed entity details service * feat(context): register MCP entity details tool * feat(context): expose local MCP entity details * test(context): align entity details scan fixtures * docs: add research-agent MCP entity_details plan * feat(context): add dictionary search service * feat(context): register MCP dictionary search tool * feat(context): expose local MCP dictionary search * docs: add research-agent MCP dictionary_search plan * feat: add MCP discover data service * feat: expose discover data MCP tool * feat: wire local discover data MCP port * docs: add research-agent MCP discover_data plan * feat(cli): add mcp http security helpers * feat(cli): host mcp over streamable http * feat(cli): manage mcp daemon lifecycle * feat(cli): add ktx mcp commands * fix(cli): stabilize mcp daemon verification * docs: add research-agent MCP http daemon plan * feat(cli): install KTX research skill * feat(cli): configure MCP clients in setup agents * feat(cli): support Claude local MCP setup scope * docs: add research-agent MCP setup-agents plan * refactor(context): use connectionId in warehouse verification tools * docs(context): update ingest verification prompts for connectionId * docs: add research-agent MCP ingest contract convergence plan * chore: build runtime artifacts in conductor setup --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-15 02:35:09 +02:00
async validateReadOnly() {
return { ok: true };
},
},
reader,
queryClient,
},
});
const adapter = adapters.find((candidate) => candidate.source === 'historic-sql');
expect(adapter).toBeDefined();
expect(adapter?.fetch).toBeTypeOf('function');
2026-05-10 23:12:26 +02:00
});
it('builds Postgres historic-sql pull config from a local connection', async () => {
const historicSql = createDefaultLocalIngestAdapters(project, {
historicSql: {
sqlAnalysis: {
async analyzeForFingerprint(sql) {
return {
fingerprint: 'fp',
normalizedSql: sql,
tablesTouched: ['public.orders'],
literalSlots: [],
};
},
2026-05-11 17:03:22 +02:00
async analyzeBatch() {
return new Map();
},
feat(mcp):added MCP server (#97) * docs(specs): design research-agent MCP tools and ktx mcp daemon Adds the 2026-05-14 design spec for exposing four new MCP tools (discover_data, entity_details, dictionary_search, sql_execution), shipping a ktx-research skill, and introducing an HTTP-only ktx mcp daemon so external agents can use KTX as a research-capable context layer. * Refine research-agent MCP tools spec after adversarial review iteration 1 * Refine research-agent MCP tools spec after adversarial review iteration 2 * Refine research-agent MCP tools spec after adversarial review iteration 3 * Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind * feat(daemon): validate read-only SQL with sqlglot * feat(context): expose read-only SQL validation port * feat(context): register MCP sql execution tool * feat(context): execute MCP SQL through validated connector path * test(context): update SQL analysis port fixtures * docs: add research-agent MCP sql execution foundation plan * feat(context): add scan-backed entity details service * feat(context): register MCP entity details tool * feat(context): expose local MCP entity details * test(context): align entity details scan fixtures * docs: add research-agent MCP entity_details plan * feat(context): add dictionary search service * feat(context): register MCP dictionary search tool * feat(context): expose local MCP dictionary search * docs: add research-agent MCP dictionary_search plan * feat: add MCP discover data service * feat: expose discover data MCP tool * feat: wire local discover data MCP port * docs: add research-agent MCP discover_data plan * feat(cli): add mcp http security helpers * feat(cli): host mcp over streamable http * feat(cli): manage mcp daemon lifecycle * feat(cli): add ktx mcp commands * fix(cli): stabilize mcp daemon verification * docs: add research-agent MCP http daemon plan * feat(cli): install KTX research skill * feat(cli): configure MCP clients in setup agents * feat(cli): support Claude local MCP setup scope * docs: add research-agent MCP setup-agents plan * refactor(context): use connectionId in warehouse verification tools * docs(context): update ingest verification prompts for connectionId * docs: add research-agent MCP ingest contract convergence plan * chore: build runtime artifacts in conductor setup --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-15 02:35:09 +02:00
async validateReadOnly() {
return { ok: true };
},
2026-05-10 23:12:26 +02:00
},
postgresQueryClient: {
async executeQuery() {
return { headers: [], rows: [] };
},
},
},
}).find((adapter) => adapter.source === 'historic-sql');
const postgresProject = projectWithConnections({
warehouse: {
driver: 'postgres',
url: 'env:WAREHOUSE_DATABASE_URL',
context: {
queryHistory: {
enabled: true,
minExecutions: 7,
maxTemplatesPerRun: 123,
filters: {
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
dropTrivialProbes: true,
},
},
2026-05-10 23:12:26 +02:00
},
},
});
await expect(localPullConfigForAdapter(postgresProject, historicSql!, 'warehouse')).resolves.toEqual({
dialect: 'postgres',
2026-05-11 19:00:01 +02:00
minExecutions: 7,
feat: merge ingest and scan * docs: add CLI component reuse guidance * docs: add unified ingest ux design * Refine unified ingest UX design after adversarial review iteration 1 * Refine unified ingest UX design after adversarial review iteration 2 * Refine unified ingest UX design after adversarial review iteration 3 * feat(cli): route public connection ingest command * feat(cli): hide standalone scan from public help * feat(cli): plan public ingest depth and query history * feat(cli): execute public database ingest facets * feat(ingest): read connection query history config * fix(cli): use public ingest wording * fix(config): stop generating ingest adapter allow lists * docs: document public ingest command * test: align ingest surface expectations * docs: add unified ingest public CLI surface plan * feat(cli): preflight deep public ingest readiness * feat(setup): store query history in connection context * feat(setup): store database context depth * feat(setup): verify context readiness by database depth * fix(setup): keep context build foreground only * fix(config): reject reserved ingest connection ids * test: close unified ingest v1 expectations * docs: add unified ingest v1 closure plan * fix(ingest): bypass adapter allow-list for public source ingest * fix(ingest): honor query history window intent * fix(ingest): hide scan internals from public database ingest * feat(ingest): use foreground view for interactive public ingest * fix(setup): use schema context and query history wording * test(cli): verify unified ingest public output * docs: add unified ingest v1 public output closure plan * fix(setup): forward query history flags * fix(setup): prompt for postgres query history * fix(status): report query history readiness * fix(ingest): remove legacy public guidance * fix(ingest): polish foreground retry copy * docs(examples): use unified query history wording * chore(ingest): finish public query history cleanup * docs: add unified ingest v1 query history status cleanup plan * test(docs): cover unified ingest public docs * docs: align ingest CLI reference with unified UX * docs: update context build guides for unified ingest * docs: update setup and primary source ingest wording * docs: stop advertising adapter-backed example ingest * docs: close unified ingest public docs gaps * docs: add unified ingest v1 docs site closure plan * fix: render unified ingest foreground warnings * fix: explain query history schema order * fix: add public ingest retry guidance * fix: align setup next steps with unified ingest * fix: remove scan wording from demo progress * test: verify unified ingest ux closure * docs: add unified ingest v1 foreground and retry closure plan * fix(cli): preserve query-history pull config in public ingest * fix(cli): omit hidden commands from docs command tree * test(cli): close unified ingest final public surface checks * docs: add unified ingest v1 final public surface closure plan * fix(cli): use public source labels in ingest reports * fix(cli): suppress low-level public ingest output * test(cli): verify unified ingest public plain output * docs: add unified ingest v1 public plain output closure plan * fix(cli): add public ingest copy sanitizers * fix(cli): sanitize public ingest progress copy * fix(cli): rename setup schema scope prompt * docs(plan): add progress copy closure; test: align setup back-nav fixture Adds the iter9 plan and updates the setup back-navigation test fixture to pass disableQueryHistory plus listSchemas/listTables stubs that the unified ingest setup step now requires. * docs(plan): add final ux labels plan with narrowed label scans * fix(cli): aggregate unsupported query-history warnings * fix(cli): align setup database labels * test(cli): fix setup database test type-check * fix(cli): remove primary-source wording from setup output * test(cli): verify unified ingest setup closure * docs(plan): add unified ingest v1 verification copy closure plan * fix(cli): remove top-level scan command * fix(cli): remove legacy ingest and wiki commands * Merge scan into ingest flow * feat(cli): split ingest progress into per-phase rows, rename work units to tasks Each database target in the unified ingest dashboard now renders one row per real subprocess (Schema, then Query history when enabled) instead of a single combined bar. Each phase has its own monotonic 0-100% bar so the progress never snaps back to zero when historic-sql starts after scan completes. Completed phases keep their final bar, summary, and elapsed time visible as an inline audit trail; queued and skipped phases are shown explicitly. Also rename user-facing "work units" / "Failed work units" to "tasks" / "Failed tasks" in ingest output and parseIngestSummary. The parser still accepts the legacy "Work units:" wording in captured output for backward compat. Internal memory-flow event names and type fields are left alone. * Fix test harness failures * Fix CI smoke checks --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-14 01:43:06 +02:00
enabledTables: [],
2026-05-11 19:00:01 +02:00
filters: {
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
dropTrivialProbes: true,
},
2026-05-10 23:12:26 +02:00
redactionPatterns: [],
2026-05-11 19:00:01 +02:00
staleArchiveAfterDays: 90,
2026-05-10 23:12:26 +02:00
});
});
feat: merge ingest and scan * docs: add CLI component reuse guidance * docs: add unified ingest ux design * Refine unified ingest UX design after adversarial review iteration 1 * Refine unified ingest UX design after adversarial review iteration 2 * Refine unified ingest UX design after adversarial review iteration 3 * feat(cli): route public connection ingest command * feat(cli): hide standalone scan from public help * feat(cli): plan public ingest depth and query history * feat(cli): execute public database ingest facets * feat(ingest): read connection query history config * fix(cli): use public ingest wording * fix(config): stop generating ingest adapter allow lists * docs: document public ingest command * test: align ingest surface expectations * docs: add unified ingest public CLI surface plan * feat(cli): preflight deep public ingest readiness * feat(setup): store query history in connection context * feat(setup): store database context depth * feat(setup): verify context readiness by database depth * fix(setup): keep context build foreground only * fix(config): reject reserved ingest connection ids * test: close unified ingest v1 expectations * docs: add unified ingest v1 closure plan * fix(ingest): bypass adapter allow-list for public source ingest * fix(ingest): honor query history window intent * fix(ingest): hide scan internals from public database ingest * feat(ingest): use foreground view for interactive public ingest * fix(setup): use schema context and query history wording * test(cli): verify unified ingest public output * docs: add unified ingest v1 public output closure plan * fix(setup): forward query history flags * fix(setup): prompt for postgres query history * fix(status): report query history readiness * fix(ingest): remove legacy public guidance * fix(ingest): polish foreground retry copy * docs(examples): use unified query history wording * chore(ingest): finish public query history cleanup * docs: add unified ingest v1 query history status cleanup plan * test(docs): cover unified ingest public docs * docs: align ingest CLI reference with unified UX * docs: update context build guides for unified ingest * docs: update setup and primary source ingest wording * docs: stop advertising adapter-backed example ingest * docs: close unified ingest public docs gaps * docs: add unified ingest v1 docs site closure plan * fix: render unified ingest foreground warnings * fix: explain query history schema order * fix: add public ingest retry guidance * fix: align setup next steps with unified ingest * fix: remove scan wording from demo progress * test: verify unified ingest ux closure * docs: add unified ingest v1 foreground and retry closure plan * fix(cli): preserve query-history pull config in public ingest * fix(cli): omit hidden commands from docs command tree * test(cli): close unified ingest final public surface checks * docs: add unified ingest v1 final public surface closure plan * fix(cli): use public source labels in ingest reports * fix(cli): suppress low-level public ingest output * test(cli): verify unified ingest public plain output * docs: add unified ingest v1 public plain output closure plan * fix(cli): add public ingest copy sanitizers * fix(cli): sanitize public ingest progress copy * fix(cli): rename setup schema scope prompt * docs(plan): add progress copy closure; test: align setup back-nav fixture Adds the iter9 plan and updates the setup back-navigation test fixture to pass disableQueryHistory plus listSchemas/listTables stubs that the unified ingest setup step now requires. * docs(plan): add final ux labels plan with narrowed label scans * fix(cli): aggregate unsupported query-history warnings * fix(cli): align setup database labels * test(cli): fix setup database test type-check * fix(cli): remove primary-source wording from setup output * test(cli): verify unified ingest setup closure * docs(plan): add unified ingest v1 verification copy closure plan * fix(cli): remove top-level scan command * fix(cli): remove legacy ingest and wiki commands * Merge scan into ingest flow * feat(cli): split ingest progress into per-phase rows, rename work units to tasks Each database target in the unified ingest dashboard now renders one row per real subprocess (Schema, then Query history when enabled) instead of a single combined bar. Each phase has its own monotonic 0-100% bar so the progress never snaps back to zero when historic-sql starts after scan completes. Completed phases keep their final bar, summary, and elapsed time visible as an inline audit trail; queued and skipped phases are shown explicitly. Also rename user-facing "work units" / "Failed work units" to "tasks" / "Failed tasks" in ingest output and parseIngestSummary. The parser still accepts the legacy "Work units:" wording in captured output for backward compat. Internal memory-flow event names and type fields are left alone. * Fix test harness failures * Fix CI smoke checks --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-14 01:43:06 +02:00
it('maps connection context.queryHistory to historic-sql pull config', async () => {
const project = projectWithConnections({
warehouse: {
driver: 'postgres',
context: {
queryHistory: {
enabled: true,
windowDays: 45,
minExecutions: 7,
filters: { dropTrivialProbes: true },
},
},
},
});
const adapter = { source: 'historic-sql' } as never;
await expect(localPullConfigForAdapter(project, adapter, 'warehouse')).resolves.toMatchObject({
dialect: 'postgres',
minExecutions: 7,
filters: { dropTrivialProbes: true },
});
});
2026-05-10 23:12:26 +02:00
it('rejects local historic-sql pulls when the connection has not enabled historic SQL', async () => {
const historicSql = createDefaultLocalIngestAdapters(project, {
historicSql: {
sqlAnalysis: {
async analyzeForFingerprint(sql) {
return {
fingerprint: 'fp',
normalizedSql: sql,
tablesTouched: [],
literalSlots: [],
};
},
2026-05-11 17:03:22 +02:00
async analyzeBatch() {
return new Map();
},
feat(mcp):added MCP server (#97) * docs(specs): design research-agent MCP tools and ktx mcp daemon Adds the 2026-05-14 design spec for exposing four new MCP tools (discover_data, entity_details, dictionary_search, sql_execution), shipping a ktx-research skill, and introducing an HTTP-only ktx mcp daemon so external agents can use KTX as a research-capable context layer. * Refine research-agent MCP tools spec after adversarial review iteration 1 * Refine research-agent MCP tools spec after adversarial review iteration 2 * Refine research-agent MCP tools spec after adversarial review iteration 3 * Refine spec: drop connectionName compat carve-out and ground summary/snippet provenance per kind * feat(daemon): validate read-only SQL with sqlglot * feat(context): expose read-only SQL validation port * feat(context): register MCP sql execution tool * feat(context): execute MCP SQL through validated connector path * test(context): update SQL analysis port fixtures * docs: add research-agent MCP sql execution foundation plan * feat(context): add scan-backed entity details service * feat(context): register MCP entity details tool * feat(context): expose local MCP entity details * test(context): align entity details scan fixtures * docs: add research-agent MCP entity_details plan * feat(context): add dictionary search service * feat(context): register MCP dictionary search tool * feat(context): expose local MCP dictionary search * docs: add research-agent MCP dictionary_search plan * feat: add MCP discover data service * feat: expose discover data MCP tool * feat: wire local discover data MCP port * docs: add research-agent MCP discover_data plan * feat(cli): add mcp http security helpers * feat(cli): host mcp over streamable http * feat(cli): manage mcp daemon lifecycle * feat(cli): add ktx mcp commands * fix(cli): stabilize mcp daemon verification * docs: add research-agent MCP http daemon plan * feat(cli): install KTX research skill * feat(cli): configure MCP clients in setup agents * feat(cli): support Claude local MCP setup scope * docs: add research-agent MCP setup-agents plan * refactor(context): use connectionId in warehouse verification tools * docs(context): update ingest verification prompts for connectionId * docs: add research-agent MCP ingest contract convergence plan * chore: build runtime artifacts in conductor setup --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-15 02:35:09 +02:00
async validateReadOnly() {
return { ok: true };
},
2026-05-10 23:12:26 +02:00
},
postgresQueryClient: {
async executeQuery() {
return { headers: [], rows: [] };
},
},
},
}).find((adapter) => adapter.source === 'historic-sql');
const postgresProject = projectWithConnections({
warehouse: {
driver: 'postgres',
url: 'env:WAREHOUSE_DATABASE_URL',
},
});
await expect(localPullConfigForAdapter(postgresProject, historicSql!, 'warehouse')).rejects.toThrow(
feat: merge ingest and scan * docs: add CLI component reuse guidance * docs: add unified ingest ux design * Refine unified ingest UX design after adversarial review iteration 1 * Refine unified ingest UX design after adversarial review iteration 2 * Refine unified ingest UX design after adversarial review iteration 3 * feat(cli): route public connection ingest command * feat(cli): hide standalone scan from public help * feat(cli): plan public ingest depth and query history * feat(cli): execute public database ingest facets * feat(ingest): read connection query history config * fix(cli): use public ingest wording * fix(config): stop generating ingest adapter allow lists * docs: document public ingest command * test: align ingest surface expectations * docs: add unified ingest public CLI surface plan * feat(cli): preflight deep public ingest readiness * feat(setup): store query history in connection context * feat(setup): store database context depth * feat(setup): verify context readiness by database depth * fix(setup): keep context build foreground only * fix(config): reject reserved ingest connection ids * test: close unified ingest v1 expectations * docs: add unified ingest v1 closure plan * fix(ingest): bypass adapter allow-list for public source ingest * fix(ingest): honor query history window intent * fix(ingest): hide scan internals from public database ingest * feat(ingest): use foreground view for interactive public ingest * fix(setup): use schema context and query history wording * test(cli): verify unified ingest public output * docs: add unified ingest v1 public output closure plan * fix(setup): forward query history flags * fix(setup): prompt for postgres query history * fix(status): report query history readiness * fix(ingest): remove legacy public guidance * fix(ingest): polish foreground retry copy * docs(examples): use unified query history wording * chore(ingest): finish public query history cleanup * docs: add unified ingest v1 query history status cleanup plan * test(docs): cover unified ingest public docs * docs: align ingest CLI reference with unified UX * docs: update context build guides for unified ingest * docs: update setup and primary source ingest wording * docs: stop advertising adapter-backed example ingest * docs: close unified ingest public docs gaps * docs: add unified ingest v1 docs site closure plan * fix: render unified ingest foreground warnings * fix: explain query history schema order * fix: add public ingest retry guidance * fix: align setup next steps with unified ingest * fix: remove scan wording from demo progress * test: verify unified ingest ux closure * docs: add unified ingest v1 foreground and retry closure plan * fix(cli): preserve query-history pull config in public ingest * fix(cli): omit hidden commands from docs command tree * test(cli): close unified ingest final public surface checks * docs: add unified ingest v1 final public surface closure plan * fix(cli): use public source labels in ingest reports * fix(cli): suppress low-level public ingest output * test(cli): verify unified ingest public plain output * docs: add unified ingest v1 public plain output closure plan * fix(cli): add public ingest copy sanitizers * fix(cli): sanitize public ingest progress copy * fix(cli): rename setup schema scope prompt * docs(plan): add progress copy closure; test: align setup back-nav fixture Adds the iter9 plan and updates the setup back-navigation test fixture to pass disableQueryHistory plus listSchemas/listTables stubs that the unified ingest setup step now requires. * docs(plan): add final ux labels plan with narrowed label scans * fix(cli): aggregate unsupported query-history warnings * fix(cli): align setup database labels * test(cli): fix setup database test type-check * fix(cli): remove primary-source wording from setup output * test(cli): verify unified ingest setup closure * docs(plan): add unified ingest v1 verification copy closure plan * fix(cli): remove top-level scan command * fix(cli): remove legacy ingest and wiki commands * Merge scan into ingest flow * feat(cli): split ingest progress into per-phase rows, rename work units to tasks Each database target in the unified ingest dashboard now renders one row per real subprocess (Schema, then Query history when enabled) instead of a single combined bar. Each phase has its own monotonic 0-100% bar so the progress never snaps back to zero when historic-sql starts after scan completes. Completed phases keep their final bar, summary, and elapsed time visible as an inline audit trail; queued and skipped phases are shown explicitly. Also rename user-facing "work units" / "Failed work units" to "tasks" / "Failed tasks" in ingest output and parseIngestSummary. The parser still accepts the legacy "Work units:" wording in captured output for backward compat. Internal memory-flow event names and type fields are left alone. * Fix test harness failures * Fix CI smoke checks --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-14 01:43:06 +02:00
'Connection "warehouse" does not have context.queryHistory.enabled: true',
2026-05-10 23:12:26 +02:00
);
});
it('builds Looker pull config from local mapping state', async () => {
2026-05-10 23:51:24 +02:00
const projectDir = await mkdtemp(join(tmpdir(), 'ktx-local-looker-'));
2026-05-10 23:12:26 +02:00
const lookerProject = {
projectDir,
config: {
connections: {
'prod-looker': {
driver: 'looker',
base_url: 'https://looker.example.test',
client_id: 'client',
},
'prod-warehouse': {
driver: 'postgres',
url: 'postgresql://readonly@db.example.test/analytics',
},
},
},
} as never;
2026-05-10 23:51:24 +02:00
const store = new LocalLookerRuntimeStore({ dbPath: join(projectDir, '.ktx/db.sqlite') });
2026-05-10 23:12:26 +02:00
await store.setCursors('prod-looker', { dashboardsLastSyncedAt: null, looksLastSyncedAt: null });
await store.upsertConnectionMapping({
lookerConnectionId: 'prod-looker',
lookerConnectionName: 'analytics',
2026-05-10 23:51:24 +02:00
ktxConnectionId: 'prod-warehouse',
2026-05-10 23:12:26 +02:00
source: 'cli',
});
const lookerDeps = {
looker: {
client: {
listLookmlModels: async () => ({
source: 'looker',
fetchedAt: '2026-05-05T00:00:00.000Z',
models: [{ name: 'ecommerce', label: null, explores: [{ name: 'orders', label: null }] }],
}),
getExplore: async () => ({
source: 'looker',
modelName: 'ecommerce',
exploreName: 'orders',
label: null,
description: null,
connectionName: 'analytics',
viewName: null,
rawSqlTableName: 'public.orders',
fields: { dimensions: [], measures: [] },
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
}),
},
parser: {
parse: async () => ({
'ecommerce.orders': {
ok: true,
catalog: null,
schema: 'public',
name: 'orders',
canonical_table: 'public.orders',
},
}),
},
},
};
const adapter = createDefaultLocalIngestAdapters(lookerProject, lookerDeps).find(
(candidate) => candidate.source === 'looker',
);
await expect(localPullConfigForAdapter(lookerProject, adapter!, 'prod-looker', lookerDeps)).resolves.toMatchObject({
lookerConnectionId: 'prod-looker',
connectionMappings: { analytics: 'prod-warehouse' },
connectionTypes: { analytics: 'POSTGRESQL' },
parsedTargetTables: {
'ecommerce.orders': { ok: true, schema: 'public', name: 'orders', canonicalTable: 'public.orders' },
},
});
});
it('builds Looker pull config from yaml mapping bootstrap when SQLite is empty', async () => {
2026-05-10 23:51:24 +02:00
const projectDir = await mkdtemp(join(tmpdir(), 'ktx-local-looker-yaml-'));
2026-05-10 23:12:26 +02:00
const lookerProject = {
projectDir,
config: {
connections: {
'prod-looker': {
driver: 'looker',
base_url: 'https://looker.example.test',
client_id: 'client',
mappings: { connectionMappings: { analytics: 'prod-warehouse' } },
},
'prod-warehouse': {
driver: 'postgres',
url: 'postgresql://readonly@db.example.test/analytics',
},
},
},
} as never;
const lookerDeps = {
looker: {
client: {
listLookmlModels: async () => ({
source: 'looker',
fetchedAt: '2026-05-05T00:00:00.000Z',
models: [{ name: 'ecommerce', label: null, explores: [{ name: 'orders', label: null }] }],
}),
getExplore: async () => ({
source: 'looker',
modelName: 'ecommerce',
exploreName: 'orders',
label: null,
description: null,
connectionName: 'analytics',
viewName: null,
rawSqlTableName: 'public.orders',
fields: { dimensions: [], measures: [] },
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
}),
},
parser: {
parse: async () => ({
'ecommerce.orders': {
ok: true,
catalog: null,
schema: 'public',
name: 'orders',
canonical_table: 'public.orders',
},
}),
},
},
};
const adapter = createDefaultLocalIngestAdapters(lookerProject, lookerDeps).find(
(candidate) => candidate.source === 'looker',
);
await expect(localPullConfigForAdapter(lookerProject, adapter!, 'prod-looker', lookerDeps)).resolves.toMatchObject({
connectionMappings: { analytics: 'prod-warehouse' },
connectionTypes: { analytics: 'POSTGRESQL' },
});
});
2026-05-10 23:51:24 +02:00
it('builds LookML pull config from flat ktx.yaml connection fields', async () => {
2026-05-10 23:12:26 +02:00
const lookmlProject = {
projectDir: tempDir,
config: {
connections: {
'prod-lookml': {
driver: 'lookml',
repoUrl: 'https://github.com/acme/looker.git',
2026-05-10 23:12:26 +02:00
branch: 'main',
path: 'models',
auth_token_ref: 'env:GITHUB_TOKEN',
mappings: { expectedLookerConnectionName: 'bigquery_prod' },
},
},
},
} as never;
const adapter = createDefaultLocalIngestAdapters(lookmlProject).find((candidate) => candidate.source === 'lookml');
await expect(
localPullConfigForAdapter(lookmlProject, adapter!, 'prod-lookml', {
looker: { env: { GITHUB_TOKEN: 'ghp_test_token' } },
}),
).resolves.toEqual({
repoUrl: 'https://github.com/acme/looker.git',
branch: 'main',
path: 'models',
authToken: 'ghp_test_token',
expectedLookerConnectionName: 'bigquery_prod',
parsedTargetTables: {},
});
});
it('rejects local LookML scheduled pulls when repoUrl is missing', async () => {
2026-05-10 23:12:26 +02:00
const lookmlProject = {
projectDir: tempDir,
config: { connections: { 'prod-lookml': { driver: 'lookml' } } },
} as never;
const adapter = createDefaultLocalIngestAdapters(lookmlProject).find((candidate) => candidate.source === 'lookml');
await expect(localPullConfigForAdapter(lookmlProject, adapter!, 'prod-lookml')).rejects.toThrow(
'lookml integration config missing repoUrl',
);
});
it('reads dbt source_dir from local connection config', async () => {
const project = projectWithConnections({
analytics_dbt: {
driver: 'dbt',
source_dir: '/repo/dbt',
profiles_path: '/repo/profiles',
target: 'prod',
project_name: 'analytics',
},
});
const adapter = createDefaultLocalIngestAdapters(project).find((candidate) => candidate.source === 'dbt');
await expect(localPullConfigForAdapter(project, adapter!, 'analytics_dbt')).resolves.toEqual({
sourceDir: '/repo/dbt',
profilesPath: '/repo/profiles',
target: 'prod',
projectName: 'analytics',
});
});
it('reads dbt git repo config from local connection config', async () => {
const dbtProject = projectWithConnections({
analytics_dbt: {
driver: 'dbt',
repo_url: 'https://github.com/acme/dbt.git',
branch: 'main',
path: 'analytics',
auth_token_ref: 'env:DBT_REPO_TOKEN',
},
});
const adapter = createDefaultLocalIngestAdapters(dbtProject).find((candidate) => candidate.source === 'dbt');
await expect(
localPullConfigForAdapter(dbtProject, adapter!, 'analytics_dbt', {
looker: { env: { DBT_REPO_TOKEN: 'token-123' } as NodeJS.ProcessEnv },
}),
).resolves.toEqual({
repoUrl: 'https://github.com/acme/dbt.git',
branch: 'main',
path: 'analytics',
authToken: 'token-123',
});
});
it('exposes configured primary warehouses as dbt target connections', async () => {
const dbtProject: KtxLocalProject = {
...projectWithConnections({
warehouse: {
driver: 'postgres',
url: 'postgresql://example/db',
},
analytics_dbt: {
driver: 'dbt',
source_dir: '/repo/dbt',
},
}),
config: {
...project.config,
setup: { database_connection_ids: ['warehouse'] },
connections: {
warehouse: {
driver: 'postgres',
url: 'postgresql://example/db',
},
analytics_dbt: {
driver: 'dbt',
source_dir: '/repo/dbt',
},
},
},
};
const adapter = createDefaultLocalIngestAdapters(dbtProject).find((candidate) => candidate.source === 'dbt');
await expect(adapter?.listTargetConnectionIds?.('/tmp/staged-dbt')).resolves.toEqual(['warehouse']);
});
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
it('passes primary warehouse connection ids to the local Notion adapter', async () => {
const adapters = createDefaultLocalIngestAdapters(
projectWithConnections({
notion: {
driver: 'notion',
auth_token: 'secret',
crawl_mode: 'selected_roots',
root_page_ids: ['page-1'],
},
warehouse: {
driver: 'postgres',
url: 'postgresql://readonly@db.example.test/analytics',
},
docs: {
driver: 'dbt',
source_dir: './dbt',
},
} as never),
);
const notion = adapters.find((adapter) => adapter.source === 'notion');
await expect(notion?.listTargetConnectionIds?.('/tmp/staged-notion')).resolves.toEqual(['warehouse']);
});
it('reads Notion cursors from local state instead of ktx.yaml', async () => {
const cursor = '{"phase":"all_accessible_pages","cursor":"cursor-1"}';
const notionProject = projectWithConnections({
notion: {
driver: 'notion',
auth_token: 'secret',
crawl_mode: 'all_accessible',
last_successful_cursor: '{"phase":"all_accessible_pages","cursor":"stale-yaml"}',
},
} as never);
await new LocalNotionRuntimeStore({ dbPath: join(notionProject.projectDir, '.ktx', 'db.sqlite') }).setCursor(
'notion',
cursor,
);
const notion = createDefaultLocalIngestAdapters(notionProject).find((adapter) => adapter.source === 'notion');
await expect(localPullConfigForAdapter(notionProject, notion!, 'notion')).resolves.toMatchObject({
lastSuccessfulCursor: cursor,
});
});
it('persists Notion next cursors to local state after successful pulls', async () => {
const cursor = '{"phase":"all_accessible_data_sources","cursor":"cursor-2"}';
const notionProject = projectWithConnections({
notion: {
driver: 'notion',
auth_token: 'secret',
crawl_mode: 'all_accessible',
},
} as never);
const stagedDir = await mkdtemp(join(tempDir, 'notion-staged-'));
await writeFile(
join(stagedDir, 'manifest.json'),
JSON.stringify({
source: 'notion',
apiVersion: '2026-03-11',
crawlMode: 'all_accessible',
rootPageIds: [],
rootDatabaseIds: [],
rootDataSourceIds: [],
fetchedAt: '2026-05-13T10:00:00.000Z',
pageCount: 1,
databaseCount: 0,
dataSourceCount: 0,
capped: true,
continuedFromCursor: false,
partialSnapshot: true,
maxPagesPerRun: 1,
maxKnowledgeCreatesPerRun: 25,
maxKnowledgeUpdatesPerRun: 20,
nextSuccessfulCursor: cursor,
skipped: [],
warnings: [],
}),
'utf-8',
);
const notion = createDefaultLocalIngestAdapters(notionProject).find((adapter) => adapter.source === 'notion');
await notion?.onPullSucceeded?.({
connectionId: 'notion',
sourceKey: 'notion',
syncId: 'sync-1',
trigger: 'scheduled_pull',
completedAt: new Date('2026-05-13T10:00:00.000Z'),
stagedDir,
});
await expect(
new LocalNotionRuntimeStore({ dbPath: join(notionProject.projectDir, '.ktx', 'db.sqlite') }).readCursor('notion'),
).resolves.toBe(cursor);
});
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
it('passes primary warehouse connection ids to local LookML and MetricFlow adapters', async () => {
const adapters = createDefaultLocalIngestAdapters(
projectWithConnections({
warehouse: {
driver: 'postgres',
url: 'postgresql://readonly@db.example.test/analytics',
},
lookml_docs: {
driver: 'lookml',
lookml: {
repoUrl: 'https://github.com/acme/lookml.git',
},
},
metrics_repo: {
driver: 'metricflow',
metricflow: {
repoUrl: 'https://github.com/acme/metrics.git',
},
},
} as never),
);
const lookml = adapters.find((adapter) => adapter.source === 'lookml');
const metricflow = adapters.find((adapter) => adapter.source === 'metricflow');
await expect(lookml?.listTargetConnectionIds?.('/tmp/staged-lookml')).resolves.toEqual(['warehouse']);
await expect(metricflow?.listTargetConnectionIds?.('/tmp/staged-metricflow')).resolves.toEqual(['warehouse']);
});
2026-05-10 23:12:26 +02:00
it('resolves MetricFlow auth_token_ref without writing literal tokens to config', async () => {
const project = projectWithConnections({
metricflow_main: {
driver: 'metricflow',
metricflow: {
repoUrl: 'https://github.com/acme/metrics.git',
branch: 'main',
path: 'semantic_models',
auth_token_ref: 'env:METRICFLOW_REPO_TOKEN',
},
},
});
const adapter = createDefaultLocalIngestAdapters(project).find((candidate) => candidate.source === 'metricflow');
await expect(
localPullConfigForAdapter(project, adapter!, 'metricflow_main', {
looker: { env: { METRICFLOW_REPO_TOKEN: 'token-123' } as NodeJS.ProcessEnv },
}),
).resolves.toEqual({
repoUrl: 'https://github.com/acme/metrics.git',
branch: 'main',
path: 'semantic_models',
authToken: 'token-123',
parsedTargetTables: {},
});
});
});