ktx/packages/cli/test/setup-sources-notion.test.ts

import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { initKtxProject } from '../src/context/project/project.js';
import { type KtxProjectConnectionConfig, parseKtxProjectConfig, serializeKtxProjectConfig } from '../src/context/project/config.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import {
  runKtxSetupSourcesStep,
  type KtxSetupSourcesPromptAdapter,
} from '../src/setup-sources.js';

const notionMocks = vi.hoisted(() => ({
  tokens: [] as string[],
  retrieveBotUser: vi.fn(async () => ({ name: 'Docs Bot' })),
  retrievePage: vi.fn(async () => ({ id: 'page-1' })),
}));

vi.mock('../src/context/ingest/adapters/notion/notion-client.js', async (importOriginal) => {
  const actual = await importOriginal<typeof import('../src/context/ingest/adapters/notion/notion-client.js')>();
  return {
    ...actual,
    NotionClient: vi.fn().mockImplementation(function NotionClient(token: string) {
      notionMocks.tokens.push(token);
      return {
        retrieveBotUser: notionMocks.retrieveBotUser,
        retrievePage: notionMocks.retrievePage,
      };
    }),
  };
});

function makeIo() {
  let stdout = '';
  let stderr = '';
  return {
    io: {
      stdout: {
        isTTY: true,
        write: (chunk: string) => {
          stdout += chunk;
        },
      },
      stderr: {
        write: (chunk: string) => {
          stderr += chunk;
        },
      },
    },
    stdout: () => stdout,
    stderr: () => stderr,
  };
}

function prompts(values: { multiselect?: string[][]; select?: string[] }): KtxSetupSourcesPromptAdapter {
  const multiselectValues = [...(values.multiselect ?? [])];
  const selectValues = [...(values.select ?? [])];
  return {
    multiselect: vi.fn(async () => multiselectValues.shift() ?? []),
    select: vi.fn(async () => selectValues.shift() ?? 'back'),
    autocomplete: vi.fn(async () => selectValues.shift() ?? 'back'),
    text: vi.fn(async () => ''),
    password: vi.fn(async () => undefined),
    cancel: vi.fn(),
    log: vi.fn(),
  };
}

describe('setup sources Notion validation', () => {
  let tempDir: string;
  let projectDir: string;

  beforeEach(async () => {
    notionMocks.tokens.length = 0;
    notionMocks.retrieveBotUser.mockClear();
    notionMocks.retrievePage.mockClear();
    tempDir = await mkdtemp(join(tmpdir(), 'ktx-setup-sources-notion-'));
    projectDir = join(tempDir, 'project');
    await initKtxProject({ projectDir });
  });

  afterEach(async () => {
    await rm(tempDir, { recursive: true, force: true });
  });

  async function readConfig() {
    return parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'));
  }

  async function writeConfigConnection(connectionId: string, connection: KtxProjectConnectionConfig) {
    const config = await readConfig();
    await writeFile(
      join(projectDir, 'ktx.yaml'),
      serializeKtxProjectConfig({
        ...config,
        connections: {
          ...config.connections,
          warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' },
          [connectionId]: connection,
        },
        setup: {
          ...config.setup,
          database_connection_ids: ['warehouse'],
        },
      }),
      'utf-8',
    );
  }

  it('validates an existing Notion source that uses an inline auth token', async () => {
    await writeConfigConnection('notion', {
      driver: 'notion',
      auth_token: 'ntn_inline_token',
      crawl_mode: 'all_accessible',
    });
    const io = makeIo();

    await expect(
      runKtxSetupSourcesStep(
        { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false },
        io.io,
        {
          prompts: prompts({
            multiselect: [['notion']],
            select: ['existing:notion'],
          }),
        },
      ),
    ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion'] });

    expect(notionMocks.tokens).toEqual(['ntn_inline_token']);
    expect(notionMocks.retrieveBotUser).toHaveBeenCalledOnce();
    expect(io.stderr()).toBe('');
  });
});
fix: improve setup wizard behavior (#127) * fix: improve setup wizard behavior * fix: derive runtime versions from release metadata * test: validate metabase source mapping requirements * Fix boundary check release identifiers 2026-05-17 19:15:09 +02:00			`import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';`
			`import { tmpdir } from 'node:os';`
			`import { join } from 'node:path';`
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`import { initKtxProject } from '../src/context/project/project.js';`
			`import { type KtxProjectConnectionConfig, parseKtxProjectConfig, serializeKtxProjectConfig } from '../src/context/project/config.js';`
fix: improve setup wizard behavior (#127) * fix: improve setup wizard behavior * fix: derive runtime versions from release metadata * test: validate metabase source mapping requirements * Fix boundary check release identifiers 2026-05-17 19:15:09 +02:00			`import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';`
			`import {`
			`runKtxSetupSourcesStep,`
			`type KtxSetupSourcesPromptAdapter,`
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`} from '../src/setup-sources.js';`
fix: improve setup wizard behavior (#127) * fix: improve setup wizard behavior * fix: derive runtime versions from release metadata * test: validate metabase source mapping requirements * Fix boundary check release identifiers 2026-05-17 19:15:09 +02:00
			`const notionMocks = vi.hoisted(() => ({`
			`tokens: [] as string[],`
			`retrieveBotUser: vi.fn(async () => ({ name: 'Docs Bot' })),`
			`retrievePage: vi.fn(async () => ({ id: 'page-1' })),`
			`}));`

test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`vi.mock('../src/context/ingest/adapters/notion/notion-client.js', async (importOriginal) => {`
			`const actual = await importOriginal<typeof import('../src/context/ingest/adapters/notion/notion-client.js')>();`
fix: improve setup wizard behavior (#127) * fix: improve setup wizard behavior * fix: derive runtime versions from release metadata * test: validate metabase source mapping requirements * Fix boundary check release identifiers 2026-05-17 19:15:09 +02:00			`return {`
			`...actual,`
			`NotionClient: vi.fn().mockImplementation(function NotionClient(token: string) {`
			`notionMocks.tokens.push(token);`
			`return {`
			`retrieveBotUser: notionMocks.retrieveBotUser,`
			`retrievePage: notionMocks.retrievePage,`
			`};`
			`}),`
			`};`
			`});`

			`function makeIo() {`
			`let stdout = '';`
			`let stderr = '';`
			`return {`
			`io: {`
			`stdout: {`
			`isTTY: true,`
			`write: (chunk: string) => {`
			`stdout += chunk;`
			`},`
			`},`
			`stderr: {`
			`write: (chunk: string) => {`
			`stderr += chunk;`
			`},`
			`},`
			`},`
			`stdout: () => stdout,`
			`stderr: () => stderr,`
			`};`
			`}`

			`function prompts(values: { multiselect?: string[][]; select?: string[] }): KtxSetupSourcesPromptAdapter {`
			`const multiselectValues = [...(values.multiselect ?? [])];`
			`const selectValues = [...(values.select ?? [])];`
			`return {`
			`multiselect: vi.fn(async () => multiselectValues.shift() ?? []),`
			`select: vi.fn(async () => selectValues.shift() ?? 'back'),`
feat(cli): redesign database scope picker for searchable schema-first setup (#203) * feat: add searchable setup prompt pickers * fix: make snowflake scope discovery single query * fix: make bigquery table discovery schema scoped * fix: honor mysql and clickhouse database scope * feat: wire schema scope discovery for all relational setup drivers * feat: add schema-first database scope picker * test: update setup prompt stubs for type-check * docs: document database scope picker fields * Fix database setup edit preservation --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> 2026-05-22 14:22:11 +02:00			`autocomplete: vi.fn(async () => selectValues.shift() ?? 'back'),`
fix: improve setup wizard behavior (#127) * fix: improve setup wizard behavior * fix: derive runtime versions from release metadata * test: validate metabase source mapping requirements * Fix boundary check release identifiers 2026-05-17 19:15:09 +02:00			`text: vi.fn(async () => ''),`
			`password: vi.fn(async () => undefined),`
			`cancel: vi.fn(),`
			`log: vi.fn(),`
			`};`
			`}`

			`describe('setup sources Notion validation', () => {`
			`let tempDir: string;`
			`let projectDir: string;`

			`beforeEach(async () => {`
			`notionMocks.tokens.length = 0;`
			`notionMocks.retrieveBotUser.mockClear();`
			`notionMocks.retrievePage.mockClear();`
			`tempDir = await mkdtemp(join(tmpdir(), 'ktx-setup-sources-notion-'));`
			`projectDir = join(tempDir, 'project');`
			`await initKtxProject({ projectDir });`
			`});`

			`afterEach(async () => {`
			`await rm(tempDir, { recursive: true, force: true });`
			`});`

			`async function readConfig() {`
			`return parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'));`
			`}`

			`async function writeConfigConnection(connectionId: string, connection: KtxProjectConnectionConfig) {`
			`const config = await readConfig();`
			`await writeFile(`
			`join(projectDir, 'ktx.yaml'),`
			`serializeKtxProjectConfig({`
			`...config,`
			`connections: {`
			`...config.connections,`
			`warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' },`
			`[connectionId]: connection,`
			`},`
			`setup: {`
			`...config.setup,`
			`database_connection_ids: ['warehouse'],`
			`},`
			`}),`
			`'utf-8',`
			`);`
			`}`

			`it('validates an existing Notion source that uses an inline auth token', async () => {`
			`await writeConfigConnection('notion', {`
			`driver: 'notion',`
			`auth_token: 'ntn_inline_token',`
			`crawl_mode: 'all_accessible',`
			`});`
			`const io = makeIo();`

			`await expect(`
			`runKtxSetupSourcesStep(`
			`{ projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false },`
			`io.io,`
			`{`
			`prompts: prompts({`
			`multiselect: [['notion']],`
			`select: ['existing:notion'],`
			`}),`
			`},`
			`),`
			`).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion'] });`

			`expect(notionMocks.tokens).toEqual(['ntn_inline_token']);`
			`expect(notionMocks.retrieveBotUser).toHaveBeenCalledOnce();`
			`expect(io.stderr()).toBe('');`
			`});`
			`});`