mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
Improve schema setup and Notion ingest UX
This commit is contained in:
parent
155613c794
commit
72a4ace13c
21 changed files with 540 additions and 118 deletions
|
|
@ -327,8 +327,19 @@ describe('createRepainter', () => {
|
|||
repainter.paint('hello');
|
||||
repainter.paint('bye');
|
||||
|
||||
expect(io.stdout()).toContain('\rbye');
|
||||
expect(io.stdout()).not.toContain('\u001b[1A\rbye');
|
||||
expect(io.stdout()).toContain('bye');
|
||||
expect(io.stdout()).not.toMatch(/\[\d+A/);
|
||||
});
|
||||
|
||||
it('does not undershoot cursor-up when a line is exactly the terminal width', () => {
|
||||
const io = makeIo({ isTTY: true, columns: 10 });
|
||||
const repainter = createRepainter(io.io);
|
||||
|
||||
repainter.paint('0123456789\nsecond\n');
|
||||
repainter.paint('0123456789\nsecond\n');
|
||||
|
||||
const cursorMoves = [...io.stdout().matchAll(/\[(\d+)A/g)].map((m) => Number(m[1]));
|
||||
expect(cursorMoves).toEqual([2]);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -378,7 +378,8 @@ export function createRepainter(io: KtxCliIo) {
|
|||
}
|
||||
io.stdout.write('\r');
|
||||
}
|
||||
io.stdout.write(content.replaceAll('\n', `${ESC}[K\n`));
|
||||
io.stdout.write(`${ESC}[2K`);
|
||||
io.stdout.write(content.replaceAll('\n', `\n${ESC}[2K`));
|
||||
io.stdout.write(`${ESC}[J`);
|
||||
hasPainted = true;
|
||||
lastCursorUpRows = cursorUpRowsAfterWrite(content);
|
||||
|
|
|
|||
|
|
@ -531,7 +531,7 @@ describe('setup databases step', () => {
|
|||
message: 'Primary sources already configured: warehouse\nWhat would you like to do?',
|
||||
options: [
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -582,7 +582,7 @@ describe('setup databases step', () => {
|
|||
message: 'Primary sources already configured: warehouse\nWhat would you like to do?',
|
||||
options: [
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -617,7 +617,7 @@ describe('setup databases step', () => {
|
|||
message: 'Primary sources already configured: postgres-warehouse\nWhat would you like to do?',
|
||||
options: [
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -652,7 +652,7 @@ describe('setup databases step', () => {
|
|||
message: 'Primary sources already configured: postgres-warehouse\nWhat would you like to do?',
|
||||
options: [
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -695,7 +695,7 @@ describe('setup databases step', () => {
|
|||
message: 'Primary sources already configured: warehouse\nWhat would you like to do?',
|
||||
options: [
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -918,6 +918,10 @@ describe('setup databases step', () => {
|
|||
'│ ✓ Connection test passed',
|
||||
'│ Driver: PostgreSQL · Tables: 2',
|
||||
'│',
|
||||
].join('\n'),
|
||||
);
|
||||
expect(io.stdout()).toContain(
|
||||
[
|
||||
'◇ Scanning postgres-warehouse',
|
||||
'│ ✓ Structural scan completed',
|
||||
'│ Changes: 2 new tables',
|
||||
|
|
@ -1007,7 +1011,7 @@ describe('setup databases step', () => {
|
|||
expect(config.connections['postgres-warehouse']).toMatchObject({
|
||||
schemas: ['orbit_analytics', 'orbit_raw'],
|
||||
});
|
||||
expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw');
|
||||
expect(io.stdout()).toContain('✓ orbit_analytics, orbit_raw');
|
||||
});
|
||||
|
||||
it('auto-selects all discovered Postgres schemas in non-interactive setup', async () => {
|
||||
|
|
@ -1043,7 +1047,7 @@ describe('setup databases step', () => {
|
|||
expect(config.connections.warehouse).toMatchObject({
|
||||
schemas: ['orbit_analytics', 'orbit_raw', 'public'],
|
||||
});
|
||||
expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw, public');
|
||||
expect(io.stdout()).toContain('✓ orbit_analytics, orbit_raw, public');
|
||||
});
|
||||
|
||||
it('adds one non-interactive Postgres URL connection, tests it, scans it, and marks databases complete', async () => {
|
||||
|
|
|
|||
|
|
@ -112,6 +112,56 @@ const DEFAULT_CONNECTION_IDS: Record<KtxSetupDatabaseDriver, string> = {
|
|||
snowflake: 'snowflake-warehouse',
|
||||
};
|
||||
|
||||
interface ScopeDiscoverySpec {
|
||||
noun: string;
|
||||
nounPlural: string;
|
||||
promptLabel: string;
|
||||
configArrayField: string;
|
||||
configSingleField: string;
|
||||
defaultSelection: (values: string[]) => string[];
|
||||
}
|
||||
|
||||
const SCOPE_DISCOVERY_SPECS: Partial<Record<KtxSetupDatabaseDriver, ScopeDiscoverySpec>> = {
|
||||
postgres: {
|
||||
noun: 'schema',
|
||||
nounPlural: 'schemas',
|
||||
promptLabel: 'PostgreSQL schemas',
|
||||
configArrayField: 'schemas',
|
||||
configSingleField: 'schema',
|
||||
defaultSelection(schemas) {
|
||||
const nonPublic = schemas.filter((s) => s !== 'public');
|
||||
return nonPublic.length > 0 ? nonPublic : schemas;
|
||||
},
|
||||
},
|
||||
sqlserver: {
|
||||
noun: 'schema',
|
||||
nounPlural: 'schemas',
|
||||
promptLabel: 'SQL Server schemas',
|
||||
configArrayField: 'schemas',
|
||||
configSingleField: 'schema',
|
||||
defaultSelection: (schemas) => schemas,
|
||||
},
|
||||
bigquery: {
|
||||
noun: 'dataset',
|
||||
nounPlural: 'datasets',
|
||||
promptLabel: 'BigQuery datasets',
|
||||
configArrayField: 'dataset_ids',
|
||||
configSingleField: 'dataset_id',
|
||||
defaultSelection: (datasets) => datasets,
|
||||
},
|
||||
snowflake: {
|
||||
noun: 'schema',
|
||||
nounPlural: 'schemas',
|
||||
promptLabel: 'Snowflake schemas',
|
||||
configArrayField: 'schema_names',
|
||||
configSingleField: 'schema_name',
|
||||
defaultSelection(schemas) {
|
||||
const nonPublic = schemas.filter((s) => s !== 'PUBLIC');
|
||||
return nonPublic.length > 0 ? nonPublic : schemas;
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
type UrlDriverType = Extract<KtxSetupDatabaseDriver, 'postgres' | 'mysql' | 'clickhouse' | 'sqlserver'>;
|
||||
|
||||
const DRIVER_CONNECTION_DEFAULTS: Record<UrlDriverType, { port: string }> = {
|
||||
|
|
@ -260,16 +310,53 @@ async function defaultHistoricSqlProbe(input: KtxSetupHistoricSqlProbeInput): Pr
|
|||
async function defaultListSchemas(projectDir: string, connectionId: string): Promise<string[]> {
|
||||
const project = await loadKtxProject({ projectDir });
|
||||
const connection = project.config.connections[connectionId];
|
||||
const { KtxPostgresScanConnector, isKtxPostgresConnectionConfig } = await import('@ktx/connector-postgres');
|
||||
if (!isKtxPostgresConnectionConfig(connection)) {
|
||||
return [];
|
||||
const driver = normalizeDriver(connection?.driver);
|
||||
|
||||
if (driver === 'postgres') {
|
||||
const { KtxPostgresScanConnector, isKtxPostgresConnectionConfig } = await import('@ktx/connector-postgres');
|
||||
if (!isKtxPostgresConnectionConfig(connection)) return [];
|
||||
const connector = new KtxPostgresScanConnector({ connectionId, connection });
|
||||
try {
|
||||
return await connector.listSchemas();
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
const connector = new KtxPostgresScanConnector({ connectionId, connection });
|
||||
try {
|
||||
return await connector.listSchemas();
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
|
||||
if (driver === 'sqlserver') {
|
||||
const { KtxSqlServerScanConnector, isKtxSqlServerConnectionConfig } = await import('@ktx/connector-sqlserver');
|
||||
if (!isKtxSqlServerConnectionConfig(connection)) return [];
|
||||
const connector = new KtxSqlServerScanConnector({ connectionId, connection });
|
||||
try {
|
||||
return await connector.listSchemas();
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
if (driver === 'bigquery') {
|
||||
const { KtxBigQueryScanConnector, isKtxBigQueryConnectionConfig } = await import('@ktx/connector-bigquery');
|
||||
if (!isKtxBigQueryConnectionConfig(connection)) return [];
|
||||
const connector = new KtxBigQueryScanConnector({ connectionId, connection });
|
||||
try {
|
||||
return await connector.listDatasets();
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
if (driver === 'snowflake') {
|
||||
const { KtxSnowflakeScanConnector, isKtxSnowflakeConnectionConfig } = await import('@ktx/connector-snowflake');
|
||||
if (!isKtxSnowflakeConnectionConfig(connection)) return [];
|
||||
const connector = new KtxSnowflakeScanConnector({ connectionId, connection });
|
||||
try {
|
||||
return await connector.listSchemas();
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
function existingConnectionIdsByDriver(
|
||||
|
|
@ -307,7 +394,7 @@ function configuredPrimarySourcesPrompt(connectionIds: string[]): {
|
|||
message: `Primary sources already configured: ${connectionIds.join(', ')}\nWhat would you like to do?`,
|
||||
options: [
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
};
|
||||
|
|
@ -831,41 +918,44 @@ async function writeConnectionConfig(input: {
|
|||
}
|
||||
}
|
||||
|
||||
function configuredSchemas(connection: KtxProjectConnectionConfig | undefined): string[] {
|
||||
function configuredScopeValues(
|
||||
connection: KtxProjectConnectionConfig | undefined,
|
||||
spec: ScopeDiscoverySpec,
|
||||
): string[] {
|
||||
if (!connection) return [];
|
||||
if (Array.isArray(connection.schemas)) {
|
||||
return connection.schemas
|
||||
.filter((schema): schema is string => typeof schema === 'string' && schema.trim().length > 0)
|
||||
.map((schema) => schema.trim());
|
||||
const arrayVal = connection[spec.configArrayField];
|
||||
if (Array.isArray(arrayVal)) {
|
||||
return arrayVal
|
||||
.filter((v): v is string => typeof v === 'string' && v.trim().length > 0)
|
||||
.map((v) => v.trim());
|
||||
}
|
||||
return typeof connection.schema === 'string' && connection.schema.trim().length > 0 ? [connection.schema.trim()] : [];
|
||||
const singleVal = connection[spec.configSingleField];
|
||||
return typeof singleVal === 'string' && singleVal.trim().length > 0 ? [singleVal.trim()] : [];
|
||||
}
|
||||
|
||||
function defaultSchemaSelection(schemas: string[]): string[] {
|
||||
const nonPublic = schemas.filter((schema) => schema !== 'public');
|
||||
return nonPublic.length > 0 ? nonPublic : schemas;
|
||||
}
|
||||
|
||||
async function writeConnectionSchemas(input: {
|
||||
async function writeScopeConfig(input: {
|
||||
projectDir: string;
|
||||
connectionId: string;
|
||||
schemas: string[];
|
||||
values: string[];
|
||||
spec: ScopeDiscoverySpec;
|
||||
}): Promise<void> {
|
||||
const project = await loadKtxProject({ projectDir: input.projectDir });
|
||||
const connection = project.config.connections[input.connectionId];
|
||||
if (!connection) return;
|
||||
const { schema: _schema, ...connectionWithoutLegacySchema } = connection;
|
||||
const cleaned = Object.fromEntries(
|
||||
Object.entries(connection).filter(([key]) => key !== input.spec.configSingleField),
|
||||
) as KtxProjectConnectionConfig;
|
||||
await writeConnectionConfig({
|
||||
projectDir: input.projectDir,
|
||||
connectionId: input.connectionId,
|
||||
connection: {
|
||||
...connectionWithoutLegacySchema,
|
||||
schemas: unique(input.schemas),
|
||||
...cleaned,
|
||||
[input.spec.configArrayField]: unique(input.values),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function maybeConfigurePostgresSchemas(input: {
|
||||
async function maybeConfigureSchemaScope(input: {
|
||||
projectDir: string;
|
||||
connectionId: string;
|
||||
args: KtxSetupDatabasesArgs;
|
||||
|
|
@ -875,65 +965,77 @@ async function maybeConfigurePostgresSchemas(input: {
|
|||
}): Promise<boolean> {
|
||||
const project = await loadKtxProject({ projectDir: input.projectDir });
|
||||
const connection = project.config.connections[input.connectionId];
|
||||
if (normalizeDriver(connection?.driver) !== 'postgres') {
|
||||
return true;
|
||||
}
|
||||
const driver = normalizeDriver(connection?.driver);
|
||||
if (!driver) return true;
|
||||
|
||||
if (configuredSchemas(connection).length > 0) {
|
||||
const spec = SCOPE_DISCOVERY_SPECS[driver];
|
||||
if (!spec) return true;
|
||||
|
||||
const arrayVal = connection?.[spec.configArrayField];
|
||||
if (Array.isArray(arrayVal) && arrayVal.length > 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (input.args.databaseSchemas.length > 0) {
|
||||
await writeConnectionSchemas({
|
||||
await writeScopeConfig({
|
||||
projectDir: input.projectDir,
|
||||
connectionId: input.connectionId,
|
||||
schemas: input.args.databaseSchemas,
|
||||
values: input.args.databaseSchemas,
|
||||
spec,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
let discoveredSchemas: string[];
|
||||
writeSetupSection(input.io, `Discovering ${spec.promptLabel.toLowerCase()}`, [
|
||||
`Connecting to ${input.connectionId}…`,
|
||||
]);
|
||||
|
||||
let discovered: string[];
|
||||
try {
|
||||
discoveredSchemas = unique(
|
||||
discovered = unique(
|
||||
await (input.deps.listSchemas ?? defaultListSchemas)(input.projectDir, input.connectionId),
|
||||
);
|
||||
} catch (error) {
|
||||
input.io.stderr.write(
|
||||
`Could not discover PostgreSQL schemas for ${input.connectionId}; continuing with existing schema scope. ` +
|
||||
`Could not discover ${spec.promptLabel.toLowerCase()} for ${input.connectionId}; continuing with existing ${spec.noun} scope. ` +
|
||||
`Pass --database-schema to set it explicitly. ${error instanceof Error ? error.message : String(error)}\n`,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if (discoveredSchemas.length === 0) {
|
||||
if (discovered.length === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
let selectedSchemas: string[];
|
||||
if (input.args.inputMode === 'disabled' || discoveredSchemas.length === 1) {
|
||||
selectedSchemas = discoveredSchemas;
|
||||
let selected: string[];
|
||||
if (input.args.inputMode === 'disabled' || discovered.length === 1) {
|
||||
selected = discovered;
|
||||
} else {
|
||||
const initialValues = defaultSchemaSelection(discoveredSchemas);
|
||||
const preconfigured = configuredScopeValues(connection, spec).filter((v) => discovered.includes(v));
|
||||
const initialValues = preconfigured.length > 0 ? preconfigured : spec.defaultSelection(discovered);
|
||||
const choices = await input.prompts.multiselect({
|
||||
message: withMultiselectNavigation(
|
||||
'PostgreSQL schemas to scan\nKTX found multiple non-system schemas. Select every schema agents should use.',
|
||||
`${spec.promptLabel} to scan\n` +
|
||||
`KTX found multiple ${spec.nounPlural}. Select every ${spec.noun} agents should use.`,
|
||||
),
|
||||
options: discoveredSchemas.map((schema) => ({ value: schema, label: schema })),
|
||||
options: discovered.map((v) => ({ value: v, label: v })),
|
||||
initialValues,
|
||||
required: true,
|
||||
});
|
||||
if (choices.includes('back')) {
|
||||
return false;
|
||||
}
|
||||
selectedSchemas = choices.length > 0 ? choices : initialValues;
|
||||
selected = choices.length > 0 ? choices : initialValues;
|
||||
}
|
||||
|
||||
await writeConnectionSchemas({
|
||||
await writeScopeConfig({
|
||||
projectDir: input.projectDir,
|
||||
connectionId: input.connectionId,
|
||||
schemas: selectedSchemas,
|
||||
values: selected,
|
||||
spec,
|
||||
});
|
||||
writeSetupSection(input.io, `Selecting schemas for ${input.connectionId}`, [
|
||||
`Schemas: ${selectedSchemas.join(', ')}`,
|
||||
const capitalNounPlural = spec.nounPlural[0]!.toUpperCase() + spec.nounPlural.slice(1);
|
||||
writeSetupSection(input.io, `${capitalNounPlural} saved for ${input.connectionId}`, [
|
||||
`✓ ${selected.join(', ')}`,
|
||||
]);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1049,7 +1151,7 @@ async function validateAndScanConnection(input: {
|
|||
testLines.push(`Driver: ${driverDisplay}${Number.isFinite(tableCount) ? ` · Tables: ${tableCount}` : ''}`);
|
||||
writeSetupSection(input.io, `Testing ${input.connectionId}`, testLines);
|
||||
|
||||
if (!(await maybeConfigurePostgresSchemas(input))) {
|
||||
if (!(await maybeConfigureSchemaScope(input))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -211,6 +211,37 @@ describe('setup sources step', () => {
|
|||
expect(runMapping).toHaveBeenCalledWith(projectDir, 'prod_metabase', io.io);
|
||||
});
|
||||
|
||||
it('writes Notion config with the full default knowledge create budget', async () => {
|
||||
await addPrimarySource();
|
||||
const validateNotion = vi.fn(async () => ({ ok: true as const, detail: 'roots=1' }));
|
||||
|
||||
await expect(
|
||||
runKtxSetupSourcesStep(
|
||||
{
|
||||
projectDir,
|
||||
inputMode: 'disabled',
|
||||
source: 'notion',
|
||||
sourceConnectionId: 'notion-main',
|
||||
sourceApiKeyRef: 'env:NOTION_TOKEN',
|
||||
notionCrawlMode: 'selected_roots',
|
||||
notionRootPageIds: ['page-1'],
|
||||
runInitialSourceIngest: false,
|
||||
skipSources: false,
|
||||
},
|
||||
makeIo().io,
|
||||
{ validateNotion },
|
||||
),
|
||||
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion-main'] });
|
||||
|
||||
expect((await readConfig()).connections['notion-main']).toMatchObject({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
root_page_ids: ['page-1'],
|
||||
max_knowledge_creates_per_run: 25,
|
||||
max_knowledge_updates_per_run: 20,
|
||||
});
|
||||
});
|
||||
|
||||
it('defaults interactive Metabase and Looker source setup to the only warehouse connection', async () => {
|
||||
await addPrimarySource();
|
||||
const cases: Array<{
|
||||
|
|
|
|||
|
|
@ -36,6 +36,8 @@ import { writeProjectLocalSecretReference } from './setup-secrets.js';
|
|||
|
||||
export type KtxSetupSourceType = 'dbt' | 'metricflow' | 'metabase' | 'looker' | 'lookml' | 'notion';
|
||||
|
||||
const DEFAULT_NOTION_MAX_KNOWLEDGE_CREATES_PER_RUN = 25;
|
||||
|
||||
export interface KtxSetupSourcesArgs {
|
||||
projectDir: string;
|
||||
inputMode: 'auto' | 'disabled';
|
||||
|
|
@ -521,7 +523,7 @@ function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC
|
|||
root_database_ids: [],
|
||||
root_data_source_ids: [],
|
||||
max_pages_per_run: 1000,
|
||||
max_knowledge_creates_per_run: 5,
|
||||
max_knowledge_creates_per_run: DEFAULT_NOTION_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
max_knowledge_updates_per_run: 20,
|
||||
last_successful_cursor: null,
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue