fix: clean up ktx yaml config parameters

This commit is contained in:
Andrey Avtomonov 2026-05-13 20:02:25 +02:00
parent 3fde4438b1
commit 2c5560112a
25 changed files with 473 additions and 62 deletions

View file

@ -316,6 +316,64 @@ describe('runKtxDoctor', () => {
expect(testIo.stdout()).not.toContain('Fix: Update the Postgres parameter group or config');
});
it('warns about stale and unsupported per-driver connection fields', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: warehouse',
'connections:',
' warehouse:',
' driver: postgres',
' url: env:WAREHOUSE_DATABASE_URL',
' readonly: true',
' historicSql:',
' enabled: true',
' dialect: postgres',
' windowDays: 30',
' concurrency: 4',
' local:',
' driver: sqlite',
' file_path: ./warehouse.db',
' docs:',
' driver: notion',
' auth_token_ref: env:NOTION_TOKEN',
' crawl_mode: all_accessible',
' last_successful_cursor: \'{"phase":"all_accessible_pages","cursor":"cursor-1"}\'',
'ingest:',
' adapters:',
' - live-database',
'',
].join('\n'),
'utf-8',
);
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
testIo.io,
{
runSetupChecks: async () => [
{ id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' },
],
runHistoricSqlDoctorChecks: async () => [],
},
),
).resolves.toBe(0);
expect(testIo.stdout()).toContain('WARN Connection config (warehouse): connections.warehouse.readonly is no longer used.');
expect(testIo.stdout()).toContain(
'WARN Connection config (warehouse): connections.warehouse.historicSql.concurrency is no longer used.',
);
expect(testIo.stdout()).toContain(
'WARN Connection config (warehouse): connections.warehouse.historicSql.windowDays does not constrain pg_stat_statements.',
);
expect(testIo.stdout()).toContain('WARN Connection config (local): connections.local.file_path was removed.');
expect(testIo.stdout()).toContain(
'WARN Connection config (docs): connections.docs.last_successful_cursor is local sync state.',
);
});
it('warns when semantic-search embeddings are not configured', async () => {
await writeProjectConfig(tempDir, ['backend: deterministic', 'model: deterministic', 'dimensions: 8']);
const testIo = makeIo();

View file

@ -118,6 +118,104 @@ function check(status: DoctorStatus, id: string, label: string, detail: string,
return fix ? { id, label, status, detail, fix } : { id, label, status, detail };
}
interface ConnectionConfigWarning {
id: string;
connectionId: string;
detail: string;
fix: string;
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function hasOwnField(value: Record<string, unknown>, key: string): boolean {
return Object.prototype.hasOwnProperty.call(value, key);
}
function connectionConfigWarning(
connectionId: string,
key: string,
detail: string,
fix: string,
): ConnectionConfigWarning {
return {
id: `connection-config-${connectionId}-${key}`.replace(/[^a-zA-Z0-9_-]/g, '-'),
connectionId,
detail,
fix,
};
}
function connectionConfigWarnings(project: KtxLocalProject): ConnectionConfigWarning[] {
const warnings: ConnectionConfigWarning[] = [];
for (const [connectionId, connection] of Object.entries(project.config.connections)) {
const driver = String(connection.driver ?? '').toLowerCase();
if (hasOwnField(connection, 'readonly')) {
warnings.push(
connectionConfigWarning(
connectionId,
'readonly',
`connections.${connectionId}.readonly is no longer used.`,
`Remove connections.${connectionId}.readonly from ktx.yaml.`,
),
);
}
if ((driver === 'sqlite' || driver === 'sqlite3') && hasOwnField(connection, 'file_path')) {
warnings.push(
connectionConfigWarning(
connectionId,
'file-path',
`connections.${connectionId}.file_path was removed.`,
`Rename connections.${connectionId}.file_path to path.`,
),
);
}
if (driver === 'notion' && hasOwnField(connection, 'last_successful_cursor')) {
warnings.push(
connectionConfigWarning(
connectionId,
'last-successful-cursor',
`connections.${connectionId}.last_successful_cursor is local sync state.`,
'Remove it from ktx.yaml. KTX stores the Notion cursor in .ktx/db.sqlite.',
),
);
}
const historicSql = isRecord(connection.historicSql) ? connection.historicSql : null;
if (!historicSql) {
continue;
}
if (hasOwnField(historicSql, 'concurrency')) {
warnings.push(
connectionConfigWarning(
connectionId,
'historic-sql-concurrency',
`connections.${connectionId}.historicSql.concurrency is no longer used.`,
`Remove connections.${connectionId}.historicSql.concurrency from ktx.yaml.`,
),
);
}
const historicDialect = String(historicSql.dialect ?? driver).toLowerCase();
if (
(historicDialect === 'postgres' || historicDialect === 'postgresql') &&
hasOwnField(historicSql, 'windowDays')
) {
warnings.push(
connectionConfigWarning(
connectionId,
'historic-sql-window-days',
`connections.${connectionId}.historicSql.windowDays does not constrain pg_stat_statements.`,
`Remove connections.${connectionId}.historicSql.windowDays from ktx.yaml.`,
),
);
}
}
return warnings;
}
const SEMANTIC_SEARCH_HEALTH_TEXT = 'KTX semantic search doctor probe';
const SEMANTIC_SEARCH_HEALTH_TIMEOUT_MS = 5_000;
const SEMANTIC_SEARCH_LOCAL_HEALTH_TIMEOUT_MS = 120_000;
@ -325,6 +423,17 @@ async function runProjectChecks(projectDir: string, deps: KtxDoctorDeps = {}): P
'Add a connection to ktx.yaml or run `ktx setup`',
),
);
for (const warning of connectionConfigWarnings(project)) {
checks.push(
check(
'warn',
warning.id,
`Connection config (${warning.connectionId})`,
warning.detail,
warning.fix,
),
);
}
checks.push(check('pass', 'storage', 'Storage', `${project.config.storage.state}/${project.config.storage.search}`));
checks.push(check('pass', 'llm-provider', 'LLM provider', project.config.llm.provider.backend));
checks.push(await runSemanticSearchEmbeddingCheck(project.config.ingest.embeddings, projectDir, deps));

View file

@ -9,7 +9,6 @@ const bigQueryMock = vi.hoisted(() => ({
constructorInputs: [] as Array<{
connectionId: string;
connection: unknown;
maxBytesBilled?: number | string;
}>,
}));
@ -20,7 +19,7 @@ vi.mock('@ktx/connector-bigquery', () => ({
readonly id: string;
readonly driver = 'bigquery';
constructor(options: { connectionId: string; connection: unknown; maxBytesBilled?: number | string }) {
constructor(options: { connectionId: string; connection: unknown }) {
bigQueryMock.constructorInputs.push(options);
this.id = `bigquery:${options.connectionId}`;
}
@ -61,7 +60,7 @@ describe('createKtxCliScanConnector', () => {
expect(connector.driver).toBe('sqlite');
});
it('passes BigQuery max_bytes_billed from standalone config', async () => {
it('passes canonical BigQuery YAML scan limits through to the connector', async () => {
await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' });
await writeFile(
join(tempDir, 'ktx.yaml'),
@ -72,6 +71,7 @@ describe('createKtxCliScanConnector', () => {
' driver: bigquery',
' dataset_id: analytics',
' max_bytes_billed: "987654321"',
' job_timeout_ms: 30000',
'',
].join('\n'),
'utf-8',
@ -85,9 +85,13 @@ describe('createKtxCliScanConnector', () => {
expect(bigQueryMock.constructorInputs).toEqual([
expect.objectContaining({
connectionId: 'warehouse',
maxBytesBilled: '987654321',
connection: expect.objectContaining({
max_bytes_billed: '987654321',
job_timeout_ms: 30000,
}),
}),
]);
expect(bigQueryMock.constructorInputs[0]).not.toHaveProperty('maxBytesBilled');
});
it('throws for structural daemon-only fallback configs', async () => {

View file

@ -3,20 +3,6 @@ import type { KtxScanConnector } from '@ktx/context/scan';
const SUPPORTED_DRIVERS = 'sqlite, postgres, mysql, clickhouse, sqlserver, bigquery, snowflake';
function bigQueryMaxBytesBilled(
connection: KtxLocalProject['config']['connections'][string],
): number | string | undefined {
const raw = connection.max_bytes_billed;
if (typeof raw === 'number') {
return Number.isFinite(raw) && raw > 0 ? raw : undefined;
}
if (typeof raw === 'string') {
const trimmed = raw.trim();
return trimmed.length > 0 ? trimmed : undefined;
}
return undefined;
}
export async function createKtxCliScanConnector(
project: KtxLocalProject,
connectionId: string,
@ -64,12 +50,7 @@ export async function createKtxCliScanConnector(
if (driver === 'bigquery') {
const { KtxBigQueryScanConnector, isKtxBigQueryConnectionConfig } = await import('@ktx/connector-bigquery');
if (isKtxBigQueryConnectionConfig(connection)) {
const maxBytesBilled = bigQueryMaxBytesBilled(connection);
return new KtxBigQueryScanConnector({
connectionId,
connection,
...(maxBytesBilled !== undefined ? { maxBytesBilled } : {}),
});
return new KtxBigQueryScanConnector({ connectionId, connection });
}
}
if (driver === 'snowflake') {

View file

@ -252,6 +252,7 @@ describe('setup sources step', () => {
max_knowledge_creates_per_run: 25,
max_knowledge_updates_per_run: 20,
});
expect((await readConfig()).connections['notion-main']?.last_successful_cursor).toBeUndefined();
});
it('uses selected Notion roots when root page ids are provided even if crawl mode says all accessible', async () => {

View file

@ -495,7 +495,6 @@ function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC
max_pages_per_run: 1000,
max_knowledge_creates_per_run: DEFAULT_NOTION_MAX_KNOWLEDGE_CREATES_PER_RUN,
max_knowledge_updates_per_run: 20,
last_successful_cursor: null,
};
}