mirror of
https://github.com/Kaelio/ktx.git
synced 2026-07-01 08:59:39 +02:00
fix: clean up ktx yaml config parameters
This commit is contained in:
parent
3fde4438b1
commit
2c5560112a
25 changed files with 473 additions and 62 deletions
|
|
@ -316,6 +316,64 @@ describe('runKtxDoctor', () => {
|
|||
expect(testIo.stdout()).not.toContain('Fix: Update the Postgres parameter group or config');
|
||||
});
|
||||
|
||||
it('warns about stale and unsupported per-driver connection fields', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: warehouse',
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: postgres',
|
||||
' url: env:WAREHOUSE_DATABASE_URL',
|
||||
' readonly: true',
|
||||
' historicSql:',
|
||||
' enabled: true',
|
||||
' dialect: postgres',
|
||||
' windowDays: 30',
|
||||
' concurrency: 4',
|
||||
' local:',
|
||||
' driver: sqlite',
|
||||
' file_path: ./warehouse.db',
|
||||
' docs:',
|
||||
' driver: notion',
|
||||
' auth_token_ref: env:NOTION_TOKEN',
|
||||
' crawl_mode: all_accessible',
|
||||
' last_successful_cursor: \'{"phase":"all_accessible_pages","cursor":"cursor-1"}\'',
|
||||
'ingest:',
|
||||
' adapters:',
|
||||
' - live-database',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const testIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{
|
||||
runSetupChecks: async () => [
|
||||
{ id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' },
|
||||
],
|
||||
runHistoricSqlDoctorChecks: async () => [],
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(testIo.stdout()).toContain('WARN Connection config (warehouse): connections.warehouse.readonly is no longer used.');
|
||||
expect(testIo.stdout()).toContain(
|
||||
'WARN Connection config (warehouse): connections.warehouse.historicSql.concurrency is no longer used.',
|
||||
);
|
||||
expect(testIo.stdout()).toContain(
|
||||
'WARN Connection config (warehouse): connections.warehouse.historicSql.windowDays does not constrain pg_stat_statements.',
|
||||
);
|
||||
expect(testIo.stdout()).toContain('WARN Connection config (local): connections.local.file_path was removed.');
|
||||
expect(testIo.stdout()).toContain(
|
||||
'WARN Connection config (docs): connections.docs.last_successful_cursor is local sync state.',
|
||||
);
|
||||
});
|
||||
|
||||
it('warns when semantic-search embeddings are not configured', async () => {
|
||||
await writeProjectConfig(tempDir, ['backend: deterministic', 'model: deterministic', 'dimensions: 8']);
|
||||
const testIo = makeIo();
|
||||
|
|
|
|||
|
|
@ -118,6 +118,104 @@ function check(status: DoctorStatus, id: string, label: string, detail: string,
|
|||
return fix ? { id, label, status, detail, fix } : { id, label, status, detail };
|
||||
}
|
||||
|
||||
interface ConnectionConfigWarning {
|
||||
id: string;
|
||||
connectionId: string;
|
||||
detail: string;
|
||||
fix: string;
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function hasOwnField(value: Record<string, unknown>, key: string): boolean {
|
||||
return Object.prototype.hasOwnProperty.call(value, key);
|
||||
}
|
||||
|
||||
function connectionConfigWarning(
|
||||
connectionId: string,
|
||||
key: string,
|
||||
detail: string,
|
||||
fix: string,
|
||||
): ConnectionConfigWarning {
|
||||
return {
|
||||
id: `connection-config-${connectionId}-${key}`.replace(/[^a-zA-Z0-9_-]/g, '-'),
|
||||
connectionId,
|
||||
detail,
|
||||
fix,
|
||||
};
|
||||
}
|
||||
|
||||
function connectionConfigWarnings(project: KtxLocalProject): ConnectionConfigWarning[] {
|
||||
const warnings: ConnectionConfigWarning[] = [];
|
||||
for (const [connectionId, connection] of Object.entries(project.config.connections)) {
|
||||
const driver = String(connection.driver ?? '').toLowerCase();
|
||||
if (hasOwnField(connection, 'readonly')) {
|
||||
warnings.push(
|
||||
connectionConfigWarning(
|
||||
connectionId,
|
||||
'readonly',
|
||||
`connections.${connectionId}.readonly is no longer used.`,
|
||||
`Remove connections.${connectionId}.readonly from ktx.yaml.`,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
if ((driver === 'sqlite' || driver === 'sqlite3') && hasOwnField(connection, 'file_path')) {
|
||||
warnings.push(
|
||||
connectionConfigWarning(
|
||||
connectionId,
|
||||
'file-path',
|
||||
`connections.${connectionId}.file_path was removed.`,
|
||||
`Rename connections.${connectionId}.file_path to path.`,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
if (driver === 'notion' && hasOwnField(connection, 'last_successful_cursor')) {
|
||||
warnings.push(
|
||||
connectionConfigWarning(
|
||||
connectionId,
|
||||
'last-successful-cursor',
|
||||
`connections.${connectionId}.last_successful_cursor is local sync state.`,
|
||||
'Remove it from ktx.yaml. KTX stores the Notion cursor in .ktx/db.sqlite.',
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
const historicSql = isRecord(connection.historicSql) ? connection.historicSql : null;
|
||||
if (!historicSql) {
|
||||
continue;
|
||||
}
|
||||
if (hasOwnField(historicSql, 'concurrency')) {
|
||||
warnings.push(
|
||||
connectionConfigWarning(
|
||||
connectionId,
|
||||
'historic-sql-concurrency',
|
||||
`connections.${connectionId}.historicSql.concurrency is no longer used.`,
|
||||
`Remove connections.${connectionId}.historicSql.concurrency from ktx.yaml.`,
|
||||
),
|
||||
);
|
||||
}
|
||||
const historicDialect = String(historicSql.dialect ?? driver).toLowerCase();
|
||||
if (
|
||||
(historicDialect === 'postgres' || historicDialect === 'postgresql') &&
|
||||
hasOwnField(historicSql, 'windowDays')
|
||||
) {
|
||||
warnings.push(
|
||||
connectionConfigWarning(
|
||||
connectionId,
|
||||
'historic-sql-window-days',
|
||||
`connections.${connectionId}.historicSql.windowDays does not constrain pg_stat_statements.`,
|
||||
`Remove connections.${connectionId}.historicSql.windowDays from ktx.yaml.`,
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
return warnings;
|
||||
}
|
||||
|
||||
const SEMANTIC_SEARCH_HEALTH_TEXT = 'KTX semantic search doctor probe';
|
||||
const SEMANTIC_SEARCH_HEALTH_TIMEOUT_MS = 5_000;
|
||||
const SEMANTIC_SEARCH_LOCAL_HEALTH_TIMEOUT_MS = 120_000;
|
||||
|
|
@ -325,6 +423,17 @@ async function runProjectChecks(projectDir: string, deps: KtxDoctorDeps = {}): P
|
|||
'Add a connection to ktx.yaml or run `ktx setup`',
|
||||
),
|
||||
);
|
||||
for (const warning of connectionConfigWarnings(project)) {
|
||||
checks.push(
|
||||
check(
|
||||
'warn',
|
||||
warning.id,
|
||||
`Connection config (${warning.connectionId})`,
|
||||
warning.detail,
|
||||
warning.fix,
|
||||
),
|
||||
);
|
||||
}
|
||||
checks.push(check('pass', 'storage', 'Storage', `${project.config.storage.state}/${project.config.storage.search}`));
|
||||
checks.push(check('pass', 'llm-provider', 'LLM provider', project.config.llm.provider.backend));
|
||||
checks.push(await runSemanticSearchEmbeddingCheck(project.config.ingest.embeddings, projectDir, deps));
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ const bigQueryMock = vi.hoisted(() => ({
|
|||
constructorInputs: [] as Array<{
|
||||
connectionId: string;
|
||||
connection: unknown;
|
||||
maxBytesBilled?: number | string;
|
||||
}>,
|
||||
}));
|
||||
|
||||
|
|
@ -20,7 +19,7 @@ vi.mock('@ktx/connector-bigquery', () => ({
|
|||
readonly id: string;
|
||||
readonly driver = 'bigquery';
|
||||
|
||||
constructor(options: { connectionId: string; connection: unknown; maxBytesBilled?: number | string }) {
|
||||
constructor(options: { connectionId: string; connection: unknown }) {
|
||||
bigQueryMock.constructorInputs.push(options);
|
||||
this.id = `bigquery:${options.connectionId}`;
|
||||
}
|
||||
|
|
@ -61,7 +60,7 @@ describe('createKtxCliScanConnector', () => {
|
|||
expect(connector.driver).toBe('sqlite');
|
||||
});
|
||||
|
||||
it('passes BigQuery max_bytes_billed from standalone config', async () => {
|
||||
it('passes canonical BigQuery YAML scan limits through to the connector', async () => {
|
||||
await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' });
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
|
|
@ -72,6 +71,7 @@ describe('createKtxCliScanConnector', () => {
|
|||
' driver: bigquery',
|
||||
' dataset_id: analytics',
|
||||
' max_bytes_billed: "987654321"',
|
||||
' job_timeout_ms: 30000',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
|
|
@ -85,9 +85,13 @@ describe('createKtxCliScanConnector', () => {
|
|||
expect(bigQueryMock.constructorInputs).toEqual([
|
||||
expect.objectContaining({
|
||||
connectionId: 'warehouse',
|
||||
maxBytesBilled: '987654321',
|
||||
connection: expect.objectContaining({
|
||||
max_bytes_billed: '987654321',
|
||||
job_timeout_ms: 30000,
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
expect(bigQueryMock.constructorInputs[0]).not.toHaveProperty('maxBytesBilled');
|
||||
});
|
||||
|
||||
it('throws for structural daemon-only fallback configs', async () => {
|
||||
|
|
|
|||
|
|
@ -3,20 +3,6 @@ import type { KtxScanConnector } from '@ktx/context/scan';
|
|||
|
||||
const SUPPORTED_DRIVERS = 'sqlite, postgres, mysql, clickhouse, sqlserver, bigquery, snowflake';
|
||||
|
||||
function bigQueryMaxBytesBilled(
|
||||
connection: KtxLocalProject['config']['connections'][string],
|
||||
): number | string | undefined {
|
||||
const raw = connection.max_bytes_billed;
|
||||
if (typeof raw === 'number') {
|
||||
return Number.isFinite(raw) && raw > 0 ? raw : undefined;
|
||||
}
|
||||
if (typeof raw === 'string') {
|
||||
const trimmed = raw.trim();
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export async function createKtxCliScanConnector(
|
||||
project: KtxLocalProject,
|
||||
connectionId: string,
|
||||
|
|
@ -64,12 +50,7 @@ export async function createKtxCliScanConnector(
|
|||
if (driver === 'bigquery') {
|
||||
const { KtxBigQueryScanConnector, isKtxBigQueryConnectionConfig } = await import('@ktx/connector-bigquery');
|
||||
if (isKtxBigQueryConnectionConfig(connection)) {
|
||||
const maxBytesBilled = bigQueryMaxBytesBilled(connection);
|
||||
return new KtxBigQueryScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
...(maxBytesBilled !== undefined ? { maxBytesBilled } : {}),
|
||||
});
|
||||
return new KtxBigQueryScanConnector({ connectionId, connection });
|
||||
}
|
||||
}
|
||||
if (driver === 'snowflake') {
|
||||
|
|
|
|||
|
|
@ -252,6 +252,7 @@ describe('setup sources step', () => {
|
|||
max_knowledge_creates_per_run: 25,
|
||||
max_knowledge_updates_per_run: 20,
|
||||
});
|
||||
expect((await readConfig()).connections['notion-main']?.last_successful_cursor).toBeUndefined();
|
||||
});
|
||||
|
||||
it('uses selected Notion roots when root page ids are provided even if crawl mode says all accessible', async () => {
|
||||
|
|
|
|||
|
|
@ -495,7 +495,6 @@ function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC
|
|||
max_pages_per_run: 1000,
|
||||
max_knowledge_creates_per_run: DEFAULT_NOTION_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
max_knowledge_updates_per_run: 20,
|
||||
last_successful_cursor: null,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue