Merge origin/main into merge-scan-into-ingest-v1

This commit is contained in:
Andrey Avtomonov 2026-05-14 01:40:11 +02:00
commit e501d1d81c
28 changed files with 432 additions and 71 deletions

View file

@ -418,6 +418,70 @@ describe('runKtxDoctor', () => {
expect(testIo.stdout()).toContain('ktx setup');
});
it('warns about stale and unsupported per-driver connection fields', async () => {
process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret
process.env.WAREHOUSE_DATABASE_URL = 'postgresql://reader@example.test/warehouse';
process.env.NOTION_TOKEN = 'notion-secret';
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: warehouse',
'connections:',
' warehouse:',
' driver: postgres',
' url: env:WAREHOUSE_DATABASE_URL',
' readonly: true',
' historicSql:',
' enabled: true',
' dialect: postgres',
' windowDays: 30',
' concurrency: 4',
' local:',
' driver: sqlite',
' file_path: ./warehouse.db',
' docs:',
' driver: notion',
' auth_token_ref: env:NOTION_TOKEN',
' crawl_mode: all_accessible',
' last_successful_cursor: \'{"phase":"all_accessible_pages","cursor":"cursor-1"}\'',
'ingest:',
' adapters:',
' - live-database',
'llm:',
' provider:',
' backend: anthropic',
'',
].join('\n'),
'utf-8',
);
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
testIo.io,
{
postgresQueryHistoryProbe: async () => ({
pgServerVersion: 'PostgreSQL 16.4',
warnings: [],
info: [],
}),
},
),
).resolves.toBe(0);
const out = testIo.stdout();
expect(out).toContain('Warnings');
expect(out).toContain('connections.warehouse.readonly is no longer used.');
expect(out).toContain('connections.warehouse.historicSql.concurrency is no longer used.');
expect(out).toContain('connections.warehouse.historicSql.windowDays does not constrain pg_stat_statements.');
expect(out).toContain('connections.local.file_path was removed.');
expect(out).toContain('connections.docs.last_successful_cursor is local sync state.');
delete process.env.ANTHROPIC_API_KEY;
delete process.env.WAREHOUSE_DATABASE_URL;
delete process.env.NOTION_TOKEN;
});
it('warns when semantic-search embeddings are not configured', async () => {
process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret
await writeFile(

View file

@ -9,7 +9,6 @@ const bigQueryMock = vi.hoisted(() => ({
constructorInputs: [] as Array<{
connectionId: string;
connection: unknown;
maxBytesBilled?: number | string;
}>,
}));
@ -20,7 +19,7 @@ vi.mock('@ktx/connector-bigquery', () => ({
readonly id: string;
readonly driver = 'bigquery';
constructor(options: { connectionId: string; connection: unknown; maxBytesBilled?: number | string }) {
constructor(options: { connectionId: string; connection: unknown }) {
bigQueryMock.constructorInputs.push(options);
this.id = `bigquery:${options.connectionId}`;
}
@ -61,7 +60,7 @@ describe('createKtxCliScanConnector', () => {
expect(connector.driver).toBe('sqlite');
});
it('passes BigQuery max_bytes_billed from standalone config', async () => {
it('passes canonical BigQuery YAML scan limits through to the connector', async () => {
await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' });
await writeFile(
join(tempDir, 'ktx.yaml'),
@ -72,6 +71,7 @@ describe('createKtxCliScanConnector', () => {
' driver: bigquery',
' dataset_id: analytics',
' max_bytes_billed: "987654321"',
' job_timeout_ms: 30000',
'',
].join('\n'),
'utf-8',
@ -85,9 +85,13 @@ describe('createKtxCliScanConnector', () => {
expect(bigQueryMock.constructorInputs).toEqual([
expect.objectContaining({
connectionId: 'warehouse',
maxBytesBilled: '987654321',
connection: expect.objectContaining({
max_bytes_billed: '987654321',
job_timeout_ms: 30000,
}),
}),
]);
expect(bigQueryMock.constructorInputs[0]).not.toHaveProperty('maxBytesBilled');
});
it('throws for structural daemon-only fallback configs', async () => {

View file

@ -3,20 +3,6 @@ import type { KtxScanConnector } from '@ktx/context/scan';
const SUPPORTED_DRIVERS = 'sqlite, postgres, mysql, clickhouse, sqlserver, bigquery, snowflake';
function bigQueryMaxBytesBilled(
connection: KtxLocalProject['config']['connections'][string],
): number | string | undefined {
const raw = connection.max_bytes_billed;
if (typeof raw === 'number') {
return Number.isFinite(raw) && raw > 0 ? raw : undefined;
}
if (typeof raw === 'string') {
const trimmed = raw.trim();
return trimmed.length > 0 ? trimmed : undefined;
}
return undefined;
}
export async function createKtxCliScanConnector(
project: KtxLocalProject,
connectionId: string,
@ -64,12 +50,7 @@ export async function createKtxCliScanConnector(
if (driver === 'bigquery') {
const { KtxBigQueryScanConnector, isKtxBigQueryConnectionConfig } = await import('@ktx/connector-bigquery');
if (isKtxBigQueryConnectionConfig(connection)) {
const maxBytesBilled = bigQueryMaxBytesBilled(connection);
return new KtxBigQueryScanConnector({
connectionId,
connection,
...(maxBytesBilled !== undefined ? { maxBytesBilled } : {}),
});
return new KtxBigQueryScanConnector({ connectionId, connection });
}
}
if (driver === 'snowflake') {

View file

@ -252,6 +252,7 @@ describe('setup sources step', () => {
max_knowledge_creates_per_run: 25,
max_knowledge_updates_per_run: 20,
});
expect((await readConfig()).connections['notion-main']?.last_successful_cursor).toBeUndefined();
});
it('accepts former ingest subcommand names as interactive source connection ids', async () => {

View file

@ -501,7 +501,6 @@ function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC
max_pages_per_run: 1000,
max_knowledge_creates_per_run: DEFAULT_NOTION_MAX_KNOWLEDGE_CREATES_PER_RUN,
max_knowledge_updates_per_run: 20,
last_successful_cursor: null,
};
}

View file

@ -166,18 +166,18 @@ describe('standalone built ktx CLI smoke', () => {
expect(result.stderr).toContain("unknown command 'agent'");
});
it('runs doctor setup through the built binary', async () => {
const env = { ...process.env };
delete env.KTX_PROJECT_DIR;
const result = await runBuiltCli(['status', '--no-input'], { cwd: tempDir, env });
it('runs status setup checks through the built binary', async () => {
const result = await runBuiltCli(['status', '--verbose', '--no-input']);
expect(result.stdout).toMatch(/KTX (setup doctor|project doctor|status)/);
expect(result.stdout).toMatch(/KTX status/);
if (result.stdout.includes('No project here yet.')) {
expect(result.stdout).toContain('ktx setup');
} else {
expect(result.stdout).toContain('Node 22+');
expect(result.stdout).toContain('Workspace-local CLI');
}
expect(result.stdout).toContain('Node 22+');
expect(result.stdout).toContain('Workspace-local CLI');
expect(result.stderr === '' || result.stderr.startsWith('Project: ')).toBe(true);
expect([0, 1]).toContain(result.code);
});

View file

@ -61,6 +61,14 @@ interface WarningItem {
fix?: string;
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function hasOwnField(value: Record<string, unknown>, key: string): boolean {
return Object.prototype.hasOwnProperty.call(value, key);
}
export interface ProjectStatus {
projectName: string;
projectDir: string;
@ -471,6 +479,51 @@ function buildWarnings(
): WarningItem[] {
const warnings: WarningItem[] = [];
for (const [connectionId, connection] of Object.entries(config.connections)) {
const driver = String(connection.driver ?? '').toLowerCase();
if (hasOwnField(connection, 'readonly')) {
warnings.push({
message: `connections.${connectionId}.readonly is no longer used.`,
fix: `Remove connections.${connectionId}.readonly from ktx.yaml.`,
});
}
if ((driver === 'sqlite' || driver === 'sqlite3') && hasOwnField(connection, 'file_path')) {
warnings.push({
message: `connections.${connectionId}.file_path was removed.`,
fix: `Rename connections.${connectionId}.file_path to path.`,
});
}
if (driver === 'notion' && hasOwnField(connection, 'last_successful_cursor')) {
warnings.push({
message: `connections.${connectionId}.last_successful_cursor is local sync state.`,
fix: 'Remove it from ktx.yaml. KTX stores the Notion cursor in .ktx/db.sqlite.',
});
}
const historicSql = isRecord(connection.historicSql) ? connection.historicSql : null;
if (!historicSql) {
continue;
}
if (hasOwnField(historicSql, 'concurrency')) {
warnings.push({
message: `connections.${connectionId}.historicSql.concurrency is no longer used.`,
fix: `Remove connections.${connectionId}.historicSql.concurrency from ktx.yaml.`,
});
}
const historicDialect = String(historicSql.dialect ?? driver).toLowerCase();
if (
(historicDialect === 'postgres' || historicDialect === 'postgresql') &&
hasOwnField(historicSql, 'windowDays')
) {
warnings.push({
message: `connections.${connectionId}.historicSql.windowDays does not constrain pg_stat_statements.`,
fix: `Remove connections.${connectionId}.historicSql.windowDays from ktx.yaml.`,
});
}
}
for (const adapter of config.ingest.adapters) {
const requiredDrivers = ADAPTER_DRIVER_REQUIREMENT[adapter];
if (!requiredDrivers) continue;