Stabilize parallel ingest concurrency

This commit is contained in:
Andrey Avtomonov 2026-05-18 15:05:56 +02:00
parent e64da5a85d
commit 1db8a6debd
19 changed files with 1370 additions and 40 deletions

View file

@ -82,6 +82,16 @@ function deepReadyProject(
};
}
function deferred<T = void>() {
let resolve!: (value: T | PromiseLike<T>) => void;
let reject!: (reason?: unknown) => void;
const promise = new Promise<T>((promiseResolve, promiseReject) => {
resolve = promiseResolve;
reject = promiseReject;
});
return { promise, resolve, reject };
}
describe('buildPublicIngestPlan', () => {
it('plans warehouse connections as scan targets and source connections as source ingest targets', () => {
const project = projectWithConnections({
@ -848,6 +858,62 @@ describe('runKtxPublicIngest', () => {
expect(io.stdout()).not.toContain('Debug:');
});
it('runs public ingest targets concurrently up to ingest.sources.maxConcurrency and renders in plan order', async () => {
const io = makeIo();
const baseConfig = buildDefaultKtxProjectConfig();
const project: KtxPublicIngestProject = {
projectDir: '/tmp/project',
config: {
...baseConfig,
ingest: {
...baseConfig.ingest,
sources: { maxConcurrency: 2 },
} as KtxProjectConfig['ingest'],
connections: {
docs: { driver: 'notion' },
prod_metabase: { driver: 'metabase', api_url: 'https://metabase.example.com' },
},
},
};
const starts: string[] = [];
const docs = deferred<number>();
const prodMetabase = deferred<number>();
const runIngest = vi.fn<NonNullable<KtxPublicIngestDeps['runIngest']>>(async (ingestArgs) => {
if (ingestArgs.command !== 'run') {
return 1;
}
starts.push(ingestArgs.connectionId);
if (ingestArgs.connectionId === 'docs') {
return docs.promise;
}
if (ingestArgs.connectionId === 'prod_metabase') {
return prodMetabase.promise;
}
return 1;
});
const run = runKtxPublicIngest(
{ command: 'run', projectDir: '/tmp/project', all: true, json: false, inputMode: 'disabled' },
io.io,
{
loadProject: vi.fn(async () => project),
runIngest,
},
);
await vi.waitFor(() => expect(starts).toEqual(['docs', 'prod_metabase']));
prodMetabase.resolve(0);
docs.resolve(0);
await expect(run).resolves.toBe(0);
expect(runIngest).toHaveBeenCalledTimes(2);
const sourceRows = io
.stdout()
.split('\n')
.filter((line) => line.startsWith('docs') || line.startsWith('prod_metabase'));
expect(sourceRows.map((line) => line.trim().split(/\s+/)[0])).toEqual(['docs', 'prod_metabase']);
});
it('prints query-history retry guidance for query-history facet failures', async () => {
const io = makeIo();
const project = deepReadyProject({

View file

@ -1,5 +1,6 @@
import { type KtxLocalProject, type KtxProjectConnectionConfig, loadKtxProject } from '@ktx/context/project';
import type { KtxProgressPort } from '@ktx/context/scan';
import pLimit from 'p-limit';
import type { KtxCliIo } from './index.js';
import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js';
import {
@ -924,9 +925,18 @@ export async function runKtxPublicIngest(
}
}
for (const target of plan.targets) {
results.push(await executePublicIngestTarget(target, args, io, deps));
}
const ingestConfig = project.config.ingest as { sources?: { maxConcurrency?: number } } | undefined;
const sourceMaxConcurrency = ingestConfig?.sources?.maxConcurrency ?? 1;
const limitTarget = pLimit(sourceMaxConcurrency);
const orderedResults = await Promise.all(
plan.targets.map((target, index) =>
limitTarget(async () => ({
index,
result: await executePublicIngestTarget(target, args, io, deps),
})),
),
);
results.push(...orderedResults.sort((left, right) => left.index - right.index).map((entry) => entry.result));
if (args.json) {
io.stdout.write(`${JSON.stringify({ plan, results }, null, 2)}\n`);