mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
Stabilize parallel ingest concurrency
This commit is contained in:
parent
e64da5a85d
commit
1db8a6debd
19 changed files with 1370 additions and 40 deletions
|
|
@ -82,6 +82,16 @@ function deepReadyProject(
|
|||
};
|
||||
}
|
||||
|
||||
function deferred<T = void>() {
|
||||
let resolve!: (value: T | PromiseLike<T>) => void;
|
||||
let reject!: (reason?: unknown) => void;
|
||||
const promise = new Promise<T>((promiseResolve, promiseReject) => {
|
||||
resolve = promiseResolve;
|
||||
reject = promiseReject;
|
||||
});
|
||||
return { promise, resolve, reject };
|
||||
}
|
||||
|
||||
describe('buildPublicIngestPlan', () => {
|
||||
it('plans warehouse connections as scan targets and source connections as source ingest targets', () => {
|
||||
const project = projectWithConnections({
|
||||
|
|
@ -848,6 +858,62 @@ describe('runKtxPublicIngest', () => {
|
|||
expect(io.stdout()).not.toContain('Debug:');
|
||||
});
|
||||
|
||||
it('runs public ingest targets concurrently up to ingest.sources.maxConcurrency and renders in plan order', async () => {
|
||||
const io = makeIo();
|
||||
const baseConfig = buildDefaultKtxProjectConfig();
|
||||
const project: KtxPublicIngestProject = {
|
||||
projectDir: '/tmp/project',
|
||||
config: {
|
||||
...baseConfig,
|
||||
ingest: {
|
||||
...baseConfig.ingest,
|
||||
sources: { maxConcurrency: 2 },
|
||||
} as KtxProjectConfig['ingest'],
|
||||
connections: {
|
||||
docs: { driver: 'notion' },
|
||||
prod_metabase: { driver: 'metabase', api_url: 'https://metabase.example.com' },
|
||||
},
|
||||
},
|
||||
};
|
||||
const starts: string[] = [];
|
||||
const docs = deferred<number>();
|
||||
const prodMetabase = deferred<number>();
|
||||
const runIngest = vi.fn<NonNullable<KtxPublicIngestDeps['runIngest']>>(async (ingestArgs) => {
|
||||
if (ingestArgs.command !== 'run') {
|
||||
return 1;
|
||||
}
|
||||
starts.push(ingestArgs.connectionId);
|
||||
if (ingestArgs.connectionId === 'docs') {
|
||||
return docs.promise;
|
||||
}
|
||||
if (ingestArgs.connectionId === 'prod_metabase') {
|
||||
return prodMetabase.promise;
|
||||
}
|
||||
return 1;
|
||||
});
|
||||
|
||||
const run = runKtxPublicIngest(
|
||||
{ command: 'run', projectDir: '/tmp/project', all: true, json: false, inputMode: 'disabled' },
|
||||
io.io,
|
||||
{
|
||||
loadProject: vi.fn(async () => project),
|
||||
runIngest,
|
||||
},
|
||||
);
|
||||
|
||||
await vi.waitFor(() => expect(starts).toEqual(['docs', 'prod_metabase']));
|
||||
prodMetabase.resolve(0);
|
||||
docs.resolve(0);
|
||||
|
||||
await expect(run).resolves.toBe(0);
|
||||
expect(runIngest).toHaveBeenCalledTimes(2);
|
||||
const sourceRows = io
|
||||
.stdout()
|
||||
.split('\n')
|
||||
.filter((line) => line.startsWith('docs') || line.startsWith('prod_metabase'));
|
||||
expect(sourceRows.map((line) => line.trim().split(/\s+/)[0])).toEqual(['docs', 'prod_metabase']);
|
||||
});
|
||||
|
||||
it('prints query-history retry guidance for query-history facet failures', async () => {
|
||||
const io = makeIo();
|
||||
const project = deepReadyProject({
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { type KtxLocalProject, type KtxProjectConnectionConfig, loadKtxProject } from '@ktx/context/project';
|
||||
import type { KtxProgressPort } from '@ktx/context/scan';
|
||||
import pLimit from 'p-limit';
|
||||
import type { KtxCliIo } from './index.js';
|
||||
import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js';
|
||||
import {
|
||||
|
|
@ -924,9 +925,18 @@ export async function runKtxPublicIngest(
|
|||
}
|
||||
}
|
||||
|
||||
for (const target of plan.targets) {
|
||||
results.push(await executePublicIngestTarget(target, args, io, deps));
|
||||
}
|
||||
const ingestConfig = project.config.ingest as { sources?: { maxConcurrency?: number } } | undefined;
|
||||
const sourceMaxConcurrency = ingestConfig?.sources?.maxConcurrency ?? 1;
|
||||
const limitTarget = pLimit(sourceMaxConcurrency);
|
||||
const orderedResults = await Promise.all(
|
||||
plan.targets.map((target, index) =>
|
||||
limitTarget(async () => ({
|
||||
index,
|
||||
result: await executePublicIngestTarget(target, args, io, deps),
|
||||
})),
|
||||
),
|
||||
);
|
||||
results.push(...orderedResults.sort((left, right) => left.index - right.index).map((entry) => entry.result));
|
||||
|
||||
if (args.json) {
|
||||
io.stdout.write(`${JSON.stringify({ plan, results }, null, 2)}\n`);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue