mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
feat(ingest): default local ingest to isolated diffs (#128)
* docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
parent
d1c84e5564
commit
e64da5a85d
66 changed files with 22346 additions and 514 deletions
|
|
@ -635,6 +635,117 @@ describe('runKtxIngest', () => {
|
|||
expect(io.stderr()).not.toContain('Metabase ingest: prod-metabase');
|
||||
});
|
||||
|
||||
it('emits structured child ingest progress during Metabase fan-out', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeMetabaseConfig(projectDir);
|
||||
const io = makeIo();
|
||||
const progressEvents: Array<{ percent: number; message: string; transient?: boolean }> = [];
|
||||
|
||||
await expect(
|
||||
runKtxIngest(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir,
|
||||
connectionId: 'prod-metabase',
|
||||
adapter: 'metabase',
|
||||
outputMode: 'json',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
progress: (event) => progressEvents.push(event),
|
||||
runLocalMetabaseIngest: async (input) => {
|
||||
input.progress?.onMetabaseFanoutPlanned?.({
|
||||
metabaseConnectionId: 'prod-metabase',
|
||||
children: [{ metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a' }],
|
||||
});
|
||||
input.progress?.onMetabaseChildStarted?.({
|
||||
metabaseConnectionId: 'prod-metabase',
|
||||
metabaseDatabaseId: 1,
|
||||
targetConnectionId: 'warehouse_a',
|
||||
jobId: 'metabase-child-1',
|
||||
});
|
||||
input.memoryFlow?.update({
|
||||
plannedWorkUnits: [
|
||||
{
|
||||
unitKey: 'metabase-col-6',
|
||||
rawFiles: ['cards/40.json'],
|
||||
peerFileCount: 0,
|
||||
dependencyCount: 0,
|
||||
},
|
||||
],
|
||||
});
|
||||
input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
|
||||
input.memoryFlow?.emit({
|
||||
type: 'work_unit_started',
|
||||
unitKey: 'metabase-col-6',
|
||||
skills: ['sl_capture'],
|
||||
stepBudget: 40,
|
||||
});
|
||||
input.memoryFlow?.emit({
|
||||
type: 'work_unit_step',
|
||||
unitKey: 'metabase-col-6',
|
||||
stepIndex: 7,
|
||||
stepBudget: 40,
|
||||
});
|
||||
input.memoryFlow?.emit({
|
||||
type: 'stage_progress',
|
||||
stage: 'integration',
|
||||
percent: 81,
|
||||
message: 'Resolving text conflict for metabase-col-6',
|
||||
});
|
||||
input.memoryFlow?.emit({ type: 'work_unit_finished', unitKey: 'metabase-col-6', status: 'success' });
|
||||
input.memoryFlow?.update({
|
||||
plannedWorkUnits: [
|
||||
{
|
||||
unitKey: 'metabase-col-7',
|
||||
rawFiles: ['cards/48.json'],
|
||||
peerFileCount: 0,
|
||||
dependencyCount: 0,
|
||||
},
|
||||
],
|
||||
});
|
||||
input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
|
||||
input.memoryFlow?.emit({
|
||||
type: 'work_unit_started',
|
||||
unitKey: 'metabase-col-7',
|
||||
skills: ['sl_capture'],
|
||||
stepBudget: 40,
|
||||
});
|
||||
input.progress?.onMetabaseChildCompleted?.({
|
||||
metabaseConnectionId: 'prod-metabase',
|
||||
metabaseDatabaseId: 1,
|
||||
targetConnectionId: 'warehouse_a',
|
||||
jobId: 'metabase-child-1',
|
||||
status: 'done',
|
||||
});
|
||||
return {
|
||||
metabaseConnectionId: 'prod-metabase',
|
||||
status: 'all_succeeded',
|
||||
totals: { workUnits: 1, failedWorkUnits: 0 },
|
||||
children: [],
|
||||
};
|
||||
},
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(progressEvents).toEqual(
|
||||
expect.arrayContaining([
|
||||
{ percent: 45, message: 'Planned 1 task' },
|
||||
{ percent: 55, message: 'Processing 1/1 tasks: metabase-col-6' },
|
||||
{
|
||||
percent: 60,
|
||||
message: 'Processing tasks: 0/1 complete, 1 active; latest metabase-col-6 step 7/40',
|
||||
transient: true,
|
||||
},
|
||||
{ percent: 81, message: 'Resolving text conflict for metabase-col-6' },
|
||||
{ percent: 81, message: 'Processing 1/1 tasks: metabase-col-7' },
|
||||
]),
|
||||
);
|
||||
expect(io.stdout()).toContain('"status": "all_succeeded"');
|
||||
expect(io.stderr()).not.toContain('Metabase ingest: prod-metabase');
|
||||
});
|
||||
|
||||
it('runs Metabase scheduled ingest through the public CLI command path with real fan-out', async () => {
|
||||
const projectDir = join(tempDir, 'metabase-cli-project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
|
|
@ -985,6 +1096,59 @@ describe('runKtxIngest', () => {
|
|||
expect(io.stdout()).toContain('Status: error\n');
|
||||
});
|
||||
|
||||
it('prints trace path and error status for stored failed ingest reports', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
const io = makeIo();
|
||||
const report = {
|
||||
id: 'report-failed',
|
||||
runId: 'run-failed',
|
||||
jobId: 'job-failed',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
createdAt: '2026-05-17T12:00:00.000Z',
|
||||
body: {
|
||||
status: 'failed',
|
||||
syncId: 'sync-failed',
|
||||
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
|
||||
commitSha: null,
|
||||
tracePath: '/project/.ktx/ingest-traces/job-failed/trace.jsonl',
|
||||
failure: { phase: 'final_gates', message: 'final artifact gates failed' },
|
||||
workUnits: [],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: true,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
evictionInputs: [],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
},
|
||||
};
|
||||
|
||||
await runKtxIngest(
|
||||
{
|
||||
command: 'status',
|
||||
projectDir,
|
||||
reportFile: '/project/report-failed.json',
|
||||
runId: 'run-failed',
|
||||
outputMode: 'plain',
|
||||
inputMode: 'disabled',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
readReportFile: vi.fn().mockResolvedValue(report),
|
||||
},
|
||||
);
|
||||
|
||||
expect(io.stdout()).toContain('Trace: /project/.ktx/ingest-traces/job-failed/trace.jsonl');
|
||||
expect(io.stdout()).toContain('Status: error');
|
||||
expect(io.stdout()).toContain('Error: final artifact gates failed');
|
||||
});
|
||||
|
||||
it('prints a clear first failure reason when query-history work units fail', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ export interface KtxIngestDeps {
|
|||
}
|
||||
|
||||
function reportStatus(report: IngestReportSnapshot): 'done' | 'error' {
|
||||
return report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
|
||||
return report.body.status === 'failed' || report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
|
||||
}
|
||||
|
||||
const REPORT_SOURCE_LABELS = new Map<string, string>([
|
||||
|
|
@ -174,6 +174,9 @@ function formatFailureReason(sourceKey: string, reason: string): string {
|
|||
}
|
||||
|
||||
function failedReportMessage(report: IngestReportSnapshot): string | null {
|
||||
if (report.body.status === 'failed' && report.body.failure?.message) {
|
||||
return sanitizeMemoryFlowError(report.body.failure.message);
|
||||
}
|
||||
const failedCount = report.body.failedWorkUnits.length;
|
||||
if (failedCount === 0) {
|
||||
return null;
|
||||
|
|
@ -195,6 +198,9 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void
|
|||
io.stdout.write(`Report: ${report.id}\n`);
|
||||
io.stdout.write(`Run: ${report.runId}\n`);
|
||||
io.stdout.write(`Job: ${report.jobId}\n`);
|
||||
if (report.body.tracePath) {
|
||||
io.stdout.write(`Trace: ${report.body.tracePath}\n`);
|
||||
}
|
||||
io.stdout.write(`Status: ${reportStatus(report)}\n`);
|
||||
io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`);
|
||||
io.stdout.write(`Connection: ${report.connectionId}\n`);
|
||||
|
|
@ -289,7 +295,11 @@ function formatDiffProgress(event: Extract<MemoryFlowEvent, { type: 'diff_comput
|
|||
}
|
||||
|
||||
function workUnitEventsThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): MemoryFlowEvent[] {
|
||||
return snapshot.events.slice(0, eventIndex + 1);
|
||||
const latestPlanIndex = snapshot.events
|
||||
.slice(0, eventIndex + 1)
|
||||
.findLastIndex((event) => event.type === 'chunks_planned');
|
||||
const startIndex = latestPlanIndex >= 0 ? latestPlanIndex + 1 : 0;
|
||||
return snapshot.events.slice(startIndex, eventIndex + 1);
|
||||
}
|
||||
|
||||
function completedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number {
|
||||
|
|
@ -313,7 +323,8 @@ function plannedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex
|
|||
if (snapshot.plannedWorkUnits.length > 0) {
|
||||
return snapshot.plannedWorkUnits.length;
|
||||
}
|
||||
const planEvent = workUnitEventsThrough(snapshot, eventIndex)
|
||||
const planEvent = snapshot.events
|
||||
.slice(0, eventIndex + 1)
|
||||
.filter((event) => event.type === 'chunks_planned')
|
||||
.at(-1);
|
||||
return planEvent?.workUnitCount ?? completedWorkUnitCountThrough(snapshot, eventIndex);
|
||||
|
|
@ -359,6 +370,12 @@ function plainIngestEventProgress(
|
|||
};
|
||||
case 'stage_skipped':
|
||||
return { percent: 45, message: `Skipped ${event.stage}: ${event.reason}` };
|
||||
case 'stage_progress':
|
||||
return {
|
||||
percent: event.percent,
|
||||
message: event.message,
|
||||
...(event.transient !== undefined ? { transient: event.transient } : {}),
|
||||
};
|
||||
case 'work_unit_started': {
|
||||
const total = plannedWorkUnitCountThrough(snapshot, eventIndex);
|
||||
const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey);
|
||||
|
|
@ -705,6 +722,25 @@ export async function runKtxIngest(
|
|||
}
|
||||
if (args.adapter === 'metabase') {
|
||||
const executeMetabaseFanout = deps.runLocalMetabaseIngest ?? runLocalMetabaseIngest;
|
||||
const runOutputMode = effectiveIngestOutputMode(args.outputMode, io, env, {
|
||||
requireInput: (args.inputMode ?? 'auto') === 'auto',
|
||||
});
|
||||
const plainProgress = shouldWritePlainIngestProgress(runOutputMode, io, env)
|
||||
? createPlainIngestProgressRenderer(args, io)
|
||||
: null;
|
||||
const structuredProgress = deps.progress
|
||||
? createPlainIngestProgressObserver(args, deps.progress)
|
||||
: null;
|
||||
const initialMemoryFlow =
|
||||
plainProgress || structuredProgress ? initialRunMemoryFlowInput(args, 'pending') : undefined;
|
||||
const memoryFlow = initialMemoryFlow
|
||||
? createMemoryFlowLiveBuffer(initialMemoryFlow, {
|
||||
onChange: (snapshot) => {
|
||||
plainProgress?.update(snapshot);
|
||||
structuredProgress?.update(snapshot);
|
||||
},
|
||||
})
|
||||
: undefined;
|
||||
const progress =
|
||||
args.outputMode === 'json' && !deps.progress
|
||||
? undefined
|
||||
|
|
@ -715,20 +751,29 @@ export async function runKtxIngest(
|
|||
: io,
|
||||
deps.progress,
|
||||
);
|
||||
const result = await executeMetabaseFanout({
|
||||
project: ingestProject,
|
||||
adapters: createAdapters(ingestProject, adapterOptions),
|
||||
metabaseConnectionId: args.connectionId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
trigger: 'manual_resync',
|
||||
jobIdFactory: deps.jobIdFactory,
|
||||
...(progress ? { progress } : {}),
|
||||
});
|
||||
if (args.outputMode === 'json') {
|
||||
io.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
||||
} else {
|
||||
writeMetabaseFanoutStatus(result, io);
|
||||
plainProgress?.start();
|
||||
structuredProgress?.start();
|
||||
let result: LocalMetabaseFanoutResult;
|
||||
try {
|
||||
result = await executeMetabaseFanout({
|
||||
project: ingestProject,
|
||||
adapters: createAdapters(ingestProject, adapterOptions),
|
||||
metabaseConnectionId: args.connectionId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
trigger: 'manual_resync',
|
||||
jobIdFactory: deps.jobIdFactory,
|
||||
...(memoryFlow ? { memoryFlow } : {}),
|
||||
...(progress ? { progress } : {}),
|
||||
});
|
||||
plainProgress?.flush();
|
||||
if (args.outputMode === 'json') {
|
||||
io.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
||||
} else {
|
||||
writeMetabaseFanoutStatus(result, io);
|
||||
}
|
||||
} finally {
|
||||
plainProgress?.flush();
|
||||
}
|
||||
return result.status === 'all_succeeded' ? 0 : 1;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue