feat(ingest): default local ingest to isolated diffs (#128)

* docs: add isolated-diff ingestion design

* Refine isolated-diff ingestion design after adversarial review iteration 1

* Refine isolated-diff ingestion design after adversarial review iteration 2

* Refine isolated-diff ingestion design after adversarial review iteration 3

* feat: persist ingest trace events

* feat: add isolated ingest patch helpers

* feat: validate wiki body semantic references

* feat: add final ingest artifact gates

* feat: execute ingest work units in child worktrees

* feat: integrate isolated work unit patches

* feat: route selected ingest sources through isolated diffs

* test: cover isolated diff ingestion regressions

* feat: add isolated diff ingestion v1 core

* docs: document ingest trace inspection

* docs: add isolated diff ingestion v1 core plan

* fix(ingest): tighten final artifact gates

* fix(ingest): gate isolated final integration tree

* fix(ingest): persist postmortem failure traces

* fix(ingest): trace policy conflicts and cleanup child worktrees

* test(ingest): verify isolated diff postmortem coverage

* docs: add isolated diff ingestion gates and trace closure plan

* fix(ingest): gate provenance before isolated diff squash

* docs: add isolated diff ingestion provenance gate closure plan

* fix(ingest): gate final wiki references

* fix(ingest): enforce SL target connection scope

* fix(ingest): trace isolated SL target policy gates

* test(ingest): cover isolated diff reference and target gates

* chore(ingest): verify isolated diff gate closure

* docs: add isolated diff ingestion reference and target gate closure plan

* fix(ingest): gate global wiki references

* docs: add isolated diff ingestion global wiki reference gate closure plan

* fix(ingest): validate scan sources and wiki refs

* test(ingest): cover isolated diff textual conflict resolver

* test(ingest): cover isolated diff resolver integration

* feat(ingest): repair isolated diff textual conflicts

* feat(ingest): report isolated diff resolver outcomes

* test(ingest): verify isolated diff textual conflict repair

* test(ingest): align textual conflict failure coverage

* docs: add isolated diff textual conflict resolver plan

* test(ingest): cover isolated diff gate repair

* feat(ingest): add isolated diff gate repair agent

* feat(ingest): repair isolated diff semantic gate failures

* feat(ingest): wire isolated diff gate repair

* test(ingest): verify isolated diff final gate repair

* chore(ingest): verify isolated diff gate repair

* docs: add isolated diff gate repair plan

* Improve ingest progress updates

* feat(ingest): route direct-write connectors through isolated diffs

* test(ingest): cover non-metabase isolated diff routing

* feat(ingest): project metricflow semantic models before work units

* test(ingest): verify metricflow isolated projection path

* chore(ingest): verify isolated diff connector migration

* docs: add isolated diff connector migration plan

* feat(ingest): make isolated diff routing the private default

* feat(ingest): promote isolated diff to default runner path

* feat(ingest): default local ingest to isolated diffs

* chore(ingest): remove isolated diff allowlist references

* fix(ingest): preserve transient evidence for isolated work units

* docs: add isolated diff default promotion plan

* refactor(ingest): remove shared worktree WorkUnit path

* docs(ingest): align WorkUnit prompts with isolated diffs

* test(ingest): drop unused runner import

* docs: add isolated diff shared worktree removal plan

* docs: add isolated diff gate repair classification plan

* fix: restrict claude-code mcp servers

* docs: align ingest trace guidance with public CLI

---------

Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
Andrey Avtomonov 2026-05-18 13:38:06 +02:00 committed by GitHub
parent d1c84e5564
commit e64da5a85d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
66 changed files with 22346 additions and 514 deletions

View file

@ -24,7 +24,6 @@ import {
type KtxConnectionInfo,
type KtxQueryResult,
SemanticLayerService,
type SemanticLayerSource,
type SlConnectionCatalogPort,
SlDiscoverTool,
SlEditSourceTool,
@ -76,6 +75,7 @@ import { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evide
import { HistoricSqlProjectionPostProcessor } from './adapters/historic-sql/post-processor.js';
import { ContextEvidenceIndexService, SqliteContextEvidenceStore } from './context-evidence/index.js';
import { DiffSetService } from './diff-set.service.js';
import { ingestTracePathForJob, type IngestTraceLevel } from './ingest-trace.js';
import { IngestBundleRunner } from './ingest-bundle.runner.js';
import { PageTriageService } from './page-triage/index.js';
import { createWarehouseVerificationTools } from './tools/warehouse-verification/index.js';
@ -96,6 +96,12 @@ const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url));
const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url));
const LOCAL_AUTHOR = { name: 'KTX Local', email: 'local@ktx.local' };
const LOCAL_SHAPE_WARNING = 'Local ingest validates semantic-layer YAML shape only.';
const INGEST_TRACE_LEVELS = new Set<IngestTraceLevel>(['error', 'info', 'debug', 'trace']);
function ingestTraceLevelFromEnv(env: NodeJS.ProcessEnv = process.env): IngestTraceLevel {
const raw = env.KTX_INGEST_TRACE_LEVEL;
return raw && INGEST_TRACE_LEVELS.has(raw as IngestTraceLevel) ? (raw as IngestTraceLevel) : 'debug';
}
export interface CreateLocalBundleIngestRuntimeOptions {
project: KtxLocalProject;
@ -151,6 +157,10 @@ class LocalIngestStorage implements IngestStoragePort {
resolveTranscriptDir(jobId: string): string {
return join(this.project.projectDir, '.ktx/ingest-transcripts', jobId);
}
resolveTracePath(jobId: string): string {
return ingestTracePathForJob(this.homeDir, jobId);
}
}
class LocalIngestLock implements IngestLockPort {
@ -237,22 +247,63 @@ class LocalSlPythonPort implements SlPythonPort {
}
class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
private validateParsedSource(sourceName: string, parsed: Record<string, unknown>) {
const isOverlay = parsed.table == null && parsed.sql == null;
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
return result.success
? { errors: [], warnings: [LOCAL_SHAPE_WARNING] }
: {
errors: result.error.issues.map(
(issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`,
),
warnings: [],
};
}
private async validateComposedSource(
deps: SlValidationDeps,
connectionId: string,
sourceName: string,
readError: unknown,
) {
try {
const { sources, loadErrors } = await deps.semanticLayerService.loadAllSources(connectionId);
const source = sources.find((candidate) => candidate.name === sourceName);
if (source) {
return this.validateParsedSource(sourceName, source as unknown as Record<string, unknown>);
}
const detail =
loadErrors.length > 0
? loadErrors.join('; ')
: readError instanceof Error
? readError.message
: String(readError);
return { errors: [`${sourceName}: ${detail}`], warnings: [] };
} catch (fallbackError) {
return {
errors: [`${sourceName}: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`],
warnings: [],
};
}
}
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
let content: string;
try {
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
const parsed = YAML.parse(file.content) as SemanticLayerSource;
const isOverlay = parsed.table == null && parsed.sql == null;
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
return result.success
? { errors: [], warnings: [LOCAL_SHAPE_WARNING] }
: {
errors: result.error.issues.map(
(issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`,
),
warnings: [],
};
content = file.content;
} catch (error) {
return { errors: [`${sourceName}: ${error instanceof Error ? error.message : String(error)}`], warnings: [] };
return this.validateComposedSource(deps, connectionId, sourceName, error);
}
try {
const parsed = YAML.parse(content) as unknown as Record<string, unknown>;
return this.validateParsedSource(sourceName, parsed);
} catch (error) {
return {
errors: [`${sourceName}: invalid YAML — ${error instanceof Error ? error.message : String(error)}`],
warnings: [],
};
}
}
}
@ -671,6 +722,7 @@ export function createLocalBundleIngestRuntime(
workUnitMaxConcurrency: options.project.config.ingest.workUnits.maxConcurrency,
workUnitStepBudget: options.project.config.ingest.workUnits.stepBudget,
workUnitFailureMode: options.project.config.ingest.workUnits.failureMode,
ingestTraceLevel: ingestTraceLevelFromEnv(),
},
skillsRegistry: new SkillsRegistryService({ skillsDir, logger }),
promptService,