feat(ingest): default local ingest to isolated diffs (#128)

* docs: add isolated-diff ingestion design

* Refine isolated-diff ingestion design after adversarial review iteration 1

* Refine isolated-diff ingestion design after adversarial review iteration 2

* Refine isolated-diff ingestion design after adversarial review iteration 3

* feat: persist ingest trace events

* feat: add isolated ingest patch helpers

* feat: validate wiki body semantic references

* feat: add final ingest artifact gates

* feat: execute ingest work units in child worktrees

* feat: integrate isolated work unit patches

* feat: route selected ingest sources through isolated diffs

* test: cover isolated diff ingestion regressions

* feat: add isolated diff ingestion v1 core

* docs: document ingest trace inspection

* docs: add isolated diff ingestion v1 core plan

* fix(ingest): tighten final artifact gates

* fix(ingest): gate isolated final integration tree

* fix(ingest): persist postmortem failure traces

* fix(ingest): trace policy conflicts and cleanup child worktrees

* test(ingest): verify isolated diff postmortem coverage

* docs: add isolated diff ingestion gates and trace closure plan

* fix(ingest): gate provenance before isolated diff squash

* docs: add isolated diff ingestion provenance gate closure plan

* fix(ingest): gate final wiki references

* fix(ingest): enforce SL target connection scope

* fix(ingest): trace isolated SL target policy gates

* test(ingest): cover isolated diff reference and target gates

* chore(ingest): verify isolated diff gate closure

* docs: add isolated diff ingestion reference and target gate closure plan

* fix(ingest): gate global wiki references

* docs: add isolated diff ingestion global wiki reference gate closure plan

* fix(ingest): validate scan sources and wiki refs

* test(ingest): cover isolated diff textual conflict resolver

* test(ingest): cover isolated diff resolver integration

* feat(ingest): repair isolated diff textual conflicts

* feat(ingest): report isolated diff resolver outcomes

* test(ingest): verify isolated diff textual conflict repair

* test(ingest): align textual conflict failure coverage

* docs: add isolated diff textual conflict resolver plan

* test(ingest): cover isolated diff gate repair

* feat(ingest): add isolated diff gate repair agent

* feat(ingest): repair isolated diff semantic gate failures

* feat(ingest): wire isolated diff gate repair

* test(ingest): verify isolated diff final gate repair

* chore(ingest): verify isolated diff gate repair

* docs: add isolated diff gate repair plan

* Improve ingest progress updates

* feat(ingest): route direct-write connectors through isolated diffs

* test(ingest): cover non-metabase isolated diff routing

* feat(ingest): project metricflow semantic models before work units

* test(ingest): verify metricflow isolated projection path

* chore(ingest): verify isolated diff connector migration

* docs: add isolated diff connector migration plan

* feat(ingest): make isolated diff routing the private default

* feat(ingest): promote isolated diff to default runner path

* feat(ingest): default local ingest to isolated diffs

* chore(ingest): remove isolated diff allowlist references

* fix(ingest): preserve transient evidence for isolated work units

* docs: add isolated diff default promotion plan

* refactor(ingest): remove shared worktree WorkUnit path

* docs(ingest): align WorkUnit prompts with isolated diffs

* test(ingest): drop unused runner import

* docs: add isolated diff shared worktree removal plan

* docs: add isolated diff gate repair classification plan

* fix: restrict claude-code mcp servers

* docs: align ingest trace guidance with public CLI

---------

Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
Andrey Avtomonov 2026-05-18 13:38:06 +02:00 committed by GitHub
parent d1c84e5564
commit e64da5a85d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
66 changed files with 22346 additions and 514 deletions

View file

@ -0,0 +1,45 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { GitService } from './git.service.js';
async function makeGit() {
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-'));
const configDir = join(homeDir, 'config');
const git = new GitService({
storage: { configDir, homeDir },
git: {
userName: 'System User',
userEmail: 'system@example.com',
bootstrapMessage: 'init',
bootstrapAuthor: 'system',
bootstrapAuthorEmail: 'system@example.com',
},
});
await git.onModuleInit();
return { homeDir, configDir, git };
}
describe('GitService patch helpers', () => {
it('collects binary-safe no-rename patches and applies them with --3way --index', async () => {
const { homeDir, configDir, git } = await makeGit();
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n');
await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com');
const base = await git.revParseHead();
await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n');
await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'proposal.patch');
await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath);
const targetDir = join(homeDir, 'target');
await git.addWorktree(targetDir, 'target', base);
const targetGit = git.forWorktree(targetDir);
await targetGit.applyPatchFile3WayIndex(patchPath);
await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com');
await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n');
});
});

View file

@ -1,5 +1,5 @@
import { promises as fs } from 'node:fs';
import { join } from 'node:path';
import { dirname, join } from 'node:path';
import type { SimpleGit } from 'simple-git';
import { noopLogger, resolveConfigDir, type KtxCoreConfig, type KtxLogger } from './config.js';
import { createSimpleGit } from './git-env.js';
@ -747,6 +747,55 @@ export class GitService {
}
}
async writeBinaryNoRenamePatch(from: string, to: string, patchPath: string): Promise<void> {
await this.withMutationQueue(async () => {
const patch = await this.git.raw(['diff', '--binary', '--no-renames', `${from}..${to}`]);
await fs.mkdir(dirname(patchPath), { recursive: true });
await fs.writeFile(patchPath, patch, 'utf-8');
});
}
async applyPatchFile3WayIndex(patchPath: string): Promise<void> {
await this.withMutationQueue(async () => {
await this.git.raw(['apply', '--3way', '--index', patchPath]);
});
}
async commitStaged(commitMessage: string, author: string, authorEmail: string): Promise<GitCommitInfo> {
return this.withMutationQueue(async () => {
const stagedChanges = await this.git.diff(['--cached', '--name-only']);
if (!stagedChanges.trim()) {
const head = (await this.git.revparse(['HEAD'])).trim();
const log = await this.git.log({ maxCount: 1 });
const latest = log.latest;
return {
commitHash: head,
shortHash: head.substring(0, 8),
message: latest?.message ?? '',
author: latest?.author_name ?? '',
authorEmail: latest?.author_email ?? '',
timestamp: latest?.date ?? new Date(0).toISOString(),
committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date(0).toISOString(),
created: false,
};
}
await this.git.commit(commitMessage, { '--author': `${author} <${authorEmail}>` });
const head = (await this.git.revparse(['HEAD'])).trim();
const log = await this.git.log({ maxCount: 1 });
const latest = log.latest;
return {
commitHash: head,
shortHash: head.substring(0, 8),
message: latest?.message ?? commitMessage,
author: latest?.author_name ?? author,
authorEmail: latest?.author_email ?? authorEmail,
timestamp: latest?.date ?? new Date().toISOString(),
committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date().toISOString(),
created: true,
};
});
}
private async fileExists(path: string): Promise<boolean> {
try {
await fs.access(path);

View file

@ -138,6 +138,52 @@ describe('fetchMetabaseBundle', () => {
expect(warn).not.toHaveBeenCalled();
});
it('emits memory-flow progress while fetching Metabase cards', async () => {
const events: unknown[] = [];
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: {
...makeFetchContext(),
memoryFlow: {
emit: (event) => events.push(event),
update: vi.fn(),
finish: vi.fn(),
snapshot: vi.fn(),
},
},
clientFactory,
sourceStateReader,
});
expect(events).toEqual(
expect.arrayContaining([
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Fetching Metabase database 42 metadata',
}),
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Fetching 1 Metabase card for database 42',
}),
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Checked 1/1 Metabase cards for database 42; wrote 1',
transient: true,
}),
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Fetched Metabase database 42: 1 cards, 0 unresolved',
}),
]),
);
});
it('routes Metabase fetch warnings through the injected logger', async () => {
const logger = {
log: vi.fn(),

View file

@ -83,6 +83,15 @@ function resolvePath(index: Map<number | 'root', CollectionNode>, collectionId:
export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Promise<void> {
const pullConfig: MetabasePullConfig = parseMetabasePullConfig(params.pullConfig);
const logger = params.logger ?? noopMetabaseFetchLogger;
const emitFetchProgress = (percent: number, message: string, transient = false): void => {
params.ctx.memoryFlow?.emit({
type: 'stage_progress',
stage: 'source',
percent,
message,
...(transient ? { transient } : {}),
});
};
const syncState = await params.sourceStateReader.getSourceState(pullConfig.metabaseConnectionId);
const mapping = syncState.mappings.find(
(m) => m.metabaseDatabaseId === pullConfig.metabaseDatabaseId && m.syncEnabled,
@ -100,6 +109,7 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
const client = await params.clientFactory.createClient(pullConfig, params.ctx);
try {
emitFetchProgress(26, `Fetching Metabase database ${pullConfig.metabaseDatabaseId} metadata`);
let mappingDatabaseName = mapping.metabaseDatabaseName;
let mappingEngine = mapping.metabaseEngine;
if (mappingDatabaseName === null) {
@ -133,6 +143,12 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
await mkdir(join(params.stagedDir, STAGED_FILES.databasesDir), { recursive: true });
const cardIdsToFetch = await resolveCardIdsToFetch(client, scope, pullConfig.metabaseDatabaseId, logger);
emitFetchProgress(
28,
`Fetching ${cardIdsToFetch.length} Metabase card${cardIdsToFetch.length === 1 ? '' : 's'} for database ${
pullConfig.metabaseDatabaseId
}`,
);
const referencedCollectionIds = new Set<number>();
let writtenCards = 0;
@ -212,7 +228,19 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
}
}
}
const knownTotal = Math.max(cardIdsToFetch.length, fetched.size + queue.length);
if (fetched.size === 1 || fetched.size % 10 === 0 || queue.length === 0) {
emitFetchProgress(
30,
`Checked ${fetched.size}/${knownTotal} Metabase cards for database ${pullConfig.metabaseDatabaseId}; wrote ${writtenCards}`,
true,
);
}
}
emitFetchProgress(
32,
`Fetched Metabase database ${pullConfig.metabaseDatabaseId}: ${writtenCards} cards, ${unresolvedCards.length} unresolved`,
);
for (const colId of referencedCollectionIds) {
const node = collectionIndex.get(colId);

View file

@ -1,10 +1,12 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js';
import type { SourceAdapter } from '../../types.js';
import type { MetricFlowParseResult } from './deep-parse.js';
import { MetricflowSourceAdapter } from './metricflow.adapter.js';
import { readMetricflowProjectionConfig, writeMetricflowProjectionConfig } from './projection-config.js';
function compileOnlyRequiredDepsCheck(): void {
// @ts-expect-error MetricflowSourceAdapter requires an explicit cache home.
@ -22,6 +24,25 @@ async function makeRepo(tmpRoot: string, files: Record<string, string>) {
return makeLocalGitRepo(fixtureDir, join(tmpRoot, 'origin'));
}
function metricflowParseResult(): MetricFlowParseResult {
return {
semanticModels: [
{
name: 'orders',
description: 'Orders',
modelRef: 'orders',
dimensions: [{ name: 'status', column: 'status', type: 'string', label: 'Status' }],
measures: [{ type: 'simple', name: 'order_count', column: 'id', aggregation: 'count' }],
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }],
defaultTimeDimension: null,
},
],
crossModelMetrics: [],
relationships: [],
warnings: ['parser warning'],
};
}
describe('MetricflowSourceAdapter', () => {
let tmpRoot: string;
let stagedDir: string;
@ -127,4 +148,119 @@ describe('MetricflowSourceAdapter', () => {
await expect(readFile(join(stagedDir, 'models/orders.yml'), 'utf-8')).resolves.toContain('semantic_models');
expect(await adapter.detect(stagedDir)).toBe(true);
});
it('persists parsed target tables for deterministic projection during fetch', async () => {
const repo = await makeRepo(tmpRoot, {
'dbt_project.yml': 'name: analytics\n',
'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n',
});
await adapter.fetch?.(
{
repoUrl: repo.repoUrl,
branch: 'main',
path: null,
authToken: null,
parsedTargetTables: {
orders: {
ok: true,
catalog: null,
schema: 'analytics',
name: 'orders',
canonicalTable: 'analytics.orders',
},
},
},
stagedDir,
{ connectionId: 'warehouse-1', sourceKey: 'metricflow' },
);
await expect(readMetricflowProjectionConfig(stagedDir)).resolves.toMatchObject({
parsedTargetTables: {
orders: {
ok: true,
schema: 'analytics',
name: 'orders',
},
},
});
});
it('projects parsed MetricFlow semantic models in the integration worktree', async () => {
await writeMetricflowProjectionConfig(stagedDir, {
parsedTargetTables: {
orders: {
ok: true,
catalog: null,
schema: 'analytics',
name: 'orders',
canonicalTable: 'analytics.orders',
},
},
});
const scoped = {
getManifestEntry: vi.fn().mockResolvedValue(null),
isManifestBacked: vi.fn().mockResolvedValue(false),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
loadSource: vi.fn().mockResolvedValue(null),
writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue(scoped),
getManifestEntry: vi.fn(),
isManifestBacked: vi.fn(),
loadAllSources: vi.fn(),
loadSource: vi.fn(),
writeSource: vi.fn(),
};
const result = await adapter.project?.({
connectionId: 'warehouse-1',
sourceKey: 'metricflow',
syncId: 'sync-1',
jobId: 'job-1',
runId: 'run-1',
stagedDir,
workdir: '/tmp/metricflow-integration',
parseArtifacts: metricflowParseResult(),
semanticLayerService: semanticLayerService as never,
});
expect(semanticLayerService.forWorktree).toHaveBeenCalledWith('/tmp/metricflow-integration');
expect(scoped.writeSource).toHaveBeenCalledWith(
'warehouse-1',
expect.objectContaining({ name: 'orders' }),
'dbt MetricFlow',
expect.any(String),
'dbt MetricFlow sync: create source orders',
{ skipValidation: true },
);
expect(result).toMatchObject({
warnings: ['parser warning'],
errors: [],
touchedSources: [{ connectionId: 'warehouse-1', sourceName: 'orders' }],
changedWikiPageKeys: [],
});
});
it('returns a projection error when parse artifacts are missing', async () => {
const result = await adapter.project?.({
connectionId: 'warehouse-1',
sourceKey: 'metricflow',
syncId: 'sync-1',
jobId: 'job-1',
runId: 'run-1',
stagedDir,
workdir: '/tmp/metricflow-integration',
parseArtifacts: undefined,
semanticLayerService: {} as never,
});
expect(result).toMatchObject({
warnings: [],
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
touchedSources: [],
changedWikiPageKeys: [],
});
});
});

View file

@ -1,10 +1,23 @@
import { join } from 'node:path';
import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js';
import type {
ChunkResult,
DeterministicProjectionContext,
DiffSet,
FetchContext,
ProjectionResult,
SourceAdapter,
} from '../../types.js';
import { chunkMetricFlowProject } from './chunk.js';
import { detectMetricFlowStagedDir } from './detect.js';
import { parseMetricflowFiles, type MetricFlowParseResult } from './deep-parse.js';
import { fetchMetricflowRepo } from './fetch.js';
import { importMetricflowSemanticModels } from './import-semantic-models.js';
import { parseMetricFlowStagedDir, type ParsedMetricFlowProject } from './parse.js';
import {
metricflowHostTablesFromParsedTargets,
readMetricflowProjectionConfig,
writeMetricflowProjectionConfig,
} from './projection-config.js';
import { parseMetricflowPullConfig } from './pull-config.js';
export interface MetricflowSourceAdapterDeps {
@ -33,6 +46,9 @@ export class MetricflowSourceAdapter implements SourceAdapter {
cacheDir: this.resolveCacheDir(ctx.connectionId),
stagedDir,
});
await writeMetricflowProjectionConfig(stagedDir, {
parsedTargetTables: config.parsedTargetTables,
});
}
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
@ -46,6 +62,37 @@ export class MetricflowSourceAdapter implements SourceAdapter {
return { ...chunk, parseArtifacts };
}
async project(ctx: DeterministicProjectionContext): Promise<ProjectionResult> {
if (!isMetricFlowParseResult(ctx.parseArtifacts)) {
return {
warnings: [],
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
touchedSources: [],
changedWikiPageKeys: [],
};
}
const projectionConfig = await readMetricflowProjectionConfig(ctx.stagedDir);
const result = await importMetricflowSemanticModels(
{ semanticLayerService: ctx.semanticLayerService },
{
connectionId: ctx.connectionId,
parseResult: ctx.parseArtifacts,
targetSchema: null,
hostTables: metricflowHostTablesFromParsedTargets(projectionConfig.parsedTargetTables),
workdir: ctx.workdir,
},
);
return {
result,
warnings: result.warnings,
errors: result.errors,
touchedSources: result.touchedSources,
changedWikiPageKeys: [],
};
}
private resolveCacheDir(connectionId: string): string {
return join(this.deps.homeDir, 'ingest-metricflow-repos', connectionId);
}
@ -54,3 +101,16 @@ export class MetricflowSourceAdapter implements SourceAdapter {
function parseMetricflowStagedDirForImport(project: ParsedMetricFlowProject): MetricFlowParseResult {
return parseMetricflowFiles(project.files);
}
function isMetricFlowParseResult(value: unknown): value is MetricFlowParseResult {
if (!value || typeof value !== 'object') {
return false;
}
const candidate = value as Partial<MetricFlowParseResult>;
return (
Array.isArray(candidate.semanticModels) &&
Array.isArray(candidate.crossModelMetrics) &&
Array.isArray(candidate.relationships) &&
Array.isArray(candidate.warnings)
);
}

View file

@ -0,0 +1,54 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { z } from 'zod';
import { parsedTargetTableSchema, type ParsedTargetTable } from '../../parsed-target-table.js';
import type { MetricflowHostTable } from './semantic-models.js';
const METRICFLOW_PROJECTION_CONFIG_FILE = 'sync-config.json';
const metricflowProjectionConfigSchema = z.object({
parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}),
});
export type MetricflowProjectionConfig = z.infer<typeof metricflowProjectionConfigSchema>;
export async function writeMetricflowProjectionConfig(
stagedDir: string,
config: MetricflowProjectionConfig,
): Promise<void> {
const parsed = metricflowProjectionConfigSchema.parse(config);
await mkdir(stagedDir, { recursive: true });
await writeFile(join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE), `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8');
}
export async function readMetricflowProjectionConfig(stagedDir: string): Promise<MetricflowProjectionConfig> {
const path = join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE);
try {
return metricflowProjectionConfigSchema.parse(JSON.parse(await readFile(path, 'utf-8')));
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
return { parsedTargetTables: {} };
}
throw error;
}
}
export function metricflowHostTablesFromParsedTargets(
parsedTargetTables: Record<string, ParsedTargetTable>,
): MetricflowHostTable[] {
return Object.entries(parsedTargetTables)
.flatMap(([id, table]) =>
table.ok
? [
{
id,
name: table.name,
catalog: table.catalog,
db: table.schema,
columns: [],
},
]
: [],
)
.sort((left, right) => left.id.localeCompare(right.id));
}

View file

@ -0,0 +1,190 @@
import { describe, expect, it, vi } from 'vitest';
import { validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js';
function wikiServiceWithPages(
pages: Record<string, { refs?: string[]; content?: string; slRefs?: string[] }>,
) {
return {
listPageKeys: vi.fn().mockResolvedValue(Object.keys(pages)),
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, pageKey: string) => {
const page = pages[pageKey];
if (!page) {
return Promise.resolve(null);
}
return Promise.resolve({
pageKey,
frontmatter: {
summary: pageKey,
usage_mode: 'auto',
refs: page.refs,
sl_refs: page.slRefs,
},
content: page.content ?? '',
});
}),
};
}
describe('artifact gates', () => {
it('fails the final tree when wiki body references a stale semantic-layer measure', async () => {
const wikiService = wikiServiceWithPages({
'account-segments': {
slRefs: ['mart_account_segments'],
content: 'ARR is `mart_account_segments.total_contract_arr_cents`.',
},
});
const semanticLayerService = {
loadAllSources: vi.fn().mockResolvedValue({
sources: [
{
name: 'mart_account_segments',
grain: ['account_id'],
columns: [{ name: 'account_id', type: 'string' }],
joins: [],
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
table: 'analytics.mart_account_segments',
},
],
loadErrors: [],
}),
};
await expect(
validateFinalIngestArtifacts({
connectionIds: ['warehouse'],
changedWikiPageKeys: ['account-segments'],
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }],
wikiService: wikiService as never,
semanticLayerService: semanticLayerService as never,
validateTouchedSources: async () => ({ invalidSources: [], validSources: ['mart_account_segments'] }),
tableExists: async () => true,
}),
).rejects.toThrow(/unknown semantic-layer entity mart_account_segments\.total_contract_arr_cents/);
});
it('fails before provenance insertion when a raw path cannot be tied to the current snapshot or eviction set', () => {
expect(() =>
validateProvenanceRawPaths({
rows: [{ rawPath: 'cards/missing.json' }],
currentRawPaths: new Set(['cards/present.json']),
deletedRawPaths: new Set(['cards/deleted.json']),
}),
).toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/);
});
it('fails measure-level wiki frontmatter sl_refs that point at missing entities', async () => {
const wikiService = wikiServiceWithPages({
'account-segments': {
slRefs: ['mart_account_segments.total_contract_arr_cents'],
content: 'ARR uses a renamed measure.',
},
});
const semanticLayerService = {
loadAllSources: vi.fn().mockResolvedValue({
sources: [
{
name: 'mart_account_segments',
grain: ['account_id'],
columns: [{ name: 'account_id', type: 'string' }],
joins: [],
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
table: 'analytics.mart_account_segments',
},
],
loadErrors: [],
}),
};
await expect(
validateFinalIngestArtifacts({
connectionIds: ['warehouse'],
changedWikiPageKeys: ['account-segments'],
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }],
wikiService: wikiService as never,
semanticLayerService: semanticLayerService as never,
validateTouchedSources: async () => ({ invalidSources: [], validSources: ['warehouse:mart_account_segments'] }),
tableExists: async () => true,
}),
).rejects.toThrow(/unknown sl_refs entity mart_account_segments\.total_contract_arr_cents/);
});
it('validates direct declared-join neighbors of touched semantic-layer sources', async () => {
const semanticLayerService = {
loadAllSources: vi.fn().mockResolvedValue({
sources: [
{
name: 'orders',
grain: ['order_id'],
columns: [
{ name: 'order_id', type: 'string' },
{ name: 'account_id', type: 'string' },
],
joins: [{ to: 'accounts', on: 'orders.account_id = accounts.account_id', relationship: 'many_to_one' }],
measures: [{ name: 'order_count', expr: 'count(*)' }],
},
{
name: 'accounts',
grain: ['account_id'],
columns: [{ name: 'account_id', type: 'string' }],
joins: [],
measures: [{ name: 'account_count', expr: 'count(*)' }],
},
{
name: 'segments',
grain: ['segment_id'],
columns: [
{ name: 'segment_id', type: 'string' },
{ name: 'account_id', type: 'string' },
],
joins: [{ to: 'accounts', on: 'segments.account_id = accounts.account_id', relationship: 'many_to_one' }],
measures: [],
},
],
loadErrors: [],
}),
};
const validateTouchedSources = vi.fn().mockResolvedValue({ invalidSources: [], validSources: [] });
await validateFinalIngestArtifacts({
connectionIds: ['warehouse'],
changedWikiPageKeys: [],
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'accounts' }],
wikiService: { readPage: vi.fn() } as never,
semanticLayerService: semanticLayerService as never,
validateTouchedSources,
tableExists: async () => true,
});
expect(validateTouchedSources).toHaveBeenCalledWith([
{ connectionId: 'warehouse', sourceName: 'accounts' },
{ connectionId: 'warehouse', sourceName: 'orders' },
{ connectionId: 'warehouse', sourceName: 'segments' },
]);
});
it('fails final gates when a changed wiki page references a missing wiki page', async () => {
const wikiService = wikiServiceWithPages({
'account-segments': {
refs: ['missing-frontmatter-page'],
content: 'See [[missing-inline-page]] for the related process.',
},
});
const semanticLayerService = {
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
};
await expect(
validateFinalIngestArtifacts({
connectionIds: ['warehouse'],
changedWikiPageKeys: ['account-segments'],
touchedSlSources: [],
wikiService: wikiService as never,
semanticLayerService: semanticLayerService as never,
validateTouchedSources: async () => ({ invalidSources: [], validSources: [] }),
tableExists: async () => true,
}),
).rejects.toThrow(
/wiki references target missing page\(s\): account-segments -> missing-frontmatter-page, account-segments -> missing-inline-page/,
);
});
});

View file

@ -0,0 +1,188 @@
import type { SemanticLayerService } from '../sl/index.js';
import type { TouchedSlSource } from '../tools/index.js';
import type { KnowledgeWikiService } from '../wiki/index.js';
import { findMissingWikiRefs } from '../wiki/wiki-ref-validation.js';
import { findInvalidWikiBodyRefs } from './wiki-body-refs.js';
export interface TouchedValidationResult {
invalidSources: string[];
validSources: string[];
}
export interface FinalArtifactGateInput {
connectionIds: string[];
changedWikiPageKeys: string[];
touchedSlSources: TouchedSlSource[];
wikiService: KnowledgeWikiService;
semanticLayerService: SemanticLayerService;
validateTouchedSources(touched: TouchedSlSource[]): Promise<TouchedValidationResult>;
tableExists(connectionId: string, tableRef: string): Promise<boolean>;
}
export interface ProvenanceRawPathValidationInput {
rows: Array<{ rawPath: string }>;
currentRawPaths: Set<string>;
deletedRawPaths: Set<string>;
}
function parseSlRef(ref: string): { connectionId: string | null; sourceName: string; entityName: string | null } {
const withoutConnection = ref.includes('/') ? ref.slice(ref.indexOf('/') + 1) : ref;
const connectionId = ref.includes('/') ? ref.slice(0, ref.indexOf('/')) : null;
const [sourceName = '', entityName = null] = withoutConnection.split('.', 2);
return { connectionId, sourceName, entityName };
}
function slEntityNames(source: Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources'][number]): Set<string> {
return new Set([
...(source.measures ?? []).map((measure) => measure.name),
...(source.columns ?? []).map((column) => column.name),
...(source.segments ?? []).map((segment) => segment.name),
]);
}
function uniqueTouchedSources(sources: TouchedSlSource[]): TouchedSlSource[] {
const seen = new Set<string>();
const unique: TouchedSlSource[] = [];
for (const source of sources) {
const key = `${source.connectionId}:${source.sourceName}`;
if (seen.has(key)) {
continue;
}
seen.add(key);
unique.push(source);
}
return unique.sort((left, right) => {
const byConnection = left.connectionId.localeCompare(right.connectionId);
return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection;
});
}
async function expandTouchedSlSourcesWithDirectJoinNeighbors(input: FinalArtifactGateInput): Promise<TouchedSlSource[]> {
const expanded = [...input.touchedSlSources];
const touchedByConnection = new Map<string, Set<string>>();
for (const source of input.touchedSlSources) {
const bucket = touchedByConnection.get(source.connectionId) ?? new Set<string>();
bucket.add(source.sourceName);
touchedByConnection.set(source.connectionId, bucket);
}
for (const connectionId of input.connectionIds) {
const touched = touchedByConnection.get(connectionId);
if (!touched || touched.size === 0) {
continue;
}
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
for (const source of sources) {
const sourceIsTouched = touched.has(source.name);
if (sourceIsTouched) {
for (const join of source.joins ?? []) {
expanded.push({ connectionId, sourceName: join.to });
}
}
if ((source.joins ?? []).some((join) => touched.has(join.to))) {
expanded.push({ connectionId, sourceName: source.name });
}
}
}
return uniqueTouchedSources(expanded);
}
async function validateWikiSlRefs(input: FinalArtifactGateInput): Promise<string[]> {
const errors: string[] = [];
const sourcesByConnection = new Map<string, Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources']>();
for (const connectionId of input.connectionIds) {
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
sourcesByConnection.set(connectionId, sources);
}
for (const pageKey of input.changedWikiPageKeys) {
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
if (!page) {
continue;
}
for (const ref of page.frontmatter.sl_refs ?? []) {
const parsed = parseSlRef(ref);
const candidateConnections = parsed.connectionId ? [parsed.connectionId] : input.connectionIds;
let source: Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources'][number] | undefined;
for (const connectionId of candidateConnections) {
source = sourcesByConnection.get(connectionId)?.find((candidate) => candidate.name === parsed.sourceName);
if (source) {
break;
}
}
if (!source) {
errors.push(`${pageKey}: unknown sl_refs entry ${ref}`);
continue;
}
if (parsed.entityName && !slEntityNames(source).has(parsed.entityName)) {
errors.push(`${pageKey}: unknown sl_refs entity ${ref}`);
}
}
}
return errors;
}
async function validateWikiRefs(input: FinalArtifactGateInput): Promise<string[]> {
const dangling: string[] = [];
for (const pageKey of input.changedWikiPageKeys) {
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
if (!page) {
continue;
}
const missingRefs = await findMissingWikiRefs({
wikiService: input.wikiService,
scope: 'GLOBAL',
scopeId: null,
pageKey,
refs: page.frontmatter.refs,
content: page.content,
});
for (const missingRef of missingRefs) {
dangling.push(`${pageKey} -> ${missingRef}`);
}
}
return dangling;
}
export async function validateFinalIngestArtifacts(input: FinalArtifactGateInput): Promise<void> {
const touchedWithDependencies = await expandTouchedSlSourcesWithDirectJoinNeighbors(input);
const validation = await input.validateTouchedSources(touchedWithDependencies);
const errors: string[] = validation.invalidSources.map((source) => `semantic-layer validation failed for ${source}`);
errors.push(...(await validateWikiSlRefs(input)));
const danglingWikiRefs = await validateWikiRefs(input);
if (danglingWikiRefs.length > 0) {
errors.push(`wiki references target missing page(s): ${danglingWikiRefs.join(', ')}`);
}
for (const pageKey of input.changedWikiPageKeys) {
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
if (!page) {
continue;
}
errors.push(
...(await findInvalidWikiBodyRefs({
pageKey,
body: page.content,
visibleConnectionIds: input.connectionIds,
loadSources: async (connectionId) => {
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
return sources;
},
tableExists: input.tableExists,
})),
);
}
if (errors.length > 0) {
throw new Error(`final artifact gates failed:\n${errors.join('\n')}`);
}
}
export function validateProvenanceRawPaths(input: ProvenanceRawPathValidationInput): void {
for (const row of input.rows) {
if (!input.currentRawPaths.has(row.rawPath) && !input.deletedRawPaths.has(row.rawPath)) {
throw new Error(`provenance row references raw path outside this snapshot: ${row.rawPath}`);
}
}
}

View file

@ -0,0 +1,136 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { finalGateRepairPaths, repairFinalGateFailure } from './final-gate-repair.js';
import { FileIngestTraceWriter } from './ingest-trace.js';
async function makeHarness() {
const root = await mkdtemp(join(tmpdir(), 'ktx-final-gate-repair-'));
const workdir = join(root, 'workdir');
await mkdir(join(workdir, 'wiki/global'), { recursive: true });
await mkdir(join(workdir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(
join(workdir, 'wiki/global/account-segments.md'),
'---\nsummary: Account segments\nusage_mode: auto\n---\n\nARR uses `mart_account_segments.total_contract_arr_cents`.\n',
'utf-8',
);
await writeFile(
join(workdir, 'semantic-layer/warehouse/mart_account_segments.yaml'),
'name: mart_account_segments\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n',
'utf-8',
);
const trace = new FileIngestTraceWriter({
tracePath: join(root, 'trace.jsonl'),
jobId: 'job-1',
connectionId: 'warehouse',
sourceKey: 'metabase',
runId: 'run-1',
syncId: 'sync-1',
level: 'trace',
});
return { root, workdir, trace };
}
describe('finalGateRepairPaths', () => {
it('derives sorted wiki and semantic-layer file paths', () => {
expect(
finalGateRepairPaths({
changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'],
touchedSlSources: [
{ connectionId: 'warehouse', sourceName: 'mart_account_segments' },
{ connectionId: 'warehouse', sourceName: 'orders' },
{ connectionId: 'warehouse', sourceName: 'orders' },
],
}),
).toEqual([
'semantic-layer/warehouse/mart_account_segments.yaml',
'semantic-layer/warehouse/orders.yaml',
'wiki/global/account-segments.md',
'wiki/global/overview.md',
]);
});
});
describe('repairFinalGateFailure', () => {
it('lets the repair agent read gate errors and edit only allowed files', async () => {
const { workdir, trace } = await makeHarness();
const agentRunner = {
runLoop: vi.fn(async (params: any) => {
const error = await params.toolSet.read_gate_error.execute({});
expect(error.markdown).toContain('total_contract_arr_cents');
const page = await params.toolSet.read_repair_file.execute({
path: 'wiki/global/account-segments.md',
});
expect(page.markdown).toContain('total_contract_arr_cents');
await expect(
params.toolSet.write_repair_file.execute({
path: 'wiki/global/other.md',
content: 'not allowed',
}),
).rejects.toThrow(/gate repair path not allowed/);
await params.toolSet.write_repair_file.execute({
path: 'wiki/global/account-segments.md',
content: page.markdown.replace('total_contract_arr_cents', 'total_contract_arr'),
});
return { stopReason: 'natural' as const };
}),
};
const result = await repairFinalGateFailure({
agentRunner,
workdir,
gateError:
'final artifact gates failed:\naccount-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents',
allowedPaths: ['wiki/global/account-segments.md'],
trace,
repairKind: 'final_artifact_gate',
maxAttempts: 1,
stepBudget: 8,
});
expect(result).toEqual({
status: 'repaired',
attempts: 1,
changedPaths: ['wiki/global/account-segments.md'],
});
await expect(readFile(join(workdir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.toContain(
'total_contract_arr',
);
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_repaired');
expect(agentRunner.runLoop).toHaveBeenCalledWith(
expect.objectContaining({
modelRole: 'repair',
stepBudget: 8,
telemetryTags: expect.objectContaining({
operationName: 'ingest-isolated-diff-gate-repair',
repairKind: 'final_artifact_gate',
}),
}),
);
});
it('returns failed when the repair agent edits no allowed file', async () => {
const { workdir, trace } = await makeHarness();
const result = await repairFinalGateFailure({
agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) },
workdir,
gateError: 'final artifact gates failed:\naccount-segments: unknown semantic-layer entity',
allowedPaths: ['wiki/global/account-segments.md'],
trace,
repairKind: 'final_artifact_gate',
maxAttempts: 1,
stepBudget: 8,
});
expect(result).toEqual({
status: 'failed',
attempts: 1,
reason: 'gate repair completed without editing an allowed path',
});
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_failed');
});
});

View file

@ -0,0 +1,230 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import { z } from 'zod';
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../llm/index.js';
import type { TouchedSlSource } from '../tools/index.js';
import type { IngestTraceWriter } from './ingest-trace.js';
import { traceTimed } from './ingest-trace.js';
type FinalGateRepairKind = 'patch_semantic_gate' | 'final_artifact_gate';
export type FinalGateRepairResult =
| { status: 'repaired'; attempts: number; changedPaths: string[] }
| { status: 'failed'; attempts: number; reason: string };
export interface RepairFinalGateFailureInput {
agentRunner: AgentRunnerPort;
workdir: string;
gateError: string;
allowedPaths: string[];
trace: IngestTraceWriter;
repairKind: FinalGateRepairKind;
maxAttempts?: number;
stepBudget?: number;
}
const readRepairFileSchema = z.object({
path: z.string().min(1),
});
const writeRepairFileSchema = z.object({
path: z.string().min(1),
content: z.string(),
});
function normalizeRepoPath(path: string): string {
const normalized = path.replace(/\\/g, '/').replace(/^\/+/, '');
const parts = normalized.split('/').filter((part) => part.length > 0);
if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) {
throw new Error(`gate repair path must be a repository-relative path: ${path}`);
}
return parts.join('/');
}
function assertAllowedPath(path: string, allowedPaths: ReadonlySet<string>): string {
const normalized = normalizeRepoPath(path);
if (!allowedPaths.has(normalized)) {
throw new Error(`gate repair path not allowed: ${normalized}`);
}
return normalized;
}
async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> {
try {
return { exists: true, content: await readFile(path, 'utf-8') };
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
return { exists: false, content: '' };
}
throw error;
}
}
function buildGateRepairSystemPrompt(): string {
return `<role>
You repair one KTX isolated-diff artifact gate failure inside the integration worktree.
</role>
<rules>
- Use read_gate_error first.
- Read only files exposed by read_repair_file.
- Edit only paths exposed by write_repair_file.
- Prefer the smallest text edit that makes the gate pass.
- Preserve accepted work-unit, reconciliation, and deterministic projection content.
- Do not invent warehouse facts, business definitions, or semantic-layer entities.
- If the gate error requires choosing between conflicting facts without evidence, stop without editing.
</rules>`;
}
function buildGateRepairUserPrompt(input: {
gateError: string;
allowedPaths: string[];
repairKind: FinalGateRepairKind;
attempt: number;
maxAttempts: number;
}): string {
return `Repair isolated-diff artifact gates.
Repair kind: ${input.repairKind}
Attempt: ${input.attempt} of ${input.maxAttempts}
Allowed files:
${input.allowedPaths.map((path) => `- ${path}`).join('\n')}
Gate error:
${input.gateError}
Use read_gate_error first. Then inspect only the allowed files, write the
minimal repaired content, and stop.`;
}
function buildToolSet(input: {
workdir: string;
gateError: string;
allowedPaths: ReadonlySet<string>;
editedPaths: Set<string>;
}): KtxRuntimeToolSet {
return {
read_gate_error: {
name: 'read_gate_error',
description: 'Read the artifact gate failure that must be repaired.',
inputSchema: z.object({}),
execute: async () => ({
markdown: input.gateError,
structured: { gateError: input.gateError },
}),
},
read_repair_file: {
name: 'read_repair_file',
description: 'Read one allowed file from the integration worktree.',
inputSchema: readRepairFileSchema,
execute: async ({ path }: z.infer<typeof readRepairFileSchema>) => {
const normalized = assertAllowedPath(path, input.allowedPaths);
const file = await readOptionalFile(join(input.workdir, normalized));
return {
markdown: file.exists ? file.content : `(missing file: ${normalized})`,
structured: { path: normalized, exists: file.exists },
};
},
},
write_repair_file: {
name: 'write_repair_file',
description: 'Replace one allowed integration worktree file with repaired text content.',
inputSchema: writeRepairFileSchema,
execute: async ({ path, content }: z.infer<typeof writeRepairFileSchema>) => {
const normalized = assertAllowedPath(path, input.allowedPaths);
const fullPath = join(input.workdir, normalized);
await mkdir(dirname(fullPath), { recursive: true });
await writeFile(fullPath, content, 'utf-8');
input.editedPaths.add(normalized);
return {
markdown: `Wrote ${normalized}`,
structured: { path: normalized, bytes: Buffer.byteLength(content) },
};
},
},
};
}
export function finalGateRepairPaths(input: {
changedWikiPageKeys: string[];
touchedSlSources: TouchedSlSource[];
}): string[] {
return [
...new Set([
...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
]),
].sort();
}
export async function repairFinalGateFailure(
input: RepairFinalGateFailureInput,
): Promise<FinalGateRepairResult> {
const allowedPaths = new Set(input.allowedPaths.map(normalizeRepoPath));
const maxAttempts = input.maxAttempts ?? 1;
const stepBudget = input.stepBudget ?? 16;
let lastFailure = 'gate repair did not run';
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
const editedPaths = new Set<string>();
const sortedAllowedPaths = [...allowedPaths].sort();
const traceData = {
repairKind: input.repairKind,
attempt,
maxAttempts,
allowedPaths: sortedAllowedPaths,
gateError: input.gateError,
};
const result = await traceTimed(input.trace, 'gate_repair', 'gate_repair', traceData, async () =>
input.agentRunner.runLoop({
modelRole: 'repair',
systemPrompt: buildGateRepairSystemPrompt(),
userPrompt: buildGateRepairUserPrompt({
gateError: input.gateError,
allowedPaths: sortedAllowedPaths,
repairKind: input.repairKind,
attempt,
maxAttempts,
}),
toolSet: buildToolSet({
workdir: input.workdir,
gateError: input.gateError,
allowedPaths,
editedPaths,
}),
stepBudget,
telemetryTags: {
operationName: 'ingest-isolated-diff-gate-repair',
source: input.trace.context.sourceKey,
jobId: input.trace.context.jobId,
repairKind: input.repairKind,
},
}),
);
if (result.stopReason === 'error') {
lastFailure = result.error?.message ?? 'gate repair agent loop errored';
await input.trace.event('error', 'gate_repair', 'gate_repair_failed', traceData, result.error);
continue;
}
const changedPaths = [...editedPaths].sort();
if (changedPaths.length === 0) {
lastFailure = 'gate repair completed without editing an allowed path';
await input.trace.event('error', 'gate_repair', 'gate_repair_failed', {
...traceData,
reason: lastFailure,
});
continue;
}
await input.trace.event('debug', 'gate_repair', 'gate_repair_repaired', {
...traceData,
changedPaths,
});
return { status: 'repaired', attempts: attempt, changedPaths };
}
return { status: 'failed', attempts: maxAttempts, reason: lastFailure };
}

View file

@ -17,6 +17,11 @@ export {
buildLiveDatabaseTableNaturalKey,
ktxSchemaSnapshotToExtractedSchema,
} from './adapters/live-database/extracted-schema.js';
export {
assertSemanticLayerTargetPathsAllowed,
findDisallowedSemanticLayerTargetPaths,
semanticLayerConnectionIdFromPath,
} from './semantic-layer-target-policy.js';
export { LiveDatabaseSourceAdapter } from './adapters/live-database/live-database.adapter.js';
export type {
BuildLiveDatabaseManifestShardsInput,
@ -609,6 +614,11 @@ export {
} from './raw-sources-paths.js';
export { ingestReportSnapshotSchema, parseIngestReportSnapshot } from './report-snapshot.js';
export type { IngestReportBody, IngestReportSnapshot } from './reports.js';
export * from './artifact-gates.js';
export * from './ingest-trace.js';
export * from './isolated-diff/git-patch.js';
export * from './isolated-diff/patch-integrator.js';
export * from './isolated-diff/work-unit-executor.js';
export * from './reports.js';
export { SourceAdapterRegistry } from './source-adapter-registry.js';
export type { SqliteBundleIngestStoreOptions } from './sqlite-bundle-ingest-store.js';
@ -652,4 +662,7 @@ export type {
TriageSignals,
UnresolvedCardInfo,
WorkUnit,
DeterministicProjectionContext,
ProjectionResult,
} from './types.js';
export * from './wiki-body-refs.js';

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,7 @@
import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises';
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { GitService } from '../core/index.js';
import { addTouchedSlSource } from '../tools/index.js';
import { IngestBundleRunner } from './ingest-bundle.runner.js';
import { createMemoryFlowLiveBuffer } from './memory-flow/live-buffer.js';
@ -123,9 +122,15 @@ const makeDeps = () => {
};
const scopedGit = {
revParseHead: vi.fn().mockResolvedValue('h'),
commitFiles: vi.fn(),
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
resetHardTo: vi.fn(),
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
await writeFile(patchPath, '', 'utf-8');
}),
applyPatchFile3WayIndex: vi.fn(),
diffNameStatus: vi.fn().mockResolvedValue([]),
};
const sessionWorktreeService = {
create: vi.fn().mockResolvedValue({
@ -167,10 +172,12 @@ const makeDeps = () => {
loadPrompt: vi.fn().mockResolvedValue('base-framing'),
};
const wikiService = {
forWorktree: vi.fn().mockReturnValue({}),
forWorktree: vi.fn(),
listPageKeys: vi.fn().mockResolvedValue([]),
readPage: vi.fn().mockResolvedValue(null),
syncFromCommit: vi.fn().mockResolvedValue(undefined),
};
wikiService.forWorktree.mockReturnValue(wikiService);
const knowledgeSlRefs = {
syncFromWiki: vi.fn().mockResolvedValue({ inserted: 1, deleted: 0 }),
};
@ -178,7 +185,7 @@ const makeDeps = () => {
listPagesForUser: vi.fn().mockResolvedValue([]),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue({}),
forWorktree: vi.fn(),
listFilesForConnection: vi
.fn()
.mockImplementation((connectionId: string) =>
@ -193,6 +200,7 @@ const makeDeps = () => {
}),
),
};
semanticLayerService.forWorktree.mockReturnValue(semanticLayerService);
const slSearchService = {
indexSources: vi.fn().mockResolvedValue(undefined),
};
@ -255,8 +263,12 @@ const buildRunner = (deps: ReturnType<typeof makeDeps> = makeDeps(), overrides:
resolveUploadDir: (uploadId) => `/tmp/ktx-test/ingest-uploads/${uploadId}`,
resolvePullDir: (jobId) => `/tmp/ktx-test/ingest-pulls/${jobId}`,
resolveTranscriptDir: (jobId) => `/tmp/ktx-test/run/wu-transcripts/${jobId}`,
resolveTracePath: (jobId) => `/tmp/ktx-test/ingest-traces/${jobId}/trace.jsonl`,
},
settings: {
probeRowCount: 1,
memoryIngestionModel: 'test-model',
},
settings: { probeRowCount: 1, memoryIngestionModel: 'test-model' },
skillsRegistry: deps.skillsRegistry as any,
promptService: deps.promptService as any,
wikiService: deps.wikiService as any,
@ -1505,7 +1517,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['explores/b2b/sales_pipeline.json', 'h1']]),
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/looker-run/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
@ -1570,6 +1582,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }],
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
});
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']);
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
);
@ -1972,9 +1985,15 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
const assertError = new Error('Worktree has in-progress git operation (sequencer ...); refusing to proceed');
const sessionGit = {
revParseHead: vi.fn().mockResolvedValue('h'),
commitFiles: vi.fn(),
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
resetHardTo: vi.fn(),
assertWorktreeClean: vi.fn().mockRejectedValue(assertError),
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
await writeFile(patchPath, '', 'utf-8');
}),
applyPatchFile3WayIndex: vi.fn(),
diffNameStatus: vi.fn().mockResolvedValue([]),
};
deps.sessionWorktreeService.create.mockResolvedValue({
chatId: 'j1',
@ -2005,135 +2024,6 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
});
it('squash-merges only successful WUs into main when one WU fails sl_validate', async () => {
const homeDir = await mkdtemp(join(tmpdir(), 'ingest-rollback-'));
try {
const configDir = join(homeDir, 'config');
const mainGit = new GitService({
storage: { configDir, homeDir },
git: {
userName: 'System User',
userEmail: 'system@example.com',
bootstrapMessage: 'Initialize test config repo',
bootstrapAuthor: 'test-system',
bootstrapAuthorEmail: 'system@example.com',
},
});
await mainGit.onModuleInit();
const baseSha = await mainGit.revParseHead();
if (!baseSha) {
throw new Error('no base sha');
}
const deps = makeDeps();
const sessionDir = join(homeDir, '.worktrees', 'session-j1');
const sessionBranch = 'session/j1';
let currentToolSession: any = null;
deps.gitService = mainGit as any;
deps.sessionWorktreeService.create.mockImplementation(async (_jobId: string, startSha: string) => {
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
await mainGit.addWorktree(sessionDir, sessionBranch, startSha);
return {
chatId: 'j1',
workdir: sessionDir,
branch: sessionBranch,
baseSha: startSha,
createdAt: new Date(),
git: mainGit.forWorktree(sessionDir),
config: {},
};
});
deps.sessionWorktreeService.cleanup.mockResolvedValue(undefined);
deps.adapter.chunk.mockResolvedValue({
workUnits: [
{ unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] },
{ unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] },
],
});
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.slValidator.validateSingleSource.mockImplementation(
(_validationDeps: unknown, _connectionId: string, sourceName: string) => ({
errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [],
warnings: [],
}),
);
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
const unitKey = params.telemetryTags?.unitKey;
if (unitKey === 'wu-good') {
await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true });
await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'good.yaml'), 'name: good\n');
addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'good');
currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'good', detail: '' });
await currentToolSession.gitService.commitFiles(
['semantic-layer/c1/good.yaml'],
'test: add good source',
'KTX Test',
'system@ktx.local',
);
}
if (unitKey === 'wu-bad') {
await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true });
await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'bad.yaml'), 'name: bad\n');
addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'bad');
currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'bad', detail: '' });
await currentToolSession.gitService.commitFiles(
['semantic-layer/c1/bad.yaml'],
'test: add bad source',
'KTX Test',
'system@ktx.local',
);
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockImplementation(async ({ worktreeRoot }: any) => {
const rawDir = join(worktreeRoot, 'raw-sources', 'c1', 'fake', 's');
await mkdir(rawDir, { recursive: true });
await writeFile(join(rawDir, 'good.raw'), 'good raw');
await writeFile(join(rawDir, 'bad.raw'), 'bad raw');
return {
currentHashes: new Map([
['good.raw', 'good-hash'],
['bad.raw', 'bad-hash'],
]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
};
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const result = await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(result.failedWorkUnits).toEqual(['wu-bad']);
expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'good.yaml'), 'utf-8')).toContain('good');
expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'bad.yaml'), 'utf-8').catch(() => null)).toBeNull();
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
failedWorkUnits: ['wu-bad'],
}),
}),
);
await expect(stat(join(configDir, '.git', 'sequencer'))).rejects.toThrow();
} finally {
await rm(homeDir, { recursive: true, force: true });
}
});
it('fails the run and rethrows when the adapter cannot detect the bundle', async () => {
const deps = makeDeps();
deps.adapter.detect.mockResolvedValue(false);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,85 @@
import { mkdtemp, readFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { FileIngestTraceWriter, ingestTracePathForJob, traceTimed } from './ingest-trace.js';
describe('FileIngestTraceWriter', () => {
it('persists structured trace events as JSONL', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-trace-'));
const tracePath = ingestTracePathForJob(root, 'job-1');
const trace = new FileIngestTraceWriter({
tracePath,
jobId: 'job-1',
connectionId: 'metabase-main',
sourceKey: 'metabase',
level: 'debug',
});
await trace.event('debug', 'snapshot', 'input_snapshot', {
baseSha: 'abc123',
rawFileCount: 2,
diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 },
});
const lines = (await readFile(tracePath, 'utf-8'))
.trim()
.split('\n')
.map((line) => JSON.parse(line));
expect(lines).toHaveLength(1);
expect(lines[0]).toMatchObject({
schemaVersion: 1,
jobId: 'job-1',
connectionId: 'metabase-main',
sourceKey: 'metabase',
level: 'debug',
phase: 'snapshot',
event: 'input_snapshot',
data: {
baseSha: 'abc123',
rawFileCount: 2,
diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 },
},
});
expect(typeof lines[0].at).toBe('string');
});
it('records timing and error context for postmortem inspection', async () => {
vi.useFakeTimers();
vi.setSystemTime(new Date('2026-05-17T12:00:00.000Z'));
const root = await mkdtemp(join(tmpdir(), 'ktx-trace-'));
const tracePath = ingestTracePathForJob(root, 'job-2');
const trace = new FileIngestTraceWriter({
tracePath,
jobId: 'job-2',
connectionId: 'c1',
sourceKey: 'fake',
level: 'trace',
});
await expect(
traceTimed(trace, 'integration', 'apply_patch', { unitKey: 'wu-1' }, async () => {
vi.advanceTimersByTime(17);
throw new Error('patch conflict');
}),
).rejects.toThrow('patch conflict');
const lines = (await readFile(tracePath, 'utf-8'))
.trim()
.split('\n')
.map((line) => JSON.parse(line));
expect(lines.map((line) => line.event)).toEqual(['apply_patch_started', 'apply_patch_failed']);
expect(lines[1]).toMatchObject({
level: 'error',
phase: 'integration',
data: { unitKey: 'wu-1' },
error: { name: 'Error', message: 'patch conflict' },
});
expect(lines[1].durationMs).toBe(17);
vi.useRealTimers();
});
it('uses the documented trace path layout', () => {
expect(ingestTracePathForJob('/project/.ktx', 'job-3')).toBe('/project/.ktx/ingest-traces/job-3/trace.jsonl');
});
});

View file

@ -0,0 +1,158 @@
import { appendFile, mkdir } from 'node:fs/promises';
import { dirname, join } from 'node:path';
export type IngestTraceLevel = 'info' | 'debug' | 'trace' | 'error';
const TRACE_LEVEL_RANK: Record<IngestTraceLevel, number> = {
error: 0,
info: 1,
debug: 2,
trace: 3,
};
export interface IngestTraceContext {
tracePath: string;
jobId: string;
connectionId: string;
sourceKey: string;
runId?: string;
syncId?: string;
level?: IngestTraceLevel;
}
export interface IngestTraceEvent {
schemaVersion: 1;
at: string;
level: IngestTraceLevel;
jobId: string;
connectionId: string;
sourceKey: string;
runId?: string;
syncId?: string;
phase: string;
event: string;
durationMs?: number;
data?: Record<string, unknown>;
error?: {
name: string;
message: string;
stack?: string;
};
}
export interface IngestTraceWriter {
readonly tracePath: string;
readonly context: IngestTraceContext;
withContext(context: Partial<Pick<IngestTraceContext, 'runId' | 'syncId'>>): IngestTraceWriter;
event(
level: IngestTraceLevel,
phase: string,
event: string,
data?: Record<string, unknown>,
error?: unknown,
durationMs?: number,
): Promise<void>;
}
export function ingestTracePathForJob(homeDir: string, jobId: string): string {
return join(homeDir, 'ingest-traces', jobId, 'trace.jsonl');
}
function serializeError(error: unknown): IngestTraceEvent['error'] | undefined {
if (error === undefined || error === null) {
return undefined;
}
if (error instanceof Error) {
return {
name: error.name,
message: error.message,
...(error.stack ? { stack: error.stack } : {}),
};
}
return { name: 'Error', message: String(error) };
}
function shouldWrite(configured: IngestTraceLevel, incoming: IngestTraceLevel): boolean {
return TRACE_LEVEL_RANK[incoming] <= TRACE_LEVEL_RANK[configured];
}
export class FileIngestTraceWriter implements IngestTraceWriter {
readonly tracePath: string;
readonly context: IngestTraceContext;
constructor(context: IngestTraceContext) {
this.context = { ...context, level: context.level ?? 'debug' };
this.tracePath = context.tracePath;
}
withContext(context: Partial<Pick<IngestTraceContext, 'runId' | 'syncId'>>): IngestTraceWriter {
return new FileIngestTraceWriter({ ...this.context, ...context, tracePath: this.tracePath });
}
async event(
level: IngestTraceLevel,
phase: string,
event: string,
data?: Record<string, unknown>,
error?: unknown,
durationMs?: number,
): Promise<void> {
if (!shouldWrite(this.context.level ?? 'debug', level)) {
return;
}
const serializedError = serializeError(error);
const payload: IngestTraceEvent = {
schemaVersion: 1,
at: new Date().toISOString(),
level,
jobId: this.context.jobId,
connectionId: this.context.connectionId,
sourceKey: this.context.sourceKey,
...(this.context.runId ? { runId: this.context.runId } : {}),
...(this.context.syncId ? { syncId: this.context.syncId } : {}),
phase,
event,
...(durationMs !== undefined ? { durationMs } : {}),
...(data ? { data } : {}),
...(serializedError ? { error: serializedError } : {}),
};
await mkdir(dirname(this.tracePath), { recursive: true });
await appendFile(this.tracePath, `${JSON.stringify(payload)}\n`, 'utf-8');
}
}
export class NoopIngestTraceWriter implements IngestTraceWriter {
readonly tracePath = '';
readonly context: IngestTraceContext = {
tracePath: '',
jobId: '',
connectionId: '',
sourceKey: '',
level: 'error',
};
withContext(): IngestTraceWriter {
return this;
}
async event(): Promise<void> {}
}
export async function traceTimed<T>(
trace: IngestTraceWriter,
phase: string,
event: string,
data: Record<string, unknown>,
fn: () => Promise<T>,
): Promise<T> {
await trace.event('debug', phase, `${event}_started`, data);
const started = Date.now();
try {
const result = await fn();
await trace.event('debug', phase, `${event}_finished`, data, undefined, Date.now() - started);
return result;
} catch (error) {
await trace.event('error', phase, `${event}_failed`, data, error, Date.now() - started);
throw error;
}
}

View file

@ -0,0 +1,97 @@
import { describe, expect, it } from 'vitest';
import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths, textArtifactRoots } from './git-patch.js';
describe('isolated diff patch contract', () => {
it('parses touched paths from no-rename git patches', () => {
const patch = [
'diff --git a/wiki/global/a.md b/wiki/global/a.md',
'index 1111111..2222222 100644',
'--- a/wiki/global/a.md',
'+++ b/wiki/global/a.md',
'@@ -1 +1 @@',
'-old',
'+new',
'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml',
'new file mode 100644',
'--- /dev/null',
'+++ b/semantic-layer/c1/orders.yaml',
'@@ -0,0 +1 @@',
'+name: orders',
'',
].join('\n');
expect(parsePatchTouchedPaths(patch)).toEqual([
{
path: 'wiki/global/a.md',
oldPath: 'wiki/global/a.md',
newPath: 'wiki/global/a.md',
mode: '100644',
binary: false,
},
{
path: 'semantic-layer/c1/orders.yaml',
oldPath: 'semantic-layer/c1/orders.yaml',
newPath: 'semantic-layer/c1/orders.yaml',
mode: '100644',
binary: false,
},
]);
});
it('rejects semantic-layer paths for slDisallowed work units', () => {
const patch = 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml\nindex 1..2 100644\n';
expect(() =>
assertPatchAllowedForWorkUnit({
unitKey: 'lookml-mismatch',
patch,
slDisallowed: true,
}),
).toThrow(/slDisallowed WorkUnit lookml-mismatch touched semantic-layer\/c1\/orders.yaml/);
});
it('rejects semantic-layer paths outside allowed target connections', () => {
const patch =
'diff --git a/semantic-layer/finance/orders.yaml b/semantic-layer/finance/orders.yaml\nindex 1..2 100644\n';
expect(() =>
assertPatchAllowedForWorkUnit({
unitKey: 'wu-finance',
patch,
slDisallowed: false,
allowedTargetConnectionIds: new Set(['warehouse']),
}),
).toThrow(
/semantic-layer target connection not allowed: semantic-layer\/finance\/orders.yaml \(finance\); allowed: warehouse/,
);
});
it('rejects executable and binary changes under known text artifact roots', () => {
expect(textArtifactRoots).toEqual(['wiki/', 'semantic-layer/']);
const executablePatch =
'diff --git a/wiki/global/a.md b/wiki/global/a.md\nold mode 100644\nnew mode 100755\nindex 1..2\n';
expect(() =>
assertPatchAllowedForWorkUnit({
unitKey: 'wu-1',
patch: executablePatch,
slDisallowed: false,
}),
).toThrow(/unexpected executable mode under wiki\/global\/a.md/);
const binaryPatch = [
'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml',
'index 1111111..2222222 100644',
'GIT binary patch',
'literal 0',
'',
].join('\n');
expect(() =>
assertPatchAllowedForWorkUnit({
unitKey: 'wu-2',
patch: binaryPatch,
slDisallowed: false,
}),
).toThrow(/unexpected binary patch under semantic-layer\/c1\/orders.yaml/);
});
});

View file

@ -0,0 +1,101 @@
import { assertSemanticLayerTargetPathsAllowed } from '../semantic-layer-target-policy.js';
export const textArtifactRoots = ['wiki/', 'semantic-layer/'] as const;
export interface PatchTouchedPath {
path: string;
oldPath: string;
newPath: string;
mode: string | null;
binary: boolean;
}
export interface PatchPolicyInput {
unitKey: string;
patch: string;
slDisallowed: boolean;
allowedTargetConnectionIds?: ReadonlySet<string>;
}
function stripPrefix(path: string): string {
return path.replace(/^[ab]\//, '');
}
function isTextArtifactPath(path: string): boolean {
return textArtifactRoots.some((root) => path.startsWith(root));
}
export function parsePatchTouchedPaths(patch: string): PatchTouchedPath[] {
const lines = patch.split('\n');
const entries: PatchTouchedPath[] = [];
let current: PatchTouchedPath | null = null;
const pushCurrent = () => {
if (current) {
entries.push(current);
}
};
for (const line of lines) {
const diffMatch = /^diff --git (.+) (.+)$/.exec(line);
if (diffMatch) {
pushCurrent();
const oldPath = stripPrefix(diffMatch[1] ?? '');
const newPath = stripPrefix(diffMatch[2] ?? '');
current = {
path: newPath === '/dev/null' ? oldPath : newPath,
oldPath,
newPath,
mode: null,
binary: false,
};
continue;
}
if (!current) {
continue;
}
const indexMode = /^index [0-9a-f]+\.\.[0-9a-f]+(?: ([0-7]{6}))?$/.exec(line);
if (indexMode?.[1]) {
current.mode = indexMode[1];
}
const newMode = /^new mode ([0-7]{6})$/.exec(line);
if (newMode) {
current.mode = newMode[1] ?? current.mode;
}
const newFileMode = /^new file mode ([0-7]{6})$/.exec(line);
if (newFileMode) {
current.mode = newFileMode[1] ?? current.mode;
}
if (line === 'GIT binary patch' || line.startsWith('Binary files ')) {
current.binary = true;
}
}
pushCurrent();
return entries;
}
export function assertPatchAllowedForWorkUnit(input: PatchPolicyInput): PatchTouchedPath[] {
const touched = parsePatchTouchedPaths(input.patch);
if (input.allowedTargetConnectionIds) {
assertSemanticLayerTargetPathsAllowed({
paths: touched.map((entry) => entry.path),
allowedConnectionIds: input.allowedTargetConnectionIds,
});
}
for (const entry of touched) {
if (input.slDisallowed && entry.path.startsWith('semantic-layer/')) {
throw new Error(`slDisallowed WorkUnit ${input.unitKey} touched ${entry.path}`);
}
if (!isTextArtifactPath(entry.path)) {
continue;
}
if (entry.binary) {
throw new Error(`unexpected binary patch under ${entry.path}`);
}
if (entry.mode && entry.mode !== '100644') {
throw new Error(`unexpected executable mode under ${entry.path}: ${entry.mode}`);
}
}
return touched;
}

View file

@ -0,0 +1,404 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { GitService } from '../../core/index.js';
import { FileIngestTraceWriter } from '../ingest-trace.js';
import { integrateWorkUnitPatch } from './patch-integrator.js';
async function makeRepo() {
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-integrate-'));
const configDir = join(homeDir, 'config');
const git = new GitService({
storage: { configDir, homeDir },
git: {
userName: 'System User',
userEmail: 'system@example.com',
bootstrapMessage: 'init',
bootstrapAuthor: 'system',
bootstrapAuthorEmail: 'system@example.com',
},
});
await git.onModuleInit();
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
await writeFile(join(configDir, 'wiki/global/a.md'), 'old\n');
await git.commitFiles(['wiki/global/a.md'], 'base', 'System User', 'system@example.com');
return { homeDir, configDir, git, baseSha: await git.revParseHead() };
}
describe('integrateWorkUnitPatch', () => {
it('applies a clean patch, runs semantic gates, and commits accepted changes', async () => {
const { homeDir, configDir, git, baseSha } = await makeRepo();
const childDir = join(homeDir, 'child');
await git.addWorktree(childDir, 'child', baseSha);
const childGit = git.forWorktree(childDir);
await writeFile(join(childDir, 'wiki/global/a.md'), 'new\n');
await childGit.commitFiles(['wiki/global/a.md'], 'edit', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'patches/wu.patch');
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
const trace = new FileIngestTraceWriter({
tracePath: join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'),
jobId: 'job-1',
connectionId: 'c1',
sourceKey: 'fake',
level: 'trace',
});
const result = await integrateWorkUnitPatch({
unitKey: 'wu-1',
patchPath,
integrationGit: git,
trace,
author: { name: 'KTX Test', email: 'system@ktx.local' },
validateAppliedTree: vi.fn().mockResolvedValue(undefined),
slDisallowed: false,
allowedTargetConnectionIds: new Set(['c1']),
});
expect(result.status).toBe('accepted');
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('new\n');
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_apply_finished');
});
it('rolls back and classifies semantic conflicts', async () => {
const { homeDir, configDir, git, baseSha } = await makeRepo();
const childDir = join(homeDir, 'child-semantic');
await git.addWorktree(childDir, 'child-semantic', baseSha);
const childGit = git.forWorktree(childDir);
await writeFile(join(childDir, 'wiki/global/a.md'), 'bad\n');
await childGit.commitFiles(['wiki/global/a.md'], 'bad edit', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'patches/bad.patch');
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
const trace = new FileIngestTraceWriter({
tracePath: join(homeDir, '.ktx/ingest-traces/job-2/trace.jsonl'),
jobId: 'job-2',
connectionId: 'c1',
sourceKey: 'fake',
level: 'trace',
});
const result = await integrateWorkUnitPatch({
unitKey: 'wu-bad',
patchPath,
integrationGit: git,
trace,
author: { name: 'KTX Test', email: 'system@ktx.local' },
validateAppliedTree: vi.fn().mockRejectedValue(new Error('final artifact gates failed')),
slDisallowed: false,
allowedTargetConnectionIds: new Set(['c1']),
});
expect(result.status).toBe('semantic_conflict');
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('old\n');
});
it('classifies slDisallowed patch policy failures as traced textual conflicts', async () => {
const { homeDir, configDir, git, baseSha } = await makeRepo();
await mkdir(join(configDir, 'semantic-layer/c1'), { recursive: true });
await git.commitFiles(['semantic-layer/c1'], 'empty sl dir', 'System User', 'system@example.com');
const childDir = join(homeDir, 'child-policy');
await git.addWorktree(childDir, 'child-policy', baseSha);
const childGit = git.forWorktree(childDir);
await mkdir(join(childDir, 'semantic-layer/c1'), { recursive: true });
await writeFile(join(childDir, 'semantic-layer/c1/orders.yaml'), 'name: orders\ncolumns: []\njoins: []\nmeasures: []\n');
await childGit.commitFiles(['semantic-layer/c1/orders.yaml'], 'forbidden sl', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'patches/forbidden.patch');
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
const trace = new FileIngestTraceWriter({
tracePath: join(homeDir, '.ktx/ingest-traces/job-policy/trace.jsonl'),
jobId: 'job-policy',
connectionId: 'c1',
sourceKey: 'fake',
level: 'trace',
});
const result = await integrateWorkUnitPatch({
unitKey: 'lookml-mismatch',
patchPath,
integrationGit: git,
trace,
author: { name: 'KTX Test', email: 'system@ktx.local' },
validateAppliedTree: vi.fn().mockResolvedValue(undefined),
slDisallowed: true,
allowedTargetConnectionIds: new Set(['c1']),
});
expect(result).toMatchObject({
status: 'textual_conflict',
touchedPaths: ['semantic-layer/c1/orders.yaml'],
});
const rawTrace = await readFile(trace.tracePath, 'utf-8');
expect(rawTrace).toContain('patch_policy_rejected');
expect(rawTrace).toContain('slDisallowed WorkUnit lookml-mismatch touched semantic-layer/c1/orders.yaml');
});
it('classifies unauthorized semantic-layer targets as traced textual conflicts', async () => {
const { homeDir, git, baseSha } = await makeRepo();
const childDir = join(homeDir, 'child-target-policy');
await git.addWorktree(childDir, 'child-target-policy', baseSha);
const childGit = git.forWorktree(childDir);
await mkdir(join(childDir, 'semantic-layer/finance'), { recursive: true });
await writeFile(
join(childDir, 'semantic-layer/finance/orders.yaml'),
'name: orders\ncolumns: []\njoins: []\nmeasures: []\n',
);
await childGit.commitFiles(['semantic-layer/finance/orders.yaml'], 'unauthorized sl', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'patches/unauthorized.patch');
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
const trace = new FileIngestTraceWriter({
tracePath: join(homeDir, '.ktx/ingest-traces/job-target-policy/trace.jsonl'),
jobId: 'job-target-policy',
connectionId: 'c1',
sourceKey: 'fake',
level: 'trace',
});
const result = await integrateWorkUnitPatch({
unitKey: 'wu-finance',
patchPath,
integrationGit: git,
trace,
author: { name: 'KTX Test', email: 'system@ktx.local' },
validateAppliedTree: vi.fn().mockResolvedValue(undefined),
slDisallowed: false,
allowedTargetConnectionIds: new Set(['warehouse']),
});
expect(result).toMatchObject({
status: 'textual_conflict',
touchedPaths: ['semantic-layer/finance/orders.yaml'],
});
const rawTrace = await readFile(trace.tracePath, 'utf-8');
expect(rawTrace).toContain('patch_policy_rejected');
expect(rawTrace).toContain('semantic-layer target connection not allowed');
expect(rawTrace).toContain('allowedTargetConnectionIds');
});
it('repairs a textual conflict through the bounded resolver and commits repaired files', async () => {
const { homeDir, configDir, git, baseSha } = await makeRepo();
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
await writeFile(join(configDir, 'wiki/global/a.md'), 'base\n', 'utf-8');
await git.commitFiles(['wiki/global/a.md'], 'base page', 'System User', 'system@example.com');
const conflictBase = await git.revParseHead();
await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\n', 'utf-8');
await git.commitFiles(['wiki/global/a.md'], 'accepted edit', 'System User', 'system@example.com');
const childDir = join(homeDir, 'child-conflict');
await git.addWorktree(childDir, 'child-conflict', conflictBase);
const childGit = git.forWorktree(childDir);
await writeFile(join(childDir, 'wiki/global/a.md'), 'proposal\n', 'utf-8');
await childGit.commitFiles(['wiki/global/a.md'], 'proposal edit', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'proposal.patch');
await childGit.writeBinaryNoRenamePatch(conflictBase, 'HEAD', patchPath);
const trace = new FileIngestTraceWriter({
tracePath: join(homeDir, '.ktx/ingest-traces/job-resolver/trace.jsonl'),
jobId: 'job-resolver',
connectionId: 'warehouse',
sourceKey: 'metabase',
level: 'trace',
});
const validateAppliedTree = vi.fn(async (paths: string[]) => {
expect(paths).toEqual(['wiki/global/a.md']);
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\nproposal\n');
});
const result = await integrateWorkUnitPatch({
unitKey: 'wu-conflict',
patchPath,
integrationGit: git,
trace,
author: { name: 'System User', email: 'system@example.com' },
slDisallowed: false,
allowedTargetConnectionIds: new Set(['warehouse']),
validateAppliedTree,
resolveTextualConflict: vi.fn(async (context) => {
expect(context).toMatchObject({
unitKey: 'wu-conflict',
patchPath,
touchedPaths: ['wiki/global/a.md'],
});
await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\nproposal\n', 'utf-8');
return {
status: 'repaired' as const,
attempts: 1,
changedPaths: ['wiki/global/a.md'],
};
}),
});
expect(result).toMatchObject({
status: 'accepted',
touchedPaths: ['wiki/global/a.md'],
textualResolution: {
status: 'repaired',
attempts: 1,
changedPaths: ['wiki/global/a.md'],
},
});
expect(validateAppliedTree).toHaveBeenCalledOnce();
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\nproposal\n');
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_accepted_after_textual_resolution');
expect(await git.revParseHead()).not.toBe(baseSha);
});
it('keeps the pre-apply integration tree when the resolver cannot repair a textual conflict', async () => {
const { homeDir, configDir, git } = await makeRepo();
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
await writeFile(join(configDir, 'wiki/global/a.md'), 'base\n', 'utf-8');
await git.commitFiles(['wiki/global/a.md'], 'base page', 'System User', 'system@example.com');
const conflictBase = await git.revParseHead();
await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\n', 'utf-8');
await git.commitFiles(['wiki/global/a.md'], 'accepted edit', 'System User', 'system@example.com');
const acceptedHead = await git.revParseHead();
const childDir = join(homeDir, 'child-conflict-fails');
await git.addWorktree(childDir, 'child-conflict-fails', conflictBase);
const childGit = git.forWorktree(childDir);
await writeFile(join(childDir, 'wiki/global/a.md'), 'proposal\n', 'utf-8');
await childGit.commitFiles(['wiki/global/a.md'], 'proposal edit', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'proposal-fails.patch');
await childGit.writeBinaryNoRenamePatch(conflictBase, 'HEAD', patchPath);
const trace = new FileIngestTraceWriter({
tracePath: join(homeDir, '.ktx/ingest-traces/job-resolver-fails/trace.jsonl'),
jobId: 'job-resolver-fails',
connectionId: 'warehouse',
sourceKey: 'metabase',
level: 'trace',
});
const result = await integrateWorkUnitPatch({
unitKey: 'wu-conflict',
patchPath,
integrationGit: git,
trace,
author: { name: 'System User', email: 'system@example.com' },
slDisallowed: false,
allowedTargetConnectionIds: new Set(['warehouse']),
validateAppliedTree: vi.fn(async () => {}),
resolveTextualConflict: vi.fn(async () => ({
status: 'failed' as const,
attempts: 1,
reason: 'resolver completed without editing an allowed path',
})),
});
expect(result).toMatchObject({
status: 'textual_conflict',
textualResolution: {
status: 'failed',
attempts: 1,
reason: 'resolver completed without editing an allowed path',
},
});
expect(await git.revParseHead()).toBe(acceptedHead);
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\n');
});
it('repairs semantic gate failures after a patch applies cleanly', async () => {
const { homeDir, configDir, git, baseSha } = await makeRepo();
const childDir = join(homeDir, 'child-semantic-repair');
await git.addWorktree(childDir, 'child-semantic-repair', baseSha);
const childGit = git.forWorktree(childDir);
await writeFile(join(childDir, 'wiki/global/a.md'), 'bad semantic ref\n');
await childGit.commitFiles(['wiki/global/a.md'], 'bad semantic edit', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'patches/semantic-repair.patch');
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
const trace = new FileIngestTraceWriter({
tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-repair/trace.jsonl'),
jobId: 'job-semantic-repair',
connectionId: 'c1',
sourceKey: 'fake',
level: 'trace',
});
const validateAppliedTree = vi
.fn()
.mockRejectedValueOnce(new Error('final artifact gates failed:\na: unknown semantic-layer entity'))
.mockResolvedValueOnce(undefined);
const result = await integrateWorkUnitPatch({
unitKey: 'wu-repairable',
patchPath,
integrationGit: git,
trace,
author: { name: 'KTX Test', email: 'system@ktx.local' },
validateAppliedTree,
slDisallowed: false,
allowedTargetConnectionIds: new Set(['c1']),
repairGateFailure: vi.fn(async (context) => {
expect(context).toMatchObject({
unitKey: 'wu-repairable',
patchPath,
touchedPaths: ['wiki/global/a.md'],
});
await writeFile(join(configDir, 'wiki/global/a.md'), 'repaired semantic ref\n', 'utf-8');
return {
status: 'repaired' as const,
attempts: 1,
changedPaths: ['wiki/global/a.md'],
};
}),
});
expect(result).toMatchObject({
status: 'accepted',
touchedPaths: ['wiki/global/a.md'],
gateRepair: {
status: 'repaired',
attempts: 1,
changedPaths: ['wiki/global/a.md'],
},
});
expect(validateAppliedTree).toHaveBeenCalledTimes(2);
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('repaired semantic ref\n');
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_accepted_after_gate_repair');
});
it('keeps the pre-apply tree when semantic gate repair fails', async () => {
const { homeDir, configDir, git, baseSha } = await makeRepo();
const childDir = join(homeDir, 'child-semantic-repair-fails');
await git.addWorktree(childDir, 'child-semantic-repair-fails', baseSha);
const childGit = git.forWorktree(childDir);
await writeFile(join(childDir, 'wiki/global/a.md'), 'bad semantic ref\n');
await childGit.commitFiles(['wiki/global/a.md'], 'bad semantic edit', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'patches/semantic-repair-fails.patch');
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
const trace = new FileIngestTraceWriter({
tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-repair-fails/trace.jsonl'),
jobId: 'job-semantic-repair-fails',
connectionId: 'c1',
sourceKey: 'fake',
level: 'trace',
});
const result = await integrateWorkUnitPatch({
unitKey: 'wu-not-repaired',
patchPath,
integrationGit: git,
trace,
author: { name: 'KTX Test', email: 'system@ktx.local' },
validateAppliedTree: vi.fn().mockRejectedValue(new Error('final artifact gates failed')),
slDisallowed: false,
allowedTargetConnectionIds: new Set(['c1']),
repairGateFailure: vi.fn(async () => ({
status: 'failed' as const,
attempts: 1,
reason: 'gate repair completed without editing an allowed path',
})),
});
expect(result).toMatchObject({
status: 'semantic_conflict',
gateRepair: {
status: 'failed',
attempts: 1,
reason: 'gate repair completed without editing an allowed path',
},
});
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('old\n');
});
});

View file

@ -0,0 +1,321 @@
import { readFile } from 'node:fs/promises';
import type { GitService } from '../../core/index.js';
import type { FinalGateRepairResult } from '../final-gate-repair.js';
import type { IngestTraceWriter } from '../ingest-trace.js';
import { traceTimed } from '../ingest-trace.js';
import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths } from './git-patch.js';
import type { TextualConflictResolutionResult } from './textual-conflict-resolver.js';
export type PatchIntegrationTextualResolution =
| { status: 'repaired'; attempts: number; changedPaths: string[] }
| { status: 'failed'; attempts: number; reason: string };
export type PatchIntegrationResult =
| {
status: 'accepted';
commitSha: string;
touchedPaths: string[];
textualResolution?: PatchIntegrationTextualResolution;
gateRepair?: FinalGateRepairResult;
}
| {
status: 'textual_conflict';
reason: string;
touchedPaths: string[];
textualResolution?: PatchIntegrationTextualResolution;
gateRepair?: FinalGateRepairResult;
}
| {
status: 'semantic_conflict';
reason: string;
touchedPaths: string[];
textualResolution?: PatchIntegrationTextualResolution;
gateRepair?: FinalGateRepairResult;
};
export interface IntegrateWorkUnitPatchInput {
unitKey: string;
patchPath: string;
integrationGit: GitService;
trace: IngestTraceWriter;
author: { name: string; email: string };
slDisallowed: boolean;
allowedTargetConnectionIds: ReadonlySet<string>;
validateAppliedTree(touchedPaths: string[]): Promise<void>;
resolveTextualConflict?(input: {
unitKey: string;
patchPath: string;
touchedPaths: string[];
reason: string;
}): Promise<TextualConflictResolutionResult>;
repairGateFailure?(input: {
unitKey: string;
patchPath: string;
touchedPaths: string[];
reason: string;
}): Promise<FinalGateRepairResult>;
}
function errorMessage(error: unknown): string {
return error instanceof Error ? error.message : String(error);
}
export async function integrateWorkUnitPatch(input: IntegrateWorkUnitPatchInput): Promise<PatchIntegrationResult> {
const preApplyHead = await input.integrationGit.revParseHead();
const patch = await readFile(input.patchPath, 'utf-8');
const touchedPaths = parsePatchTouchedPaths(patch).map((entry) => entry.path);
if (touchedPaths.length === 0) {
await input.trace.event('debug', 'integration', 'patch_noop_accepted', {
unitKey: input.unitKey,
patchPath: input.patchPath,
patchBytes: Buffer.byteLength(patch),
});
return { status: 'accepted', commitSha: preApplyHead ?? '', touchedPaths };
}
try {
assertPatchAllowedForWorkUnit({
unitKey: input.unitKey,
patch,
slDisallowed: input.slDisallowed,
allowedTargetConnectionIds: input.allowedTargetConnectionIds,
});
} catch (error) {
await input.trace.event('error', 'integration', 'patch_policy_rejected', {
unitKey: input.unitKey,
patchPath: input.patchPath,
touchedPaths,
allowedTargetConnectionIds: [...input.allowedTargetConnectionIds].sort(),
reason: errorMessage(error),
});
return {
status: 'textual_conflict',
reason: errorMessage(error),
touchedPaths,
};
}
try {
await traceTimed(
input.trace,
'integration',
'patch_apply',
{ unitKey: input.unitKey, patchPath: input.patchPath, touchedPaths },
async () => {
await input.integrationGit.applyPatchFile3WayIndex(input.patchPath);
await input.integrationGit.assertWorktreeClean();
},
);
} catch (error) {
if (preApplyHead) {
await input.integrationGit.resetHardTo(preApplyHead);
}
const reason = errorMessage(error);
await input.trace.event('error', 'integration', 'patch_textual_conflict', {
unitKey: input.unitKey,
patchPath: input.patchPath,
touchedPaths,
reason,
});
if (!input.resolveTextualConflict) {
return {
status: 'textual_conflict',
reason,
touchedPaths,
};
}
const textualResolution = await input.resolveTextualConflict({
unitKey: input.unitKey,
patchPath: input.patchPath,
touchedPaths,
reason,
});
if (textualResolution.status === 'failed') {
if (preApplyHead) {
await input.integrationGit.resetHardTo(preApplyHead);
}
return {
status: 'textual_conflict',
reason: textualResolution.reason,
touchedPaths,
textualResolution,
};
}
try {
await traceTimed(
input.trace,
'integration',
'semantic_gate_after_textual_resolution',
{ unitKey: input.unitKey, touchedPaths: textualResolution.changedPaths },
async () => {
await input.validateAppliedTree(textualResolution.changedPaths);
},
);
} catch (semanticError) {
if (preApplyHead) {
await input.integrationGit.resetHardTo(preApplyHead);
}
await input.trace.event('error', 'integration', 'patch_semantic_conflict_after_textual_resolution', {
unitKey: input.unitKey,
patchPath: input.patchPath,
touchedPaths: textualResolution.changedPaths,
reason: errorMessage(semanticError),
});
return {
status: 'semantic_conflict',
reason: errorMessage(semanticError),
touchedPaths: textualResolution.changedPaths,
textualResolution,
};
}
const commit = await input.integrationGit.commitFiles(
textualResolution.changedPaths,
`ingest: resolve WorkUnit ${input.unitKey} conflict`,
input.author.name,
input.author.email,
);
if (!commit.created) {
if (preApplyHead) {
await input.integrationGit.resetHardTo(preApplyHead);
}
const noChangeReason = 'textual resolver produced no committable changes';
await input.trace.event('error', 'integration', 'textual_conflict_resolver_noop', {
unitKey: input.unitKey,
patchPath: input.patchPath,
touchedPaths: textualResolution.changedPaths,
});
return {
status: 'textual_conflict',
reason: noChangeReason,
touchedPaths: textualResolution.changedPaths,
textualResolution,
};
}
await input.trace.event('debug', 'integration', 'patch_accepted_after_textual_resolution', {
unitKey: input.unitKey,
commitSha: commit.commitHash,
touchedPaths: textualResolution.changedPaths,
attempts: textualResolution.attempts,
});
return {
status: 'accepted',
commitSha: commit.commitHash,
touchedPaths: textualResolution.changedPaths,
textualResolution,
};
}
try {
await traceTimed(input.trace, 'integration', 'semantic_gate', { unitKey: input.unitKey, touchedPaths }, async () => {
await input.validateAppliedTree(touchedPaths);
});
} catch (error) {
const reason = errorMessage(error);
await input.trace.event('error', 'integration', 'patch_semantic_conflict', {
unitKey: input.unitKey,
patchPath: input.patchPath,
touchedPaths,
reason,
});
if (input.repairGateFailure) {
const gateRepair = await input.repairGateFailure({
unitKey: input.unitKey,
patchPath: input.patchPath,
touchedPaths,
reason,
});
if (gateRepair.status === 'failed') {
if (preApplyHead) {
await input.integrationGit.resetHardTo(preApplyHead);
}
return {
status: 'semantic_conflict',
reason: gateRepair.reason,
touchedPaths,
gateRepair,
};
}
try {
await traceTimed(
input.trace,
'integration',
'semantic_gate_after_gate_repair',
{ unitKey: input.unitKey, touchedPaths: gateRepair.changedPaths },
async () => {
await input.validateAppliedTree(gateRepair.changedPaths);
},
);
} catch (repairValidationError) {
if (preApplyHead) {
await input.integrationGit.resetHardTo(preApplyHead);
}
return {
status: 'semantic_conflict',
reason: errorMessage(repairValidationError),
touchedPaths: gateRepair.changedPaths,
gateRepair,
};
}
const commit = await input.integrationGit.commitFiles(
gateRepair.changedPaths,
`ingest: repair WorkUnit ${input.unitKey} gates`,
input.author.name,
input.author.email,
);
if (!commit.created) {
if (preApplyHead) {
await input.integrationGit.resetHardTo(preApplyHead);
}
return {
status: 'semantic_conflict',
reason: 'gate repair produced no committable changes',
touchedPaths: gateRepair.changedPaths,
gateRepair,
};
}
await input.trace.event('debug', 'integration', 'patch_accepted_after_gate_repair', {
unitKey: input.unitKey,
commitSha: commit.commitHash,
touchedPaths: gateRepair.changedPaths,
attempts: gateRepair.attempts,
});
return {
status: 'accepted',
commitSha: commit.commitHash,
touchedPaths: gateRepair.changedPaths,
gateRepair,
};
}
if (preApplyHead) {
await input.integrationGit.resetHardTo(preApplyHead);
}
return {
status: 'semantic_conflict',
reason,
touchedPaths,
};
}
const commit = await input.integrationGit.commitStaged(
`ingest: accept WorkUnit ${input.unitKey}`,
input.author.name,
input.author.email,
);
await input.trace.event('debug', 'integration', 'patch_accepted', {
unitKey: input.unitKey,
commitSha: commit.commitHash,
touchedPaths,
});
return { status: 'accepted', commitSha: commit.commitHash, touchedPaths };
}

View file

@ -0,0 +1,120 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { FileIngestTraceWriter } from '../ingest-trace.js';
import { resolveTextualConflict } from './textual-conflict-resolver.js';
async function makeHarness() {
const root = await mkdtemp(join(tmpdir(), 'ktx-textual-resolver-'));
const workdir = join(root, 'workdir');
const patchPath = join(root, 'failed.patch');
const trace = new FileIngestTraceWriter({
tracePath: join(root, 'trace.jsonl'),
jobId: 'job-1',
connectionId: 'warehouse',
sourceKey: 'metabase',
runId: 'run-1',
syncId: 'sync-1',
level: 'trace',
});
await mkdir(join(workdir, 'wiki/global'), { recursive: true });
await writeFile(join(workdir, 'wiki/global/account.md'), 'accepted line\n', 'utf-8');
await writeFile(
patchPath,
[
'diff --git a/wiki/global/account.md b/wiki/global/account.md',
'index 8877391..6f63f4d 100644',
'--- a/wiki/global/account.md',
'+++ b/wiki/global/account.md',
'@@ -1 +1 @@',
'-base line',
'+proposal line',
'',
].join('\n'),
'utf-8',
);
return { root, workdir, patchPath, trace };
}
describe('resolveTextualConflict', () => {
it('lets the repair agent read the failed patch and write only touched paths', async () => {
const { workdir, patchPath, trace } = await makeHarness();
const agentRunner = {
runLoop: vi.fn(async (params: any) => {
const current = await params.toolSet.read_integration_file.execute({ path: 'wiki/global/account.md' });
expect(current.structured).toEqual({ path: 'wiki/global/account.md', exists: true });
expect(current.markdown).toContain('accepted line');
const patch = await params.toolSet.read_failed_patch.execute({});
expect(patch.markdown).toContain('proposal line');
await expect(
params.toolSet.write_integration_file.execute({
path: 'wiki/global/not-allowed.md',
content: 'bad\n',
}),
).rejects.toThrow(/resolver path not allowed/);
await params.toolSet.write_integration_file.execute({
path: 'wiki/global/account.md',
content: 'accepted line\nproposal line\n',
});
return { stopReason: 'natural' as const };
}),
};
const result = await resolveTextualConflict({
agentRunner,
workdir,
unitKey: 'wu-a',
patchPath,
touchedPaths: ['wiki/global/account.md'],
trace,
reason: 'patch failed: wiki/global/account.md',
maxAttempts: 1,
stepBudget: 8,
});
expect(result).toEqual({
status: 'repaired',
attempts: 1,
changedPaths: ['wiki/global/account.md'],
});
await expect(readFile(join(workdir, 'wiki/global/account.md'), 'utf-8')).resolves.toBe(
'accepted line\nproposal line\n',
);
expect(agentRunner.runLoop).toHaveBeenCalledWith(
expect.objectContaining({
modelRole: 'repair',
stepBudget: 8,
telemetryTags: expect.objectContaining({
operationName: 'ingest-isolated-diff-textual-resolver',
jobId: 'job-1',
unitKey: 'wu-a',
}),
}),
);
});
it('fails when the repair agent completes without editing any touched path', async () => {
const { workdir, patchPath, trace } = await makeHarness();
const result = await resolveTextualConflict({
agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) },
workdir,
unitKey: 'wu-a',
patchPath,
touchedPaths: ['wiki/global/account.md'],
trace,
reason: 'patch failed: wiki/global/account.md',
maxAttempts: 1,
stepBudget: 8,
});
expect(result).toEqual({
status: 'failed',
attempts: 1,
reason: 'resolver completed without editing an allowed path',
});
});
});

View file

@ -0,0 +1,238 @@
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import { z } from 'zod';
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../llm/index.js';
import type { IngestTraceWriter } from '../ingest-trace.js';
import { traceTimed } from '../ingest-trace.js';
export type TextualConflictResolutionResult =
| { status: 'repaired'; attempts: number; changedPaths: string[] }
| { status: 'failed'; attempts: number; reason: string };
export interface ResolveTextualConflictInput {
agentRunner: AgentRunnerPort;
workdir: string;
unitKey: string;
patchPath: string;
touchedPaths: string[];
trace: IngestTraceWriter;
reason: string;
maxAttempts?: number;
stepBudget?: number;
}
const readIntegrationFileSchema = z.object({
path: z.string().min(1),
});
const writeIntegrationFileSchema = z.object({
path: z.string().min(1),
content: z.string(),
});
const deleteIntegrationFileSchema = z.object({
path: z.string().min(1),
});
function normalizeRepoPath(path: string): string {
const normalized = path.replace(/\\/g, '/').replace(/^\/+/, '');
const parts = normalized.split('/').filter((part) => part.length > 0);
if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) {
throw new Error(`resolver path must be a repository-relative path: ${path}`);
}
return parts.join('/');
}
function assertAllowedPath(path: string, allowedPaths: ReadonlySet<string>): string {
const normalized = normalizeRepoPath(path);
if (!allowedPaths.has(normalized)) {
throw new Error(`resolver path not allowed: ${normalized}`);
}
return normalized;
}
async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> {
try {
return { exists: true, content: await readFile(path, 'utf-8') };
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
return { exists: false, content: '' };
}
throw error;
}
}
function buildResolverSystemPrompt(): string {
return `<role>
You repair one failed KTX isolated-diff patch inside the integration worktree.
</role>
<rules>
- Preserve accepted integration content that is unrelated to the failed patch.
- Incorporate the failed patch only when the patch evidence is compatible with the current file.
- Edit only paths exposed by the resolver tools.
- Prefer the smallest text edit that makes the composed artifact coherent.
- Do not create new facts that are absent from the current file or failed patch.
- Stop after writing the repaired file content.
</rules>`;
}
function buildResolverUserPrompt(input: {
unitKey: string;
patchPath: string;
touchedPaths: string[];
reason: string;
attempt: number;
maxAttempts: number;
}): string {
return `Repair isolated-diff textual conflict.
WorkUnit: ${input.unitKey}
Attempt: ${input.attempt} of ${input.maxAttempts}
Patch path: ${input.patchPath}
Touched paths:
${input.touchedPaths.map((path) => `- ${path}`).join('\n')}
Git apply failure:
${input.reason}
Use read_failed_patch first. Then read the touched integration files, write the
repaired content, and stop.`;
}
function buildToolSet(input: {
workdir: string;
patchPath: string;
allowedPaths: ReadonlySet<string>;
editedPaths: Set<string>;
}): KtxRuntimeToolSet {
return {
read_failed_patch: {
name: 'read_failed_patch',
description: 'Read the failed Git patch that could not be applied to the integration worktree.',
inputSchema: z.object({}),
execute: async () => {
const patch = await readFile(input.patchPath, 'utf-8');
return {
markdown: patch,
structured: { patchPath: input.patchPath, bytes: Buffer.byteLength(patch) },
};
},
},
read_integration_file: {
name: 'read_integration_file',
description: 'Read one allowed file from the current integration worktree.',
inputSchema: readIntegrationFileSchema,
execute: async ({ path }: z.infer<typeof readIntegrationFileSchema>) => {
const normalized = assertAllowedPath(path, input.allowedPaths);
const file = await readOptionalFile(join(input.workdir, normalized));
return {
markdown: file.exists ? file.content : `(missing file: ${normalized})`,
structured: { path: normalized, exists: file.exists },
};
},
},
write_integration_file: {
name: 'write_integration_file',
description: 'Replace one allowed integration worktree file with repaired text content.',
inputSchema: writeIntegrationFileSchema,
execute: async ({ path, content }: z.infer<typeof writeIntegrationFileSchema>) => {
const normalized = assertAllowedPath(path, input.allowedPaths);
const fullPath = join(input.workdir, normalized);
await mkdir(dirname(fullPath), { recursive: true });
await writeFile(fullPath, content, 'utf-8');
input.editedPaths.add(normalized);
return {
markdown: `Wrote ${normalized}`,
structured: { path: normalized, bytes: Buffer.byteLength(content) },
};
},
},
delete_integration_file: {
name: 'delete_integration_file',
description: 'Delete one allowed integration worktree file when the failed patch proves the deletion is correct.',
inputSchema: deleteIntegrationFileSchema,
execute: async ({ path }: z.infer<typeof deleteIntegrationFileSchema>) => {
const normalized = assertAllowedPath(path, input.allowedPaths);
await rm(join(input.workdir, normalized), { force: true });
input.editedPaths.add(normalized);
return {
markdown: `Deleted ${normalized}`,
structured: { path: normalized },
};
},
},
};
}
export async function resolveTextualConflict(
input: ResolveTextualConflictInput,
): Promise<TextualConflictResolutionResult> {
const allowedPaths = new Set(input.touchedPaths.map(normalizeRepoPath));
const maxAttempts = input.maxAttempts ?? 1;
const stepBudget = input.stepBudget ?? 12;
let lastFailure = 'resolver did not run';
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
const editedPaths = new Set<string>();
const traceData = {
unitKey: input.unitKey,
patchPath: input.patchPath,
touchedPaths: [...allowedPaths].sort(),
attempt,
maxAttempts,
reason: input.reason,
};
const result = await traceTimed(input.trace, 'resolver', 'textual_conflict_resolver', traceData, async () =>
input.agentRunner.runLoop({
modelRole: 'repair',
systemPrompt: buildResolverSystemPrompt(),
userPrompt: buildResolverUserPrompt({
unitKey: input.unitKey,
patchPath: input.patchPath,
touchedPaths: [...allowedPaths].sort(),
reason: input.reason,
attempt,
maxAttempts,
}),
toolSet: buildToolSet({
workdir: input.workdir,
patchPath: input.patchPath,
allowedPaths,
editedPaths,
}),
stepBudget,
telemetryTags: {
operationName: 'ingest-isolated-diff-textual-resolver',
source: input.trace.context.sourceKey,
jobId: input.trace.context.jobId,
unitKey: input.unitKey,
},
}),
);
if (result.stopReason === 'error') {
lastFailure = result.error?.message ?? 'resolver agent loop errored';
await input.trace.event('error', 'resolver', 'textual_conflict_resolver_failed', traceData, result.error);
continue;
}
const changedPaths = [...editedPaths].sort();
if (changedPaths.length === 0) {
lastFailure = 'resolver completed without editing an allowed path';
await input.trace.event('error', 'resolver', 'textual_conflict_resolver_failed', {
...traceData,
reason: lastFailure,
});
continue;
}
await input.trace.event('debug', 'resolver', 'textual_conflict_resolver_repaired', {
...traceData,
changedPaths,
});
return { status: 'repaired', attempts: attempt, changedPaths };
}
return { status: 'failed', attempts: maxAttempts, reason: lastFailure };
}

View file

@ -0,0 +1,144 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { GitService } from '../../core/index.js';
import { FileIngestTraceWriter } from '../ingest-trace.js';
import { runIsolatedWorkUnit } from './work-unit-executor.js';
async function makeGit() {
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-isolated-wu-'));
const configDir = join(homeDir, 'config');
const git = new GitService({
storage: { configDir, homeDir },
git: {
userName: 'System User',
userEmail: 'system@example.com',
bootstrapMessage: 'init',
bootstrapAuthor: 'system',
bootstrapAuthorEmail: 'system@example.com',
},
});
await git.onModuleInit();
await mkdir(join(configDir, 'raw-sources/c1/fake/s'), { recursive: true });
await writeFile(join(configDir, 'raw-sources/c1/fake/s/a.json'), '{}\n');
await git.commitFiles(['raw-sources/c1/fake/s/a.json'], 'raw snapshot', 'System User', 'system@example.com');
return { homeDir, configDir, git, baseSha: await git.revParseHead() };
}
describe('runIsolatedWorkUnit', () => {
it('creates a child worktree at the ingestion base and persists a patch proposal', async () => {
const { homeDir, git, baseSha } = await makeGit();
const childDir = join(homeDir, '.worktrees/session-job-1-wu-1');
const sessionWorktreeService = {
create: vi.fn(async (_key: string, startSha: string) => {
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
await git.addWorktree(childDir, 'session/job-1-wu-1', startSha);
const childGit = git.forWorktree(childDir);
return {
chatId: 'job-1-wu-1',
workdir: childDir,
branch: 'session/job-1-wu-1',
baseSha: startSha,
createdAt: new Date(),
git: childGit,
config: {},
};
}),
cleanup: vi.fn(async () => undefined),
};
const tracePath = join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl');
const trace = new FileIngestTraceWriter({
tracePath,
jobId: 'job-1',
connectionId: 'c1',
sourceKey: 'fake',
level: 'trace',
});
const result = await runIsolatedWorkUnit({
unitIndex: 0,
ingestionBaseSha: baseSha,
sessionWorktreeService: sessionWorktreeService as never,
patchDir: join(homeDir, '.ktx/ingest-patches/job-1'),
trace,
run: async (child) => {
await mkdir(join(child.workdir, 'wiki/global'), { recursive: true });
await writeFile(join(child.workdir, 'wiki/global/a.md'), '---\nsummary: A\nusage_mode: auto\n---\n\nBody\n');
await child.git.commitFiles(['wiki/global/a.md'], 'test: write wiki', 'KTX Test', 'system@ktx.local');
return {
unitKey: 'wu-1',
status: 'success',
preSha: baseSha,
postSha: await child.git.revParseHead(),
actions: [{ target: 'wiki', type: 'created', key: 'a', detail: 'A' }],
touchedSlSources: [],
};
},
workUnit: { unitKey: 'wu-1', rawFiles: ['a.json'], peerFileIndex: [], dependencyPaths: [] },
});
expect(sessionWorktreeService.create).toHaveBeenCalledWith('job-1-wu-1', baseSha);
expect(sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
expect(result.status).toBe('success');
if (result.status !== 'success') {
throw new Error('expected successful work unit');
}
const patchPath = result.patchPath;
if (!patchPath) {
throw new Error('expected patch path');
}
expect(patchPath).toContain('0000-wu-1.patch');
await expect(readFile(patchPath, 'utf-8')).resolves.toContain('wiki/global/a.md');
await expect(readFile(tracePath, 'utf-8')).resolves.toContain('work_unit_child_created');
});
it('removes child worktrees after failed WorkUnit outcomes are traced', async () => {
const { homeDir, git, baseSha } = await makeGit();
const childDir = join(homeDir, '.worktrees/session-job-1-wu-fail');
const sessionWorktreeService = {
create: vi.fn(async (_key: string, startSha: string) => {
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
await git.addWorktree(childDir, 'session/job-1-wu-fail', startSha);
return {
chatId: 'job-1-wu-fail',
workdir: childDir,
branch: 'session/job-1-wu-fail',
baseSha: startSha,
createdAt: new Date(),
git: git.forWorktree(childDir),
config: {},
};
}),
cleanup: vi.fn(async () => undefined),
};
const trace = new FileIngestTraceWriter({
tracePath: join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'),
jobId: 'job-1',
connectionId: 'c1',
sourceKey: 'fake',
level: 'trace',
});
const result = await runIsolatedWorkUnit({
unitIndex: 0,
ingestionBaseSha: baseSha,
sessionWorktreeService: sessionWorktreeService as never,
patchDir: join(homeDir, '.ktx/ingest-patches/job-1'),
trace,
run: async () => ({
unitKey: 'wu-fail',
status: 'failed',
reason: 'agent loop errored',
preSha: baseSha,
postSha: baseSha,
actions: [],
touchedSlSources: [],
}),
workUnit: { unitKey: 'wu-fail', rawFiles: ['a.json'], peerFileIndex: [], dependencyPaths: [] },
});
expect(result.status).toBe('failed');
expect(sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
});
});

View file

@ -0,0 +1,85 @@
import { mkdir, readFile } from 'node:fs/promises';
import { join } from 'node:path';
import type { SessionOutcome } from '../../core/index.js';
import type { IngestSessionWorktree, IngestSessionWorktreePort } from '../ports.js';
import type { WorkUnit } from '../types.js';
import type { IngestTraceWriter } from '../ingest-trace.js';
import type { WorkUnitOutcome } from '../stages/stage-3-work-units.js';
import { parsePatchTouchedPaths } from './git-patch.js';
export interface RunIsolatedWorkUnitInput {
unitIndex: number;
ingestionBaseSha: string;
sessionWorktreeService: IngestSessionWorktreePort;
patchDir: string;
trace: IngestTraceWriter;
workUnit: WorkUnit;
run(child: IngestSessionWorktree): Promise<WorkUnitOutcome>;
afterSuccess?(child: IngestSessionWorktree): Promise<void>;
}
function patchFileName(unitIndex: number, unitKey: string): string {
const safeKey = unitKey.replace(/[^a-zA-Z0-9_.-]+/g, '-');
return `${String(unitIndex).padStart(4, '0')}-${safeKey}.patch`;
}
export async function runIsolatedWorkUnit(input: RunIsolatedWorkUnitInput): Promise<WorkUnitOutcome> {
const sessionKey = `${input.trace.context.jobId}-${input.workUnit.unitKey}`;
let cleanupOutcome: SessionOutcome = 'crash';
const child = await input.sessionWorktreeService.create(sessionKey, input.ingestionBaseSha);
await input.trace.event('debug', 'work_unit', 'work_unit_child_created', {
unitKey: input.workUnit.unitKey,
unitIndex: input.unitIndex,
worktreePath: child.workdir,
baseSha: input.ingestionBaseSha,
});
try {
const outcome = await input.run(child);
if (outcome.status !== 'success') {
cleanupOutcome = 'success';
await input.trace.event('error', 'work_unit', 'work_unit_failed_before_patch', {
unitKey: input.workUnit.unitKey,
reason: outcome.reason ?? 'unknown failure',
});
return { ...outcome, childWorktreePath: child.workdir };
}
await input.afterSuccess?.(child);
await mkdir(input.patchDir, { recursive: true });
const patchPath = join(input.patchDir, patchFileName(input.unitIndex, input.workUnit.unitKey));
await child.git.writeBinaryNoRenamePatch(input.ingestionBaseSha, 'HEAD', patchPath);
const patch = await readFile(patchPath, 'utf-8');
const touched = parsePatchTouchedPaths(patch);
cleanupOutcome = 'success';
await input.trace.event('debug', 'work_unit', 'work_unit_patch_collected', {
unitKey: input.workUnit.unitKey,
patchPath,
touchedPaths: touched.map((entry) => entry.path),
patchBytes: Buffer.byteLength(patch),
});
return {
...outcome,
patchPath,
patchTouchedPaths: touched.map((entry) => entry.path),
childWorktreePath: child.workdir,
};
} catch (error) {
await input.trace.event(
'error',
'work_unit',
'work_unit_child_failed',
{ unitKey: input.workUnit.unitKey, worktreePath: child.workdir },
error,
);
cleanupOutcome = 'success';
throw error;
} finally {
await input.sessionWorktreeService.cleanup(child, cleanupOutcome);
await input.trace.event('trace', 'work_unit', 'work_unit_child_cleanup', {
unitKey: input.workUnit.unitKey,
outcome: cleanupOutcome,
worktreePath: child.workdir,
});
}
}

View file

@ -694,6 +694,14 @@ describe('canonical local ingest', () => {
],
},
});
expect(result.report.body.isolatedDiff).toMatchObject({
enabled: true,
acceptedPatches: 0,
projectionSha: expect.any(String),
});
const projectedSourcePath = join(metricflowProject.projectDir, 'semantic-layer/warehouse/orders.yaml');
await expect(readFile(projectedSourcePath, 'utf-8')).resolves.toContain('name: orders');
const stagedRawPath = join(
metricflowProject.projectDir,

View file

@ -17,6 +17,24 @@ type RuntimeWithConnectionDeps = {
};
};
type RuntimeWithSlValidationDeps = {
deps: {
slValidator: {
validateSingleSource(
deps: unknown,
connectionId: string,
sourceName: string,
): Promise<{ errors: string[]; warnings: string[] }>;
};
};
};
type RuntimeWithSettingsDeps = {
deps: {
settings: Record<string, unknown>;
};
};
function testAgentRunner(): AgentRunnerPort {
return { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' as const }) };
}
@ -144,6 +162,77 @@ describe('createLocalBundleIngestRuntime', () => {
]);
});
it('validates manifest-backed scan sources during local ingest gates', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
[
'tables:',
' payments:',
' table: public.payments',
' columns:',
' - name: payment_id',
' type: string',
' - name: amount',
' type: number',
'',
].join('\n'),
'ktx',
'ktx@example.com',
'Add warehouse manifest',
);
const agentRunner = testAgentRunner();
const runtime = createLocalBundleIngestRuntime({
project,
adapters: [new FakeSourceAdapter()],
agentRunner,
});
const deps = (runtime.runner as unknown as RuntimeWithSlValidationDeps).deps;
await expect(deps.slValidator.validateSingleSource(deps, 'warehouse', 'payments')).resolves.toEqual({
errors: [],
warnings: expect.any(Array),
});
});
it('does not mask malformed direct overlays with manifest-backed fallback validation', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
[
'tables:',
' payments:',
' table: public.payments',
' columns:',
' - name: payment_id',
' type: string',
'',
].join('\n'),
'ktx',
'ktx@example.com',
'Add warehouse manifest',
);
await project.fileStore.writeFile(
'semantic-layer/warehouse/payments.yaml',
['name: payments', 'columns:', ' - [', ''].join('\n'),
'ktx',
'ktx@example.com',
'Add malformed overlay',
);
const agentRunner = testAgentRunner();
const runtime = createLocalBundleIngestRuntime({
project,
adapters: [new FakeSourceAdapter()],
agentRunner,
});
const deps = (runtime.runner as unknown as RuntimeWithSlValidationDeps).deps;
await expect(deps.slValidator.validateSingleSource(deps, 'warehouse', 'payments')).resolves.toEqual({
errors: [expect.stringContaining('invalid YAML')],
warnings: [],
});
});
it('passes project connection config to local ingest query executors', async () => {
const agentRunner = testAgentRunner();
const queryExecutor = {
@ -175,6 +264,27 @@ describe('createLocalBundleIngestRuntime', () => {
});
});
it('defaults local bundle ingest to isolated diffs without a shared-worktree fallback setting', () => {
const runtime = createLocalBundleIngestRuntime({
project,
adapters: [new FakeSourceAdapter()],
agentRunner: testAgentRunner(),
});
const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings;
const fallbackSettingKey = ['sharedWorktree', 'SourceKeys'].join('');
expect(settings).not.toHaveProperty(fallbackSettingKey);
expect(Object.keys(settings).sort()).toEqual([
'ingestTraceLevel',
'memoryIngestionModel',
'probeRowCount',
'workUnitFailureMode',
'workUnitMaxConcurrency',
'workUnitStepBudget',
]);
});
it('accepts a debug LLM request file when constructing the default agent runner', async () => {
await writeFile(
join(project.projectDir, 'ktx.yaml'),

View file

@ -24,7 +24,6 @@ import {
type KtxConnectionInfo,
type KtxQueryResult,
SemanticLayerService,
type SemanticLayerSource,
type SlConnectionCatalogPort,
SlDiscoverTool,
SlEditSourceTool,
@ -76,6 +75,7 @@ import { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evide
import { HistoricSqlProjectionPostProcessor } from './adapters/historic-sql/post-processor.js';
import { ContextEvidenceIndexService, SqliteContextEvidenceStore } from './context-evidence/index.js';
import { DiffSetService } from './diff-set.service.js';
import { ingestTracePathForJob, type IngestTraceLevel } from './ingest-trace.js';
import { IngestBundleRunner } from './ingest-bundle.runner.js';
import { PageTriageService } from './page-triage/index.js';
import { createWarehouseVerificationTools } from './tools/warehouse-verification/index.js';
@ -96,6 +96,12 @@ const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url));
const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url));
const LOCAL_AUTHOR = { name: 'KTX Local', email: 'local@ktx.local' };
const LOCAL_SHAPE_WARNING = 'Local ingest validates semantic-layer YAML shape only.';
const INGEST_TRACE_LEVELS = new Set<IngestTraceLevel>(['error', 'info', 'debug', 'trace']);
function ingestTraceLevelFromEnv(env: NodeJS.ProcessEnv = process.env): IngestTraceLevel {
const raw = env.KTX_INGEST_TRACE_LEVEL;
return raw && INGEST_TRACE_LEVELS.has(raw as IngestTraceLevel) ? (raw as IngestTraceLevel) : 'debug';
}
export interface CreateLocalBundleIngestRuntimeOptions {
project: KtxLocalProject;
@ -151,6 +157,10 @@ class LocalIngestStorage implements IngestStoragePort {
resolveTranscriptDir(jobId: string): string {
return join(this.project.projectDir, '.ktx/ingest-transcripts', jobId);
}
resolveTracePath(jobId: string): string {
return ingestTracePathForJob(this.homeDir, jobId);
}
}
class LocalIngestLock implements IngestLockPort {
@ -237,22 +247,63 @@ class LocalSlPythonPort implements SlPythonPort {
}
class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
private validateParsedSource(sourceName: string, parsed: Record<string, unknown>) {
const isOverlay = parsed.table == null && parsed.sql == null;
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
return result.success
? { errors: [], warnings: [LOCAL_SHAPE_WARNING] }
: {
errors: result.error.issues.map(
(issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`,
),
warnings: [],
};
}
private async validateComposedSource(
deps: SlValidationDeps,
connectionId: string,
sourceName: string,
readError: unknown,
) {
try {
const { sources, loadErrors } = await deps.semanticLayerService.loadAllSources(connectionId);
const source = sources.find((candidate) => candidate.name === sourceName);
if (source) {
return this.validateParsedSource(sourceName, source as unknown as Record<string, unknown>);
}
const detail =
loadErrors.length > 0
? loadErrors.join('; ')
: readError instanceof Error
? readError.message
: String(readError);
return { errors: [`${sourceName}: ${detail}`], warnings: [] };
} catch (fallbackError) {
return {
errors: [`${sourceName}: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`],
warnings: [],
};
}
}
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
let content: string;
try {
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
const parsed = YAML.parse(file.content) as SemanticLayerSource;
const isOverlay = parsed.table == null && parsed.sql == null;
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
return result.success
? { errors: [], warnings: [LOCAL_SHAPE_WARNING] }
: {
errors: result.error.issues.map(
(issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`,
),
warnings: [],
};
content = file.content;
} catch (error) {
return { errors: [`${sourceName}: ${error instanceof Error ? error.message : String(error)}`], warnings: [] };
return this.validateComposedSource(deps, connectionId, sourceName, error);
}
try {
const parsed = YAML.parse(content) as unknown as Record<string, unknown>;
return this.validateParsedSource(sourceName, parsed);
} catch (error) {
return {
errors: [`${sourceName}: invalid YAML — ${error instanceof Error ? error.message : String(error)}`],
warnings: [],
};
}
}
}
@ -671,6 +722,7 @@ export function createLocalBundleIngestRuntime(
workUnitMaxConcurrency: options.project.config.ingest.workUnits.maxConcurrency,
workUnitStepBudget: options.project.config.ingest.workUnits.stepBudget,
workUnitFailureMode: options.project.config.ingest.workUnits.failureMode,
ingestTraceLevel: ingestTraceLevelFromEnv(),
},
skillsRegistry: new SkillsRegistryService({ skillsDir, logger }),
promptService,

View file

@ -21,6 +21,7 @@ function snapshot(overrides: Partial<MemoryFlowReplayInput> = {}): MemoryFlowRep
{ type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 },
{ type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 0 },
{ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 },
{ type: 'stage_progress', stage: 'integration', percent: 80, message: 'Integrating 1/1 patches: orders' },
{ type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 },
{ type: 'work_unit_step', unitKey: 'orders', stepIndex: 1, stepBudget: 40 },
{ type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'wiki/orders.md' },

View file

@ -53,6 +53,23 @@ export const memoryFlowEventSchema = z.discriminatedUnion('type', [
stage: z.enum(['source', 'chunks', 'workUnits', 'actions', 'gates', 'saved']),
reason: z.string().min(1),
}),
eventSchema({
type: z.literal('stage_progress'),
stage: z.enum([
'source',
'integration',
'reconciliation',
'post_processor',
'wiki_sl_ref_repair',
'final_gates',
'save',
'provenance',
'report',
]),
percent: z.number().min(0).max(100),
message: z.string().min(1),
transient: z.boolean().optional(),
}),
eventSchema({
type: z.literal('work_unit_started'),
unitKey: z.string().min(1),

View file

@ -44,6 +44,22 @@ type MemoryFlowEventPayload =
stage: MemoryFlowColumnId;
reason: string;
}
| {
type: 'stage_progress';
stage:
| 'source'
| 'integration'
| 'reconciliation'
| 'post_processor'
| 'wiki_sl_ref_repair'
| 'final_gates'
| 'save'
| 'provenance'
| 'report';
percent: number;
message: string;
transient?: boolean;
}
| {
type: 'work_unit_started';
unitKey: string;

View file

@ -16,6 +16,7 @@ import type {
import type { ToolContext, ToolSession, TouchedSlSource } from '../tools/index.js';
import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js';
import type { CanonicalPin } from './canonical-pins.js';
import type { IngestTraceLevel } from './ingest-trace.js';
import type { IngestReportSnapshot } from './reports.js';
import type {
ReconcileCandidateForPrompt,
@ -142,6 +143,7 @@ export interface IngestSettingsPort {
workUnitMaxConcurrency?: number;
workUnitStepBudget?: number;
workUnitFailureMode?: 'abort' | 'continue';
ingestTraceLevel?: IngestTraceLevel;
}
export interface IngestGitAuthor {
@ -155,6 +157,7 @@ export interface IngestStoragePort {
resolveUploadDir(uploadId: string): string;
resolvePullDir(jobId: string): string;
resolveTranscriptDir(jobId: string): string;
resolveTracePath(jobId: string): string;
}
export interface IngestCommitMessagePort {

View file

@ -206,6 +206,47 @@ describe('parseIngestReportSnapshot', () => {
expect(snapshot.body.toolTranscripts).toEqual([]);
});
it('parses failed ingest reports with trace and failure details', () => {
const snapshot = parseIngestReportSnapshot({
id: 'report-failed',
runId: 'run-failed',
jobId: 'job-failed',
connectionId: 'warehouse',
sourceKey: 'metabase',
createdAt: '2026-05-17T12:00:00.000Z',
body: {
status: 'failed',
syncId: 'sync-failed',
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
commitSha: null,
tracePath: '/project/.ktx/ingest-traces/job-failed/trace.jsonl',
failure: {
phase: 'final_gates',
message: 'final artifact gates failed',
},
workUnits: [],
failedWorkUnits: [],
reconciliationSkipped: true,
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
evictionInputs: [],
unresolvedCards: [],
supersededBy: null,
overrideOf: null,
provenanceRows: [],
toolTranscripts: [],
},
});
expect(snapshot.body.status).toBe('failed');
expect(snapshot.body.failure).toEqual({
phase: 'final_gates',
message: 'final artifact gates failed',
});
expect(snapshot.body.tracePath).toContain('trace.jsonl');
});
it('rejects malformed report snapshots with a concise message', () => {
const report = validReportSnapshot();
report.body.workUnits[0] = {
@ -215,4 +256,93 @@ describe('parseIngestReportSnapshot', () => {
expect(() => parseIngestReportSnapshot(report)).toThrow('Invalid ingest report snapshot');
});
it('parses isolated-diff textual resolver counters', () => {
const snapshot = parseIngestReportSnapshot({
id: 'report-1',
runId: 'run-1',
jobId: 'job-1',
connectionId: 'warehouse',
sourceKey: 'metabase',
createdAt: '2026-05-18T00:00:00.000Z',
body: {
status: 'completed',
syncId: 'sync-1',
diffSummary: { added: 0, modified: 1, deleted: 0, unchanged: 0 },
commitSha: 'abc123',
isolatedDiff: {
enabled: true,
acceptedPatches: 2,
textualConflicts: 1,
semanticConflicts: 0,
resolverAttempts: 1,
resolverRepairs: 1,
resolverFailures: 0,
},
workUnits: [],
failedWorkUnits: [],
reconciliationSkipped: true,
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
artifactResolutions: [],
evictionInputs: [],
unresolvedCards: [],
supersededBy: null,
overrideOf: null,
provenanceRows: [],
toolTranscripts: [],
},
});
expect(snapshot.body.isolatedDiff).toMatchObject({
resolverAttempts: 1,
resolverRepairs: 1,
resolverFailures: 0,
});
});
it('parses isolated-diff gate repair counters', () => {
const snapshot = parseIngestReportSnapshot({
id: 'report-1',
runId: 'run-1',
jobId: 'job-1',
connectionId: 'warehouse',
sourceKey: 'metabase',
createdAt: '2026-05-18T00:00:00.000Z',
body: {
status: 'completed',
syncId: 'sync-1',
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
commitSha: 'abc123',
isolatedDiff: {
enabled: true,
acceptedPatches: 1,
textualConflicts: 0,
semanticConflicts: 1,
gateRepairAttempts: 1,
gateRepairs: 1,
gateRepairFailures: 0,
},
workUnits: [],
failedWorkUnits: [],
reconciliationSkipped: true,
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
evictionInputs: [],
unresolvedCards: [],
supersededBy: null,
overrideOf: null,
provenanceRows: [],
toolTranscripts: [],
},
});
expect(snapshot.body.isolatedDiff).toMatchObject({
gateRepairAttempts: 1,
gateRepairs: 1,
gateRepairFailures: 0,
});
});
});

View file

@ -123,6 +123,12 @@ const sourceFetchReportSchema = z.object({
warnings: z.array(sourceFetchIssueSchema).default([]),
});
const ingestReportFailureSchema = z.object({
phase: z.string().min(1),
message: z.string().min(1),
details: z.record(z.string(), z.unknown()).optional(),
});
export const ingestReportSnapshotSchema = z
.object({
id: z.string().min(1),
@ -133,10 +139,30 @@ export const ingestReportSnapshotSchema = z
createdAt: z.string().min(1),
body: z
.object({
status: z.enum(['completed', 'failed']).optional(),
syncId: z.string().min(1),
diffSummary: ingestDiffSummarySchema,
fetch: sourceFetchReportSchema.optional(),
commitSha: z.string().nullable(),
tracePath: z.string().optional(),
failure: ingestReportFailureSchema.optional(),
isolatedDiff: z
.object({
enabled: z.boolean(),
integrationWorktreePath: z.string().optional(),
ingestionBaseSha: z.string().optional(),
projectionSha: z.string().nullable().optional(),
acceptedPatches: z.number().int().min(0),
textualConflicts: z.number().int().min(0),
semanticConflicts: z.number().int().min(0),
resolverAttempts: z.number().int().min(0).default(0),
resolverRepairs: z.number().int().min(0).default(0),
resolverFailures: z.number().int().min(0).default(0),
gateRepairAttempts: z.number().int().min(0).default(0),
gateRepairs: z.number().int().min(0).default(0),
gateRepairFailures: z.number().int().min(0).default(0),
})
.optional(),
workUnits: z.array(
z.object({
unitKey: z.string().min(1),

View file

@ -48,11 +48,35 @@ export interface IngestReportPostProcessorOutcome {
touchedSources: TouchedSlSource[];
}
export interface IngestReportFailure {
phase: string;
message: string;
details?: Record<string, unknown>;
}
export interface IngestReportBody {
status?: 'completed' | 'failed';
syncId: string;
diffSummary: IngestDiffSummary;
fetch?: SourceFetchReport;
commitSha: string | null;
tracePath?: string;
failure?: IngestReportFailure;
isolatedDiff?: {
enabled: boolean;
integrationWorktreePath?: string;
ingestionBaseSha?: string;
projectionSha?: string | null;
acceptedPatches: number;
textualConflicts: number;
semanticConflicts: number;
resolverAttempts?: number;
resolverRepairs?: number;
resolverFailures?: number;
gateRepairAttempts?: number;
gateRepairs?: number;
gateRepairFailures?: number;
};
workUnits: IngestReportWorkUnit[];
failedWorkUnits: string[];
reconciliationSkipped: boolean;

View file

@ -0,0 +1,38 @@
import { describe, expect, it } from 'vitest';
import {
assertSemanticLayerTargetPathsAllowed,
findDisallowedSemanticLayerTargetPaths,
semanticLayerConnectionIdFromPath,
} from './semantic-layer-target-policy.js';
describe('semantic-layer target policy', () => {
it('extracts connection ids from semantic-layer paths', () => {
expect(semanticLayerConnectionIdFromPath('semantic-layer/warehouse/orders.yaml')).toBe('warehouse');
expect(semanticLayerConnectionIdFromPath('a/semantic-layer/finance/orders.yaml')).toBe('finance');
expect(semanticLayerConnectionIdFromPath('wiki/global/orders.md')).toBeNull();
});
it('finds semantic-layer paths outside the allowed target connections', () => {
expect(
findDisallowedSemanticLayerTargetPaths({
paths: [
'semantic-layer/warehouse/orders.yaml',
'semantic-layer/finance/orders.yaml',
'wiki/global/orders.md',
],
allowedConnectionIds: new Set(['warehouse']),
}),
).toEqual([{ path: 'semantic-layer/finance/orders.yaml', connectionId: 'finance' }]);
});
it('throws a deterministic error for unauthorized semantic-layer targets', () => {
expect(() =>
assertSemanticLayerTargetPathsAllowed({
paths: ['semantic-layer/finance/orders.yaml', 'semantic-layer/marketing/accounts.yaml'],
allowedConnectionIds: new Set(['warehouse']),
}),
).toThrow(
/semantic-layer target connection not allowed: semantic-layer\/finance\/orders\.yaml \(finance\), semantic-layer\/marketing\/accounts\.yaml \(marketing\); allowed: warehouse/,
);
});
});

View file

@ -0,0 +1,42 @@
export interface SemanticLayerTargetPolicyInput {
paths: readonly string[];
allowedConnectionIds: ReadonlySet<string>;
}
export interface SemanticLayerTargetPolicyViolation {
path: string;
connectionId: string;
}
export function semanticLayerConnectionIdFromPath(path: string): string | null {
const normalized = path.replace(/^[ab]\//, '');
const match = /^semantic-layer\/([^/]+)\//.exec(normalized);
return match?.[1] ?? null;
}
export function findDisallowedSemanticLayerTargetPaths(
input: SemanticLayerTargetPolicyInput,
): SemanticLayerTargetPolicyViolation[] {
return input.paths
.map((path) => ({ path, connectionId: semanticLayerConnectionIdFromPath(path) }))
.filter((entry): entry is SemanticLayerTargetPolicyViolation => {
return entry.connectionId !== null && !input.allowedConnectionIds.has(entry.connectionId);
})
.sort((left, right) => {
const byConnection = left.connectionId.localeCompare(right.connectionId);
return byConnection === 0 ? left.path.localeCompare(right.path) : byConnection;
});
}
export function assertSemanticLayerTargetPathsAllowed(input: SemanticLayerTargetPolicyInput): void {
const violations = findDisallowedSemanticLayerTargetPaths(input);
if (violations.length === 0) {
return;
}
const allowed = [...input.allowedConnectionIds].sort();
throw new Error(
`semantic-layer target connection not allowed: ${violations
.map((violation) => `${violation.path} (${violation.connectionId})`)
.join(', ')}; allowed: ${allowed.length > 0 ? allowed.join(', ') : '(none)'}`,
);
}

View file

@ -41,6 +41,9 @@ export interface WorkUnitOutcome {
touchedSlSources: TouchedSlSource[];
slDisallowed?: boolean;
slDisallowedReason?: 'lookml_connection_mismatch';
patchPath?: string;
patchTouchedPaths?: string[];
childWorktreePath?: string;
}
export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit): Promise<WorkUnitOutcome> {

View file

@ -1,4 +1,5 @@
import type { KtxEmbeddingPort } from '../core/embedding.js';
import type { SemanticLayerService } from '../sl/index.js';
import type { MemoryFlowEventSink } from './memory-flow/types.js';
export type IngestTrigger = 'upload' | 'scheduled_pull' | 'manual_resync' | 'manual_override';
@ -47,6 +48,7 @@ export interface ChunkResult {
export interface FetchContext {
connectionId: string;
sourceKey: string;
memoryFlow?: MemoryFlowEventSink;
}
type SourceFetchIssueKind =
@ -96,6 +98,26 @@ export interface ClusterWorkUnitsContext {
embedding: KtxEmbeddingPort;
}
export interface DeterministicProjectionContext {
connectionId: string;
sourceKey: string;
syncId: string;
jobId: string;
runId: string;
stagedDir: string;
workdir: string;
parseArtifacts?: unknown;
semanticLayerService: SemanticLayerService;
}
export interface ProjectionResult {
warnings: string[];
errors: string[];
touchedSources: Array<{ connectionId: string; sourceName: string }>;
changedWikiPageKeys: string[];
result?: unknown;
}
export interface SourceAdapter {
readonly source: string;
readonly skillNames: string[];
@ -109,6 +131,7 @@ export interface SourceAdapter {
listTargetConnectionIds?(stagedDir: string): Promise<string[]>;
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult>;
clusterWorkUnits?(ctx: ClusterWorkUnitsContext): Promise<WorkUnit[]>;
project?(ctx: DeterministicProjectionContext): Promise<ProjectionResult>;
describeScope?(stagedDir: string): Promise<ScopeDescriptor>;
onPullSucceeded?(ctx: {
connectionId: string;

View file

@ -0,0 +1,153 @@
import { describe, expect, it } from 'vitest';
import { findInvalidWikiBodyRefs, parseWikiBodyRefs } from './wiki-body-refs.js';
const sources = [
{
name: 'mart_account_segments',
grain: ['account_id'],
columns: [
{ name: 'account_id', type: 'string' },
{ name: 'segment', type: 'string' },
],
joins: [],
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
segments: [{ name: 'enterprise', expr: "segment = 'enterprise'" }],
table: 'analytics.mart_account_segments',
},
];
describe('wiki body refs', () => {
it('parses only explicit inline-code body references outside fenced blocks', () => {
const body = [
'Valid `mart_account_segments.total_contract_arr` and `source:mart_account_segments`.',
'Also `warehouse/mart_account_segments.segment` and `table:analytics.mart_account_segments`.',
'Ignore prose mart_account_segments.total_contract_arr_cents.',
'Ignore `single_token`.',
'Ignore wildcard pattern `mart_nrr_quarterly.*_arr_cents`.',
'Ignore condition `users.is_internal = false`.',
'```sql',
'select `mart_account_segments.total_contract_arr_cents`',
'```',
].join('\n');
expect(parseWikiBodyRefs(body)).toEqual([
{ kind: 'sl_entity', connectionId: null, sourceName: 'mart_account_segments', entityName: 'total_contract_arr' },
{ kind: 'sl_source', connectionId: null, sourceName: 'mart_account_segments' },
{ kind: 'sl_entity', connectionId: 'warehouse', sourceName: 'mart_account_segments', entityName: 'segment' },
{ kind: 'table', connectionId: null, tableRef: 'analytics.mart_account_segments' },
]);
});
it('rejects stale inline-code semantic-layer references', async () => {
const invalid = await findInvalidWikiBodyRefs({
pageKey: 'account-segments',
body: 'ARR is documented as `mart_account_segments.total_contract_arr_cents`.',
visibleConnectionIds: ['warehouse'],
loadSources: async () => sources,
tableExists: async () => true,
});
expect(invalid).toEqual([
'account-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents',
]);
});
it('does not treat wildcard inline-code patterns as exact semantic-layer entity references', async () => {
const invalid = await findInvalidWikiBodyRefs({
pageKey: 'revenue-metrics-encoding',
body: 'Cents columns include `mart_nrr_quarterly.*_arr_cents` and `mart_retention_movement_breakout.*_arr_cents`.',
visibleConnectionIds: ['warehouse'],
loadSources: async () => [
{ name: 'mart_nrr_quarterly', grain: [], columns: [], joins: [], measures: [], table: 'analytics.mart_nrr_quarterly' },
{
name: 'mart_retention_movement_breakout',
grain: [],
columns: [],
joins: [],
measures: [],
table: 'analytics.mart_retention_movement_breakout',
},
],
tableExists: async () => true,
});
expect(invalid).toEqual([]);
});
it('does not treat inline-code SQL predicates as exact semantic-layer entity references', async () => {
const invalid = await findInvalidWikiBodyRefs({
pageKey: 'account-reporting-exclusions',
body: 'Exclude internal users with `users.is_internal = false` and test users with `users.is_test = false`.',
visibleConnectionIds: ['warehouse'],
loadSources: async () => [
{
name: 'users',
grain: [],
columns: [
{ name: 'is_internal', type: 'boolean' },
{ name: 'is_test', type: 'boolean' },
],
joins: [],
measures: [],
table: 'analytics.users',
},
],
tableExists: async () => true,
});
expect(invalid).toEqual([]);
});
it('validates source, dimension, segment, measure, and table references', async () => {
const invalid = await findInvalidWikiBodyRefs({
pageKey: 'account-segments',
body: [
'`mart_account_segments.total_contract_arr`',
'`mart_account_segments.segment`',
'`mart_account_segments.enterprise`',
'`source:mart_account_segments`',
'`table:analytics.mart_account_segments`',
].join('\n'),
visibleConnectionIds: ['warehouse'],
loadSources: async () => sources,
tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments',
});
expect(invalid).toEqual([]);
});
it('ignores two-part inline code when the source is not visible', async () => {
const invalid = await findInvalidWikiBodyRefs({
pageKey: 'engineering-notes',
body: [
'A version token like `node.v22` is not a semantic-layer reference.',
'A raw table must use `table:analytics.mart_account_segments`.',
].join('\n'),
visibleConnectionIds: ['warehouse'],
loadSources: async () => sources,
tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments',
});
expect(invalid).toEqual([]);
});
it('still rejects explicit missing source and table references', async () => {
const invalid = await findInvalidWikiBodyRefs({
pageKey: 'account-segments',
body: [
'`source:missing_source`',
'`warehouse/source:missing_source`',
'`table:analytics.missing_table`',
].join('\n'),
visibleConnectionIds: ['warehouse'],
loadSources: async () => sources,
tableExists: async () => false,
});
expect(invalid).toEqual([
'account-segments: unknown semantic-layer source missing_source',
'account-segments: unknown semantic-layer source warehouse/missing_source',
'account-segments: unknown raw table analytics.missing_table',
]);
});
});

View file

@ -0,0 +1,141 @@
import type { SemanticLayerSource } from '../sl/index.js';
export type WikiBodyRef =
| { kind: 'sl_entity'; connectionId: string | null; sourceName: string; entityName: string }
| { kind: 'sl_source'; connectionId: string | null; sourceName: string }
| { kind: 'table'; connectionId: string | null; tableRef: string };
export interface WikiBodyRefValidationInput {
pageKey: string;
body: string;
visibleConnectionIds: string[];
loadSources(connectionId: string): Promise<SemanticLayerSource[]>;
tableExists(connectionId: string, tableRef: string): Promise<boolean>;
}
const inlineCodePattern = /`([^`\n]+)`/g;
function visibleLinesOutsideFences(body: string): string[] {
const lines: string[] = [];
let fenced = false;
for (const line of body.split('\n')) {
if (/^\s*```/.test(line)) {
fenced = !fenced;
continue;
}
if (!fenced) {
lines.push(line);
}
}
return lines;
}
function parseConnectionScoped(value: string): { connectionId: string | null; body: string } {
const slash = value.indexOf('/');
if (slash <= 0) {
return { connectionId: null, body: value };
}
return { connectionId: value.slice(0, slash), body: value.slice(slash + 1) };
}
function isIdentifierToken(value: string): boolean {
return /^[A-Za-z_][A-Za-z0-9_]*$/.test(value);
}
export function parseWikiBodyRefs(body: string): WikiBodyRef[] {
const refs: WikiBodyRef[] = [];
for (const line of visibleLinesOutsideFences(body)) {
for (const match of line.matchAll(inlineCodePattern)) {
const token = (match[1] ?? '').trim();
if (!token) {
continue;
}
const scoped = parseConnectionScoped(token);
if (scoped.body.startsWith('source:')) {
const sourceName = scoped.body.slice('source:'.length).trim();
if (sourceName) {
refs.push({ kind: 'sl_source', connectionId: scoped.connectionId, sourceName });
}
continue;
}
if (scoped.body.startsWith('table:')) {
const tableRef = scoped.body.slice('table:'.length).trim();
if (tableRef) {
refs.push({ kind: 'table', connectionId: scoped.connectionId, tableRef });
}
continue;
}
const parts = scoped.body.split('.');
if (parts.length === 2 && isIdentifierToken(parts[0] ?? '') && isIdentifierToken(parts[1] ?? '')) {
refs.push({
kind: 'sl_entity',
connectionId: scoped.connectionId,
sourceName: parts[0],
entityName: parts[1],
});
}
}
}
return refs;
}
function entityNames(source: SemanticLayerSource): Set<string> {
return new Set([
...(source.measures ?? []).map((measure) => measure.name),
...(source.columns ?? []).map((column) => column.name),
...(source.segments ?? []).map((segment) => segment.name),
]);
}
export async function findInvalidWikiBodyRefs(input: WikiBodyRefValidationInput): Promise<string[]> {
const errors: string[] = [];
const sourceCache = new Map<string, SemanticLayerSource[]>();
const loadSources = async (connectionId: string): Promise<SemanticLayerSource[]> => {
const cached = sourceCache.get(connectionId);
if (cached) {
return cached;
}
const sources = await input.loadSources(connectionId);
sourceCache.set(connectionId, sources);
return sources;
};
const findSource = async (
connectionIds: string[],
sourceName: string,
): Promise<{ connectionId: string; source: SemanticLayerSource } | null> => {
for (const connectionId of connectionIds) {
const source = (await loadSources(connectionId)).find((candidate) => candidate.name === sourceName);
if (source) {
return { connectionId, source };
}
}
return null;
};
for (const ref of parseWikiBodyRefs(input.body)) {
const connectionIds = ref.connectionId ? [ref.connectionId] : input.visibleConnectionIds;
if (ref.kind === 'table') {
const found = await Promise.all(connectionIds.map((connectionId) => input.tableExists(connectionId, ref.tableRef)));
if (!found.some(Boolean)) {
errors.push(`${input.pageKey}: unknown raw table ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.tableRef}`);
}
continue;
}
const found = await findSource(connectionIds, ref.sourceName);
if (!found) {
if (ref.kind === 'sl_source') {
errors.push(
`${input.pageKey}: unknown semantic-layer source ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.sourceName}`,
);
}
continue;
}
if (ref.kind === 'sl_entity' && !entityNames(found.source).has(ref.entityName)) {
errors.push(`${input.pageKey}: unknown semantic-layer entity ${ref.sourceName}.${ref.entityName}`);
}
}
return errors;
}

View file

@ -78,6 +78,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
skills: [],
plugins: [],
tools: [],
managedSettings: {
allowManagedMcpServersOnly: true,
allowedMcpServers: [],
},
strictMcpConfig: true,
allowedTools: [],
permissionMode: 'dontAsk',
persistSession: false,
@ -144,6 +149,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
const options = query.mock.calls[0][0].options;
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
expect(options.managedSettings).toEqual({
allowManagedMcpServersOnly: true,
allowedMcpServers: [{ serverName: 'ktx' }],
});
expect(options.strictMcpConfig).toBe(true);
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
behavior: 'allow',
toolUseID: '1',
@ -176,6 +186,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
skills: [],
plugins: [],
tools: [],
managedSettings: {
allowManagedMcpServersOnly: true,
allowedMcpServers: [],
},
strictMcpConfig: true,
allowedTools: [],
permissionMode: 'dontAsk',
persistSession: false,
@ -268,6 +283,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
const options = query.mock.calls[0][0].options;
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
expect(options.managedSettings).toEqual({
allowManagedMcpServersOnly: true,
allowedMcpServers: [{ serverName: 'ktx' }],
});
expect(options.strictMcpConfig).toBe(true);
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
behavior: 'allow',
toolUseID: '1',
@ -334,6 +354,10 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
answer: 'yes',
});
expect(objectQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ PATH: '/usr/bin' }));
expect(objectQuery.mock.calls[0][0].options.managedSettings).toEqual({
allowManagedMcpServersOnly: true,
allowedMcpServers: [],
});
expect(objectQuery.mock.calls[0][0].options.env).not.toEqual(
expect.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod' }), // pragma: allowlist secret
);
@ -374,6 +398,10 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
telemetryTags: { operationName: 'test' },
});
expect(agentQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ HOME: '/Users/test' }));
expect(agentQuery.mock.calls[0][0].options.managedSettings).toEqual({
allowManagedMcpServersOnly: true,
allowedMcpServers: [{ serverName: 'ktx' }],
});
expect(agentQuery.mock.calls[0][0].options.env).not.toEqual(
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1' }),
);
@ -442,6 +470,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
skills: [],
plugins: [],
tools: [],
managedSettings: {
allowManagedMcpServersOnly: true,
allowedMcpServers: [],
},
strictMcpConfig: true,
allowedTools: [],
persistSession: false,
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),

View file

@ -45,6 +45,8 @@ const BUILTIN_TOOLS = [
'TodoWrite',
];
const KTX_MCP_SERVER_NAME = 'ktx';
function isResult(message: SDKMessage): message is SDKResultMessage {
return message.type === 'result';
}
@ -113,7 +115,14 @@ function assertInitIsolation(
}
function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set<string> {
return tools && Object.keys(tools).length > 0 ? new Set(['ktx']) : new Set();
return tools && Object.keys(tools).length > 0 ? new Set([KTX_MCP_SERVER_NAME]) : new Set();
}
function managedMcpSettings(serverNames: string[]): NonNullable<Options['managedSettings']> {
return {
allowManagedMcpServersOnly: true,
allowedMcpServers: serverNames.map((serverName) => ({ serverName })),
};
}
function baseOptions(input: {
@ -125,6 +134,7 @@ function baseOptions(input: {
}): Options {
const toolIds = mcpToolIds(input.tools ?? {});
const allowedToolIds = new Set(toolIds);
const expectedServerNames = [...expectedMcpServerNames(input.tools)];
return {
cwd: input.projectDir,
model: input.model,
@ -133,6 +143,8 @@ function baseOptions(input: {
skills: [],
plugins: [],
tools: [],
managedSettings: managedMcpSettings(expectedServerNames),
strictMcpConfig: true,
allowedTools: toolIds,
disallowedTools: BUILTIN_TOOLS,
canUseTool: async (toolName, _toolInput, options) =>
@ -147,7 +159,14 @@ function baseOptions(input: {
persistSession: false,
env: createKtxClaudeCodeEnv(input.env),
...(input.tools && Object.keys(input.tools).length > 0
? { mcpServers: { ktx: createSdkMcpServer({ name: 'ktx', tools: createClaudeSdkTools(input.tools) }) } }
? {
mcpServers: {
[KTX_MCP_SERVER_NAME]: createSdkMcpServer({
name: KTX_MCP_SERVER_NAME,
tools: createClaudeSdkTools(input.tools),
}),
},
}
: {}),
};
}

View file

@ -99,6 +99,27 @@ describe('SlEditSourceTool — session gating', () => {
);
});
it('rejects session-scoped edits outside allowed target connections', async () => {
const { tool } = makeTool();
const session = makeSession({
allowedConnectionNames: new Set(['warehouse']),
});
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: 'finance',
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toContain('connectionId "finance" is outside this ingest session');
expect(session.actions).toEqual([]);
});
it('indexes normally when no session is present', async () => {
const { tool, slSearchService } = makeTool();
const result = await tool.call(

View file

@ -1,6 +1,12 @@
import YAML from 'yaml';
import { z } from 'zod';
import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
import {
addTouchedSlSource,
type ToolContext,
type ToolOutput,
validateActionRawPaths,
validateActionTargetConnection,
} from '../../tools/index.js';
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
import type { SemanticLayerSource } from '../types.js';
@ -79,6 +85,10 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
const skipIndex = context.session?.isWorktreeScoped === true;
const targetConnectionValidation = validateActionTargetConnection(context.session, connectionId);
if (!targetConnectionValidation.ok) {
return this.buildOutput(false, [targetConnectionValidation.error], sourceName);
}
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
if (!rawPathValidation.ok) {
return this.buildOutput(false, [rawPathValidation.error], sourceName);

View file

@ -133,6 +133,34 @@ describe('SlWriteSourceTool — session gating', () => {
);
});
it('rejects session-scoped writes outside allowed target connections', async () => {
const { tool } = makeTool();
const session = makeSession({
allowedConnectionNames: new Set(['warehouse']),
});
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: 'finance',
sourceName: 'finance_orders',
source: {
name: 'finance_orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toContain('connectionId "finance" is outside this ingest session');
expect(session.actions).toEqual([]);
});
it('indexes normally when no session is present', async () => {
const { tool, slSearchService } = makeTool();
const result = await tool.call(

View file

@ -1,6 +1,12 @@
import YAML from 'yaml';
import { z } from 'zod';
import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
import {
addTouchedSlSource,
type ToolContext,
type ToolOutput,
validateActionRawPaths,
validateActionTargetConnection,
} from '../../tools/index.js';
import { sourceOverlaySchema } from '../schemas.js';
import type { SemanticLayerService } from '../semantic-layer.service.js';
import type { SemanticLayerSource } from '../types.js';
@ -106,6 +112,10 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
const skipIndex = context.session?.isWorktreeScoped === true;
const targetConnectionValidation = validateActionTargetConnection(context.session, connectionId);
if (!targetConnectionValidation.ok) {
return this.buildOutput(false, [targetConnectionValidation.error], sourceName);
}
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
if (!rawPathValidation.ok) {
return this.buildOutput(false, [rawPathValidation.error], sourceName);

View file

@ -0,0 +1,23 @@
import type { ToolSession } from './tool-session.js';
type ActionTargetConnectionValidation = { ok: true } | { ok: false; error: string };
export function validateActionTargetConnection(
session: ToolSession | undefined,
connectionId: string,
): ActionTargetConnectionValidation {
const allowed = session?.allowedConnectionNames;
if (!allowed) {
return { ok: true };
}
if (allowed.has(connectionId)) {
return { ok: true };
}
const allowedList = [...allowed].sort();
return {
ok: false,
error: `connectionId "${connectionId}" is outside this ingest session's allowed target connections: ${
allowedList.length > 0 ? allowedList.join(', ') : '(none)'
}`,
};
}

View file

@ -32,6 +32,7 @@ export type { SqlEdit } from './sql-edit-replacer.js';
export { applySqlEdits } from './sql-edit-replacer.js';
export type { IngestToolMetadata, MemoryAction, ToolSession } from './tool-session.js';
export { validateActionRawPaths } from './action-raw-paths.js';
export { validateActionTargetConnection } from './action-target-connection.js';
export type { TouchedSlSource, TouchedSlSourceSet } from './touched-sl-sources.js';
export {
addTouchedSlSource,