mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
feat(ingest): default local ingest to isolated diffs (#128)
* docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
parent
d1c84e5564
commit
e64da5a85d
66 changed files with 22346 additions and 514 deletions
45
packages/context/src/core/git.service.patch.test.ts
Normal file
45
packages/context/src/core/git.service.patch.test.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
async function makeGit() {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-'));
|
||||
const configDir = join(homeDir, 'config');
|
||||
const git = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'init',
|
||||
bootstrapAuthor: 'system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await git.onModuleInit();
|
||||
return { homeDir, configDir, git };
|
||||
}
|
||||
|
||||
describe('GitService patch helpers', () => {
|
||||
it('collects binary-safe no-rename patches and applies them with --3way --index', async () => {
|
||||
const { homeDir, configDir, git } = await makeGit();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n');
|
||||
await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com');
|
||||
const base = await git.revParseHead();
|
||||
|
||||
await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n');
|
||||
await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'proposal.patch');
|
||||
await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath);
|
||||
|
||||
const targetDir = join(homeDir, 'target');
|
||||
await git.addWorktree(targetDir, 'target', base);
|
||||
const targetGit = git.forWorktree(targetDir);
|
||||
await targetGit.applyPatchFile3WayIndex(patchPath);
|
||||
await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com');
|
||||
|
||||
await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n');
|
||||
});
|
||||
});
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
import { promises as fs } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { dirname, join } from 'node:path';
|
||||
import type { SimpleGit } from 'simple-git';
|
||||
import { noopLogger, resolveConfigDir, type KtxCoreConfig, type KtxLogger } from './config.js';
|
||||
import { createSimpleGit } from './git-env.js';
|
||||
|
|
@ -747,6 +747,55 @@ export class GitService {
|
|||
}
|
||||
}
|
||||
|
||||
async writeBinaryNoRenamePatch(from: string, to: string, patchPath: string): Promise<void> {
|
||||
await this.withMutationQueue(async () => {
|
||||
const patch = await this.git.raw(['diff', '--binary', '--no-renames', `${from}..${to}`]);
|
||||
await fs.mkdir(dirname(patchPath), { recursive: true });
|
||||
await fs.writeFile(patchPath, patch, 'utf-8');
|
||||
});
|
||||
}
|
||||
|
||||
async applyPatchFile3WayIndex(patchPath: string): Promise<void> {
|
||||
await this.withMutationQueue(async () => {
|
||||
await this.git.raw(['apply', '--3way', '--index', patchPath]);
|
||||
});
|
||||
}
|
||||
|
||||
async commitStaged(commitMessage: string, author: string, authorEmail: string): Promise<GitCommitInfo> {
|
||||
return this.withMutationQueue(async () => {
|
||||
const stagedChanges = await this.git.diff(['--cached', '--name-only']);
|
||||
if (!stagedChanges.trim()) {
|
||||
const head = (await this.git.revparse(['HEAD'])).trim();
|
||||
const log = await this.git.log({ maxCount: 1 });
|
||||
const latest = log.latest;
|
||||
return {
|
||||
commitHash: head,
|
||||
shortHash: head.substring(0, 8),
|
||||
message: latest?.message ?? '',
|
||||
author: latest?.author_name ?? '',
|
||||
authorEmail: latest?.author_email ?? '',
|
||||
timestamp: latest?.date ?? new Date(0).toISOString(),
|
||||
committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date(0).toISOString(),
|
||||
created: false,
|
||||
};
|
||||
}
|
||||
await this.git.commit(commitMessage, { '--author': `${author} <${authorEmail}>` });
|
||||
const head = (await this.git.revparse(['HEAD'])).trim();
|
||||
const log = await this.git.log({ maxCount: 1 });
|
||||
const latest = log.latest;
|
||||
return {
|
||||
commitHash: head,
|
||||
shortHash: head.substring(0, 8),
|
||||
message: latest?.message ?? commitMessage,
|
||||
author: latest?.author_name ?? author,
|
||||
authorEmail: latest?.author_email ?? authorEmail,
|
||||
timestamp: latest?.date ?? new Date().toISOString(),
|
||||
committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date().toISOString(),
|
||||
created: true,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private async fileExists(path: string): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(path);
|
||||
|
|
|
|||
|
|
@ -138,6 +138,52 @@ describe('fetchMetabaseBundle', () => {
|
|||
expect(warn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('emits memory-flow progress while fetching Metabase cards', async () => {
|
||||
const events: unknown[] = [];
|
||||
|
||||
await fetchMetabaseBundle({
|
||||
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
|
||||
stagedDir,
|
||||
ctx: {
|
||||
...makeFetchContext(),
|
||||
memoryFlow: {
|
||||
emit: (event) => events.push(event),
|
||||
update: vi.fn(),
|
||||
finish: vi.fn(),
|
||||
snapshot: vi.fn(),
|
||||
},
|
||||
},
|
||||
clientFactory,
|
||||
sourceStateReader,
|
||||
});
|
||||
|
||||
expect(events).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Fetching Metabase database 42 metadata',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Fetching 1 Metabase card for database 42',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Checked 1/1 Metabase cards for database 42; wrote 1',
|
||||
transient: true,
|
||||
}),
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Fetched Metabase database 42: 1 cards, 0 unresolved',
|
||||
}),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('routes Metabase fetch warnings through the injected logger', async () => {
|
||||
const logger = {
|
||||
log: vi.fn(),
|
||||
|
|
|
|||
|
|
@ -83,6 +83,15 @@ function resolvePath(index: Map<number | 'root', CollectionNode>, collectionId:
|
|||
export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Promise<void> {
|
||||
const pullConfig: MetabasePullConfig = parseMetabasePullConfig(params.pullConfig);
|
||||
const logger = params.logger ?? noopMetabaseFetchLogger;
|
||||
const emitFetchProgress = (percent: number, message: string, transient = false): void => {
|
||||
params.ctx.memoryFlow?.emit({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
percent,
|
||||
message,
|
||||
...(transient ? { transient } : {}),
|
||||
});
|
||||
};
|
||||
const syncState = await params.sourceStateReader.getSourceState(pullConfig.metabaseConnectionId);
|
||||
const mapping = syncState.mappings.find(
|
||||
(m) => m.metabaseDatabaseId === pullConfig.metabaseDatabaseId && m.syncEnabled,
|
||||
|
|
@ -100,6 +109,7 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
|
|||
|
||||
const client = await params.clientFactory.createClient(pullConfig, params.ctx);
|
||||
try {
|
||||
emitFetchProgress(26, `Fetching Metabase database ${pullConfig.metabaseDatabaseId} metadata`);
|
||||
let mappingDatabaseName = mapping.metabaseDatabaseName;
|
||||
let mappingEngine = mapping.metabaseEngine;
|
||||
if (mappingDatabaseName === null) {
|
||||
|
|
@ -133,6 +143,12 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
|
|||
await mkdir(join(params.stagedDir, STAGED_FILES.databasesDir), { recursive: true });
|
||||
|
||||
const cardIdsToFetch = await resolveCardIdsToFetch(client, scope, pullConfig.metabaseDatabaseId, logger);
|
||||
emitFetchProgress(
|
||||
28,
|
||||
`Fetching ${cardIdsToFetch.length} Metabase card${cardIdsToFetch.length === 1 ? '' : 's'} for database ${
|
||||
pullConfig.metabaseDatabaseId
|
||||
}`,
|
||||
);
|
||||
|
||||
const referencedCollectionIds = new Set<number>();
|
||||
let writtenCards = 0;
|
||||
|
|
@ -212,7 +228,19 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
|
|||
}
|
||||
}
|
||||
}
|
||||
const knownTotal = Math.max(cardIdsToFetch.length, fetched.size + queue.length);
|
||||
if (fetched.size === 1 || fetched.size % 10 === 0 || queue.length === 0) {
|
||||
emitFetchProgress(
|
||||
30,
|
||||
`Checked ${fetched.size}/${knownTotal} Metabase cards for database ${pullConfig.metabaseDatabaseId}; wrote ${writtenCards}`,
|
||||
true,
|
||||
);
|
||||
}
|
||||
}
|
||||
emitFetchProgress(
|
||||
32,
|
||||
`Fetched Metabase database ${pullConfig.metabaseDatabaseId}: ${writtenCards} cards, ${unresolvedCards.length} unresolved`,
|
||||
);
|
||||
|
||||
for (const colId of referencedCollectionIds) {
|
||||
const node = collectionIndex.get(colId);
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js';
|
||||
import type { SourceAdapter } from '../../types.js';
|
||||
import type { MetricFlowParseResult } from './deep-parse.js';
|
||||
import { MetricflowSourceAdapter } from './metricflow.adapter.js';
|
||||
import { readMetricflowProjectionConfig, writeMetricflowProjectionConfig } from './projection-config.js';
|
||||
|
||||
function compileOnlyRequiredDepsCheck(): void {
|
||||
// @ts-expect-error MetricflowSourceAdapter requires an explicit cache home.
|
||||
|
|
@ -22,6 +24,25 @@ async function makeRepo(tmpRoot: string, files: Record<string, string>) {
|
|||
return makeLocalGitRepo(fixtureDir, join(tmpRoot, 'origin'));
|
||||
}
|
||||
|
||||
function metricflowParseResult(): MetricFlowParseResult {
|
||||
return {
|
||||
semanticModels: [
|
||||
{
|
||||
name: 'orders',
|
||||
description: 'Orders',
|
||||
modelRef: 'orders',
|
||||
dimensions: [{ name: 'status', column: 'status', type: 'string', label: 'Status' }],
|
||||
measures: [{ type: 'simple', name: 'order_count', column: 'id', aggregation: 'count' }],
|
||||
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }],
|
||||
defaultTimeDimension: null,
|
||||
},
|
||||
],
|
||||
crossModelMetrics: [],
|
||||
relationships: [],
|
||||
warnings: ['parser warning'],
|
||||
};
|
||||
}
|
||||
|
||||
describe('MetricflowSourceAdapter', () => {
|
||||
let tmpRoot: string;
|
||||
let stagedDir: string;
|
||||
|
|
@ -127,4 +148,119 @@ describe('MetricflowSourceAdapter', () => {
|
|||
await expect(readFile(join(stagedDir, 'models/orders.yml'), 'utf-8')).resolves.toContain('semantic_models');
|
||||
expect(await adapter.detect(stagedDir)).toBe(true);
|
||||
});
|
||||
|
||||
it('persists parsed target tables for deterministic projection during fetch', async () => {
|
||||
const repo = await makeRepo(tmpRoot, {
|
||||
'dbt_project.yml': 'name: analytics\n',
|
||||
'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n',
|
||||
});
|
||||
|
||||
await adapter.fetch?.(
|
||||
{
|
||||
repoUrl: repo.repoUrl,
|
||||
branch: 'main',
|
||||
path: null,
|
||||
authToken: null,
|
||||
parsedTargetTables: {
|
||||
orders: {
|
||||
ok: true,
|
||||
catalog: null,
|
||||
schema: 'analytics',
|
||||
name: 'orders',
|
||||
canonicalTable: 'analytics.orders',
|
||||
},
|
||||
},
|
||||
},
|
||||
stagedDir,
|
||||
{ connectionId: 'warehouse-1', sourceKey: 'metricflow' },
|
||||
);
|
||||
|
||||
await expect(readMetricflowProjectionConfig(stagedDir)).resolves.toMatchObject({
|
||||
parsedTargetTables: {
|
||||
orders: {
|
||||
ok: true,
|
||||
schema: 'analytics',
|
||||
name: 'orders',
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('projects parsed MetricFlow semantic models in the integration worktree', async () => {
|
||||
await writeMetricflowProjectionConfig(stagedDir, {
|
||||
parsedTargetTables: {
|
||||
orders: {
|
||||
ok: true,
|
||||
catalog: null,
|
||||
schema: 'analytics',
|
||||
name: 'orders',
|
||||
canonicalTable: 'analytics.orders',
|
||||
},
|
||||
},
|
||||
});
|
||||
const scoped = {
|
||||
getManifestEntry: vi.fn().mockResolvedValue(null),
|
||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
|
||||
loadSource: vi.fn().mockResolvedValue(null),
|
||||
writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
|
||||
};
|
||||
const semanticLayerService = {
|
||||
forWorktree: vi.fn().mockReturnValue(scoped),
|
||||
getManifestEntry: vi.fn(),
|
||||
isManifestBacked: vi.fn(),
|
||||
loadAllSources: vi.fn(),
|
||||
loadSource: vi.fn(),
|
||||
writeSource: vi.fn(),
|
||||
};
|
||||
|
||||
const result = await adapter.project?.({
|
||||
connectionId: 'warehouse-1',
|
||||
sourceKey: 'metricflow',
|
||||
syncId: 'sync-1',
|
||||
jobId: 'job-1',
|
||||
runId: 'run-1',
|
||||
stagedDir,
|
||||
workdir: '/tmp/metricflow-integration',
|
||||
parseArtifacts: metricflowParseResult(),
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
});
|
||||
|
||||
expect(semanticLayerService.forWorktree).toHaveBeenCalledWith('/tmp/metricflow-integration');
|
||||
expect(scoped.writeSource).toHaveBeenCalledWith(
|
||||
'warehouse-1',
|
||||
expect.objectContaining({ name: 'orders' }),
|
||||
'dbt MetricFlow',
|
||||
expect.any(String),
|
||||
'dbt MetricFlow sync: create source orders',
|
||||
{ skipValidation: true },
|
||||
);
|
||||
expect(result).toMatchObject({
|
||||
warnings: ['parser warning'],
|
||||
errors: [],
|
||||
touchedSources: [{ connectionId: 'warehouse-1', sourceName: 'orders' }],
|
||||
changedWikiPageKeys: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a projection error when parse artifacts are missing', async () => {
|
||||
const result = await adapter.project?.({
|
||||
connectionId: 'warehouse-1',
|
||||
sourceKey: 'metricflow',
|
||||
syncId: 'sync-1',
|
||||
jobId: 'job-1',
|
||||
runId: 'run-1',
|
||||
stagedDir,
|
||||
workdir: '/tmp/metricflow-integration',
|
||||
parseArtifacts: undefined,
|
||||
semanticLayerService: {} as never,
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
warnings: [],
|
||||
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
|
||||
touchedSources: [],
|
||||
changedWikiPageKeys: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,10 +1,23 @@
|
|||
import { join } from 'node:path';
|
||||
import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js';
|
||||
import type {
|
||||
ChunkResult,
|
||||
DeterministicProjectionContext,
|
||||
DiffSet,
|
||||
FetchContext,
|
||||
ProjectionResult,
|
||||
SourceAdapter,
|
||||
} from '../../types.js';
|
||||
import { chunkMetricFlowProject } from './chunk.js';
|
||||
import { detectMetricFlowStagedDir } from './detect.js';
|
||||
import { parseMetricflowFiles, type MetricFlowParseResult } from './deep-parse.js';
|
||||
import { fetchMetricflowRepo } from './fetch.js';
|
||||
import { importMetricflowSemanticModels } from './import-semantic-models.js';
|
||||
import { parseMetricFlowStagedDir, type ParsedMetricFlowProject } from './parse.js';
|
||||
import {
|
||||
metricflowHostTablesFromParsedTargets,
|
||||
readMetricflowProjectionConfig,
|
||||
writeMetricflowProjectionConfig,
|
||||
} from './projection-config.js';
|
||||
import { parseMetricflowPullConfig } from './pull-config.js';
|
||||
|
||||
export interface MetricflowSourceAdapterDeps {
|
||||
|
|
@ -33,6 +46,9 @@ export class MetricflowSourceAdapter implements SourceAdapter {
|
|||
cacheDir: this.resolveCacheDir(ctx.connectionId),
|
||||
stagedDir,
|
||||
});
|
||||
await writeMetricflowProjectionConfig(stagedDir, {
|
||||
parsedTargetTables: config.parsedTargetTables,
|
||||
});
|
||||
}
|
||||
|
||||
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
|
||||
|
|
@ -46,6 +62,37 @@ export class MetricflowSourceAdapter implements SourceAdapter {
|
|||
return { ...chunk, parseArtifacts };
|
||||
}
|
||||
|
||||
async project(ctx: DeterministicProjectionContext): Promise<ProjectionResult> {
|
||||
if (!isMetricFlowParseResult(ctx.parseArtifacts)) {
|
||||
return {
|
||||
warnings: [],
|
||||
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
|
||||
touchedSources: [],
|
||||
changedWikiPageKeys: [],
|
||||
};
|
||||
}
|
||||
|
||||
const projectionConfig = await readMetricflowProjectionConfig(ctx.stagedDir);
|
||||
const result = await importMetricflowSemanticModels(
|
||||
{ semanticLayerService: ctx.semanticLayerService },
|
||||
{
|
||||
connectionId: ctx.connectionId,
|
||||
parseResult: ctx.parseArtifacts,
|
||||
targetSchema: null,
|
||||
hostTables: metricflowHostTablesFromParsedTargets(projectionConfig.parsedTargetTables),
|
||||
workdir: ctx.workdir,
|
||||
},
|
||||
);
|
||||
|
||||
return {
|
||||
result,
|
||||
warnings: result.warnings,
|
||||
errors: result.errors,
|
||||
touchedSources: result.touchedSources,
|
||||
changedWikiPageKeys: [],
|
||||
};
|
||||
}
|
||||
|
||||
private resolveCacheDir(connectionId: string): string {
|
||||
return join(this.deps.homeDir, 'ingest-metricflow-repos', connectionId);
|
||||
}
|
||||
|
|
@ -54,3 +101,16 @@ export class MetricflowSourceAdapter implements SourceAdapter {
|
|||
function parseMetricflowStagedDirForImport(project: ParsedMetricFlowProject): MetricFlowParseResult {
|
||||
return parseMetricflowFiles(project.files);
|
||||
}
|
||||
|
||||
function isMetricFlowParseResult(value: unknown): value is MetricFlowParseResult {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return false;
|
||||
}
|
||||
const candidate = value as Partial<MetricFlowParseResult>;
|
||||
return (
|
||||
Array.isArray(candidate.semanticModels) &&
|
||||
Array.isArray(candidate.crossModelMetrics) &&
|
||||
Array.isArray(candidate.relationships) &&
|
||||
Array.isArray(candidate.warnings)
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,54 @@
|
|||
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import { parsedTargetTableSchema, type ParsedTargetTable } from '../../parsed-target-table.js';
|
||||
import type { MetricflowHostTable } from './semantic-models.js';
|
||||
|
||||
const METRICFLOW_PROJECTION_CONFIG_FILE = 'sync-config.json';
|
||||
|
||||
const metricflowProjectionConfigSchema = z.object({
|
||||
parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}),
|
||||
});
|
||||
|
||||
export type MetricflowProjectionConfig = z.infer<typeof metricflowProjectionConfigSchema>;
|
||||
|
||||
export async function writeMetricflowProjectionConfig(
|
||||
stagedDir: string,
|
||||
config: MetricflowProjectionConfig,
|
||||
): Promise<void> {
|
||||
const parsed = metricflowProjectionConfigSchema.parse(config);
|
||||
await mkdir(stagedDir, { recursive: true });
|
||||
await writeFile(join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE), `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
export async function readMetricflowProjectionConfig(stagedDir: string): Promise<MetricflowProjectionConfig> {
|
||||
const path = join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE);
|
||||
try {
|
||||
return metricflowProjectionConfigSchema.parse(JSON.parse(await readFile(path, 'utf-8')));
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return { parsedTargetTables: {} };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export function metricflowHostTablesFromParsedTargets(
|
||||
parsedTargetTables: Record<string, ParsedTargetTable>,
|
||||
): MetricflowHostTable[] {
|
||||
return Object.entries(parsedTargetTables)
|
||||
.flatMap(([id, table]) =>
|
||||
table.ok
|
||||
? [
|
||||
{
|
||||
id,
|
||||
name: table.name,
|
||||
catalog: table.catalog,
|
||||
db: table.schema,
|
||||
columns: [],
|
||||
},
|
||||
]
|
||||
: [],
|
||||
)
|
||||
.sort((left, right) => left.id.localeCompare(right.id));
|
||||
}
|
||||
190
packages/context/src/ingest/artifact-gates.test.ts
Normal file
190
packages/context/src/ingest/artifact-gates.test.ts
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js';
|
||||
|
||||
function wikiServiceWithPages(
|
||||
pages: Record<string, { refs?: string[]; content?: string; slRefs?: string[] }>,
|
||||
) {
|
||||
return {
|
||||
listPageKeys: vi.fn().mockResolvedValue(Object.keys(pages)),
|
||||
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, pageKey: string) => {
|
||||
const page = pages[pageKey];
|
||||
if (!page) {
|
||||
return Promise.resolve(null);
|
||||
}
|
||||
return Promise.resolve({
|
||||
pageKey,
|
||||
frontmatter: {
|
||||
summary: pageKey,
|
||||
usage_mode: 'auto',
|
||||
refs: page.refs,
|
||||
sl_refs: page.slRefs,
|
||||
},
|
||||
content: page.content ?? '',
|
||||
});
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
describe('artifact gates', () => {
|
||||
it('fails the final tree when wiki body references a stale semantic-layer measure', async () => {
|
||||
const wikiService = wikiServiceWithPages({
|
||||
'account-segments': {
|
||||
slRefs: ['mart_account_segments'],
|
||||
content: 'ARR is `mart_account_segments.total_contract_arr_cents`.',
|
||||
},
|
||||
});
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({
|
||||
sources: [
|
||||
{
|
||||
name: 'mart_account_segments',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
|
||||
table: 'analytics.mart_account_segments',
|
||||
},
|
||||
],
|
||||
loadErrors: [],
|
||||
}),
|
||||
};
|
||||
|
||||
await expect(
|
||||
validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: ['account-segments'],
|
||||
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }],
|
||||
wikiService: wikiService as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources: async () => ({ invalidSources: [], validSources: ['mart_account_segments'] }),
|
||||
tableExists: async () => true,
|
||||
}),
|
||||
).rejects.toThrow(/unknown semantic-layer entity mart_account_segments\.total_contract_arr_cents/);
|
||||
});
|
||||
|
||||
it('fails before provenance insertion when a raw path cannot be tied to the current snapshot or eviction set', () => {
|
||||
expect(() =>
|
||||
validateProvenanceRawPaths({
|
||||
rows: [{ rawPath: 'cards/missing.json' }],
|
||||
currentRawPaths: new Set(['cards/present.json']),
|
||||
deletedRawPaths: new Set(['cards/deleted.json']),
|
||||
}),
|
||||
).toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/);
|
||||
});
|
||||
|
||||
it('fails measure-level wiki frontmatter sl_refs that point at missing entities', async () => {
|
||||
const wikiService = wikiServiceWithPages({
|
||||
'account-segments': {
|
||||
slRefs: ['mart_account_segments.total_contract_arr_cents'],
|
||||
content: 'ARR uses a renamed measure.',
|
||||
},
|
||||
});
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({
|
||||
sources: [
|
||||
{
|
||||
name: 'mart_account_segments',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
|
||||
table: 'analytics.mart_account_segments',
|
||||
},
|
||||
],
|
||||
loadErrors: [],
|
||||
}),
|
||||
};
|
||||
|
||||
await expect(
|
||||
validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: ['account-segments'],
|
||||
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }],
|
||||
wikiService: wikiService as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources: async () => ({ invalidSources: [], validSources: ['warehouse:mart_account_segments'] }),
|
||||
tableExists: async () => true,
|
||||
}),
|
||||
).rejects.toThrow(/unknown sl_refs entity mart_account_segments\.total_contract_arr_cents/);
|
||||
});
|
||||
|
||||
it('validates direct declared-join neighbors of touched semantic-layer sources', async () => {
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({
|
||||
sources: [
|
||||
{
|
||||
name: 'orders',
|
||||
grain: ['order_id'],
|
||||
columns: [
|
||||
{ name: 'order_id', type: 'string' },
|
||||
{ name: 'account_id', type: 'string' },
|
||||
],
|
||||
joins: [{ to: 'accounts', on: 'orders.account_id = accounts.account_id', relationship: 'many_to_one' }],
|
||||
measures: [{ name: 'order_count', expr: 'count(*)' }],
|
||||
},
|
||||
{
|
||||
name: 'accounts',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'account_count', expr: 'count(*)' }],
|
||||
},
|
||||
{
|
||||
name: 'segments',
|
||||
grain: ['segment_id'],
|
||||
columns: [
|
||||
{ name: 'segment_id', type: 'string' },
|
||||
{ name: 'account_id', type: 'string' },
|
||||
],
|
||||
joins: [{ to: 'accounts', on: 'segments.account_id = accounts.account_id', relationship: 'many_to_one' }],
|
||||
measures: [],
|
||||
},
|
||||
],
|
||||
loadErrors: [],
|
||||
}),
|
||||
};
|
||||
const validateTouchedSources = vi.fn().mockResolvedValue({ invalidSources: [], validSources: [] });
|
||||
|
||||
await validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: [],
|
||||
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'accounts' }],
|
||||
wikiService: { readPage: vi.fn() } as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources,
|
||||
tableExists: async () => true,
|
||||
});
|
||||
|
||||
expect(validateTouchedSources).toHaveBeenCalledWith([
|
||||
{ connectionId: 'warehouse', sourceName: 'accounts' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
{ connectionId: 'warehouse', sourceName: 'segments' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('fails final gates when a changed wiki page references a missing wiki page', async () => {
|
||||
const wikiService = wikiServiceWithPages({
|
||||
'account-segments': {
|
||||
refs: ['missing-frontmatter-page'],
|
||||
content: 'See [[missing-inline-page]] for the related process.',
|
||||
},
|
||||
});
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
|
||||
};
|
||||
|
||||
await expect(
|
||||
validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: ['account-segments'],
|
||||
touchedSlSources: [],
|
||||
wikiService: wikiService as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources: async () => ({ invalidSources: [], validSources: [] }),
|
||||
tableExists: async () => true,
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
/wiki references target missing page\(s\): account-segments -> missing-frontmatter-page, account-segments -> missing-inline-page/,
|
||||
);
|
||||
});
|
||||
});
|
||||
188
packages/context/src/ingest/artifact-gates.ts
Normal file
188
packages/context/src/ingest/artifact-gates.ts
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
import type { SemanticLayerService } from '../sl/index.js';
|
||||
import type { TouchedSlSource } from '../tools/index.js';
|
||||
import type { KnowledgeWikiService } from '../wiki/index.js';
|
||||
import { findMissingWikiRefs } from '../wiki/wiki-ref-validation.js';
|
||||
import { findInvalidWikiBodyRefs } from './wiki-body-refs.js';
|
||||
|
||||
export interface TouchedValidationResult {
|
||||
invalidSources: string[];
|
||||
validSources: string[];
|
||||
}
|
||||
|
||||
export interface FinalArtifactGateInput {
|
||||
connectionIds: string[];
|
||||
changedWikiPageKeys: string[];
|
||||
touchedSlSources: TouchedSlSource[];
|
||||
wikiService: KnowledgeWikiService;
|
||||
semanticLayerService: SemanticLayerService;
|
||||
validateTouchedSources(touched: TouchedSlSource[]): Promise<TouchedValidationResult>;
|
||||
tableExists(connectionId: string, tableRef: string): Promise<boolean>;
|
||||
}
|
||||
|
||||
export interface ProvenanceRawPathValidationInput {
|
||||
rows: Array<{ rawPath: string }>;
|
||||
currentRawPaths: Set<string>;
|
||||
deletedRawPaths: Set<string>;
|
||||
}
|
||||
|
||||
function parseSlRef(ref: string): { connectionId: string | null; sourceName: string; entityName: string | null } {
|
||||
const withoutConnection = ref.includes('/') ? ref.slice(ref.indexOf('/') + 1) : ref;
|
||||
const connectionId = ref.includes('/') ? ref.slice(0, ref.indexOf('/')) : null;
|
||||
const [sourceName = '', entityName = null] = withoutConnection.split('.', 2);
|
||||
return { connectionId, sourceName, entityName };
|
||||
}
|
||||
|
||||
function slEntityNames(source: Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources'][number]): Set<string> {
|
||||
return new Set([
|
||||
...(source.measures ?? []).map((measure) => measure.name),
|
||||
...(source.columns ?? []).map((column) => column.name),
|
||||
...(source.segments ?? []).map((segment) => segment.name),
|
||||
]);
|
||||
}
|
||||
|
||||
function uniqueTouchedSources(sources: TouchedSlSource[]): TouchedSlSource[] {
|
||||
const seen = new Set<string>();
|
||||
const unique: TouchedSlSource[] = [];
|
||||
for (const source of sources) {
|
||||
const key = `${source.connectionId}:${source.sourceName}`;
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
unique.push(source);
|
||||
}
|
||||
return unique.sort((left, right) => {
|
||||
const byConnection = left.connectionId.localeCompare(right.connectionId);
|
||||
return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection;
|
||||
});
|
||||
}
|
||||
|
||||
async function expandTouchedSlSourcesWithDirectJoinNeighbors(input: FinalArtifactGateInput): Promise<TouchedSlSource[]> {
|
||||
const expanded = [...input.touchedSlSources];
|
||||
const touchedByConnection = new Map<string, Set<string>>();
|
||||
for (const source of input.touchedSlSources) {
|
||||
const bucket = touchedByConnection.get(source.connectionId) ?? new Set<string>();
|
||||
bucket.add(source.sourceName);
|
||||
touchedByConnection.set(source.connectionId, bucket);
|
||||
}
|
||||
|
||||
for (const connectionId of input.connectionIds) {
|
||||
const touched = touchedByConnection.get(connectionId);
|
||||
if (!touched || touched.size === 0) {
|
||||
continue;
|
||||
}
|
||||
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
|
||||
for (const source of sources) {
|
||||
const sourceIsTouched = touched.has(source.name);
|
||||
if (sourceIsTouched) {
|
||||
for (const join of source.joins ?? []) {
|
||||
expanded.push({ connectionId, sourceName: join.to });
|
||||
}
|
||||
}
|
||||
if ((source.joins ?? []).some((join) => touched.has(join.to))) {
|
||||
expanded.push({ connectionId, sourceName: source.name });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueTouchedSources(expanded);
|
||||
}
|
||||
|
||||
async function validateWikiSlRefs(input: FinalArtifactGateInput): Promise<string[]> {
|
||||
const errors: string[] = [];
|
||||
const sourcesByConnection = new Map<string, Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources']>();
|
||||
for (const connectionId of input.connectionIds) {
|
||||
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
|
||||
sourcesByConnection.set(connectionId, sources);
|
||||
}
|
||||
|
||||
for (const pageKey of input.changedWikiPageKeys) {
|
||||
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
|
||||
if (!page) {
|
||||
continue;
|
||||
}
|
||||
for (const ref of page.frontmatter.sl_refs ?? []) {
|
||||
const parsed = parseSlRef(ref);
|
||||
const candidateConnections = parsed.connectionId ? [parsed.connectionId] : input.connectionIds;
|
||||
let source: Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources'][number] | undefined;
|
||||
for (const connectionId of candidateConnections) {
|
||||
source = sourcesByConnection.get(connectionId)?.find((candidate) => candidate.name === parsed.sourceName);
|
||||
if (source) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!source) {
|
||||
errors.push(`${pageKey}: unknown sl_refs entry ${ref}`);
|
||||
continue;
|
||||
}
|
||||
if (parsed.entityName && !slEntityNames(source).has(parsed.entityName)) {
|
||||
errors.push(`${pageKey}: unknown sl_refs entity ${ref}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
async function validateWikiRefs(input: FinalArtifactGateInput): Promise<string[]> {
|
||||
const dangling: string[] = [];
|
||||
for (const pageKey of input.changedWikiPageKeys) {
|
||||
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
|
||||
if (!page) {
|
||||
continue;
|
||||
}
|
||||
const missingRefs = await findMissingWikiRefs({
|
||||
wikiService: input.wikiService,
|
||||
scope: 'GLOBAL',
|
||||
scopeId: null,
|
||||
pageKey,
|
||||
refs: page.frontmatter.refs,
|
||||
content: page.content,
|
||||
});
|
||||
for (const missingRef of missingRefs) {
|
||||
dangling.push(`${pageKey} -> ${missingRef}`);
|
||||
}
|
||||
}
|
||||
return dangling;
|
||||
}
|
||||
|
||||
export async function validateFinalIngestArtifacts(input: FinalArtifactGateInput): Promise<void> {
|
||||
const touchedWithDependencies = await expandTouchedSlSourcesWithDirectJoinNeighbors(input);
|
||||
const validation = await input.validateTouchedSources(touchedWithDependencies);
|
||||
const errors: string[] = validation.invalidSources.map((source) => `semantic-layer validation failed for ${source}`);
|
||||
errors.push(...(await validateWikiSlRefs(input)));
|
||||
const danglingWikiRefs = await validateWikiRefs(input);
|
||||
if (danglingWikiRefs.length > 0) {
|
||||
errors.push(`wiki references target missing page(s): ${danglingWikiRefs.join(', ')}`);
|
||||
}
|
||||
|
||||
for (const pageKey of input.changedWikiPageKeys) {
|
||||
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
|
||||
if (!page) {
|
||||
continue;
|
||||
}
|
||||
errors.push(
|
||||
...(await findInvalidWikiBodyRefs({
|
||||
pageKey,
|
||||
body: page.content,
|
||||
visibleConnectionIds: input.connectionIds,
|
||||
loadSources: async (connectionId) => {
|
||||
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
|
||||
return sources;
|
||||
},
|
||||
tableExists: input.tableExists,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
if (errors.length > 0) {
|
||||
throw new Error(`final artifact gates failed:\n${errors.join('\n')}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function validateProvenanceRawPaths(input: ProvenanceRawPathValidationInput): void {
|
||||
for (const row of input.rows) {
|
||||
if (!input.currentRawPaths.has(row.rawPath) && !input.deletedRawPaths.has(row.rawPath)) {
|
||||
throw new Error(`provenance row references raw path outside this snapshot: ${row.rawPath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
136
packages/context/src/ingest/final-gate-repair.test.ts
Normal file
136
packages/context/src/ingest/final-gate-repair.test.ts
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { finalGateRepairPaths, repairFinalGateFailure } from './final-gate-repair.js';
|
||||
import { FileIngestTraceWriter } from './ingest-trace.js';
|
||||
|
||||
async function makeHarness() {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-final-gate-repair-'));
|
||||
const workdir = join(root, 'workdir');
|
||||
await mkdir(join(workdir, 'wiki/global'), { recursive: true });
|
||||
await mkdir(join(workdir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(workdir, 'wiki/global/account-segments.md'),
|
||||
'---\nsummary: Account segments\nusage_mode: auto\n---\n\nARR uses `mart_account_segments.total_contract_arr_cents`.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await writeFile(
|
||||
join(workdir, 'semantic-layer/warehouse/mart_account_segments.yaml'),
|
||||
'name: mart_account_segments\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n',
|
||||
'utf-8',
|
||||
);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(root, 'trace.jsonl'),
|
||||
jobId: 'job-1',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
runId: 'run-1',
|
||||
syncId: 'sync-1',
|
||||
level: 'trace',
|
||||
});
|
||||
return { root, workdir, trace };
|
||||
}
|
||||
|
||||
describe('finalGateRepairPaths', () => {
|
||||
it('derives sorted wiki and semantic-layer file paths', () => {
|
||||
expect(
|
||||
finalGateRepairPaths({
|
||||
changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'],
|
||||
touchedSlSources: [
|
||||
{ connectionId: 'warehouse', sourceName: 'mart_account_segments' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
],
|
||||
}),
|
||||
).toEqual([
|
||||
'semantic-layer/warehouse/mart_account_segments.yaml',
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
'wiki/global/account-segments.md',
|
||||
'wiki/global/overview.md',
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('repairFinalGateFailure', () => {
|
||||
it('lets the repair agent read gate errors and edit only allowed files', async () => {
|
||||
const { workdir, trace } = await makeHarness();
|
||||
const agentRunner = {
|
||||
runLoop: vi.fn(async (params: any) => {
|
||||
const error = await params.toolSet.read_gate_error.execute({});
|
||||
expect(error.markdown).toContain('total_contract_arr_cents');
|
||||
|
||||
const page = await params.toolSet.read_repair_file.execute({
|
||||
path: 'wiki/global/account-segments.md',
|
||||
});
|
||||
expect(page.markdown).toContain('total_contract_arr_cents');
|
||||
|
||||
await expect(
|
||||
params.toolSet.write_repair_file.execute({
|
||||
path: 'wiki/global/other.md',
|
||||
content: 'not allowed',
|
||||
}),
|
||||
).rejects.toThrow(/gate repair path not allowed/);
|
||||
|
||||
await params.toolSet.write_repair_file.execute({
|
||||
path: 'wiki/global/account-segments.md',
|
||||
content: page.markdown.replace('total_contract_arr_cents', 'total_contract_arr'),
|
||||
});
|
||||
return { stopReason: 'natural' as const };
|
||||
}),
|
||||
};
|
||||
|
||||
const result = await repairFinalGateFailure({
|
||||
agentRunner,
|
||||
workdir,
|
||||
gateError:
|
||||
'final artifact gates failed:\naccount-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents',
|
||||
allowedPaths: ['wiki/global/account-segments.md'],
|
||||
trace,
|
||||
repairKind: 'final_artifact_gate',
|
||||
maxAttempts: 1,
|
||||
stepBudget: 8,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
status: 'repaired',
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/account-segments.md'],
|
||||
});
|
||||
await expect(readFile(join(workdir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.toContain(
|
||||
'total_contract_arr',
|
||||
);
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_repaired');
|
||||
expect(agentRunner.runLoop).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
modelRole: 'repair',
|
||||
stepBudget: 8,
|
||||
telemetryTags: expect.objectContaining({
|
||||
operationName: 'ingest-isolated-diff-gate-repair',
|
||||
repairKind: 'final_artifact_gate',
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('returns failed when the repair agent edits no allowed file', async () => {
|
||||
const { workdir, trace } = await makeHarness();
|
||||
const result = await repairFinalGateFailure({
|
||||
agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) },
|
||||
workdir,
|
||||
gateError: 'final artifact gates failed:\naccount-segments: unknown semantic-layer entity',
|
||||
allowedPaths: ['wiki/global/account-segments.md'],
|
||||
trace,
|
||||
repairKind: 'final_artifact_gate',
|
||||
maxAttempts: 1,
|
||||
stepBudget: 8,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
status: 'failed',
|
||||
attempts: 1,
|
||||
reason: 'gate repair completed without editing an allowed path',
|
||||
});
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_failed');
|
||||
});
|
||||
});
|
||||
230
packages/context/src/ingest/final-gate-repair.ts
Normal file
230
packages/context/src/ingest/final-gate-repair.ts
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../llm/index.js';
|
||||
import type { TouchedSlSource } from '../tools/index.js';
|
||||
import type { IngestTraceWriter } from './ingest-trace.js';
|
||||
import { traceTimed } from './ingest-trace.js';
|
||||
|
||||
type FinalGateRepairKind = 'patch_semantic_gate' | 'final_artifact_gate';
|
||||
|
||||
export type FinalGateRepairResult =
|
||||
| { status: 'repaired'; attempts: number; changedPaths: string[] }
|
||||
| { status: 'failed'; attempts: number; reason: string };
|
||||
|
||||
export interface RepairFinalGateFailureInput {
|
||||
agentRunner: AgentRunnerPort;
|
||||
workdir: string;
|
||||
gateError: string;
|
||||
allowedPaths: string[];
|
||||
trace: IngestTraceWriter;
|
||||
repairKind: FinalGateRepairKind;
|
||||
maxAttempts?: number;
|
||||
stepBudget?: number;
|
||||
}
|
||||
|
||||
const readRepairFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
});
|
||||
|
||||
const writeRepairFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
content: z.string(),
|
||||
});
|
||||
|
||||
function normalizeRepoPath(path: string): string {
|
||||
const normalized = path.replace(/\\/g, '/').replace(/^\/+/, '');
|
||||
const parts = normalized.split('/').filter((part) => part.length > 0);
|
||||
if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) {
|
||||
throw new Error(`gate repair path must be a repository-relative path: ${path}`);
|
||||
}
|
||||
return parts.join('/');
|
||||
}
|
||||
|
||||
function assertAllowedPath(path: string, allowedPaths: ReadonlySet<string>): string {
|
||||
const normalized = normalizeRepoPath(path);
|
||||
if (!allowedPaths.has(normalized)) {
|
||||
throw new Error(`gate repair path not allowed: ${normalized}`);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> {
|
||||
try {
|
||||
return { exists: true, content: await readFile(path, 'utf-8') };
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return { exists: false, content: '' };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
function buildGateRepairSystemPrompt(): string {
|
||||
return `<role>
|
||||
You repair one KTX isolated-diff artifact gate failure inside the integration worktree.
|
||||
</role>
|
||||
|
||||
<rules>
|
||||
- Use read_gate_error first.
|
||||
- Read only files exposed by read_repair_file.
|
||||
- Edit only paths exposed by write_repair_file.
|
||||
- Prefer the smallest text edit that makes the gate pass.
|
||||
- Preserve accepted work-unit, reconciliation, and deterministic projection content.
|
||||
- Do not invent warehouse facts, business definitions, or semantic-layer entities.
|
||||
- If the gate error requires choosing between conflicting facts without evidence, stop without editing.
|
||||
</rules>`;
|
||||
}
|
||||
|
||||
function buildGateRepairUserPrompt(input: {
|
||||
gateError: string;
|
||||
allowedPaths: string[];
|
||||
repairKind: FinalGateRepairKind;
|
||||
attempt: number;
|
||||
maxAttempts: number;
|
||||
}): string {
|
||||
return `Repair isolated-diff artifact gates.
|
||||
|
||||
Repair kind: ${input.repairKind}
|
||||
Attempt: ${input.attempt} of ${input.maxAttempts}
|
||||
|
||||
Allowed files:
|
||||
${input.allowedPaths.map((path) => `- ${path}`).join('\n')}
|
||||
|
||||
Gate error:
|
||||
${input.gateError}
|
||||
|
||||
Use read_gate_error first. Then inspect only the allowed files, write the
|
||||
minimal repaired content, and stop.`;
|
||||
}
|
||||
|
||||
function buildToolSet(input: {
|
||||
workdir: string;
|
||||
gateError: string;
|
||||
allowedPaths: ReadonlySet<string>;
|
||||
editedPaths: Set<string>;
|
||||
}): KtxRuntimeToolSet {
|
||||
return {
|
||||
read_gate_error: {
|
||||
name: 'read_gate_error',
|
||||
description: 'Read the artifact gate failure that must be repaired.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => ({
|
||||
markdown: input.gateError,
|
||||
structured: { gateError: input.gateError },
|
||||
}),
|
||||
},
|
||||
read_repair_file: {
|
||||
name: 'read_repair_file',
|
||||
description: 'Read one allowed file from the integration worktree.',
|
||||
inputSchema: readRepairFileSchema,
|
||||
execute: async ({ path }: z.infer<typeof readRepairFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
const file = await readOptionalFile(join(input.workdir, normalized));
|
||||
return {
|
||||
markdown: file.exists ? file.content : `(missing file: ${normalized})`,
|
||||
structured: { path: normalized, exists: file.exists },
|
||||
};
|
||||
},
|
||||
},
|
||||
write_repair_file: {
|
||||
name: 'write_repair_file',
|
||||
description: 'Replace one allowed integration worktree file with repaired text content.',
|
||||
inputSchema: writeRepairFileSchema,
|
||||
execute: async ({ path, content }: z.infer<typeof writeRepairFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
const fullPath = join(input.workdir, normalized);
|
||||
await mkdir(dirname(fullPath), { recursive: true });
|
||||
await writeFile(fullPath, content, 'utf-8');
|
||||
input.editedPaths.add(normalized);
|
||||
return {
|
||||
markdown: `Wrote ${normalized}`,
|
||||
structured: { path: normalized, bytes: Buffer.byteLength(content) },
|
||||
};
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function finalGateRepairPaths(input: {
|
||||
changedWikiPageKeys: string[];
|
||||
touchedSlSources: TouchedSlSource[];
|
||||
}): string[] {
|
||||
return [
|
||||
...new Set([
|
||||
...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
|
||||
...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
||||
]),
|
||||
].sort();
|
||||
}
|
||||
|
||||
export async function repairFinalGateFailure(
|
||||
input: RepairFinalGateFailureInput,
|
||||
): Promise<FinalGateRepairResult> {
|
||||
const allowedPaths = new Set(input.allowedPaths.map(normalizeRepoPath));
|
||||
const maxAttempts = input.maxAttempts ?? 1;
|
||||
const stepBudget = input.stepBudget ?? 16;
|
||||
let lastFailure = 'gate repair did not run';
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
||||
const editedPaths = new Set<string>();
|
||||
const sortedAllowedPaths = [...allowedPaths].sort();
|
||||
const traceData = {
|
||||
repairKind: input.repairKind,
|
||||
attempt,
|
||||
maxAttempts,
|
||||
allowedPaths: sortedAllowedPaths,
|
||||
gateError: input.gateError,
|
||||
};
|
||||
const result = await traceTimed(input.trace, 'gate_repair', 'gate_repair', traceData, async () =>
|
||||
input.agentRunner.runLoop({
|
||||
modelRole: 'repair',
|
||||
systemPrompt: buildGateRepairSystemPrompt(),
|
||||
userPrompt: buildGateRepairUserPrompt({
|
||||
gateError: input.gateError,
|
||||
allowedPaths: sortedAllowedPaths,
|
||||
repairKind: input.repairKind,
|
||||
attempt,
|
||||
maxAttempts,
|
||||
}),
|
||||
toolSet: buildToolSet({
|
||||
workdir: input.workdir,
|
||||
gateError: input.gateError,
|
||||
allowedPaths,
|
||||
editedPaths,
|
||||
}),
|
||||
stepBudget,
|
||||
telemetryTags: {
|
||||
operationName: 'ingest-isolated-diff-gate-repair',
|
||||
source: input.trace.context.sourceKey,
|
||||
jobId: input.trace.context.jobId,
|
||||
repairKind: input.repairKind,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
if (result.stopReason === 'error') {
|
||||
lastFailure = result.error?.message ?? 'gate repair agent loop errored';
|
||||
await input.trace.event('error', 'gate_repair', 'gate_repair_failed', traceData, result.error);
|
||||
continue;
|
||||
}
|
||||
|
||||
const changedPaths = [...editedPaths].sort();
|
||||
if (changedPaths.length === 0) {
|
||||
lastFailure = 'gate repair completed without editing an allowed path';
|
||||
await input.trace.event('error', 'gate_repair', 'gate_repair_failed', {
|
||||
...traceData,
|
||||
reason: lastFailure,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
await input.trace.event('debug', 'gate_repair', 'gate_repair_repaired', {
|
||||
...traceData,
|
||||
changedPaths,
|
||||
});
|
||||
return { status: 'repaired', attempts: attempt, changedPaths };
|
||||
}
|
||||
|
||||
return { status: 'failed', attempts: maxAttempts, reason: lastFailure };
|
||||
}
|
||||
|
|
@ -17,6 +17,11 @@ export {
|
|||
buildLiveDatabaseTableNaturalKey,
|
||||
ktxSchemaSnapshotToExtractedSchema,
|
||||
} from './adapters/live-database/extracted-schema.js';
|
||||
export {
|
||||
assertSemanticLayerTargetPathsAllowed,
|
||||
findDisallowedSemanticLayerTargetPaths,
|
||||
semanticLayerConnectionIdFromPath,
|
||||
} from './semantic-layer-target-policy.js';
|
||||
export { LiveDatabaseSourceAdapter } from './adapters/live-database/live-database.adapter.js';
|
||||
export type {
|
||||
BuildLiveDatabaseManifestShardsInput,
|
||||
|
|
@ -609,6 +614,11 @@ export {
|
|||
} from './raw-sources-paths.js';
|
||||
export { ingestReportSnapshotSchema, parseIngestReportSnapshot } from './report-snapshot.js';
|
||||
export type { IngestReportBody, IngestReportSnapshot } from './reports.js';
|
||||
export * from './artifact-gates.js';
|
||||
export * from './ingest-trace.js';
|
||||
export * from './isolated-diff/git-patch.js';
|
||||
export * from './isolated-diff/patch-integrator.js';
|
||||
export * from './isolated-diff/work-unit-executor.js';
|
||||
export * from './reports.js';
|
||||
export { SourceAdapterRegistry } from './source-adapter-registry.js';
|
||||
export type { SqliteBundleIngestStoreOptions } from './sqlite-bundle-ingest-store.js';
|
||||
|
|
@ -652,4 +662,7 @@ export type {
|
|||
TriageSignals,
|
||||
UnresolvedCardInfo,
|
||||
WorkUnit,
|
||||
DeterministicProjectionContext,
|
||||
ProjectionResult,
|
||||
} from './types.js';
|
||||
export * from './wiki-body-refs.js';
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,8 +1,7 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises';
|
||||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { GitService } from '../core/index.js';
|
||||
import { addTouchedSlSource } from '../tools/index.js';
|
||||
import { IngestBundleRunner } from './ingest-bundle.runner.js';
|
||||
import { createMemoryFlowLiveBuffer } from './memory-flow/live-buffer.js';
|
||||
|
|
@ -123,9 +122,15 @@ const makeDeps = () => {
|
|||
};
|
||||
const scopedGit = {
|
||||
revParseHead: vi.fn().mockResolvedValue('h'),
|
||||
commitFiles: vi.fn(),
|
||||
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
|
||||
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
|
||||
resetHardTo: vi.fn(),
|
||||
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
|
||||
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
|
||||
await writeFile(patchPath, '', 'utf-8');
|
||||
}),
|
||||
applyPatchFile3WayIndex: vi.fn(),
|
||||
diffNameStatus: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
const sessionWorktreeService = {
|
||||
create: vi.fn().mockResolvedValue({
|
||||
|
|
@ -167,10 +172,12 @@ const makeDeps = () => {
|
|||
loadPrompt: vi.fn().mockResolvedValue('base-framing'),
|
||||
};
|
||||
const wikiService = {
|
||||
forWorktree: vi.fn().mockReturnValue({}),
|
||||
forWorktree: vi.fn(),
|
||||
listPageKeys: vi.fn().mockResolvedValue([]),
|
||||
readPage: vi.fn().mockResolvedValue(null),
|
||||
syncFromCommit: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
wikiService.forWorktree.mockReturnValue(wikiService);
|
||||
const knowledgeSlRefs = {
|
||||
syncFromWiki: vi.fn().mockResolvedValue({ inserted: 1, deleted: 0 }),
|
||||
};
|
||||
|
|
@ -178,7 +185,7 @@ const makeDeps = () => {
|
|||
listPagesForUser: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
const semanticLayerService = {
|
||||
forWorktree: vi.fn().mockReturnValue({}),
|
||||
forWorktree: vi.fn(),
|
||||
listFilesForConnection: vi
|
||||
.fn()
|
||||
.mockImplementation((connectionId: string) =>
|
||||
|
|
@ -193,6 +200,7 @@ const makeDeps = () => {
|
|||
}),
|
||||
),
|
||||
};
|
||||
semanticLayerService.forWorktree.mockReturnValue(semanticLayerService);
|
||||
const slSearchService = {
|
||||
indexSources: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
|
@ -255,8 +263,12 @@ const buildRunner = (deps: ReturnType<typeof makeDeps> = makeDeps(), overrides:
|
|||
resolveUploadDir: (uploadId) => `/tmp/ktx-test/ingest-uploads/${uploadId}`,
|
||||
resolvePullDir: (jobId) => `/tmp/ktx-test/ingest-pulls/${jobId}`,
|
||||
resolveTranscriptDir: (jobId) => `/tmp/ktx-test/run/wu-transcripts/${jobId}`,
|
||||
resolveTracePath: (jobId) => `/tmp/ktx-test/ingest-traces/${jobId}/trace.jsonl`,
|
||||
},
|
||||
settings: {
|
||||
probeRowCount: 1,
|
||||
memoryIngestionModel: 'test-model',
|
||||
},
|
||||
settings: { probeRowCount: 1, memoryIngestionModel: 'test-model' },
|
||||
skillsRegistry: deps.skillsRegistry as any,
|
||||
promptService: deps.promptService as any,
|
||||
wikiService: deps.wikiService as any,
|
||||
|
|
@ -1505,7 +1517,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['explores/b2b/sales_pipeline.json', 'h1']]),
|
||||
currentHashes: new Map([['a.yml', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/looker-run/fake/s',
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
|
@ -1570,6 +1582,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }],
|
||||
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
|
||||
});
|
||||
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']);
|
||||
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
|
||||
);
|
||||
|
|
@ -1972,9 +1985,15 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
const assertError = new Error('Worktree has in-progress git operation (sequencer ...); refusing to proceed');
|
||||
const sessionGit = {
|
||||
revParseHead: vi.fn().mockResolvedValue('h'),
|
||||
commitFiles: vi.fn(),
|
||||
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
|
||||
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
|
||||
resetHardTo: vi.fn(),
|
||||
assertWorktreeClean: vi.fn().mockRejectedValue(assertError),
|
||||
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
|
||||
await writeFile(patchPath, '', 'utf-8');
|
||||
}),
|
||||
applyPatchFile3WayIndex: vi.fn(),
|
||||
diffNameStatus: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
deps.sessionWorktreeService.create.mockResolvedValue({
|
||||
chatId: 'j1',
|
||||
|
|
@ -2005,135 +2024,6 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('squash-merges only successful WUs into main when one WU fails sl_validate', async () => {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ingest-rollback-'));
|
||||
try {
|
||||
const configDir = join(homeDir, 'config');
|
||||
const mainGit = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'Initialize test config repo',
|
||||
bootstrapAuthor: 'test-system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await mainGit.onModuleInit();
|
||||
const baseSha = await mainGit.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const deps = makeDeps();
|
||||
const sessionDir = join(homeDir, '.worktrees', 'session-j1');
|
||||
const sessionBranch = 'session/j1';
|
||||
let currentToolSession: any = null;
|
||||
|
||||
deps.gitService = mainGit as any;
|
||||
deps.sessionWorktreeService.create.mockImplementation(async (_jobId: string, startSha: string) => {
|
||||
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
|
||||
await mainGit.addWorktree(sessionDir, sessionBranch, startSha);
|
||||
return {
|
||||
chatId: 'j1',
|
||||
workdir: sessionDir,
|
||||
branch: sessionBranch,
|
||||
baseSha: startSha,
|
||||
createdAt: new Date(),
|
||||
git: mainGit.forWorktree(sessionDir),
|
||||
config: {},
|
||||
};
|
||||
});
|
||||
deps.sessionWorktreeService.cleanup.mockResolvedValue(undefined);
|
||||
deps.adapter.chunk.mockResolvedValue({
|
||||
workUnits: [
|
||||
{ unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] },
|
||||
{ unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] },
|
||||
],
|
||||
});
|
||||
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
|
||||
currentToolSession = toolSession;
|
||||
return {
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
});
|
||||
deps.slValidator.validateSingleSource.mockImplementation(
|
||||
(_validationDeps: unknown, _connectionId: string, sourceName: string) => ({
|
||||
errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [],
|
||||
warnings: [],
|
||||
}),
|
||||
);
|
||||
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
|
||||
const unitKey = params.telemetryTags?.unitKey;
|
||||
if (unitKey === 'wu-good') {
|
||||
await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true });
|
||||
await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'good.yaml'), 'name: good\n');
|
||||
addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'good');
|
||||
currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'good', detail: '' });
|
||||
await currentToolSession.gitService.commitFiles(
|
||||
['semantic-layer/c1/good.yaml'],
|
||||
'test: add good source',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
}
|
||||
if (unitKey === 'wu-bad') {
|
||||
await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true });
|
||||
await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'bad.yaml'), 'name: bad\n');
|
||||
addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'bad');
|
||||
currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'bad', detail: '' });
|
||||
await currentToolSession.gitService.commitFiles(
|
||||
['semantic-layer/c1/bad.yaml'],
|
||||
'test: add bad source',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
}
|
||||
return { stopReason: 'natural' };
|
||||
});
|
||||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockImplementation(async ({ worktreeRoot }: any) => {
|
||||
const rawDir = join(worktreeRoot, 'raw-sources', 'c1', 'fake', 's');
|
||||
await mkdir(rawDir, { recursive: true });
|
||||
await writeFile(join(rawDir, 'good.raw'), 'good raw');
|
||||
await writeFile(join(rawDir, 'bad.raw'), 'bad raw');
|
||||
return {
|
||||
currentHashes: new Map([
|
||||
['good.raw', 'good-hash'],
|
||||
['bad.raw', 'bad-hash'],
|
||||
]),
|
||||
rawDirInWorktree: 'raw-sources/c1/fake/s',
|
||||
};
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
||||
const result = await runner.run({
|
||||
jobId: 'j1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
|
||||
});
|
||||
|
||||
expect(result.failedWorkUnits).toEqual(['wu-bad']);
|
||||
expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'good.yaml'), 'utf-8')).toContain('good');
|
||||
expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'bad.yaml'), 'utf-8').catch(() => null)).toBeNull();
|
||||
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
body: expect.objectContaining({
|
||||
failedWorkUnits: ['wu-bad'],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
await expect(stat(join(configDir, '.git', 'sequencer'))).rejects.toThrow();
|
||||
} finally {
|
||||
await rm(homeDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('fails the run and rethrows when the adapter cannot detect the bundle', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.adapter.detect.mockResolvedValue(false);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
85
packages/context/src/ingest/ingest-trace.test.ts
Normal file
85
packages/context/src/ingest/ingest-trace.test.ts
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import { mkdtemp, readFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { FileIngestTraceWriter, ingestTracePathForJob, traceTimed } from './ingest-trace.js';
|
||||
|
||||
describe('FileIngestTraceWriter', () => {
|
||||
it('persists structured trace events as JSONL', async () => {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-trace-'));
|
||||
const tracePath = ingestTracePathForJob(root, 'job-1');
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath,
|
||||
jobId: 'job-1',
|
||||
connectionId: 'metabase-main',
|
||||
sourceKey: 'metabase',
|
||||
level: 'debug',
|
||||
});
|
||||
|
||||
await trace.event('debug', 'snapshot', 'input_snapshot', {
|
||||
baseSha: 'abc123',
|
||||
rawFileCount: 2,
|
||||
diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 },
|
||||
});
|
||||
|
||||
const lines = (await readFile(tracePath, 'utf-8'))
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => JSON.parse(line));
|
||||
expect(lines).toHaveLength(1);
|
||||
expect(lines[0]).toMatchObject({
|
||||
schemaVersion: 1,
|
||||
jobId: 'job-1',
|
||||
connectionId: 'metabase-main',
|
||||
sourceKey: 'metabase',
|
||||
level: 'debug',
|
||||
phase: 'snapshot',
|
||||
event: 'input_snapshot',
|
||||
data: {
|
||||
baseSha: 'abc123',
|
||||
rawFileCount: 2,
|
||||
diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 },
|
||||
},
|
||||
});
|
||||
expect(typeof lines[0].at).toBe('string');
|
||||
});
|
||||
|
||||
it('records timing and error context for postmortem inspection', async () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date('2026-05-17T12:00:00.000Z'));
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-trace-'));
|
||||
const tracePath = ingestTracePathForJob(root, 'job-2');
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath,
|
||||
jobId: 'job-2',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
await expect(
|
||||
traceTimed(trace, 'integration', 'apply_patch', { unitKey: 'wu-1' }, async () => {
|
||||
vi.advanceTimersByTime(17);
|
||||
throw new Error('patch conflict');
|
||||
}),
|
||||
).rejects.toThrow('patch conflict');
|
||||
|
||||
const lines = (await readFile(tracePath, 'utf-8'))
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => JSON.parse(line));
|
||||
expect(lines.map((line) => line.event)).toEqual(['apply_patch_started', 'apply_patch_failed']);
|
||||
expect(lines[1]).toMatchObject({
|
||||
level: 'error',
|
||||
phase: 'integration',
|
||||
data: { unitKey: 'wu-1' },
|
||||
error: { name: 'Error', message: 'patch conflict' },
|
||||
});
|
||||
expect(lines[1].durationMs).toBe(17);
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('uses the documented trace path layout', () => {
|
||||
expect(ingestTracePathForJob('/project/.ktx', 'job-3')).toBe('/project/.ktx/ingest-traces/job-3/trace.jsonl');
|
||||
});
|
||||
});
|
||||
158
packages/context/src/ingest/ingest-trace.ts
Normal file
158
packages/context/src/ingest/ingest-trace.ts
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
import { appendFile, mkdir } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
|
||||
export type IngestTraceLevel = 'info' | 'debug' | 'trace' | 'error';
|
||||
|
||||
const TRACE_LEVEL_RANK: Record<IngestTraceLevel, number> = {
|
||||
error: 0,
|
||||
info: 1,
|
||||
debug: 2,
|
||||
trace: 3,
|
||||
};
|
||||
|
||||
export interface IngestTraceContext {
|
||||
tracePath: string;
|
||||
jobId: string;
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
runId?: string;
|
||||
syncId?: string;
|
||||
level?: IngestTraceLevel;
|
||||
}
|
||||
|
||||
export interface IngestTraceEvent {
|
||||
schemaVersion: 1;
|
||||
at: string;
|
||||
level: IngestTraceLevel;
|
||||
jobId: string;
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
runId?: string;
|
||||
syncId?: string;
|
||||
phase: string;
|
||||
event: string;
|
||||
durationMs?: number;
|
||||
data?: Record<string, unknown>;
|
||||
error?: {
|
||||
name: string;
|
||||
message: string;
|
||||
stack?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface IngestTraceWriter {
|
||||
readonly tracePath: string;
|
||||
readonly context: IngestTraceContext;
|
||||
withContext(context: Partial<Pick<IngestTraceContext, 'runId' | 'syncId'>>): IngestTraceWriter;
|
||||
event(
|
||||
level: IngestTraceLevel,
|
||||
phase: string,
|
||||
event: string,
|
||||
data?: Record<string, unknown>,
|
||||
error?: unknown,
|
||||
durationMs?: number,
|
||||
): Promise<void>;
|
||||
}
|
||||
|
||||
export function ingestTracePathForJob(homeDir: string, jobId: string): string {
|
||||
return join(homeDir, 'ingest-traces', jobId, 'trace.jsonl');
|
||||
}
|
||||
|
||||
function serializeError(error: unknown): IngestTraceEvent['error'] | undefined {
|
||||
if (error === undefined || error === null) {
|
||||
return undefined;
|
||||
}
|
||||
if (error instanceof Error) {
|
||||
return {
|
||||
name: error.name,
|
||||
message: error.message,
|
||||
...(error.stack ? { stack: error.stack } : {}),
|
||||
};
|
||||
}
|
||||
return { name: 'Error', message: String(error) };
|
||||
}
|
||||
|
||||
function shouldWrite(configured: IngestTraceLevel, incoming: IngestTraceLevel): boolean {
|
||||
return TRACE_LEVEL_RANK[incoming] <= TRACE_LEVEL_RANK[configured];
|
||||
}
|
||||
|
||||
export class FileIngestTraceWriter implements IngestTraceWriter {
|
||||
readonly tracePath: string;
|
||||
readonly context: IngestTraceContext;
|
||||
|
||||
constructor(context: IngestTraceContext) {
|
||||
this.context = { ...context, level: context.level ?? 'debug' };
|
||||
this.tracePath = context.tracePath;
|
||||
}
|
||||
|
||||
withContext(context: Partial<Pick<IngestTraceContext, 'runId' | 'syncId'>>): IngestTraceWriter {
|
||||
return new FileIngestTraceWriter({ ...this.context, ...context, tracePath: this.tracePath });
|
||||
}
|
||||
|
||||
async event(
|
||||
level: IngestTraceLevel,
|
||||
phase: string,
|
||||
event: string,
|
||||
data?: Record<string, unknown>,
|
||||
error?: unknown,
|
||||
durationMs?: number,
|
||||
): Promise<void> {
|
||||
if (!shouldWrite(this.context.level ?? 'debug', level)) {
|
||||
return;
|
||||
}
|
||||
const serializedError = serializeError(error);
|
||||
const payload: IngestTraceEvent = {
|
||||
schemaVersion: 1,
|
||||
at: new Date().toISOString(),
|
||||
level,
|
||||
jobId: this.context.jobId,
|
||||
connectionId: this.context.connectionId,
|
||||
sourceKey: this.context.sourceKey,
|
||||
...(this.context.runId ? { runId: this.context.runId } : {}),
|
||||
...(this.context.syncId ? { syncId: this.context.syncId } : {}),
|
||||
phase,
|
||||
event,
|
||||
...(durationMs !== undefined ? { durationMs } : {}),
|
||||
...(data ? { data } : {}),
|
||||
...(serializedError ? { error: serializedError } : {}),
|
||||
};
|
||||
await mkdir(dirname(this.tracePath), { recursive: true });
|
||||
await appendFile(this.tracePath, `${JSON.stringify(payload)}\n`, 'utf-8');
|
||||
}
|
||||
}
|
||||
|
||||
export class NoopIngestTraceWriter implements IngestTraceWriter {
|
||||
readonly tracePath = '';
|
||||
readonly context: IngestTraceContext = {
|
||||
tracePath: '',
|
||||
jobId: '',
|
||||
connectionId: '',
|
||||
sourceKey: '',
|
||||
level: 'error',
|
||||
};
|
||||
|
||||
withContext(): IngestTraceWriter {
|
||||
return this;
|
||||
}
|
||||
|
||||
async event(): Promise<void> {}
|
||||
}
|
||||
|
||||
export async function traceTimed<T>(
|
||||
trace: IngestTraceWriter,
|
||||
phase: string,
|
||||
event: string,
|
||||
data: Record<string, unknown>,
|
||||
fn: () => Promise<T>,
|
||||
): Promise<T> {
|
||||
await trace.event('debug', phase, `${event}_started`, data);
|
||||
const started = Date.now();
|
||||
try {
|
||||
const result = await fn();
|
||||
await trace.event('debug', phase, `${event}_finished`, data, undefined, Date.now() - started);
|
||||
return result;
|
||||
} catch (error) {
|
||||
await trace.event('error', phase, `${event}_failed`, data, error, Date.now() - started);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
97
packages/context/src/ingest/isolated-diff/git-patch.test.ts
Normal file
97
packages/context/src/ingest/isolated-diff/git-patch.test.ts
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths, textArtifactRoots } from './git-patch.js';
|
||||
|
||||
describe('isolated diff patch contract', () => {
|
||||
it('parses touched paths from no-rename git patches', () => {
|
||||
const patch = [
|
||||
'diff --git a/wiki/global/a.md b/wiki/global/a.md',
|
||||
'index 1111111..2222222 100644',
|
||||
'--- a/wiki/global/a.md',
|
||||
'+++ b/wiki/global/a.md',
|
||||
'@@ -1 +1 @@',
|
||||
'-old',
|
||||
'+new',
|
||||
'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml',
|
||||
'new file mode 100644',
|
||||
'--- /dev/null',
|
||||
'+++ b/semantic-layer/c1/orders.yaml',
|
||||
'@@ -0,0 +1 @@',
|
||||
'+name: orders',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
expect(parsePatchTouchedPaths(patch)).toEqual([
|
||||
{
|
||||
path: 'wiki/global/a.md',
|
||||
oldPath: 'wiki/global/a.md',
|
||||
newPath: 'wiki/global/a.md',
|
||||
mode: '100644',
|
||||
binary: false,
|
||||
},
|
||||
{
|
||||
path: 'semantic-layer/c1/orders.yaml',
|
||||
oldPath: 'semantic-layer/c1/orders.yaml',
|
||||
newPath: 'semantic-layer/c1/orders.yaml',
|
||||
mode: '100644',
|
||||
binary: false,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('rejects semantic-layer paths for slDisallowed work units', () => {
|
||||
const patch = 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml\nindex 1..2 100644\n';
|
||||
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'lookml-mismatch',
|
||||
patch,
|
||||
slDisallowed: true,
|
||||
}),
|
||||
).toThrow(/slDisallowed WorkUnit lookml-mismatch touched semantic-layer\/c1\/orders.yaml/);
|
||||
});
|
||||
|
||||
it('rejects semantic-layer paths outside allowed target connections', () => {
|
||||
const patch =
|
||||
'diff --git a/semantic-layer/finance/orders.yaml b/semantic-layer/finance/orders.yaml\nindex 1..2 100644\n';
|
||||
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'wu-finance',
|
||||
patch,
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['warehouse']),
|
||||
}),
|
||||
).toThrow(
|
||||
/semantic-layer target connection not allowed: semantic-layer\/finance\/orders.yaml \(finance\); allowed: warehouse/,
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects executable and binary changes under known text artifact roots', () => {
|
||||
expect(textArtifactRoots).toEqual(['wiki/', 'semantic-layer/']);
|
||||
|
||||
const executablePatch =
|
||||
'diff --git a/wiki/global/a.md b/wiki/global/a.md\nold mode 100644\nnew mode 100755\nindex 1..2\n';
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'wu-1',
|
||||
patch: executablePatch,
|
||||
slDisallowed: false,
|
||||
}),
|
||||
).toThrow(/unexpected executable mode under wiki\/global\/a.md/);
|
||||
|
||||
const binaryPatch = [
|
||||
'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml',
|
||||
'index 1111111..2222222 100644',
|
||||
'GIT binary patch',
|
||||
'literal 0',
|
||||
'',
|
||||
].join('\n');
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'wu-2',
|
||||
patch: binaryPatch,
|
||||
slDisallowed: false,
|
||||
}),
|
||||
).toThrow(/unexpected binary patch under semantic-layer\/c1\/orders.yaml/);
|
||||
});
|
||||
});
|
||||
101
packages/context/src/ingest/isolated-diff/git-patch.ts
Normal file
101
packages/context/src/ingest/isolated-diff/git-patch.ts
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
import { assertSemanticLayerTargetPathsAllowed } from '../semantic-layer-target-policy.js';
|
||||
|
||||
export const textArtifactRoots = ['wiki/', 'semantic-layer/'] as const;
|
||||
|
||||
export interface PatchTouchedPath {
|
||||
path: string;
|
||||
oldPath: string;
|
||||
newPath: string;
|
||||
mode: string | null;
|
||||
binary: boolean;
|
||||
}
|
||||
|
||||
export interface PatchPolicyInput {
|
||||
unitKey: string;
|
||||
patch: string;
|
||||
slDisallowed: boolean;
|
||||
allowedTargetConnectionIds?: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
function stripPrefix(path: string): string {
|
||||
return path.replace(/^[ab]\//, '');
|
||||
}
|
||||
|
||||
function isTextArtifactPath(path: string): boolean {
|
||||
return textArtifactRoots.some((root) => path.startsWith(root));
|
||||
}
|
||||
|
||||
export function parsePatchTouchedPaths(patch: string): PatchTouchedPath[] {
|
||||
const lines = patch.split('\n');
|
||||
const entries: PatchTouchedPath[] = [];
|
||||
let current: PatchTouchedPath | null = null;
|
||||
|
||||
const pushCurrent = () => {
|
||||
if (current) {
|
||||
entries.push(current);
|
||||
}
|
||||
};
|
||||
|
||||
for (const line of lines) {
|
||||
const diffMatch = /^diff --git (.+) (.+)$/.exec(line);
|
||||
if (diffMatch) {
|
||||
pushCurrent();
|
||||
const oldPath = stripPrefix(diffMatch[1] ?? '');
|
||||
const newPath = stripPrefix(diffMatch[2] ?? '');
|
||||
current = {
|
||||
path: newPath === '/dev/null' ? oldPath : newPath,
|
||||
oldPath,
|
||||
newPath,
|
||||
mode: null,
|
||||
binary: false,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
if (!current) {
|
||||
continue;
|
||||
}
|
||||
const indexMode = /^index [0-9a-f]+\.\.[0-9a-f]+(?: ([0-7]{6}))?$/.exec(line);
|
||||
if (indexMode?.[1]) {
|
||||
current.mode = indexMode[1];
|
||||
}
|
||||
const newMode = /^new mode ([0-7]{6})$/.exec(line);
|
||||
if (newMode) {
|
||||
current.mode = newMode[1] ?? current.mode;
|
||||
}
|
||||
const newFileMode = /^new file mode ([0-7]{6})$/.exec(line);
|
||||
if (newFileMode) {
|
||||
current.mode = newFileMode[1] ?? current.mode;
|
||||
}
|
||||
if (line === 'GIT binary patch' || line.startsWith('Binary files ')) {
|
||||
current.binary = true;
|
||||
}
|
||||
}
|
||||
|
||||
pushCurrent();
|
||||
return entries;
|
||||
}
|
||||
|
||||
export function assertPatchAllowedForWorkUnit(input: PatchPolicyInput): PatchTouchedPath[] {
|
||||
const touched = parsePatchTouchedPaths(input.patch);
|
||||
if (input.allowedTargetConnectionIds) {
|
||||
assertSemanticLayerTargetPathsAllowed({
|
||||
paths: touched.map((entry) => entry.path),
|
||||
allowedConnectionIds: input.allowedTargetConnectionIds,
|
||||
});
|
||||
}
|
||||
for (const entry of touched) {
|
||||
if (input.slDisallowed && entry.path.startsWith('semantic-layer/')) {
|
||||
throw new Error(`slDisallowed WorkUnit ${input.unitKey} touched ${entry.path}`);
|
||||
}
|
||||
if (!isTextArtifactPath(entry.path)) {
|
||||
continue;
|
||||
}
|
||||
if (entry.binary) {
|
||||
throw new Error(`unexpected binary patch under ${entry.path}`);
|
||||
}
|
||||
if (entry.mode && entry.mode !== '100644') {
|
||||
throw new Error(`unexpected executable mode under ${entry.path}: ${entry.mode}`);
|
||||
}
|
||||
}
|
||||
return touched;
|
||||
}
|
||||
|
|
@ -0,0 +1,404 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { GitService } from '../../core/index.js';
|
||||
import { FileIngestTraceWriter } from '../ingest-trace.js';
|
||||
import { integrateWorkUnitPatch } from './patch-integrator.js';
|
||||
|
||||
async function makeRepo() {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-integrate-'));
|
||||
const configDir = join(homeDir, 'config');
|
||||
const git = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'init',
|
||||
bootstrapAuthor: 'system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await git.onModuleInit();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'old\n');
|
||||
await git.commitFiles(['wiki/global/a.md'], 'base', 'System User', 'system@example.com');
|
||||
return { homeDir, configDir, git, baseSha: await git.revParseHead() };
|
||||
}
|
||||
|
||||
describe('integrateWorkUnitPatch', () => {
|
||||
it('applies a clean patch, runs semantic gates, and commits accepted changes', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
const childDir = join(homeDir, 'child');
|
||||
await git.addWorktree(childDir, 'child', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'new\n');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/wu.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'),
|
||||
jobId: 'job-1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-1',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree: vi.fn().mockResolvedValue(undefined),
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['c1']),
|
||||
});
|
||||
|
||||
expect(result.status).toBe('accepted');
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('new\n');
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_apply_finished');
|
||||
});
|
||||
|
||||
it('rolls back and classifies semantic conflicts', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
const childDir = join(homeDir, 'child-semantic');
|
||||
await git.addWorktree(childDir, 'child-semantic', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'bad\n');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'bad edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/bad.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-2/trace.jsonl'),
|
||||
jobId: 'job-2',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-bad',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree: vi.fn().mockRejectedValue(new Error('final artifact gates failed')),
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['c1']),
|
||||
});
|
||||
|
||||
expect(result.status).toBe('semantic_conflict');
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('old\n');
|
||||
});
|
||||
|
||||
it('classifies slDisallowed patch policy failures as traced textual conflicts', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
await mkdir(join(configDir, 'semantic-layer/c1'), { recursive: true });
|
||||
await git.commitFiles(['semantic-layer/c1'], 'empty sl dir', 'System User', 'system@example.com');
|
||||
const childDir = join(homeDir, 'child-policy');
|
||||
await git.addWorktree(childDir, 'child-policy', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await mkdir(join(childDir, 'semantic-layer/c1'), { recursive: true });
|
||||
await writeFile(join(childDir, 'semantic-layer/c1/orders.yaml'), 'name: orders\ncolumns: []\njoins: []\nmeasures: []\n');
|
||||
await childGit.commitFiles(['semantic-layer/c1/orders.yaml'], 'forbidden sl', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/forbidden.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-policy/trace.jsonl'),
|
||||
jobId: 'job-policy',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'lookml-mismatch',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree: vi.fn().mockResolvedValue(undefined),
|
||||
slDisallowed: true,
|
||||
allowedTargetConnectionIds: new Set(['c1']),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'textual_conflict',
|
||||
touchedPaths: ['semantic-layer/c1/orders.yaml'],
|
||||
});
|
||||
const rawTrace = await readFile(trace.tracePath, 'utf-8');
|
||||
expect(rawTrace).toContain('patch_policy_rejected');
|
||||
expect(rawTrace).toContain('slDisallowed WorkUnit lookml-mismatch touched semantic-layer/c1/orders.yaml');
|
||||
});
|
||||
|
||||
it('classifies unauthorized semantic-layer targets as traced textual conflicts', async () => {
|
||||
const { homeDir, git, baseSha } = await makeRepo();
|
||||
const childDir = join(homeDir, 'child-target-policy');
|
||||
await git.addWorktree(childDir, 'child-target-policy', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await mkdir(join(childDir, 'semantic-layer/finance'), { recursive: true });
|
||||
await writeFile(
|
||||
join(childDir, 'semantic-layer/finance/orders.yaml'),
|
||||
'name: orders\ncolumns: []\njoins: []\nmeasures: []\n',
|
||||
);
|
||||
await childGit.commitFiles(['semantic-layer/finance/orders.yaml'], 'unauthorized sl', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/unauthorized.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-target-policy/trace.jsonl'),
|
||||
jobId: 'job-target-policy',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-finance',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree: vi.fn().mockResolvedValue(undefined),
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['warehouse']),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'textual_conflict',
|
||||
touchedPaths: ['semantic-layer/finance/orders.yaml'],
|
||||
});
|
||||
const rawTrace = await readFile(trace.tracePath, 'utf-8');
|
||||
expect(rawTrace).toContain('patch_policy_rejected');
|
||||
expect(rawTrace).toContain('semantic-layer target connection not allowed');
|
||||
expect(rawTrace).toContain('allowedTargetConnectionIds');
|
||||
});
|
||||
|
||||
it('repairs a textual conflict through the bounded resolver and commits repaired files', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'base\n', 'utf-8');
|
||||
await git.commitFiles(['wiki/global/a.md'], 'base page', 'System User', 'system@example.com');
|
||||
const conflictBase = await git.revParseHead();
|
||||
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\n', 'utf-8');
|
||||
await git.commitFiles(['wiki/global/a.md'], 'accepted edit', 'System User', 'system@example.com');
|
||||
|
||||
const childDir = join(homeDir, 'child-conflict');
|
||||
await git.addWorktree(childDir, 'child-conflict', conflictBase);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'proposal\n', 'utf-8');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'proposal edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'proposal.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(conflictBase, 'HEAD', patchPath);
|
||||
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-resolver/trace.jsonl'),
|
||||
jobId: 'job-resolver',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const validateAppliedTree = vi.fn(async (paths: string[]) => {
|
||||
expect(paths).toEqual(['wiki/global/a.md']);
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\nproposal\n');
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-conflict',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'System User', email: 'system@example.com' },
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['warehouse']),
|
||||
validateAppliedTree,
|
||||
resolveTextualConflict: vi.fn(async (context) => {
|
||||
expect(context).toMatchObject({
|
||||
unitKey: 'wu-conflict',
|
||||
patchPath,
|
||||
touchedPaths: ['wiki/global/a.md'],
|
||||
});
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\nproposal\n', 'utf-8');
|
||||
return {
|
||||
status: 'repaired' as const,
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/a.md'],
|
||||
};
|
||||
}),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'accepted',
|
||||
touchedPaths: ['wiki/global/a.md'],
|
||||
textualResolution: {
|
||||
status: 'repaired',
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/a.md'],
|
||||
},
|
||||
});
|
||||
expect(validateAppliedTree).toHaveBeenCalledOnce();
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\nproposal\n');
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_accepted_after_textual_resolution');
|
||||
expect(await git.revParseHead()).not.toBe(baseSha);
|
||||
});
|
||||
|
||||
it('keeps the pre-apply integration tree when the resolver cannot repair a textual conflict', async () => {
|
||||
const { homeDir, configDir, git } = await makeRepo();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'base\n', 'utf-8');
|
||||
await git.commitFiles(['wiki/global/a.md'], 'base page', 'System User', 'system@example.com');
|
||||
const conflictBase = await git.revParseHead();
|
||||
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\n', 'utf-8');
|
||||
await git.commitFiles(['wiki/global/a.md'], 'accepted edit', 'System User', 'system@example.com');
|
||||
const acceptedHead = await git.revParseHead();
|
||||
|
||||
const childDir = join(homeDir, 'child-conflict-fails');
|
||||
await git.addWorktree(childDir, 'child-conflict-fails', conflictBase);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'proposal\n', 'utf-8');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'proposal edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'proposal-fails.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(conflictBase, 'HEAD', patchPath);
|
||||
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-resolver-fails/trace.jsonl'),
|
||||
jobId: 'job-resolver-fails',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-conflict',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'System User', email: 'system@example.com' },
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['warehouse']),
|
||||
validateAppliedTree: vi.fn(async () => {}),
|
||||
resolveTextualConflict: vi.fn(async () => ({
|
||||
status: 'failed' as const,
|
||||
attempts: 1,
|
||||
reason: 'resolver completed without editing an allowed path',
|
||||
})),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'textual_conflict',
|
||||
textualResolution: {
|
||||
status: 'failed',
|
||||
attempts: 1,
|
||||
reason: 'resolver completed without editing an allowed path',
|
||||
},
|
||||
});
|
||||
expect(await git.revParseHead()).toBe(acceptedHead);
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\n');
|
||||
});
|
||||
|
||||
it('repairs semantic gate failures after a patch applies cleanly', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
const childDir = join(homeDir, 'child-semantic-repair');
|
||||
await git.addWorktree(childDir, 'child-semantic-repair', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'bad semantic ref\n');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'bad semantic edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/semantic-repair.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-repair/trace.jsonl'),
|
||||
jobId: 'job-semantic-repair',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
const validateAppliedTree = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error('final artifact gates failed:\na: unknown semantic-layer entity'))
|
||||
.mockResolvedValueOnce(undefined);
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-repairable',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree,
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['c1']),
|
||||
repairGateFailure: vi.fn(async (context) => {
|
||||
expect(context).toMatchObject({
|
||||
unitKey: 'wu-repairable',
|
||||
patchPath,
|
||||
touchedPaths: ['wiki/global/a.md'],
|
||||
});
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'repaired semantic ref\n', 'utf-8');
|
||||
return {
|
||||
status: 'repaired' as const,
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/a.md'],
|
||||
};
|
||||
}),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'accepted',
|
||||
touchedPaths: ['wiki/global/a.md'],
|
||||
gateRepair: {
|
||||
status: 'repaired',
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/a.md'],
|
||||
},
|
||||
});
|
||||
expect(validateAppliedTree).toHaveBeenCalledTimes(2);
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('repaired semantic ref\n');
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_accepted_after_gate_repair');
|
||||
});
|
||||
|
||||
it('keeps the pre-apply tree when semantic gate repair fails', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
const childDir = join(homeDir, 'child-semantic-repair-fails');
|
||||
await git.addWorktree(childDir, 'child-semantic-repair-fails', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'bad semantic ref\n');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'bad semantic edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/semantic-repair-fails.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-repair-fails/trace.jsonl'),
|
||||
jobId: 'job-semantic-repair-fails',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-not-repaired',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree: vi.fn().mockRejectedValue(new Error('final artifact gates failed')),
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['c1']),
|
||||
repairGateFailure: vi.fn(async () => ({
|
||||
status: 'failed' as const,
|
||||
attempts: 1,
|
||||
reason: 'gate repair completed without editing an allowed path',
|
||||
})),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'semantic_conflict',
|
||||
gateRepair: {
|
||||
status: 'failed',
|
||||
attempts: 1,
|
||||
reason: 'gate repair completed without editing an allowed path',
|
||||
},
|
||||
});
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('old\n');
|
||||
});
|
||||
});
|
||||
321
packages/context/src/ingest/isolated-diff/patch-integrator.ts
Normal file
321
packages/context/src/ingest/isolated-diff/patch-integrator.ts
Normal file
|
|
@ -0,0 +1,321 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import type { GitService } from '../../core/index.js';
|
||||
import type { FinalGateRepairResult } from '../final-gate-repair.js';
|
||||
import type { IngestTraceWriter } from '../ingest-trace.js';
|
||||
import { traceTimed } from '../ingest-trace.js';
|
||||
import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths } from './git-patch.js';
|
||||
import type { TextualConflictResolutionResult } from './textual-conflict-resolver.js';
|
||||
|
||||
export type PatchIntegrationTextualResolution =
|
||||
| { status: 'repaired'; attempts: number; changedPaths: string[] }
|
||||
| { status: 'failed'; attempts: number; reason: string };
|
||||
|
||||
export type PatchIntegrationResult =
|
||||
| {
|
||||
status: 'accepted';
|
||||
commitSha: string;
|
||||
touchedPaths: string[];
|
||||
textualResolution?: PatchIntegrationTextualResolution;
|
||||
gateRepair?: FinalGateRepairResult;
|
||||
}
|
||||
| {
|
||||
status: 'textual_conflict';
|
||||
reason: string;
|
||||
touchedPaths: string[];
|
||||
textualResolution?: PatchIntegrationTextualResolution;
|
||||
gateRepair?: FinalGateRepairResult;
|
||||
}
|
||||
| {
|
||||
status: 'semantic_conflict';
|
||||
reason: string;
|
||||
touchedPaths: string[];
|
||||
textualResolution?: PatchIntegrationTextualResolution;
|
||||
gateRepair?: FinalGateRepairResult;
|
||||
};
|
||||
|
||||
export interface IntegrateWorkUnitPatchInput {
|
||||
unitKey: string;
|
||||
patchPath: string;
|
||||
integrationGit: GitService;
|
||||
trace: IngestTraceWriter;
|
||||
author: { name: string; email: string };
|
||||
slDisallowed: boolean;
|
||||
allowedTargetConnectionIds: ReadonlySet<string>;
|
||||
validateAppliedTree(touchedPaths: string[]): Promise<void>;
|
||||
resolveTextualConflict?(input: {
|
||||
unitKey: string;
|
||||
patchPath: string;
|
||||
touchedPaths: string[];
|
||||
reason: string;
|
||||
}): Promise<TextualConflictResolutionResult>;
|
||||
repairGateFailure?(input: {
|
||||
unitKey: string;
|
||||
patchPath: string;
|
||||
touchedPaths: string[];
|
||||
reason: string;
|
||||
}): Promise<FinalGateRepairResult>;
|
||||
}
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
export async function integrateWorkUnitPatch(input: IntegrateWorkUnitPatchInput): Promise<PatchIntegrationResult> {
|
||||
const preApplyHead = await input.integrationGit.revParseHead();
|
||||
const patch = await readFile(input.patchPath, 'utf-8');
|
||||
const touchedPaths = parsePatchTouchedPaths(patch).map((entry) => entry.path);
|
||||
if (touchedPaths.length === 0) {
|
||||
await input.trace.event('debug', 'integration', 'patch_noop_accepted', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
patchBytes: Buffer.byteLength(patch),
|
||||
});
|
||||
return { status: 'accepted', commitSha: preApplyHead ?? '', touchedPaths };
|
||||
}
|
||||
try {
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: input.unitKey,
|
||||
patch,
|
||||
slDisallowed: input.slDisallowed,
|
||||
allowedTargetConnectionIds: input.allowedTargetConnectionIds,
|
||||
});
|
||||
} catch (error) {
|
||||
await input.trace.event('error', 'integration', 'patch_policy_rejected', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths,
|
||||
allowedTargetConnectionIds: [...input.allowedTargetConnectionIds].sort(),
|
||||
reason: errorMessage(error),
|
||||
});
|
||||
return {
|
||||
status: 'textual_conflict',
|
||||
reason: errorMessage(error),
|
||||
touchedPaths,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
await traceTimed(
|
||||
input.trace,
|
||||
'integration',
|
||||
'patch_apply',
|
||||
{ unitKey: input.unitKey, patchPath: input.patchPath, touchedPaths },
|
||||
async () => {
|
||||
await input.integrationGit.applyPatchFile3WayIndex(input.patchPath);
|
||||
await input.integrationGit.assertWorktreeClean();
|
||||
},
|
||||
);
|
||||
} catch (error) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
const reason = errorMessage(error);
|
||||
await input.trace.event('error', 'integration', 'patch_textual_conflict', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths,
|
||||
reason,
|
||||
});
|
||||
|
||||
if (!input.resolveTextualConflict) {
|
||||
return {
|
||||
status: 'textual_conflict',
|
||||
reason,
|
||||
touchedPaths,
|
||||
};
|
||||
}
|
||||
|
||||
const textualResolution = await input.resolveTextualConflict({
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths,
|
||||
reason,
|
||||
});
|
||||
|
||||
if (textualResolution.status === 'failed') {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'textual_conflict',
|
||||
reason: textualResolution.reason,
|
||||
touchedPaths,
|
||||
textualResolution,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
await traceTimed(
|
||||
input.trace,
|
||||
'integration',
|
||||
'semantic_gate_after_textual_resolution',
|
||||
{ unitKey: input.unitKey, touchedPaths: textualResolution.changedPaths },
|
||||
async () => {
|
||||
await input.validateAppliedTree(textualResolution.changedPaths);
|
||||
},
|
||||
);
|
||||
} catch (semanticError) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
await input.trace.event('error', 'integration', 'patch_semantic_conflict_after_textual_resolution', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
reason: errorMessage(semanticError),
|
||||
});
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: errorMessage(semanticError),
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
textualResolution,
|
||||
};
|
||||
}
|
||||
|
||||
const commit = await input.integrationGit.commitFiles(
|
||||
textualResolution.changedPaths,
|
||||
`ingest: resolve WorkUnit ${input.unitKey} conflict`,
|
||||
input.author.name,
|
||||
input.author.email,
|
||||
);
|
||||
if (!commit.created) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
const noChangeReason = 'textual resolver produced no committable changes';
|
||||
await input.trace.event('error', 'integration', 'textual_conflict_resolver_noop', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
});
|
||||
return {
|
||||
status: 'textual_conflict',
|
||||
reason: noChangeReason,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
textualResolution,
|
||||
};
|
||||
}
|
||||
|
||||
await input.trace.event('debug', 'integration', 'patch_accepted_after_textual_resolution', {
|
||||
unitKey: input.unitKey,
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
attempts: textualResolution.attempts,
|
||||
});
|
||||
return {
|
||||
status: 'accepted',
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
textualResolution,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
await traceTimed(input.trace, 'integration', 'semantic_gate', { unitKey: input.unitKey, touchedPaths }, async () => {
|
||||
await input.validateAppliedTree(touchedPaths);
|
||||
});
|
||||
} catch (error) {
|
||||
const reason = errorMessage(error);
|
||||
await input.trace.event('error', 'integration', 'patch_semantic_conflict', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths,
|
||||
reason,
|
||||
});
|
||||
|
||||
if (input.repairGateFailure) {
|
||||
const gateRepair = await input.repairGateFailure({
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths,
|
||||
reason,
|
||||
});
|
||||
|
||||
if (gateRepair.status === 'failed') {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: gateRepair.reason,
|
||||
touchedPaths,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
await traceTimed(
|
||||
input.trace,
|
||||
'integration',
|
||||
'semantic_gate_after_gate_repair',
|
||||
{ unitKey: input.unitKey, touchedPaths: gateRepair.changedPaths },
|
||||
async () => {
|
||||
await input.validateAppliedTree(gateRepair.changedPaths);
|
||||
},
|
||||
);
|
||||
} catch (repairValidationError) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: errorMessage(repairValidationError),
|
||||
touchedPaths: gateRepair.changedPaths,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
|
||||
const commit = await input.integrationGit.commitFiles(
|
||||
gateRepair.changedPaths,
|
||||
`ingest: repair WorkUnit ${input.unitKey} gates`,
|
||||
input.author.name,
|
||||
input.author.email,
|
||||
);
|
||||
if (!commit.created) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: 'gate repair produced no committable changes',
|
||||
touchedPaths: gateRepair.changedPaths,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
|
||||
await input.trace.event('debug', 'integration', 'patch_accepted_after_gate_repair', {
|
||||
unitKey: input.unitKey,
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths: gateRepair.changedPaths,
|
||||
attempts: gateRepair.attempts,
|
||||
});
|
||||
return {
|
||||
status: 'accepted',
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths: gateRepair.changedPaths,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason,
|
||||
touchedPaths,
|
||||
};
|
||||
}
|
||||
|
||||
const commit = await input.integrationGit.commitStaged(
|
||||
`ingest: accept WorkUnit ${input.unitKey}`,
|
||||
input.author.name,
|
||||
input.author.email,
|
||||
);
|
||||
await input.trace.event('debug', 'integration', 'patch_accepted', {
|
||||
unitKey: input.unitKey,
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths,
|
||||
});
|
||||
return { status: 'accepted', commitSha: commit.commitHash, touchedPaths };
|
||||
}
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { FileIngestTraceWriter } from '../ingest-trace.js';
|
||||
import { resolveTextualConflict } from './textual-conflict-resolver.js';
|
||||
|
||||
async function makeHarness() {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-textual-resolver-'));
|
||||
const workdir = join(root, 'workdir');
|
||||
const patchPath = join(root, 'failed.patch');
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(root, 'trace.jsonl'),
|
||||
jobId: 'job-1',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
runId: 'run-1',
|
||||
syncId: 'sync-1',
|
||||
level: 'trace',
|
||||
});
|
||||
await mkdir(join(workdir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(workdir, 'wiki/global/account.md'), 'accepted line\n', 'utf-8');
|
||||
await writeFile(
|
||||
patchPath,
|
||||
[
|
||||
'diff --git a/wiki/global/account.md b/wiki/global/account.md',
|
||||
'index 8877391..6f63f4d 100644',
|
||||
'--- a/wiki/global/account.md',
|
||||
'+++ b/wiki/global/account.md',
|
||||
'@@ -1 +1 @@',
|
||||
'-base line',
|
||||
'+proposal line',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
return { root, workdir, patchPath, trace };
|
||||
}
|
||||
|
||||
describe('resolveTextualConflict', () => {
|
||||
it('lets the repair agent read the failed patch and write only touched paths', async () => {
|
||||
const { workdir, patchPath, trace } = await makeHarness();
|
||||
const agentRunner = {
|
||||
runLoop: vi.fn(async (params: any) => {
|
||||
const current = await params.toolSet.read_integration_file.execute({ path: 'wiki/global/account.md' });
|
||||
expect(current.structured).toEqual({ path: 'wiki/global/account.md', exists: true });
|
||||
expect(current.markdown).toContain('accepted line');
|
||||
|
||||
const patch = await params.toolSet.read_failed_patch.execute({});
|
||||
expect(patch.markdown).toContain('proposal line');
|
||||
|
||||
await expect(
|
||||
params.toolSet.write_integration_file.execute({
|
||||
path: 'wiki/global/not-allowed.md',
|
||||
content: 'bad\n',
|
||||
}),
|
||||
).rejects.toThrow(/resolver path not allowed/);
|
||||
|
||||
await params.toolSet.write_integration_file.execute({
|
||||
path: 'wiki/global/account.md',
|
||||
content: 'accepted line\nproposal line\n',
|
||||
});
|
||||
return { stopReason: 'natural' as const };
|
||||
}),
|
||||
};
|
||||
|
||||
const result = await resolveTextualConflict({
|
||||
agentRunner,
|
||||
workdir,
|
||||
unitKey: 'wu-a',
|
||||
patchPath,
|
||||
touchedPaths: ['wiki/global/account.md'],
|
||||
trace,
|
||||
reason: 'patch failed: wiki/global/account.md',
|
||||
maxAttempts: 1,
|
||||
stepBudget: 8,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
status: 'repaired',
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/account.md'],
|
||||
});
|
||||
await expect(readFile(join(workdir, 'wiki/global/account.md'), 'utf-8')).resolves.toBe(
|
||||
'accepted line\nproposal line\n',
|
||||
);
|
||||
expect(agentRunner.runLoop).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
modelRole: 'repair',
|
||||
stepBudget: 8,
|
||||
telemetryTags: expect.objectContaining({
|
||||
operationName: 'ingest-isolated-diff-textual-resolver',
|
||||
jobId: 'job-1',
|
||||
unitKey: 'wu-a',
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('fails when the repair agent completes without editing any touched path', async () => {
|
||||
const { workdir, patchPath, trace } = await makeHarness();
|
||||
const result = await resolveTextualConflict({
|
||||
agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) },
|
||||
workdir,
|
||||
unitKey: 'wu-a',
|
||||
patchPath,
|
||||
touchedPaths: ['wiki/global/account.md'],
|
||||
trace,
|
||||
reason: 'patch failed: wiki/global/account.md',
|
||||
maxAttempts: 1,
|
||||
stepBudget: 8,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
status: 'failed',
|
||||
attempts: 1,
|
||||
reason: 'resolver completed without editing an allowed path',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,238 @@
|
|||
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../llm/index.js';
|
||||
import type { IngestTraceWriter } from '../ingest-trace.js';
|
||||
import { traceTimed } from '../ingest-trace.js';
|
||||
|
||||
export type TextualConflictResolutionResult =
|
||||
| { status: 'repaired'; attempts: number; changedPaths: string[] }
|
||||
| { status: 'failed'; attempts: number; reason: string };
|
||||
|
||||
export interface ResolveTextualConflictInput {
|
||||
agentRunner: AgentRunnerPort;
|
||||
workdir: string;
|
||||
unitKey: string;
|
||||
patchPath: string;
|
||||
touchedPaths: string[];
|
||||
trace: IngestTraceWriter;
|
||||
reason: string;
|
||||
maxAttempts?: number;
|
||||
stepBudget?: number;
|
||||
}
|
||||
|
||||
const readIntegrationFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
});
|
||||
|
||||
const writeIntegrationFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
content: z.string(),
|
||||
});
|
||||
|
||||
const deleteIntegrationFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
});
|
||||
|
||||
function normalizeRepoPath(path: string): string {
|
||||
const normalized = path.replace(/\\/g, '/').replace(/^\/+/, '');
|
||||
const parts = normalized.split('/').filter((part) => part.length > 0);
|
||||
if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) {
|
||||
throw new Error(`resolver path must be a repository-relative path: ${path}`);
|
||||
}
|
||||
return parts.join('/');
|
||||
}
|
||||
|
||||
function assertAllowedPath(path: string, allowedPaths: ReadonlySet<string>): string {
|
||||
const normalized = normalizeRepoPath(path);
|
||||
if (!allowedPaths.has(normalized)) {
|
||||
throw new Error(`resolver path not allowed: ${normalized}`);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> {
|
||||
try {
|
||||
return { exists: true, content: await readFile(path, 'utf-8') };
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return { exists: false, content: '' };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
function buildResolverSystemPrompt(): string {
|
||||
return `<role>
|
||||
You repair one failed KTX isolated-diff patch inside the integration worktree.
|
||||
</role>
|
||||
|
||||
<rules>
|
||||
- Preserve accepted integration content that is unrelated to the failed patch.
|
||||
- Incorporate the failed patch only when the patch evidence is compatible with the current file.
|
||||
- Edit only paths exposed by the resolver tools.
|
||||
- Prefer the smallest text edit that makes the composed artifact coherent.
|
||||
- Do not create new facts that are absent from the current file or failed patch.
|
||||
- Stop after writing the repaired file content.
|
||||
</rules>`;
|
||||
}
|
||||
|
||||
function buildResolverUserPrompt(input: {
|
||||
unitKey: string;
|
||||
patchPath: string;
|
||||
touchedPaths: string[];
|
||||
reason: string;
|
||||
attempt: number;
|
||||
maxAttempts: number;
|
||||
}): string {
|
||||
return `Repair isolated-diff textual conflict.
|
||||
|
||||
WorkUnit: ${input.unitKey}
|
||||
Attempt: ${input.attempt} of ${input.maxAttempts}
|
||||
Patch path: ${input.patchPath}
|
||||
Touched paths:
|
||||
${input.touchedPaths.map((path) => `- ${path}`).join('\n')}
|
||||
|
||||
Git apply failure:
|
||||
${input.reason}
|
||||
|
||||
Use read_failed_patch first. Then read the touched integration files, write the
|
||||
repaired content, and stop.`;
|
||||
}
|
||||
|
||||
function buildToolSet(input: {
|
||||
workdir: string;
|
||||
patchPath: string;
|
||||
allowedPaths: ReadonlySet<string>;
|
||||
editedPaths: Set<string>;
|
||||
}): KtxRuntimeToolSet {
|
||||
return {
|
||||
read_failed_patch: {
|
||||
name: 'read_failed_patch',
|
||||
description: 'Read the failed Git patch that could not be applied to the integration worktree.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => {
|
||||
const patch = await readFile(input.patchPath, 'utf-8');
|
||||
return {
|
||||
markdown: patch,
|
||||
structured: { patchPath: input.patchPath, bytes: Buffer.byteLength(patch) },
|
||||
};
|
||||
},
|
||||
},
|
||||
read_integration_file: {
|
||||
name: 'read_integration_file',
|
||||
description: 'Read one allowed file from the current integration worktree.',
|
||||
inputSchema: readIntegrationFileSchema,
|
||||
execute: async ({ path }: z.infer<typeof readIntegrationFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
const file = await readOptionalFile(join(input.workdir, normalized));
|
||||
return {
|
||||
markdown: file.exists ? file.content : `(missing file: ${normalized})`,
|
||||
structured: { path: normalized, exists: file.exists },
|
||||
};
|
||||
},
|
||||
},
|
||||
write_integration_file: {
|
||||
name: 'write_integration_file',
|
||||
description: 'Replace one allowed integration worktree file with repaired text content.',
|
||||
inputSchema: writeIntegrationFileSchema,
|
||||
execute: async ({ path, content }: z.infer<typeof writeIntegrationFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
const fullPath = join(input.workdir, normalized);
|
||||
await mkdir(dirname(fullPath), { recursive: true });
|
||||
await writeFile(fullPath, content, 'utf-8');
|
||||
input.editedPaths.add(normalized);
|
||||
return {
|
||||
markdown: `Wrote ${normalized}`,
|
||||
structured: { path: normalized, bytes: Buffer.byteLength(content) },
|
||||
};
|
||||
},
|
||||
},
|
||||
delete_integration_file: {
|
||||
name: 'delete_integration_file',
|
||||
description: 'Delete one allowed integration worktree file when the failed patch proves the deletion is correct.',
|
||||
inputSchema: deleteIntegrationFileSchema,
|
||||
execute: async ({ path }: z.infer<typeof deleteIntegrationFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
await rm(join(input.workdir, normalized), { force: true });
|
||||
input.editedPaths.add(normalized);
|
||||
return {
|
||||
markdown: `Deleted ${normalized}`,
|
||||
structured: { path: normalized },
|
||||
};
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function resolveTextualConflict(
|
||||
input: ResolveTextualConflictInput,
|
||||
): Promise<TextualConflictResolutionResult> {
|
||||
const allowedPaths = new Set(input.touchedPaths.map(normalizeRepoPath));
|
||||
const maxAttempts = input.maxAttempts ?? 1;
|
||||
const stepBudget = input.stepBudget ?? 12;
|
||||
let lastFailure = 'resolver did not run';
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
||||
const editedPaths = new Set<string>();
|
||||
const traceData = {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: [...allowedPaths].sort(),
|
||||
attempt,
|
||||
maxAttempts,
|
||||
reason: input.reason,
|
||||
};
|
||||
const result = await traceTimed(input.trace, 'resolver', 'textual_conflict_resolver', traceData, async () =>
|
||||
input.agentRunner.runLoop({
|
||||
modelRole: 'repair',
|
||||
systemPrompt: buildResolverSystemPrompt(),
|
||||
userPrompt: buildResolverUserPrompt({
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: [...allowedPaths].sort(),
|
||||
reason: input.reason,
|
||||
attempt,
|
||||
maxAttempts,
|
||||
}),
|
||||
toolSet: buildToolSet({
|
||||
workdir: input.workdir,
|
||||
patchPath: input.patchPath,
|
||||
allowedPaths,
|
||||
editedPaths,
|
||||
}),
|
||||
stepBudget,
|
||||
telemetryTags: {
|
||||
operationName: 'ingest-isolated-diff-textual-resolver',
|
||||
source: input.trace.context.sourceKey,
|
||||
jobId: input.trace.context.jobId,
|
||||
unitKey: input.unitKey,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
if (result.stopReason === 'error') {
|
||||
lastFailure = result.error?.message ?? 'resolver agent loop errored';
|
||||
await input.trace.event('error', 'resolver', 'textual_conflict_resolver_failed', traceData, result.error);
|
||||
continue;
|
||||
}
|
||||
|
||||
const changedPaths = [...editedPaths].sort();
|
||||
if (changedPaths.length === 0) {
|
||||
lastFailure = 'resolver completed without editing an allowed path';
|
||||
await input.trace.event('error', 'resolver', 'textual_conflict_resolver_failed', {
|
||||
...traceData,
|
||||
reason: lastFailure,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
await input.trace.event('debug', 'resolver', 'textual_conflict_resolver_repaired', {
|
||||
...traceData,
|
||||
changedPaths,
|
||||
});
|
||||
return { status: 'repaired', attempts: attempt, changedPaths };
|
||||
}
|
||||
|
||||
return { status: 'failed', attempts: maxAttempts, reason: lastFailure };
|
||||
}
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { GitService } from '../../core/index.js';
|
||||
import { FileIngestTraceWriter } from '../ingest-trace.js';
|
||||
import { runIsolatedWorkUnit } from './work-unit-executor.js';
|
||||
|
||||
async function makeGit() {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-isolated-wu-'));
|
||||
const configDir = join(homeDir, 'config');
|
||||
const git = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'init',
|
||||
bootstrapAuthor: 'system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await git.onModuleInit();
|
||||
await mkdir(join(configDir, 'raw-sources/c1/fake/s'), { recursive: true });
|
||||
await writeFile(join(configDir, 'raw-sources/c1/fake/s/a.json'), '{}\n');
|
||||
await git.commitFiles(['raw-sources/c1/fake/s/a.json'], 'raw snapshot', 'System User', 'system@example.com');
|
||||
return { homeDir, configDir, git, baseSha: await git.revParseHead() };
|
||||
}
|
||||
|
||||
describe('runIsolatedWorkUnit', () => {
|
||||
it('creates a child worktree at the ingestion base and persists a patch proposal', async () => {
|
||||
const { homeDir, git, baseSha } = await makeGit();
|
||||
const childDir = join(homeDir, '.worktrees/session-job-1-wu-1');
|
||||
const sessionWorktreeService = {
|
||||
create: vi.fn(async (_key: string, startSha: string) => {
|
||||
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
|
||||
await git.addWorktree(childDir, 'session/job-1-wu-1', startSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
return {
|
||||
chatId: 'job-1-wu-1',
|
||||
workdir: childDir,
|
||||
branch: 'session/job-1-wu-1',
|
||||
baseSha: startSha,
|
||||
createdAt: new Date(),
|
||||
git: childGit,
|
||||
config: {},
|
||||
};
|
||||
}),
|
||||
cleanup: vi.fn(async () => undefined),
|
||||
};
|
||||
const tracePath = join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl');
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath,
|
||||
jobId: 'job-1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await runIsolatedWorkUnit({
|
||||
unitIndex: 0,
|
||||
ingestionBaseSha: baseSha,
|
||||
sessionWorktreeService: sessionWorktreeService as never,
|
||||
patchDir: join(homeDir, '.ktx/ingest-patches/job-1'),
|
||||
trace,
|
||||
run: async (child) => {
|
||||
await mkdir(join(child.workdir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(child.workdir, 'wiki/global/a.md'), '---\nsummary: A\nusage_mode: auto\n---\n\nBody\n');
|
||||
await child.git.commitFiles(['wiki/global/a.md'], 'test: write wiki', 'KTX Test', 'system@ktx.local');
|
||||
return {
|
||||
unitKey: 'wu-1',
|
||||
status: 'success',
|
||||
preSha: baseSha,
|
||||
postSha: await child.git.revParseHead(),
|
||||
actions: [{ target: 'wiki', type: 'created', key: 'a', detail: 'A' }],
|
||||
touchedSlSources: [],
|
||||
};
|
||||
},
|
||||
workUnit: { unitKey: 'wu-1', rawFiles: ['a.json'], peerFileIndex: [], dependencyPaths: [] },
|
||||
});
|
||||
|
||||
expect(sessionWorktreeService.create).toHaveBeenCalledWith('job-1-wu-1', baseSha);
|
||||
expect(sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
|
||||
expect(result.status).toBe('success');
|
||||
if (result.status !== 'success') {
|
||||
throw new Error('expected successful work unit');
|
||||
}
|
||||
const patchPath = result.patchPath;
|
||||
if (!patchPath) {
|
||||
throw new Error('expected patch path');
|
||||
}
|
||||
expect(patchPath).toContain('0000-wu-1.patch');
|
||||
await expect(readFile(patchPath, 'utf-8')).resolves.toContain('wiki/global/a.md');
|
||||
await expect(readFile(tracePath, 'utf-8')).resolves.toContain('work_unit_child_created');
|
||||
});
|
||||
|
||||
it('removes child worktrees after failed WorkUnit outcomes are traced', async () => {
|
||||
const { homeDir, git, baseSha } = await makeGit();
|
||||
const childDir = join(homeDir, '.worktrees/session-job-1-wu-fail');
|
||||
const sessionWorktreeService = {
|
||||
create: vi.fn(async (_key: string, startSha: string) => {
|
||||
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
|
||||
await git.addWorktree(childDir, 'session/job-1-wu-fail', startSha);
|
||||
return {
|
||||
chatId: 'job-1-wu-fail',
|
||||
workdir: childDir,
|
||||
branch: 'session/job-1-wu-fail',
|
||||
baseSha: startSha,
|
||||
createdAt: new Date(),
|
||||
git: git.forWorktree(childDir),
|
||||
config: {},
|
||||
};
|
||||
}),
|
||||
cleanup: vi.fn(async () => undefined),
|
||||
};
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'),
|
||||
jobId: 'job-1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await runIsolatedWorkUnit({
|
||||
unitIndex: 0,
|
||||
ingestionBaseSha: baseSha,
|
||||
sessionWorktreeService: sessionWorktreeService as never,
|
||||
patchDir: join(homeDir, '.ktx/ingest-patches/job-1'),
|
||||
trace,
|
||||
run: async () => ({
|
||||
unitKey: 'wu-fail',
|
||||
status: 'failed',
|
||||
reason: 'agent loop errored',
|
||||
preSha: baseSha,
|
||||
postSha: baseSha,
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
}),
|
||||
workUnit: { unitKey: 'wu-fail', rawFiles: ['a.json'], peerFileIndex: [], dependencyPaths: [] },
|
||||
});
|
||||
|
||||
expect(result.status).toBe('failed');
|
||||
expect(sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
import { mkdir, readFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import type { SessionOutcome } from '../../core/index.js';
|
||||
import type { IngestSessionWorktree, IngestSessionWorktreePort } from '../ports.js';
|
||||
import type { WorkUnit } from '../types.js';
|
||||
import type { IngestTraceWriter } from '../ingest-trace.js';
|
||||
import type { WorkUnitOutcome } from '../stages/stage-3-work-units.js';
|
||||
import { parsePatchTouchedPaths } from './git-patch.js';
|
||||
|
||||
export interface RunIsolatedWorkUnitInput {
|
||||
unitIndex: number;
|
||||
ingestionBaseSha: string;
|
||||
sessionWorktreeService: IngestSessionWorktreePort;
|
||||
patchDir: string;
|
||||
trace: IngestTraceWriter;
|
||||
workUnit: WorkUnit;
|
||||
run(child: IngestSessionWorktree): Promise<WorkUnitOutcome>;
|
||||
afterSuccess?(child: IngestSessionWorktree): Promise<void>;
|
||||
}
|
||||
|
||||
function patchFileName(unitIndex: number, unitKey: string): string {
|
||||
const safeKey = unitKey.replace(/[^a-zA-Z0-9_.-]+/g, '-');
|
||||
return `${String(unitIndex).padStart(4, '0')}-${safeKey}.patch`;
|
||||
}
|
||||
|
||||
export async function runIsolatedWorkUnit(input: RunIsolatedWorkUnitInput): Promise<WorkUnitOutcome> {
|
||||
const sessionKey = `${input.trace.context.jobId}-${input.workUnit.unitKey}`;
|
||||
let cleanupOutcome: SessionOutcome = 'crash';
|
||||
const child = await input.sessionWorktreeService.create(sessionKey, input.ingestionBaseSha);
|
||||
await input.trace.event('debug', 'work_unit', 'work_unit_child_created', {
|
||||
unitKey: input.workUnit.unitKey,
|
||||
unitIndex: input.unitIndex,
|
||||
worktreePath: child.workdir,
|
||||
baseSha: input.ingestionBaseSha,
|
||||
});
|
||||
|
||||
try {
|
||||
const outcome = await input.run(child);
|
||||
if (outcome.status !== 'success') {
|
||||
cleanupOutcome = 'success';
|
||||
await input.trace.event('error', 'work_unit', 'work_unit_failed_before_patch', {
|
||||
unitKey: input.workUnit.unitKey,
|
||||
reason: outcome.reason ?? 'unknown failure',
|
||||
});
|
||||
return { ...outcome, childWorktreePath: child.workdir };
|
||||
}
|
||||
|
||||
await input.afterSuccess?.(child);
|
||||
await mkdir(input.patchDir, { recursive: true });
|
||||
const patchPath = join(input.patchDir, patchFileName(input.unitIndex, input.workUnit.unitKey));
|
||||
await child.git.writeBinaryNoRenamePatch(input.ingestionBaseSha, 'HEAD', patchPath);
|
||||
const patch = await readFile(patchPath, 'utf-8');
|
||||
const touched = parsePatchTouchedPaths(patch);
|
||||
cleanupOutcome = 'success';
|
||||
await input.trace.event('debug', 'work_unit', 'work_unit_patch_collected', {
|
||||
unitKey: input.workUnit.unitKey,
|
||||
patchPath,
|
||||
touchedPaths: touched.map((entry) => entry.path),
|
||||
patchBytes: Buffer.byteLength(patch),
|
||||
});
|
||||
return {
|
||||
...outcome,
|
||||
patchPath,
|
||||
patchTouchedPaths: touched.map((entry) => entry.path),
|
||||
childWorktreePath: child.workdir,
|
||||
};
|
||||
} catch (error) {
|
||||
await input.trace.event(
|
||||
'error',
|
||||
'work_unit',
|
||||
'work_unit_child_failed',
|
||||
{ unitKey: input.workUnit.unitKey, worktreePath: child.workdir },
|
||||
error,
|
||||
);
|
||||
cleanupOutcome = 'success';
|
||||
throw error;
|
||||
} finally {
|
||||
await input.sessionWorktreeService.cleanup(child, cleanupOutcome);
|
||||
await input.trace.event('trace', 'work_unit', 'work_unit_child_cleanup', {
|
||||
unitKey: input.workUnit.unitKey,
|
||||
outcome: cleanupOutcome,
|
||||
worktreePath: child.workdir,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -694,6 +694,14 @@ describe('canonical local ingest', () => {
|
|||
],
|
||||
},
|
||||
});
|
||||
expect(result.report.body.isolatedDiff).toMatchObject({
|
||||
enabled: true,
|
||||
acceptedPatches: 0,
|
||||
projectionSha: expect.any(String),
|
||||
});
|
||||
|
||||
const projectedSourcePath = join(metricflowProject.projectDir, 'semantic-layer/warehouse/orders.yaml');
|
||||
await expect(readFile(projectedSourcePath, 'utf-8')).resolves.toContain('name: orders');
|
||||
|
||||
const stagedRawPath = join(
|
||||
metricflowProject.projectDir,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,24 @@ type RuntimeWithConnectionDeps = {
|
|||
};
|
||||
};
|
||||
|
||||
type RuntimeWithSlValidationDeps = {
|
||||
deps: {
|
||||
slValidator: {
|
||||
validateSingleSource(
|
||||
deps: unknown,
|
||||
connectionId: string,
|
||||
sourceName: string,
|
||||
): Promise<{ errors: string[]; warnings: string[] }>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
type RuntimeWithSettingsDeps = {
|
||||
deps: {
|
||||
settings: Record<string, unknown>;
|
||||
};
|
||||
};
|
||||
|
||||
function testAgentRunner(): AgentRunnerPort {
|
||||
return { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' as const }) };
|
||||
}
|
||||
|
|
@ -144,6 +162,77 @@ describe('createLocalBundleIngestRuntime', () => {
|
|||
]);
|
||||
});
|
||||
|
||||
it('validates manifest-backed scan sources during local ingest gates', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
[
|
||||
'tables:',
|
||||
' payments:',
|
||||
' table: public.payments',
|
||||
' columns:',
|
||||
' - name: payment_id',
|
||||
' type: string',
|
||||
' - name: amount',
|
||||
' type: number',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add warehouse manifest',
|
||||
);
|
||||
const agentRunner = testAgentRunner();
|
||||
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
agentRunner,
|
||||
});
|
||||
const deps = (runtime.runner as unknown as RuntimeWithSlValidationDeps).deps;
|
||||
|
||||
await expect(deps.slValidator.validateSingleSource(deps, 'warehouse', 'payments')).resolves.toEqual({
|
||||
errors: [],
|
||||
warnings: expect.any(Array),
|
||||
});
|
||||
});
|
||||
|
||||
it('does not mask malformed direct overlays with manifest-backed fallback validation', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
[
|
||||
'tables:',
|
||||
' payments:',
|
||||
' table: public.payments',
|
||||
' columns:',
|
||||
' - name: payment_id',
|
||||
' type: string',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add warehouse manifest',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/payments.yaml',
|
||||
['name: payments', 'columns:', ' - [', ''].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add malformed overlay',
|
||||
);
|
||||
const agentRunner = testAgentRunner();
|
||||
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
agentRunner,
|
||||
});
|
||||
const deps = (runtime.runner as unknown as RuntimeWithSlValidationDeps).deps;
|
||||
|
||||
await expect(deps.slValidator.validateSingleSource(deps, 'warehouse', 'payments')).resolves.toEqual({
|
||||
errors: [expect.stringContaining('invalid YAML')],
|
||||
warnings: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('passes project connection config to local ingest query executors', async () => {
|
||||
const agentRunner = testAgentRunner();
|
||||
const queryExecutor = {
|
||||
|
|
@ -175,6 +264,27 @@ describe('createLocalBundleIngestRuntime', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('defaults local bundle ingest to isolated diffs without a shared-worktree fallback setting', () => {
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
agentRunner: testAgentRunner(),
|
||||
});
|
||||
|
||||
const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings;
|
||||
const fallbackSettingKey = ['sharedWorktree', 'SourceKeys'].join('');
|
||||
|
||||
expect(settings).not.toHaveProperty(fallbackSettingKey);
|
||||
expect(Object.keys(settings).sort()).toEqual([
|
||||
'ingestTraceLevel',
|
||||
'memoryIngestionModel',
|
||||
'probeRowCount',
|
||||
'workUnitFailureMode',
|
||||
'workUnitMaxConcurrency',
|
||||
'workUnitStepBudget',
|
||||
]);
|
||||
});
|
||||
|
||||
it('accepts a debug LLM request file when constructing the default agent runner', async () => {
|
||||
await writeFile(
|
||||
join(project.projectDir, 'ktx.yaml'),
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ import {
|
|||
type KtxConnectionInfo,
|
||||
type KtxQueryResult,
|
||||
SemanticLayerService,
|
||||
type SemanticLayerSource,
|
||||
type SlConnectionCatalogPort,
|
||||
SlDiscoverTool,
|
||||
SlEditSourceTool,
|
||||
|
|
@ -76,6 +75,7 @@ import { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evide
|
|||
import { HistoricSqlProjectionPostProcessor } from './adapters/historic-sql/post-processor.js';
|
||||
import { ContextEvidenceIndexService, SqliteContextEvidenceStore } from './context-evidence/index.js';
|
||||
import { DiffSetService } from './diff-set.service.js';
|
||||
import { ingestTracePathForJob, type IngestTraceLevel } from './ingest-trace.js';
|
||||
import { IngestBundleRunner } from './ingest-bundle.runner.js';
|
||||
import { PageTriageService } from './page-triage/index.js';
|
||||
import { createWarehouseVerificationTools } from './tools/warehouse-verification/index.js';
|
||||
|
|
@ -96,6 +96,12 @@ const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url));
|
|||
const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url));
|
||||
const LOCAL_AUTHOR = { name: 'KTX Local', email: 'local@ktx.local' };
|
||||
const LOCAL_SHAPE_WARNING = 'Local ingest validates semantic-layer YAML shape only.';
|
||||
const INGEST_TRACE_LEVELS = new Set<IngestTraceLevel>(['error', 'info', 'debug', 'trace']);
|
||||
|
||||
function ingestTraceLevelFromEnv(env: NodeJS.ProcessEnv = process.env): IngestTraceLevel {
|
||||
const raw = env.KTX_INGEST_TRACE_LEVEL;
|
||||
return raw && INGEST_TRACE_LEVELS.has(raw as IngestTraceLevel) ? (raw as IngestTraceLevel) : 'debug';
|
||||
}
|
||||
|
||||
export interface CreateLocalBundleIngestRuntimeOptions {
|
||||
project: KtxLocalProject;
|
||||
|
|
@ -151,6 +157,10 @@ class LocalIngestStorage implements IngestStoragePort {
|
|||
resolveTranscriptDir(jobId: string): string {
|
||||
return join(this.project.projectDir, '.ktx/ingest-transcripts', jobId);
|
||||
}
|
||||
|
||||
resolveTracePath(jobId: string): string {
|
||||
return ingestTracePathForJob(this.homeDir, jobId);
|
||||
}
|
||||
}
|
||||
|
||||
class LocalIngestLock implements IngestLockPort {
|
||||
|
|
@ -237,22 +247,63 @@ class LocalSlPythonPort implements SlPythonPort {
|
|||
}
|
||||
|
||||
class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
|
||||
private validateParsedSource(sourceName: string, parsed: Record<string, unknown>) {
|
||||
const isOverlay = parsed.table == null && parsed.sql == null;
|
||||
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
|
||||
return result.success
|
||||
? { errors: [], warnings: [LOCAL_SHAPE_WARNING] }
|
||||
: {
|
||||
errors: result.error.issues.map(
|
||||
(issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`,
|
||||
),
|
||||
warnings: [],
|
||||
};
|
||||
}
|
||||
|
||||
private async validateComposedSource(
|
||||
deps: SlValidationDeps,
|
||||
connectionId: string,
|
||||
sourceName: string,
|
||||
readError: unknown,
|
||||
) {
|
||||
try {
|
||||
const { sources, loadErrors } = await deps.semanticLayerService.loadAllSources(connectionId);
|
||||
const source = sources.find((candidate) => candidate.name === sourceName);
|
||||
if (source) {
|
||||
return this.validateParsedSource(sourceName, source as unknown as Record<string, unknown>);
|
||||
}
|
||||
const detail =
|
||||
loadErrors.length > 0
|
||||
? loadErrors.join('; ')
|
||||
: readError instanceof Error
|
||||
? readError.message
|
||||
: String(readError);
|
||||
return { errors: [`${sourceName}: ${detail}`], warnings: [] };
|
||||
} catch (fallbackError) {
|
||||
return {
|
||||
errors: [`${sourceName}: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`],
|
||||
warnings: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
|
||||
let content: string;
|
||||
try {
|
||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
const parsed = YAML.parse(file.content) as SemanticLayerSource;
|
||||
const isOverlay = parsed.table == null && parsed.sql == null;
|
||||
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
|
||||
return result.success
|
||||
? { errors: [], warnings: [LOCAL_SHAPE_WARNING] }
|
||||
: {
|
||||
errors: result.error.issues.map(
|
||||
(issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`,
|
||||
),
|
||||
warnings: [],
|
||||
};
|
||||
content = file.content;
|
||||
} catch (error) {
|
||||
return { errors: [`${sourceName}: ${error instanceof Error ? error.message : String(error)}`], warnings: [] };
|
||||
return this.validateComposedSource(deps, connectionId, sourceName, error);
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = YAML.parse(content) as unknown as Record<string, unknown>;
|
||||
return this.validateParsedSource(sourceName, parsed);
|
||||
} catch (error) {
|
||||
return {
|
||||
errors: [`${sourceName}: invalid YAML — ${error instanceof Error ? error.message : String(error)}`],
|
||||
warnings: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -671,6 +722,7 @@ export function createLocalBundleIngestRuntime(
|
|||
workUnitMaxConcurrency: options.project.config.ingest.workUnits.maxConcurrency,
|
||||
workUnitStepBudget: options.project.config.ingest.workUnits.stepBudget,
|
||||
workUnitFailureMode: options.project.config.ingest.workUnits.failureMode,
|
||||
ingestTraceLevel: ingestTraceLevelFromEnv(),
|
||||
},
|
||||
skillsRegistry: new SkillsRegistryService({ skillsDir, logger }),
|
||||
promptService,
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ function snapshot(overrides: Partial<MemoryFlowReplayInput> = {}): MemoryFlowRep
|
|||
{ type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 },
|
||||
{ type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 0 },
|
||||
{ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 },
|
||||
{ type: 'stage_progress', stage: 'integration', percent: 80, message: 'Integrating 1/1 patches: orders' },
|
||||
{ type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 },
|
||||
{ type: 'work_unit_step', unitKey: 'orders', stepIndex: 1, stepBudget: 40 },
|
||||
{ type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'wiki/orders.md' },
|
||||
|
|
|
|||
|
|
@ -53,6 +53,23 @@ export const memoryFlowEventSchema = z.discriminatedUnion('type', [
|
|||
stage: z.enum(['source', 'chunks', 'workUnits', 'actions', 'gates', 'saved']),
|
||||
reason: z.string().min(1),
|
||||
}),
|
||||
eventSchema({
|
||||
type: z.literal('stage_progress'),
|
||||
stage: z.enum([
|
||||
'source',
|
||||
'integration',
|
||||
'reconciliation',
|
||||
'post_processor',
|
||||
'wiki_sl_ref_repair',
|
||||
'final_gates',
|
||||
'save',
|
||||
'provenance',
|
||||
'report',
|
||||
]),
|
||||
percent: z.number().min(0).max(100),
|
||||
message: z.string().min(1),
|
||||
transient: z.boolean().optional(),
|
||||
}),
|
||||
eventSchema({
|
||||
type: z.literal('work_unit_started'),
|
||||
unitKey: z.string().min(1),
|
||||
|
|
|
|||
|
|
@ -44,6 +44,22 @@ type MemoryFlowEventPayload =
|
|||
stage: MemoryFlowColumnId;
|
||||
reason: string;
|
||||
}
|
||||
| {
|
||||
type: 'stage_progress';
|
||||
stage:
|
||||
| 'source'
|
||||
| 'integration'
|
||||
| 'reconciliation'
|
||||
| 'post_processor'
|
||||
| 'wiki_sl_ref_repair'
|
||||
| 'final_gates'
|
||||
| 'save'
|
||||
| 'provenance'
|
||||
| 'report';
|
||||
percent: number;
|
||||
message: string;
|
||||
transient?: boolean;
|
||||
}
|
||||
| {
|
||||
type: 'work_unit_started';
|
||||
unitKey: string;
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import type {
|
|||
import type { ToolContext, ToolSession, TouchedSlSource } from '../tools/index.js';
|
||||
import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js';
|
||||
import type { CanonicalPin } from './canonical-pins.js';
|
||||
import type { IngestTraceLevel } from './ingest-trace.js';
|
||||
import type { IngestReportSnapshot } from './reports.js';
|
||||
import type {
|
||||
ReconcileCandidateForPrompt,
|
||||
|
|
@ -142,6 +143,7 @@ export interface IngestSettingsPort {
|
|||
workUnitMaxConcurrency?: number;
|
||||
workUnitStepBudget?: number;
|
||||
workUnitFailureMode?: 'abort' | 'continue';
|
||||
ingestTraceLevel?: IngestTraceLevel;
|
||||
}
|
||||
|
||||
export interface IngestGitAuthor {
|
||||
|
|
@ -155,6 +157,7 @@ export interface IngestStoragePort {
|
|||
resolveUploadDir(uploadId: string): string;
|
||||
resolvePullDir(jobId: string): string;
|
||||
resolveTranscriptDir(jobId: string): string;
|
||||
resolveTracePath(jobId: string): string;
|
||||
}
|
||||
|
||||
export interface IngestCommitMessagePort {
|
||||
|
|
|
|||
|
|
@ -206,6 +206,47 @@ describe('parseIngestReportSnapshot', () => {
|
|||
expect(snapshot.body.toolTranscripts).toEqual([]);
|
||||
});
|
||||
|
||||
it('parses failed ingest reports with trace and failure details', () => {
|
||||
const snapshot = parseIngestReportSnapshot({
|
||||
id: 'report-failed',
|
||||
runId: 'run-failed',
|
||||
jobId: 'job-failed',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
createdAt: '2026-05-17T12:00:00.000Z',
|
||||
body: {
|
||||
status: 'failed',
|
||||
syncId: 'sync-failed',
|
||||
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
|
||||
commitSha: null,
|
||||
tracePath: '/project/.ktx/ingest-traces/job-failed/trace.jsonl',
|
||||
failure: {
|
||||
phase: 'final_gates',
|
||||
message: 'final artifact gates failed',
|
||||
},
|
||||
workUnits: [],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: true,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
evictionInputs: [],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
},
|
||||
});
|
||||
|
||||
expect(snapshot.body.status).toBe('failed');
|
||||
expect(snapshot.body.failure).toEqual({
|
||||
phase: 'final_gates',
|
||||
message: 'final artifact gates failed',
|
||||
});
|
||||
expect(snapshot.body.tracePath).toContain('trace.jsonl');
|
||||
});
|
||||
|
||||
it('rejects malformed report snapshots with a concise message', () => {
|
||||
const report = validReportSnapshot();
|
||||
report.body.workUnits[0] = {
|
||||
|
|
@ -215,4 +256,93 @@ describe('parseIngestReportSnapshot', () => {
|
|||
|
||||
expect(() => parseIngestReportSnapshot(report)).toThrow('Invalid ingest report snapshot');
|
||||
});
|
||||
|
||||
it('parses isolated-diff textual resolver counters', () => {
|
||||
const snapshot = parseIngestReportSnapshot({
|
||||
id: 'report-1',
|
||||
runId: 'run-1',
|
||||
jobId: 'job-1',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
createdAt: '2026-05-18T00:00:00.000Z',
|
||||
body: {
|
||||
status: 'completed',
|
||||
syncId: 'sync-1',
|
||||
diffSummary: { added: 0, modified: 1, deleted: 0, unchanged: 0 },
|
||||
commitSha: 'abc123',
|
||||
isolatedDiff: {
|
||||
enabled: true,
|
||||
acceptedPatches: 2,
|
||||
textualConflicts: 1,
|
||||
semanticConflicts: 0,
|
||||
resolverAttempts: 1,
|
||||
resolverRepairs: 1,
|
||||
resolverFailures: 0,
|
||||
},
|
||||
workUnits: [],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: true,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
artifactResolutions: [],
|
||||
evictionInputs: [],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
},
|
||||
});
|
||||
|
||||
expect(snapshot.body.isolatedDiff).toMatchObject({
|
||||
resolverAttempts: 1,
|
||||
resolverRepairs: 1,
|
||||
resolverFailures: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it('parses isolated-diff gate repair counters', () => {
|
||||
const snapshot = parseIngestReportSnapshot({
|
||||
id: 'report-1',
|
||||
runId: 'run-1',
|
||||
jobId: 'job-1',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
createdAt: '2026-05-18T00:00:00.000Z',
|
||||
body: {
|
||||
status: 'completed',
|
||||
syncId: 'sync-1',
|
||||
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
|
||||
commitSha: 'abc123',
|
||||
isolatedDiff: {
|
||||
enabled: true,
|
||||
acceptedPatches: 1,
|
||||
textualConflicts: 0,
|
||||
semanticConflicts: 1,
|
||||
gateRepairAttempts: 1,
|
||||
gateRepairs: 1,
|
||||
gateRepairFailures: 0,
|
||||
},
|
||||
workUnits: [],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: true,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
evictionInputs: [],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
},
|
||||
});
|
||||
|
||||
expect(snapshot.body.isolatedDiff).toMatchObject({
|
||||
gateRepairAttempts: 1,
|
||||
gateRepairs: 1,
|
||||
gateRepairFailures: 0,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -123,6 +123,12 @@ const sourceFetchReportSchema = z.object({
|
|||
warnings: z.array(sourceFetchIssueSchema).default([]),
|
||||
});
|
||||
|
||||
const ingestReportFailureSchema = z.object({
|
||||
phase: z.string().min(1),
|
||||
message: z.string().min(1),
|
||||
details: z.record(z.string(), z.unknown()).optional(),
|
||||
});
|
||||
|
||||
export const ingestReportSnapshotSchema = z
|
||||
.object({
|
||||
id: z.string().min(1),
|
||||
|
|
@ -133,10 +139,30 @@ export const ingestReportSnapshotSchema = z
|
|||
createdAt: z.string().min(1),
|
||||
body: z
|
||||
.object({
|
||||
status: z.enum(['completed', 'failed']).optional(),
|
||||
syncId: z.string().min(1),
|
||||
diffSummary: ingestDiffSummarySchema,
|
||||
fetch: sourceFetchReportSchema.optional(),
|
||||
commitSha: z.string().nullable(),
|
||||
tracePath: z.string().optional(),
|
||||
failure: ingestReportFailureSchema.optional(),
|
||||
isolatedDiff: z
|
||||
.object({
|
||||
enabled: z.boolean(),
|
||||
integrationWorktreePath: z.string().optional(),
|
||||
ingestionBaseSha: z.string().optional(),
|
||||
projectionSha: z.string().nullable().optional(),
|
||||
acceptedPatches: z.number().int().min(0),
|
||||
textualConflicts: z.number().int().min(0),
|
||||
semanticConflicts: z.number().int().min(0),
|
||||
resolverAttempts: z.number().int().min(0).default(0),
|
||||
resolverRepairs: z.number().int().min(0).default(0),
|
||||
resolverFailures: z.number().int().min(0).default(0),
|
||||
gateRepairAttempts: z.number().int().min(0).default(0),
|
||||
gateRepairs: z.number().int().min(0).default(0),
|
||||
gateRepairFailures: z.number().int().min(0).default(0),
|
||||
})
|
||||
.optional(),
|
||||
workUnits: z.array(
|
||||
z.object({
|
||||
unitKey: z.string().min(1),
|
||||
|
|
|
|||
|
|
@ -48,11 +48,35 @@ export interface IngestReportPostProcessorOutcome {
|
|||
touchedSources: TouchedSlSource[];
|
||||
}
|
||||
|
||||
export interface IngestReportFailure {
|
||||
phase: string;
|
||||
message: string;
|
||||
details?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface IngestReportBody {
|
||||
status?: 'completed' | 'failed';
|
||||
syncId: string;
|
||||
diffSummary: IngestDiffSummary;
|
||||
fetch?: SourceFetchReport;
|
||||
commitSha: string | null;
|
||||
tracePath?: string;
|
||||
failure?: IngestReportFailure;
|
||||
isolatedDiff?: {
|
||||
enabled: boolean;
|
||||
integrationWorktreePath?: string;
|
||||
ingestionBaseSha?: string;
|
||||
projectionSha?: string | null;
|
||||
acceptedPatches: number;
|
||||
textualConflicts: number;
|
||||
semanticConflicts: number;
|
||||
resolverAttempts?: number;
|
||||
resolverRepairs?: number;
|
||||
resolverFailures?: number;
|
||||
gateRepairAttempts?: number;
|
||||
gateRepairs?: number;
|
||||
gateRepairFailures?: number;
|
||||
};
|
||||
workUnits: IngestReportWorkUnit[];
|
||||
failedWorkUnits: string[];
|
||||
reconciliationSkipped: boolean;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,38 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
assertSemanticLayerTargetPathsAllowed,
|
||||
findDisallowedSemanticLayerTargetPaths,
|
||||
semanticLayerConnectionIdFromPath,
|
||||
} from './semantic-layer-target-policy.js';
|
||||
|
||||
describe('semantic-layer target policy', () => {
|
||||
it('extracts connection ids from semantic-layer paths', () => {
|
||||
expect(semanticLayerConnectionIdFromPath('semantic-layer/warehouse/orders.yaml')).toBe('warehouse');
|
||||
expect(semanticLayerConnectionIdFromPath('a/semantic-layer/finance/orders.yaml')).toBe('finance');
|
||||
expect(semanticLayerConnectionIdFromPath('wiki/global/orders.md')).toBeNull();
|
||||
});
|
||||
|
||||
it('finds semantic-layer paths outside the allowed target connections', () => {
|
||||
expect(
|
||||
findDisallowedSemanticLayerTargetPaths({
|
||||
paths: [
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
'semantic-layer/finance/orders.yaml',
|
||||
'wiki/global/orders.md',
|
||||
],
|
||||
allowedConnectionIds: new Set(['warehouse']),
|
||||
}),
|
||||
).toEqual([{ path: 'semantic-layer/finance/orders.yaml', connectionId: 'finance' }]);
|
||||
});
|
||||
|
||||
it('throws a deterministic error for unauthorized semantic-layer targets', () => {
|
||||
expect(() =>
|
||||
assertSemanticLayerTargetPathsAllowed({
|
||||
paths: ['semantic-layer/finance/orders.yaml', 'semantic-layer/marketing/accounts.yaml'],
|
||||
allowedConnectionIds: new Set(['warehouse']),
|
||||
}),
|
||||
).toThrow(
|
||||
/semantic-layer target connection not allowed: semantic-layer\/finance\/orders\.yaml \(finance\), semantic-layer\/marketing\/accounts\.yaml \(marketing\); allowed: warehouse/,
|
||||
);
|
||||
});
|
||||
});
|
||||
42
packages/context/src/ingest/semantic-layer-target-policy.ts
Normal file
42
packages/context/src/ingest/semantic-layer-target-policy.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
export interface SemanticLayerTargetPolicyInput {
|
||||
paths: readonly string[];
|
||||
allowedConnectionIds: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
export interface SemanticLayerTargetPolicyViolation {
|
||||
path: string;
|
||||
connectionId: string;
|
||||
}
|
||||
|
||||
export function semanticLayerConnectionIdFromPath(path: string): string | null {
|
||||
const normalized = path.replace(/^[ab]\//, '');
|
||||
const match = /^semantic-layer\/([^/]+)\//.exec(normalized);
|
||||
return match?.[1] ?? null;
|
||||
}
|
||||
|
||||
export function findDisallowedSemanticLayerTargetPaths(
|
||||
input: SemanticLayerTargetPolicyInput,
|
||||
): SemanticLayerTargetPolicyViolation[] {
|
||||
return input.paths
|
||||
.map((path) => ({ path, connectionId: semanticLayerConnectionIdFromPath(path) }))
|
||||
.filter((entry): entry is SemanticLayerTargetPolicyViolation => {
|
||||
return entry.connectionId !== null && !input.allowedConnectionIds.has(entry.connectionId);
|
||||
})
|
||||
.sort((left, right) => {
|
||||
const byConnection = left.connectionId.localeCompare(right.connectionId);
|
||||
return byConnection === 0 ? left.path.localeCompare(right.path) : byConnection;
|
||||
});
|
||||
}
|
||||
|
||||
export function assertSemanticLayerTargetPathsAllowed(input: SemanticLayerTargetPolicyInput): void {
|
||||
const violations = findDisallowedSemanticLayerTargetPaths(input);
|
||||
if (violations.length === 0) {
|
||||
return;
|
||||
}
|
||||
const allowed = [...input.allowedConnectionIds].sort();
|
||||
throw new Error(
|
||||
`semantic-layer target connection not allowed: ${violations
|
||||
.map((violation) => `${violation.path} (${violation.connectionId})`)
|
||||
.join(', ')}; allowed: ${allowed.length > 0 ? allowed.join(', ') : '(none)'}`,
|
||||
);
|
||||
}
|
||||
|
|
@ -41,6 +41,9 @@ export interface WorkUnitOutcome {
|
|||
touchedSlSources: TouchedSlSource[];
|
||||
slDisallowed?: boolean;
|
||||
slDisallowedReason?: 'lookml_connection_mismatch';
|
||||
patchPath?: string;
|
||||
patchTouchedPaths?: string[];
|
||||
childWorktreePath?: string;
|
||||
}
|
||||
|
||||
export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit): Promise<WorkUnitOutcome> {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type { KtxEmbeddingPort } from '../core/embedding.js';
|
||||
import type { SemanticLayerService } from '../sl/index.js';
|
||||
import type { MemoryFlowEventSink } from './memory-flow/types.js';
|
||||
|
||||
export type IngestTrigger = 'upload' | 'scheduled_pull' | 'manual_resync' | 'manual_override';
|
||||
|
|
@ -47,6 +48,7 @@ export interface ChunkResult {
|
|||
export interface FetchContext {
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
memoryFlow?: MemoryFlowEventSink;
|
||||
}
|
||||
|
||||
type SourceFetchIssueKind =
|
||||
|
|
@ -96,6 +98,26 @@ export interface ClusterWorkUnitsContext {
|
|||
embedding: KtxEmbeddingPort;
|
||||
}
|
||||
|
||||
export interface DeterministicProjectionContext {
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
syncId: string;
|
||||
jobId: string;
|
||||
runId: string;
|
||||
stagedDir: string;
|
||||
workdir: string;
|
||||
parseArtifacts?: unknown;
|
||||
semanticLayerService: SemanticLayerService;
|
||||
}
|
||||
|
||||
export interface ProjectionResult {
|
||||
warnings: string[];
|
||||
errors: string[];
|
||||
touchedSources: Array<{ connectionId: string; sourceName: string }>;
|
||||
changedWikiPageKeys: string[];
|
||||
result?: unknown;
|
||||
}
|
||||
|
||||
export interface SourceAdapter {
|
||||
readonly source: string;
|
||||
readonly skillNames: string[];
|
||||
|
|
@ -109,6 +131,7 @@ export interface SourceAdapter {
|
|||
listTargetConnectionIds?(stagedDir: string): Promise<string[]>;
|
||||
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult>;
|
||||
clusterWorkUnits?(ctx: ClusterWorkUnitsContext): Promise<WorkUnit[]>;
|
||||
project?(ctx: DeterministicProjectionContext): Promise<ProjectionResult>;
|
||||
describeScope?(stagedDir: string): Promise<ScopeDescriptor>;
|
||||
onPullSucceeded?(ctx: {
|
||||
connectionId: string;
|
||||
|
|
|
|||
153
packages/context/src/ingest/wiki-body-refs.test.ts
Normal file
153
packages/context/src/ingest/wiki-body-refs.test.ts
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { findInvalidWikiBodyRefs, parseWikiBodyRefs } from './wiki-body-refs.js';
|
||||
|
||||
const sources = [
|
||||
{
|
||||
name: 'mart_account_segments',
|
||||
grain: ['account_id'],
|
||||
columns: [
|
||||
{ name: 'account_id', type: 'string' },
|
||||
{ name: 'segment', type: 'string' },
|
||||
],
|
||||
joins: [],
|
||||
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
|
||||
segments: [{ name: 'enterprise', expr: "segment = 'enterprise'" }],
|
||||
table: 'analytics.mart_account_segments',
|
||||
},
|
||||
];
|
||||
|
||||
describe('wiki body refs', () => {
|
||||
it('parses only explicit inline-code body references outside fenced blocks', () => {
|
||||
const body = [
|
||||
'Valid `mart_account_segments.total_contract_arr` and `source:mart_account_segments`.',
|
||||
'Also `warehouse/mart_account_segments.segment` and `table:analytics.mart_account_segments`.',
|
||||
'Ignore prose mart_account_segments.total_contract_arr_cents.',
|
||||
'Ignore `single_token`.',
|
||||
'Ignore wildcard pattern `mart_nrr_quarterly.*_arr_cents`.',
|
||||
'Ignore condition `users.is_internal = false`.',
|
||||
'```sql',
|
||||
'select `mart_account_segments.total_contract_arr_cents`',
|
||||
'```',
|
||||
].join('\n');
|
||||
|
||||
expect(parseWikiBodyRefs(body)).toEqual([
|
||||
{ kind: 'sl_entity', connectionId: null, sourceName: 'mart_account_segments', entityName: 'total_contract_arr' },
|
||||
{ kind: 'sl_source', connectionId: null, sourceName: 'mart_account_segments' },
|
||||
{ kind: 'sl_entity', connectionId: 'warehouse', sourceName: 'mart_account_segments', entityName: 'segment' },
|
||||
{ kind: 'table', connectionId: null, tableRef: 'analytics.mart_account_segments' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('rejects stale inline-code semantic-layer references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'account-segments',
|
||||
body: 'ARR is documented as `mart_account_segments.total_contract_arr_cents`.',
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => sources,
|
||||
tableExists: async () => true,
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([
|
||||
'account-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents',
|
||||
]);
|
||||
});
|
||||
|
||||
it('does not treat wildcard inline-code patterns as exact semantic-layer entity references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'revenue-metrics-encoding',
|
||||
body: 'Cents columns include `mart_nrr_quarterly.*_arr_cents` and `mart_retention_movement_breakout.*_arr_cents`.',
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => [
|
||||
{ name: 'mart_nrr_quarterly', grain: [], columns: [], joins: [], measures: [], table: 'analytics.mart_nrr_quarterly' },
|
||||
{
|
||||
name: 'mart_retention_movement_breakout',
|
||||
grain: [],
|
||||
columns: [],
|
||||
joins: [],
|
||||
measures: [],
|
||||
table: 'analytics.mart_retention_movement_breakout',
|
||||
},
|
||||
],
|
||||
tableExists: async () => true,
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([]);
|
||||
});
|
||||
|
||||
it('does not treat inline-code SQL predicates as exact semantic-layer entity references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'account-reporting-exclusions',
|
||||
body: 'Exclude internal users with `users.is_internal = false` and test users with `users.is_test = false`.',
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => [
|
||||
{
|
||||
name: 'users',
|
||||
grain: [],
|
||||
columns: [
|
||||
{ name: 'is_internal', type: 'boolean' },
|
||||
{ name: 'is_test', type: 'boolean' },
|
||||
],
|
||||
joins: [],
|
||||
measures: [],
|
||||
table: 'analytics.users',
|
||||
},
|
||||
],
|
||||
tableExists: async () => true,
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([]);
|
||||
});
|
||||
|
||||
it('validates source, dimension, segment, measure, and table references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'account-segments',
|
||||
body: [
|
||||
'`mart_account_segments.total_contract_arr`',
|
||||
'`mart_account_segments.segment`',
|
||||
'`mart_account_segments.enterprise`',
|
||||
'`source:mart_account_segments`',
|
||||
'`table:analytics.mart_account_segments`',
|
||||
].join('\n'),
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => sources,
|
||||
tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments',
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([]);
|
||||
});
|
||||
|
||||
it('ignores two-part inline code when the source is not visible', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'engineering-notes',
|
||||
body: [
|
||||
'A version token like `node.v22` is not a semantic-layer reference.',
|
||||
'A raw table must use `table:analytics.mart_account_segments`.',
|
||||
].join('\n'),
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => sources,
|
||||
tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments',
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([]);
|
||||
});
|
||||
|
||||
it('still rejects explicit missing source and table references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'account-segments',
|
||||
body: [
|
||||
'`source:missing_source`',
|
||||
'`warehouse/source:missing_source`',
|
||||
'`table:analytics.missing_table`',
|
||||
].join('\n'),
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => sources,
|
||||
tableExists: async () => false,
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([
|
||||
'account-segments: unknown semantic-layer source missing_source',
|
||||
'account-segments: unknown semantic-layer source warehouse/missing_source',
|
||||
'account-segments: unknown raw table analytics.missing_table',
|
||||
]);
|
||||
});
|
||||
});
|
||||
141
packages/context/src/ingest/wiki-body-refs.ts
Normal file
141
packages/context/src/ingest/wiki-body-refs.ts
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
import type { SemanticLayerSource } from '../sl/index.js';
|
||||
|
||||
export type WikiBodyRef =
|
||||
| { kind: 'sl_entity'; connectionId: string | null; sourceName: string; entityName: string }
|
||||
| { kind: 'sl_source'; connectionId: string | null; sourceName: string }
|
||||
| { kind: 'table'; connectionId: string | null; tableRef: string };
|
||||
|
||||
export interface WikiBodyRefValidationInput {
|
||||
pageKey: string;
|
||||
body: string;
|
||||
visibleConnectionIds: string[];
|
||||
loadSources(connectionId: string): Promise<SemanticLayerSource[]>;
|
||||
tableExists(connectionId: string, tableRef: string): Promise<boolean>;
|
||||
}
|
||||
|
||||
const inlineCodePattern = /`([^`\n]+)`/g;
|
||||
|
||||
function visibleLinesOutsideFences(body: string): string[] {
|
||||
const lines: string[] = [];
|
||||
let fenced = false;
|
||||
for (const line of body.split('\n')) {
|
||||
if (/^\s*```/.test(line)) {
|
||||
fenced = !fenced;
|
||||
continue;
|
||||
}
|
||||
if (!fenced) {
|
||||
lines.push(line);
|
||||
}
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
function parseConnectionScoped(value: string): { connectionId: string | null; body: string } {
|
||||
const slash = value.indexOf('/');
|
||||
if (slash <= 0) {
|
||||
return { connectionId: null, body: value };
|
||||
}
|
||||
return { connectionId: value.slice(0, slash), body: value.slice(slash + 1) };
|
||||
}
|
||||
|
||||
function isIdentifierToken(value: string): boolean {
|
||||
return /^[A-Za-z_][A-Za-z0-9_]*$/.test(value);
|
||||
}
|
||||
|
||||
export function parseWikiBodyRefs(body: string): WikiBodyRef[] {
|
||||
const refs: WikiBodyRef[] = [];
|
||||
for (const line of visibleLinesOutsideFences(body)) {
|
||||
for (const match of line.matchAll(inlineCodePattern)) {
|
||||
const token = (match[1] ?? '').trim();
|
||||
if (!token) {
|
||||
continue;
|
||||
}
|
||||
const scoped = parseConnectionScoped(token);
|
||||
if (scoped.body.startsWith('source:')) {
|
||||
const sourceName = scoped.body.slice('source:'.length).trim();
|
||||
if (sourceName) {
|
||||
refs.push({ kind: 'sl_source', connectionId: scoped.connectionId, sourceName });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (scoped.body.startsWith('table:')) {
|
||||
const tableRef = scoped.body.slice('table:'.length).trim();
|
||||
if (tableRef) {
|
||||
refs.push({ kind: 'table', connectionId: scoped.connectionId, tableRef });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const parts = scoped.body.split('.');
|
||||
if (parts.length === 2 && isIdentifierToken(parts[0] ?? '') && isIdentifierToken(parts[1] ?? '')) {
|
||||
refs.push({
|
||||
kind: 'sl_entity',
|
||||
connectionId: scoped.connectionId,
|
||||
sourceName: parts[0],
|
||||
entityName: parts[1],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return refs;
|
||||
}
|
||||
|
||||
function entityNames(source: SemanticLayerSource): Set<string> {
|
||||
return new Set([
|
||||
...(source.measures ?? []).map((measure) => measure.name),
|
||||
...(source.columns ?? []).map((column) => column.name),
|
||||
...(source.segments ?? []).map((segment) => segment.name),
|
||||
]);
|
||||
}
|
||||
|
||||
export async function findInvalidWikiBodyRefs(input: WikiBodyRefValidationInput): Promise<string[]> {
|
||||
const errors: string[] = [];
|
||||
const sourceCache = new Map<string, SemanticLayerSource[]>();
|
||||
const loadSources = async (connectionId: string): Promise<SemanticLayerSource[]> => {
|
||||
const cached = sourceCache.get(connectionId);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
const sources = await input.loadSources(connectionId);
|
||||
sourceCache.set(connectionId, sources);
|
||||
return sources;
|
||||
};
|
||||
|
||||
const findSource = async (
|
||||
connectionIds: string[],
|
||||
sourceName: string,
|
||||
): Promise<{ connectionId: string; source: SemanticLayerSource } | null> => {
|
||||
for (const connectionId of connectionIds) {
|
||||
const source = (await loadSources(connectionId)).find((candidate) => candidate.name === sourceName);
|
||||
if (source) {
|
||||
return { connectionId, source };
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
for (const ref of parseWikiBodyRefs(input.body)) {
|
||||
const connectionIds = ref.connectionId ? [ref.connectionId] : input.visibleConnectionIds;
|
||||
if (ref.kind === 'table') {
|
||||
const found = await Promise.all(connectionIds.map((connectionId) => input.tableExists(connectionId, ref.tableRef)));
|
||||
if (!found.some(Boolean)) {
|
||||
errors.push(`${input.pageKey}: unknown raw table ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.tableRef}`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const found = await findSource(connectionIds, ref.sourceName);
|
||||
if (!found) {
|
||||
if (ref.kind === 'sl_source') {
|
||||
errors.push(
|
||||
`${input.pageKey}: unknown semantic-layer source ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.sourceName}`,
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (ref.kind === 'sl_entity' && !entityNames(found.source).has(ref.entityName)) {
|
||||
errors.push(`${input.pageKey}: unknown semantic-layer entity ${ref.sourceName}.${ref.entityName}`);
|
||||
}
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
|
@ -78,6 +78,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
},
|
||||
strictMcpConfig: true,
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
|
|
@ -144,6 +149,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
|
||||
const options = query.mock.calls[0][0].options;
|
||||
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
|
||||
expect(options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [{ serverName: 'ktx' }],
|
||||
});
|
||||
expect(options.strictMcpConfig).toBe(true);
|
||||
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
|
||||
behavior: 'allow',
|
||||
toolUseID: '1',
|
||||
|
|
@ -176,6 +186,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
},
|
||||
strictMcpConfig: true,
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
|
|
@ -268,6 +283,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
|
||||
const options = query.mock.calls[0][0].options;
|
||||
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
|
||||
expect(options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [{ serverName: 'ktx' }],
|
||||
});
|
||||
expect(options.strictMcpConfig).toBe(true);
|
||||
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
|
||||
behavior: 'allow',
|
||||
toolUseID: '1',
|
||||
|
|
@ -334,6 +354,10 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
answer: 'yes',
|
||||
});
|
||||
expect(objectQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ PATH: '/usr/bin' }));
|
||||
expect(objectQuery.mock.calls[0][0].options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
});
|
||||
expect(objectQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod' }), // pragma: allowlist secret
|
||||
);
|
||||
|
|
@ -374,6 +398,10 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
telemetryTags: { operationName: 'test' },
|
||||
});
|
||||
expect(agentQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ HOME: '/Users/test' }));
|
||||
expect(agentQuery.mock.calls[0][0].options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [{ serverName: 'ktx' }],
|
||||
});
|
||||
expect(agentQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1' }),
|
||||
);
|
||||
|
|
@ -442,6 +470,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
},
|
||||
strictMcpConfig: true,
|
||||
allowedTools: [],
|
||||
persistSession: false,
|
||||
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
|
||||
|
|
|
|||
|
|
@ -45,6 +45,8 @@ const BUILTIN_TOOLS = [
|
|||
'TodoWrite',
|
||||
];
|
||||
|
||||
const KTX_MCP_SERVER_NAME = 'ktx';
|
||||
|
||||
function isResult(message: SDKMessage): message is SDKResultMessage {
|
||||
return message.type === 'result';
|
||||
}
|
||||
|
|
@ -113,7 +115,14 @@ function assertInitIsolation(
|
|||
}
|
||||
|
||||
function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set<string> {
|
||||
return tools && Object.keys(tools).length > 0 ? new Set(['ktx']) : new Set();
|
||||
return tools && Object.keys(tools).length > 0 ? new Set([KTX_MCP_SERVER_NAME]) : new Set();
|
||||
}
|
||||
|
||||
function managedMcpSettings(serverNames: string[]): NonNullable<Options['managedSettings']> {
|
||||
return {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: serverNames.map((serverName) => ({ serverName })),
|
||||
};
|
||||
}
|
||||
|
||||
function baseOptions(input: {
|
||||
|
|
@ -125,6 +134,7 @@ function baseOptions(input: {
|
|||
}): Options {
|
||||
const toolIds = mcpToolIds(input.tools ?? {});
|
||||
const allowedToolIds = new Set(toolIds);
|
||||
const expectedServerNames = [...expectedMcpServerNames(input.tools)];
|
||||
return {
|
||||
cwd: input.projectDir,
|
||||
model: input.model,
|
||||
|
|
@ -133,6 +143,8 @@ function baseOptions(input: {
|
|||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: managedMcpSettings(expectedServerNames),
|
||||
strictMcpConfig: true,
|
||||
allowedTools: toolIds,
|
||||
disallowedTools: BUILTIN_TOOLS,
|
||||
canUseTool: async (toolName, _toolInput, options) =>
|
||||
|
|
@ -147,7 +159,14 @@ function baseOptions(input: {
|
|||
persistSession: false,
|
||||
env: createKtxClaudeCodeEnv(input.env),
|
||||
...(input.tools && Object.keys(input.tools).length > 0
|
||||
? { mcpServers: { ktx: createSdkMcpServer({ name: 'ktx', tools: createClaudeSdkTools(input.tools) }) } }
|
||||
? {
|
||||
mcpServers: {
|
||||
[KTX_MCP_SERVER_NAME]: createSdkMcpServer({
|
||||
name: KTX_MCP_SERVER_NAME,
|
||||
tools: createClaudeSdkTools(input.tools),
|
||||
}),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,6 +99,27 @@ describe('SlEditSourceTool — session gating', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('rejects session-scoped edits outside allowed target connections', async () => {
|
||||
const { tool } = makeTool();
|
||||
const session = makeSession({
|
||||
allowedConnectionNames: new Set(['warehouse']),
|
||||
});
|
||||
const context: ToolContext = { ...baseContext, session };
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: 'finance',
|
||||
sourceName: 'orders',
|
||||
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
|
||||
} as any,
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(false);
|
||||
expect(result.markdown).toContain('connectionId "finance" is outside this ingest session');
|
||||
expect(session.actions).toEqual([]);
|
||||
});
|
||||
|
||||
it('indexes normally when no session is present', async () => {
|
||||
const { tool, slSearchService } = makeTool();
|
||||
const result = await tool.call(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,12 @@
|
|||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
|
||||
import {
|
||||
addTouchedSlSource,
|
||||
type ToolContext,
|
||||
type ToolOutput,
|
||||
validateActionRawPaths,
|
||||
validateActionTargetConnection,
|
||||
} from '../../tools/index.js';
|
||||
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
|
||||
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
|
|
@ -79,6 +85,10 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
|
||||
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
|
||||
const skipIndex = context.session?.isWorktreeScoped === true;
|
||||
const targetConnectionValidation = validateActionTargetConnection(context.session, connectionId);
|
||||
if (!targetConnectionValidation.ok) {
|
||||
return this.buildOutput(false, [targetConnectionValidation.error], sourceName);
|
||||
}
|
||||
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
|
||||
if (!rawPathValidation.ok) {
|
||||
return this.buildOutput(false, [rawPathValidation.error], sourceName);
|
||||
|
|
|
|||
|
|
@ -133,6 +133,34 @@ describe('SlWriteSourceTool — session gating', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('rejects session-scoped writes outside allowed target connections', async () => {
|
||||
const { tool } = makeTool();
|
||||
const session = makeSession({
|
||||
allowedConnectionNames: new Set(['warehouse']),
|
||||
});
|
||||
const context: ToolContext = { ...baseContext, session };
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: 'finance',
|
||||
sourceName: 'finance_orders',
|
||||
source: {
|
||||
name: 'finance_orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'string' }],
|
||||
measures: [],
|
||||
joins: [],
|
||||
} as any,
|
||||
} as any,
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(false);
|
||||
expect(result.markdown).toContain('connectionId "finance" is outside this ingest session');
|
||||
expect(session.actions).toEqual([]);
|
||||
});
|
||||
|
||||
it('indexes normally when no session is present', async () => {
|
||||
const { tool, slSearchService } = makeTool();
|
||||
const result = await tool.call(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,12 @@
|
|||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
|
||||
import {
|
||||
addTouchedSlSource,
|
||||
type ToolContext,
|
||||
type ToolOutput,
|
||||
validateActionRawPaths,
|
||||
validateActionTargetConnection,
|
||||
} from '../../tools/index.js';
|
||||
import { sourceOverlaySchema } from '../schemas.js';
|
||||
import type { SemanticLayerService } from '../semantic-layer.service.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
|
|
@ -106,6 +112,10 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
|
||||
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
|
||||
const skipIndex = context.session?.isWorktreeScoped === true;
|
||||
const targetConnectionValidation = validateActionTargetConnection(context.session, connectionId);
|
||||
if (!targetConnectionValidation.ok) {
|
||||
return this.buildOutput(false, [targetConnectionValidation.error], sourceName);
|
||||
}
|
||||
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
|
||||
if (!rawPathValidation.ok) {
|
||||
return this.buildOutput(false, [rawPathValidation.error], sourceName);
|
||||
|
|
|
|||
23
packages/context/src/tools/action-target-connection.ts
Normal file
23
packages/context/src/tools/action-target-connection.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import type { ToolSession } from './tool-session.js';
|
||||
|
||||
type ActionTargetConnectionValidation = { ok: true } | { ok: false; error: string };
|
||||
|
||||
export function validateActionTargetConnection(
|
||||
session: ToolSession | undefined,
|
||||
connectionId: string,
|
||||
): ActionTargetConnectionValidation {
|
||||
const allowed = session?.allowedConnectionNames;
|
||||
if (!allowed) {
|
||||
return { ok: true };
|
||||
}
|
||||
if (allowed.has(connectionId)) {
|
||||
return { ok: true };
|
||||
}
|
||||
const allowedList = [...allowed].sort();
|
||||
return {
|
||||
ok: false,
|
||||
error: `connectionId "${connectionId}" is outside this ingest session's allowed target connections: ${
|
||||
allowedList.length > 0 ? allowedList.join(', ') : '(none)'
|
||||
}`,
|
||||
};
|
||||
}
|
||||
|
|
@ -32,6 +32,7 @@ export type { SqlEdit } from './sql-edit-replacer.js';
|
|||
export { applySqlEdits } from './sql-edit-replacer.js';
|
||||
export type { IngestToolMetadata, MemoryAction, ToolSession } from './tool-session.js';
|
||||
export { validateActionRawPaths } from './action-raw-paths.js';
|
||||
export { validateActionTargetConnection } from './action-target-connection.js';
|
||||
export type { TouchedSlSource, TouchedSlSourceSet } from './touched-sl-sources.js';
|
||||
export {
|
||||
addTouchedSlSource,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue