mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-19 08:28:06 +02:00
feat(ingest): default local ingest to isolated diffs (#128)
* docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
parent
d1c84e5564
commit
e64da5a85d
66 changed files with 22346 additions and 514 deletions
|
|
@ -1,5 +1,12 @@
|
|||
<role>
|
||||
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, Notion pages, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and discoverable with `discover_data`.
|
||||
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit
|
||||
gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs,
|
||||
Metabase card JSONs, Notion pages, or similar) and you must translate that
|
||||
slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass.
|
||||
You run in an isolated WorkUnit worktree. Deterministic projection output,
|
||||
existing project memory, and listed dependency paths are visible; sibling
|
||||
WorkUnit edits from this same job are not visible until the runner integrates
|
||||
accepted patches.
|
||||
</role>
|
||||
|
||||
<stance>
|
||||
|
|
@ -8,9 +15,19 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing
|
|||
|
||||
<workflow>
|
||||
1. Read this WorkUnit's section at the end of the user prompt. It lists your `rawFiles`, any unchanged `dependencyPaths` you may need to resolve references, the `peerFileIndex` (paths only; you CANNOT read them), the source's `skillNames`, and any `priorProvenance` rows telling you what earlier syncs produced from these files.
|
||||
2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `wiki_capture`, and `ingest_triage` last. The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping.
|
||||
2. Load the per-source review skill first (for example `lookml_ingest`,
|
||||
`metricflow_ingest`, or `dbt_ingest`), then `sl_capture` and
|
||||
`wiki_capture`, and `ingest_triage` last. The triage skill tells you how to
|
||||
react when existing project memory, deterministic projection output, or
|
||||
prior provenance overlaps with what this WorkUnit is about to write.
|
||||
3. If the system prompt includes `<canonical_pins>`, read those pins before choosing artifact keys. A pin's `canonicalArtifactKey` is the preferred artifact for its `contestedKey`: prefer editing the pinned canonical artifact when it already exists or when this raw file clearly updates it. Do not create a duplicate contested artifact when a pin says another artifact is canonical; use a specific disambiguated key only when the raw file describes a genuinely different domain.
|
||||
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `discover_data` for each candidate source, table, metric, or topic name to find prior-WU writes, existing wiki pages, SL sources, and raw warehouse matches; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip.
|
||||
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large
|
||||
files) to load content. Before writing a new SL source or wiki page, call
|
||||
`discover_data` for each candidate source, table, metric, or topic name to
|
||||
find existing wiki pages, SL sources, deterministic projection output, prior
|
||||
sync artifacts, and raw warehouse matches; apply `ingest_triage` when you hit
|
||||
one, and apply any matching canonical pin before deciding whether to edit,
|
||||
rename, or skip.
|
||||
5. For every `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call, include `rawPaths` with only the raw file paths that directly support that action. If one artifact synthesizes several files, list each contributing raw file. Do not include unrelated files from the same WorkUnit.
|
||||
6. When `priorProvenance` names an existing artifact for one of your raw files, prefer `sl_edit` over `sl_write` for that artifact: the re-ingest change rule says expression-only changes replace silently, grain/column/filter changes replace and flag.
|
||||
7. When a raw file cannot map to normal SL and you use a fallback path, call `emit_unmapped_fallback` exactly once for that raw file and reason. Use `fallback: "sql_standalone"` for a standalone SQL source, `fallback: "wiki_only"` for documentation-only capture, and `fallback: "flagged"` when no reliable artifact can be written.
|
||||
|
|
@ -28,5 +45,7 @@ Wiki keys must be flat slugs like `paid-order-lifecycle`, not directory paths li
|
|||
- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`discover_data`, `sl_discover`, `entity_details`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source.
|
||||
- Do not write context-source overlays into the context source connection just because that is the current WorkUnit connection. Use `sl_discover` across data sources and write the SL artifact to the warehouse/data-source connection that owns the matching manifest. If there is no confirmed target connection, use `emit_unmapped_fallback` and wiki capture.
|
||||
- Do not duplicate an artifact that prior provenance says you already produced; update it.
|
||||
- Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`.
|
||||
- Do not silently accept a name collision with visible existing memory,
|
||||
deterministic projection output, or prior provenance when the formula differs.
|
||||
Trigger `ingest_triage`.
|
||||
</do_not>
|
||||
|
|
|
|||
|
|
@ -7,8 +7,11 @@ callers: [memory_agent]
|
|||
# Ingest Triage - conflict classification and resolution
|
||||
|
||||
This skill is loaded in two contexts:
|
||||
- By a Stage 3 WorkUnit agent when `sl_discover` reveals that a prior WU (or a prior sync) already wrote something that overlaps with what the current WU is about to write.
|
||||
- By the Stage 4 reconciliation agent for cross-WU sweeps and for eviction decisions.
|
||||
- By a Stage 3 WorkUnit agent when `sl_discover`, deterministic projection
|
||||
output, existing project memory, or prior provenance overlaps with what the
|
||||
current WorkUnit is about to write.
|
||||
- By the Stage 4 reconciliation agent for cross-WorkUnit sweeps, accepted patch
|
||||
overlap, and eviction decisions.
|
||||
|
||||
Apply the rules below before every write that could collide with an existing artifact.
|
||||
|
||||
|
|
@ -23,7 +26,8 @@ Apply the rules below before every write that could collide with an existing art
|
|||
3. **If the difference is structural - grain, columns, filter, join shape - is the current bundle the re-ingest of a previously-ingested bundle (i.e. `priorProvenance` has a row for this raw file and artifact)?**
|
||||
Re-ingest change (semantic break): replace + flag. Record in the IngestReport's `conflicts_resolved` list with `flagged_for_human: true`.
|
||||
|
||||
4. **If there's no prior-sync row (both are from THIS job), check for same-ingest contradictions:**
|
||||
4. **If reconciliation sees accepted patches from this same job with no
|
||||
prior-sync row, check for same-ingest contradictions:**
|
||||
|
||||
| Kind | Detection | Resolution |
|
||||
|---|---|---|
|
||||
|
|
|
|||
45
packages/context/src/core/git.service.patch.test.ts
Normal file
45
packages/context/src/core/git.service.patch.test.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
async function makeGit() {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-'));
|
||||
const configDir = join(homeDir, 'config');
|
||||
const git = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'init',
|
||||
bootstrapAuthor: 'system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await git.onModuleInit();
|
||||
return { homeDir, configDir, git };
|
||||
}
|
||||
|
||||
describe('GitService patch helpers', () => {
|
||||
it('collects binary-safe no-rename patches and applies them with --3way --index', async () => {
|
||||
const { homeDir, configDir, git } = await makeGit();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n');
|
||||
await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com');
|
||||
const base = await git.revParseHead();
|
||||
|
||||
await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n');
|
||||
await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'proposal.patch');
|
||||
await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath);
|
||||
|
||||
const targetDir = join(homeDir, 'target');
|
||||
await git.addWorktree(targetDir, 'target', base);
|
||||
const targetGit = git.forWorktree(targetDir);
|
||||
await targetGit.applyPatchFile3WayIndex(patchPath);
|
||||
await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com');
|
||||
|
||||
await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n');
|
||||
});
|
||||
});
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
import { promises as fs } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { dirname, join } from 'node:path';
|
||||
import type { SimpleGit } from 'simple-git';
|
||||
import { noopLogger, resolveConfigDir, type KtxCoreConfig, type KtxLogger } from './config.js';
|
||||
import { createSimpleGit } from './git-env.js';
|
||||
|
|
@ -747,6 +747,55 @@ export class GitService {
|
|||
}
|
||||
}
|
||||
|
||||
async writeBinaryNoRenamePatch(from: string, to: string, patchPath: string): Promise<void> {
|
||||
await this.withMutationQueue(async () => {
|
||||
const patch = await this.git.raw(['diff', '--binary', '--no-renames', `${from}..${to}`]);
|
||||
await fs.mkdir(dirname(patchPath), { recursive: true });
|
||||
await fs.writeFile(patchPath, patch, 'utf-8');
|
||||
});
|
||||
}
|
||||
|
||||
async applyPatchFile3WayIndex(patchPath: string): Promise<void> {
|
||||
await this.withMutationQueue(async () => {
|
||||
await this.git.raw(['apply', '--3way', '--index', patchPath]);
|
||||
});
|
||||
}
|
||||
|
||||
async commitStaged(commitMessage: string, author: string, authorEmail: string): Promise<GitCommitInfo> {
|
||||
return this.withMutationQueue(async () => {
|
||||
const stagedChanges = await this.git.diff(['--cached', '--name-only']);
|
||||
if (!stagedChanges.trim()) {
|
||||
const head = (await this.git.revparse(['HEAD'])).trim();
|
||||
const log = await this.git.log({ maxCount: 1 });
|
||||
const latest = log.latest;
|
||||
return {
|
||||
commitHash: head,
|
||||
shortHash: head.substring(0, 8),
|
||||
message: latest?.message ?? '',
|
||||
author: latest?.author_name ?? '',
|
||||
authorEmail: latest?.author_email ?? '',
|
||||
timestamp: latest?.date ?? new Date(0).toISOString(),
|
||||
committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date(0).toISOString(),
|
||||
created: false,
|
||||
};
|
||||
}
|
||||
await this.git.commit(commitMessage, { '--author': `${author} <${authorEmail}>` });
|
||||
const head = (await this.git.revparse(['HEAD'])).trim();
|
||||
const log = await this.git.log({ maxCount: 1 });
|
||||
const latest = log.latest;
|
||||
return {
|
||||
commitHash: head,
|
||||
shortHash: head.substring(0, 8),
|
||||
message: latest?.message ?? commitMessage,
|
||||
author: latest?.author_name ?? author,
|
||||
authorEmail: latest?.author_email ?? authorEmail,
|
||||
timestamp: latest?.date ?? new Date().toISOString(),
|
||||
committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date().toISOString(),
|
||||
created: true,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private async fileExists(path: string): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(path);
|
||||
|
|
|
|||
|
|
@ -138,6 +138,52 @@ describe('fetchMetabaseBundle', () => {
|
|||
expect(warn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('emits memory-flow progress while fetching Metabase cards', async () => {
|
||||
const events: unknown[] = [];
|
||||
|
||||
await fetchMetabaseBundle({
|
||||
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
|
||||
stagedDir,
|
||||
ctx: {
|
||||
...makeFetchContext(),
|
||||
memoryFlow: {
|
||||
emit: (event) => events.push(event),
|
||||
update: vi.fn(),
|
||||
finish: vi.fn(),
|
||||
snapshot: vi.fn(),
|
||||
},
|
||||
},
|
||||
clientFactory,
|
||||
sourceStateReader,
|
||||
});
|
||||
|
||||
expect(events).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Fetching Metabase database 42 metadata',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Fetching 1 Metabase card for database 42',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Checked 1/1 Metabase cards for database 42; wrote 1',
|
||||
transient: true,
|
||||
}),
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Fetched Metabase database 42: 1 cards, 0 unresolved',
|
||||
}),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('routes Metabase fetch warnings through the injected logger', async () => {
|
||||
const logger = {
|
||||
log: vi.fn(),
|
||||
|
|
|
|||
|
|
@ -83,6 +83,15 @@ function resolvePath(index: Map<number | 'root', CollectionNode>, collectionId:
|
|||
export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Promise<void> {
|
||||
const pullConfig: MetabasePullConfig = parseMetabasePullConfig(params.pullConfig);
|
||||
const logger = params.logger ?? noopMetabaseFetchLogger;
|
||||
const emitFetchProgress = (percent: number, message: string, transient = false): void => {
|
||||
params.ctx.memoryFlow?.emit({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
percent,
|
||||
message,
|
||||
...(transient ? { transient } : {}),
|
||||
});
|
||||
};
|
||||
const syncState = await params.sourceStateReader.getSourceState(pullConfig.metabaseConnectionId);
|
||||
const mapping = syncState.mappings.find(
|
||||
(m) => m.metabaseDatabaseId === pullConfig.metabaseDatabaseId && m.syncEnabled,
|
||||
|
|
@ -100,6 +109,7 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
|
|||
|
||||
const client = await params.clientFactory.createClient(pullConfig, params.ctx);
|
||||
try {
|
||||
emitFetchProgress(26, `Fetching Metabase database ${pullConfig.metabaseDatabaseId} metadata`);
|
||||
let mappingDatabaseName = mapping.metabaseDatabaseName;
|
||||
let mappingEngine = mapping.metabaseEngine;
|
||||
if (mappingDatabaseName === null) {
|
||||
|
|
@ -133,6 +143,12 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
|
|||
await mkdir(join(params.stagedDir, STAGED_FILES.databasesDir), { recursive: true });
|
||||
|
||||
const cardIdsToFetch = await resolveCardIdsToFetch(client, scope, pullConfig.metabaseDatabaseId, logger);
|
||||
emitFetchProgress(
|
||||
28,
|
||||
`Fetching ${cardIdsToFetch.length} Metabase card${cardIdsToFetch.length === 1 ? '' : 's'} for database ${
|
||||
pullConfig.metabaseDatabaseId
|
||||
}`,
|
||||
);
|
||||
|
||||
const referencedCollectionIds = new Set<number>();
|
||||
let writtenCards = 0;
|
||||
|
|
@ -212,7 +228,19 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
|
|||
}
|
||||
}
|
||||
}
|
||||
const knownTotal = Math.max(cardIdsToFetch.length, fetched.size + queue.length);
|
||||
if (fetched.size === 1 || fetched.size % 10 === 0 || queue.length === 0) {
|
||||
emitFetchProgress(
|
||||
30,
|
||||
`Checked ${fetched.size}/${knownTotal} Metabase cards for database ${pullConfig.metabaseDatabaseId}; wrote ${writtenCards}`,
|
||||
true,
|
||||
);
|
||||
}
|
||||
}
|
||||
emitFetchProgress(
|
||||
32,
|
||||
`Fetched Metabase database ${pullConfig.metabaseDatabaseId}: ${writtenCards} cards, ${unresolvedCards.length} unresolved`,
|
||||
);
|
||||
|
||||
for (const colId of referencedCollectionIds) {
|
||||
const node = collectionIndex.get(colId);
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js';
|
||||
import type { SourceAdapter } from '../../types.js';
|
||||
import type { MetricFlowParseResult } from './deep-parse.js';
|
||||
import { MetricflowSourceAdapter } from './metricflow.adapter.js';
|
||||
import { readMetricflowProjectionConfig, writeMetricflowProjectionConfig } from './projection-config.js';
|
||||
|
||||
function compileOnlyRequiredDepsCheck(): void {
|
||||
// @ts-expect-error MetricflowSourceAdapter requires an explicit cache home.
|
||||
|
|
@ -22,6 +24,25 @@ async function makeRepo(tmpRoot: string, files: Record<string, string>) {
|
|||
return makeLocalGitRepo(fixtureDir, join(tmpRoot, 'origin'));
|
||||
}
|
||||
|
||||
function metricflowParseResult(): MetricFlowParseResult {
|
||||
return {
|
||||
semanticModels: [
|
||||
{
|
||||
name: 'orders',
|
||||
description: 'Orders',
|
||||
modelRef: 'orders',
|
||||
dimensions: [{ name: 'status', column: 'status', type: 'string', label: 'Status' }],
|
||||
measures: [{ type: 'simple', name: 'order_count', column: 'id', aggregation: 'count' }],
|
||||
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }],
|
||||
defaultTimeDimension: null,
|
||||
},
|
||||
],
|
||||
crossModelMetrics: [],
|
||||
relationships: [],
|
||||
warnings: ['parser warning'],
|
||||
};
|
||||
}
|
||||
|
||||
describe('MetricflowSourceAdapter', () => {
|
||||
let tmpRoot: string;
|
||||
let stagedDir: string;
|
||||
|
|
@ -127,4 +148,119 @@ describe('MetricflowSourceAdapter', () => {
|
|||
await expect(readFile(join(stagedDir, 'models/orders.yml'), 'utf-8')).resolves.toContain('semantic_models');
|
||||
expect(await adapter.detect(stagedDir)).toBe(true);
|
||||
});
|
||||
|
||||
it('persists parsed target tables for deterministic projection during fetch', async () => {
|
||||
const repo = await makeRepo(tmpRoot, {
|
||||
'dbt_project.yml': 'name: analytics\n',
|
||||
'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n',
|
||||
});
|
||||
|
||||
await adapter.fetch?.(
|
||||
{
|
||||
repoUrl: repo.repoUrl,
|
||||
branch: 'main',
|
||||
path: null,
|
||||
authToken: null,
|
||||
parsedTargetTables: {
|
||||
orders: {
|
||||
ok: true,
|
||||
catalog: null,
|
||||
schema: 'analytics',
|
||||
name: 'orders',
|
||||
canonicalTable: 'analytics.orders',
|
||||
},
|
||||
},
|
||||
},
|
||||
stagedDir,
|
||||
{ connectionId: 'warehouse-1', sourceKey: 'metricflow' },
|
||||
);
|
||||
|
||||
await expect(readMetricflowProjectionConfig(stagedDir)).resolves.toMatchObject({
|
||||
parsedTargetTables: {
|
||||
orders: {
|
||||
ok: true,
|
||||
schema: 'analytics',
|
||||
name: 'orders',
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('projects parsed MetricFlow semantic models in the integration worktree', async () => {
|
||||
await writeMetricflowProjectionConfig(stagedDir, {
|
||||
parsedTargetTables: {
|
||||
orders: {
|
||||
ok: true,
|
||||
catalog: null,
|
||||
schema: 'analytics',
|
||||
name: 'orders',
|
||||
canonicalTable: 'analytics.orders',
|
||||
},
|
||||
},
|
||||
});
|
||||
const scoped = {
|
||||
getManifestEntry: vi.fn().mockResolvedValue(null),
|
||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
|
||||
loadSource: vi.fn().mockResolvedValue(null),
|
||||
writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
|
||||
};
|
||||
const semanticLayerService = {
|
||||
forWorktree: vi.fn().mockReturnValue(scoped),
|
||||
getManifestEntry: vi.fn(),
|
||||
isManifestBacked: vi.fn(),
|
||||
loadAllSources: vi.fn(),
|
||||
loadSource: vi.fn(),
|
||||
writeSource: vi.fn(),
|
||||
};
|
||||
|
||||
const result = await adapter.project?.({
|
||||
connectionId: 'warehouse-1',
|
||||
sourceKey: 'metricflow',
|
||||
syncId: 'sync-1',
|
||||
jobId: 'job-1',
|
||||
runId: 'run-1',
|
||||
stagedDir,
|
||||
workdir: '/tmp/metricflow-integration',
|
||||
parseArtifacts: metricflowParseResult(),
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
});
|
||||
|
||||
expect(semanticLayerService.forWorktree).toHaveBeenCalledWith('/tmp/metricflow-integration');
|
||||
expect(scoped.writeSource).toHaveBeenCalledWith(
|
||||
'warehouse-1',
|
||||
expect.objectContaining({ name: 'orders' }),
|
||||
'dbt MetricFlow',
|
||||
expect.any(String),
|
||||
'dbt MetricFlow sync: create source orders',
|
||||
{ skipValidation: true },
|
||||
);
|
||||
expect(result).toMatchObject({
|
||||
warnings: ['parser warning'],
|
||||
errors: [],
|
||||
touchedSources: [{ connectionId: 'warehouse-1', sourceName: 'orders' }],
|
||||
changedWikiPageKeys: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a projection error when parse artifacts are missing', async () => {
|
||||
const result = await adapter.project?.({
|
||||
connectionId: 'warehouse-1',
|
||||
sourceKey: 'metricflow',
|
||||
syncId: 'sync-1',
|
||||
jobId: 'job-1',
|
||||
runId: 'run-1',
|
||||
stagedDir,
|
||||
workdir: '/tmp/metricflow-integration',
|
||||
parseArtifacts: undefined,
|
||||
semanticLayerService: {} as never,
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
warnings: [],
|
||||
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
|
||||
touchedSources: [],
|
||||
changedWikiPageKeys: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,10 +1,23 @@
|
|||
import { join } from 'node:path';
|
||||
import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js';
|
||||
import type {
|
||||
ChunkResult,
|
||||
DeterministicProjectionContext,
|
||||
DiffSet,
|
||||
FetchContext,
|
||||
ProjectionResult,
|
||||
SourceAdapter,
|
||||
} from '../../types.js';
|
||||
import { chunkMetricFlowProject } from './chunk.js';
|
||||
import { detectMetricFlowStagedDir } from './detect.js';
|
||||
import { parseMetricflowFiles, type MetricFlowParseResult } from './deep-parse.js';
|
||||
import { fetchMetricflowRepo } from './fetch.js';
|
||||
import { importMetricflowSemanticModels } from './import-semantic-models.js';
|
||||
import { parseMetricFlowStagedDir, type ParsedMetricFlowProject } from './parse.js';
|
||||
import {
|
||||
metricflowHostTablesFromParsedTargets,
|
||||
readMetricflowProjectionConfig,
|
||||
writeMetricflowProjectionConfig,
|
||||
} from './projection-config.js';
|
||||
import { parseMetricflowPullConfig } from './pull-config.js';
|
||||
|
||||
export interface MetricflowSourceAdapterDeps {
|
||||
|
|
@ -33,6 +46,9 @@ export class MetricflowSourceAdapter implements SourceAdapter {
|
|||
cacheDir: this.resolveCacheDir(ctx.connectionId),
|
||||
stagedDir,
|
||||
});
|
||||
await writeMetricflowProjectionConfig(stagedDir, {
|
||||
parsedTargetTables: config.parsedTargetTables,
|
||||
});
|
||||
}
|
||||
|
||||
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
|
||||
|
|
@ -46,6 +62,37 @@ export class MetricflowSourceAdapter implements SourceAdapter {
|
|||
return { ...chunk, parseArtifacts };
|
||||
}
|
||||
|
||||
async project(ctx: DeterministicProjectionContext): Promise<ProjectionResult> {
|
||||
if (!isMetricFlowParseResult(ctx.parseArtifacts)) {
|
||||
return {
|
||||
warnings: [],
|
||||
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
|
||||
touchedSources: [],
|
||||
changedWikiPageKeys: [],
|
||||
};
|
||||
}
|
||||
|
||||
const projectionConfig = await readMetricflowProjectionConfig(ctx.stagedDir);
|
||||
const result = await importMetricflowSemanticModels(
|
||||
{ semanticLayerService: ctx.semanticLayerService },
|
||||
{
|
||||
connectionId: ctx.connectionId,
|
||||
parseResult: ctx.parseArtifacts,
|
||||
targetSchema: null,
|
||||
hostTables: metricflowHostTablesFromParsedTargets(projectionConfig.parsedTargetTables),
|
||||
workdir: ctx.workdir,
|
||||
},
|
||||
);
|
||||
|
||||
return {
|
||||
result,
|
||||
warnings: result.warnings,
|
||||
errors: result.errors,
|
||||
touchedSources: result.touchedSources,
|
||||
changedWikiPageKeys: [],
|
||||
};
|
||||
}
|
||||
|
||||
private resolveCacheDir(connectionId: string): string {
|
||||
return join(this.deps.homeDir, 'ingest-metricflow-repos', connectionId);
|
||||
}
|
||||
|
|
@ -54,3 +101,16 @@ export class MetricflowSourceAdapter implements SourceAdapter {
|
|||
function parseMetricflowStagedDirForImport(project: ParsedMetricFlowProject): MetricFlowParseResult {
|
||||
return parseMetricflowFiles(project.files);
|
||||
}
|
||||
|
||||
function isMetricFlowParseResult(value: unknown): value is MetricFlowParseResult {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return false;
|
||||
}
|
||||
const candidate = value as Partial<MetricFlowParseResult>;
|
||||
return (
|
||||
Array.isArray(candidate.semanticModels) &&
|
||||
Array.isArray(candidate.crossModelMetrics) &&
|
||||
Array.isArray(candidate.relationships) &&
|
||||
Array.isArray(candidate.warnings)
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,54 @@
|
|||
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import { parsedTargetTableSchema, type ParsedTargetTable } from '../../parsed-target-table.js';
|
||||
import type { MetricflowHostTable } from './semantic-models.js';
|
||||
|
||||
const METRICFLOW_PROJECTION_CONFIG_FILE = 'sync-config.json';
|
||||
|
||||
const metricflowProjectionConfigSchema = z.object({
|
||||
parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}),
|
||||
});
|
||||
|
||||
export type MetricflowProjectionConfig = z.infer<typeof metricflowProjectionConfigSchema>;
|
||||
|
||||
export async function writeMetricflowProjectionConfig(
|
||||
stagedDir: string,
|
||||
config: MetricflowProjectionConfig,
|
||||
): Promise<void> {
|
||||
const parsed = metricflowProjectionConfigSchema.parse(config);
|
||||
await mkdir(stagedDir, { recursive: true });
|
||||
await writeFile(join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE), `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
export async function readMetricflowProjectionConfig(stagedDir: string): Promise<MetricflowProjectionConfig> {
|
||||
const path = join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE);
|
||||
try {
|
||||
return metricflowProjectionConfigSchema.parse(JSON.parse(await readFile(path, 'utf-8')));
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return { parsedTargetTables: {} };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export function metricflowHostTablesFromParsedTargets(
|
||||
parsedTargetTables: Record<string, ParsedTargetTable>,
|
||||
): MetricflowHostTable[] {
|
||||
return Object.entries(parsedTargetTables)
|
||||
.flatMap(([id, table]) =>
|
||||
table.ok
|
||||
? [
|
||||
{
|
||||
id,
|
||||
name: table.name,
|
||||
catalog: table.catalog,
|
||||
db: table.schema,
|
||||
columns: [],
|
||||
},
|
||||
]
|
||||
: [],
|
||||
)
|
||||
.sort((left, right) => left.id.localeCompare(right.id));
|
||||
}
|
||||
190
packages/context/src/ingest/artifact-gates.test.ts
Normal file
190
packages/context/src/ingest/artifact-gates.test.ts
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js';
|
||||
|
||||
function wikiServiceWithPages(
|
||||
pages: Record<string, { refs?: string[]; content?: string; slRefs?: string[] }>,
|
||||
) {
|
||||
return {
|
||||
listPageKeys: vi.fn().mockResolvedValue(Object.keys(pages)),
|
||||
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, pageKey: string) => {
|
||||
const page = pages[pageKey];
|
||||
if (!page) {
|
||||
return Promise.resolve(null);
|
||||
}
|
||||
return Promise.resolve({
|
||||
pageKey,
|
||||
frontmatter: {
|
||||
summary: pageKey,
|
||||
usage_mode: 'auto',
|
||||
refs: page.refs,
|
||||
sl_refs: page.slRefs,
|
||||
},
|
||||
content: page.content ?? '',
|
||||
});
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
describe('artifact gates', () => {
|
||||
it('fails the final tree when wiki body references a stale semantic-layer measure', async () => {
|
||||
const wikiService = wikiServiceWithPages({
|
||||
'account-segments': {
|
||||
slRefs: ['mart_account_segments'],
|
||||
content: 'ARR is `mart_account_segments.total_contract_arr_cents`.',
|
||||
},
|
||||
});
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({
|
||||
sources: [
|
||||
{
|
||||
name: 'mart_account_segments',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
|
||||
table: 'analytics.mart_account_segments',
|
||||
},
|
||||
],
|
||||
loadErrors: [],
|
||||
}),
|
||||
};
|
||||
|
||||
await expect(
|
||||
validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: ['account-segments'],
|
||||
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }],
|
||||
wikiService: wikiService as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources: async () => ({ invalidSources: [], validSources: ['mart_account_segments'] }),
|
||||
tableExists: async () => true,
|
||||
}),
|
||||
).rejects.toThrow(/unknown semantic-layer entity mart_account_segments\.total_contract_arr_cents/);
|
||||
});
|
||||
|
||||
it('fails before provenance insertion when a raw path cannot be tied to the current snapshot or eviction set', () => {
|
||||
expect(() =>
|
||||
validateProvenanceRawPaths({
|
||||
rows: [{ rawPath: 'cards/missing.json' }],
|
||||
currentRawPaths: new Set(['cards/present.json']),
|
||||
deletedRawPaths: new Set(['cards/deleted.json']),
|
||||
}),
|
||||
).toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/);
|
||||
});
|
||||
|
||||
it('fails measure-level wiki frontmatter sl_refs that point at missing entities', async () => {
|
||||
const wikiService = wikiServiceWithPages({
|
||||
'account-segments': {
|
||||
slRefs: ['mart_account_segments.total_contract_arr_cents'],
|
||||
content: 'ARR uses a renamed measure.',
|
||||
},
|
||||
});
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({
|
||||
sources: [
|
||||
{
|
||||
name: 'mart_account_segments',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
|
||||
table: 'analytics.mart_account_segments',
|
||||
},
|
||||
],
|
||||
loadErrors: [],
|
||||
}),
|
||||
};
|
||||
|
||||
await expect(
|
||||
validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: ['account-segments'],
|
||||
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }],
|
||||
wikiService: wikiService as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources: async () => ({ invalidSources: [], validSources: ['warehouse:mart_account_segments'] }),
|
||||
tableExists: async () => true,
|
||||
}),
|
||||
).rejects.toThrow(/unknown sl_refs entity mart_account_segments\.total_contract_arr_cents/);
|
||||
});
|
||||
|
||||
it('validates direct declared-join neighbors of touched semantic-layer sources', async () => {
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({
|
||||
sources: [
|
||||
{
|
||||
name: 'orders',
|
||||
grain: ['order_id'],
|
||||
columns: [
|
||||
{ name: 'order_id', type: 'string' },
|
||||
{ name: 'account_id', type: 'string' },
|
||||
],
|
||||
joins: [{ to: 'accounts', on: 'orders.account_id = accounts.account_id', relationship: 'many_to_one' }],
|
||||
measures: [{ name: 'order_count', expr: 'count(*)' }],
|
||||
},
|
||||
{
|
||||
name: 'accounts',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'account_count', expr: 'count(*)' }],
|
||||
},
|
||||
{
|
||||
name: 'segments',
|
||||
grain: ['segment_id'],
|
||||
columns: [
|
||||
{ name: 'segment_id', type: 'string' },
|
||||
{ name: 'account_id', type: 'string' },
|
||||
],
|
||||
joins: [{ to: 'accounts', on: 'segments.account_id = accounts.account_id', relationship: 'many_to_one' }],
|
||||
measures: [],
|
||||
},
|
||||
],
|
||||
loadErrors: [],
|
||||
}),
|
||||
};
|
||||
const validateTouchedSources = vi.fn().mockResolvedValue({ invalidSources: [], validSources: [] });
|
||||
|
||||
await validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: [],
|
||||
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'accounts' }],
|
||||
wikiService: { readPage: vi.fn() } as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources,
|
||||
tableExists: async () => true,
|
||||
});
|
||||
|
||||
expect(validateTouchedSources).toHaveBeenCalledWith([
|
||||
{ connectionId: 'warehouse', sourceName: 'accounts' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
{ connectionId: 'warehouse', sourceName: 'segments' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('fails final gates when a changed wiki page references a missing wiki page', async () => {
|
||||
const wikiService = wikiServiceWithPages({
|
||||
'account-segments': {
|
||||
refs: ['missing-frontmatter-page'],
|
||||
content: 'See [[missing-inline-page]] for the related process.',
|
||||
},
|
||||
});
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
|
||||
};
|
||||
|
||||
await expect(
|
||||
validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: ['account-segments'],
|
||||
touchedSlSources: [],
|
||||
wikiService: wikiService as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources: async () => ({ invalidSources: [], validSources: [] }),
|
||||
tableExists: async () => true,
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
/wiki references target missing page\(s\): account-segments -> missing-frontmatter-page, account-segments -> missing-inline-page/,
|
||||
);
|
||||
});
|
||||
});
|
||||
188
packages/context/src/ingest/artifact-gates.ts
Normal file
188
packages/context/src/ingest/artifact-gates.ts
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
import type { SemanticLayerService } from '../sl/index.js';
|
||||
import type { TouchedSlSource } from '../tools/index.js';
|
||||
import type { KnowledgeWikiService } from '../wiki/index.js';
|
||||
import { findMissingWikiRefs } from '../wiki/wiki-ref-validation.js';
|
||||
import { findInvalidWikiBodyRefs } from './wiki-body-refs.js';
|
||||
|
||||
export interface TouchedValidationResult {
|
||||
invalidSources: string[];
|
||||
validSources: string[];
|
||||
}
|
||||
|
||||
export interface FinalArtifactGateInput {
|
||||
connectionIds: string[];
|
||||
changedWikiPageKeys: string[];
|
||||
touchedSlSources: TouchedSlSource[];
|
||||
wikiService: KnowledgeWikiService;
|
||||
semanticLayerService: SemanticLayerService;
|
||||
validateTouchedSources(touched: TouchedSlSource[]): Promise<TouchedValidationResult>;
|
||||
tableExists(connectionId: string, tableRef: string): Promise<boolean>;
|
||||
}
|
||||
|
||||
export interface ProvenanceRawPathValidationInput {
|
||||
rows: Array<{ rawPath: string }>;
|
||||
currentRawPaths: Set<string>;
|
||||
deletedRawPaths: Set<string>;
|
||||
}
|
||||
|
||||
function parseSlRef(ref: string): { connectionId: string | null; sourceName: string; entityName: string | null } {
|
||||
const withoutConnection = ref.includes('/') ? ref.slice(ref.indexOf('/') + 1) : ref;
|
||||
const connectionId = ref.includes('/') ? ref.slice(0, ref.indexOf('/')) : null;
|
||||
const [sourceName = '', entityName = null] = withoutConnection.split('.', 2);
|
||||
return { connectionId, sourceName, entityName };
|
||||
}
|
||||
|
||||
function slEntityNames(source: Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources'][number]): Set<string> {
|
||||
return new Set([
|
||||
...(source.measures ?? []).map((measure) => measure.name),
|
||||
...(source.columns ?? []).map((column) => column.name),
|
||||
...(source.segments ?? []).map((segment) => segment.name),
|
||||
]);
|
||||
}
|
||||
|
||||
function uniqueTouchedSources(sources: TouchedSlSource[]): TouchedSlSource[] {
|
||||
const seen = new Set<string>();
|
||||
const unique: TouchedSlSource[] = [];
|
||||
for (const source of sources) {
|
||||
const key = `${source.connectionId}:${source.sourceName}`;
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
unique.push(source);
|
||||
}
|
||||
return unique.sort((left, right) => {
|
||||
const byConnection = left.connectionId.localeCompare(right.connectionId);
|
||||
return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection;
|
||||
});
|
||||
}
|
||||
|
||||
async function expandTouchedSlSourcesWithDirectJoinNeighbors(input: FinalArtifactGateInput): Promise<TouchedSlSource[]> {
|
||||
const expanded = [...input.touchedSlSources];
|
||||
const touchedByConnection = new Map<string, Set<string>>();
|
||||
for (const source of input.touchedSlSources) {
|
||||
const bucket = touchedByConnection.get(source.connectionId) ?? new Set<string>();
|
||||
bucket.add(source.sourceName);
|
||||
touchedByConnection.set(source.connectionId, bucket);
|
||||
}
|
||||
|
||||
for (const connectionId of input.connectionIds) {
|
||||
const touched = touchedByConnection.get(connectionId);
|
||||
if (!touched || touched.size === 0) {
|
||||
continue;
|
||||
}
|
||||
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
|
||||
for (const source of sources) {
|
||||
const sourceIsTouched = touched.has(source.name);
|
||||
if (sourceIsTouched) {
|
||||
for (const join of source.joins ?? []) {
|
||||
expanded.push({ connectionId, sourceName: join.to });
|
||||
}
|
||||
}
|
||||
if ((source.joins ?? []).some((join) => touched.has(join.to))) {
|
||||
expanded.push({ connectionId, sourceName: source.name });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueTouchedSources(expanded);
|
||||
}
|
||||
|
||||
async function validateWikiSlRefs(input: FinalArtifactGateInput): Promise<string[]> {
|
||||
const errors: string[] = [];
|
||||
const sourcesByConnection = new Map<string, Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources']>();
|
||||
for (const connectionId of input.connectionIds) {
|
||||
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
|
||||
sourcesByConnection.set(connectionId, sources);
|
||||
}
|
||||
|
||||
for (const pageKey of input.changedWikiPageKeys) {
|
||||
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
|
||||
if (!page) {
|
||||
continue;
|
||||
}
|
||||
for (const ref of page.frontmatter.sl_refs ?? []) {
|
||||
const parsed = parseSlRef(ref);
|
||||
const candidateConnections = parsed.connectionId ? [parsed.connectionId] : input.connectionIds;
|
||||
let source: Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources'][number] | undefined;
|
||||
for (const connectionId of candidateConnections) {
|
||||
source = sourcesByConnection.get(connectionId)?.find((candidate) => candidate.name === parsed.sourceName);
|
||||
if (source) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!source) {
|
||||
errors.push(`${pageKey}: unknown sl_refs entry ${ref}`);
|
||||
continue;
|
||||
}
|
||||
if (parsed.entityName && !slEntityNames(source).has(parsed.entityName)) {
|
||||
errors.push(`${pageKey}: unknown sl_refs entity ${ref}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
async function validateWikiRefs(input: FinalArtifactGateInput): Promise<string[]> {
|
||||
const dangling: string[] = [];
|
||||
for (const pageKey of input.changedWikiPageKeys) {
|
||||
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
|
||||
if (!page) {
|
||||
continue;
|
||||
}
|
||||
const missingRefs = await findMissingWikiRefs({
|
||||
wikiService: input.wikiService,
|
||||
scope: 'GLOBAL',
|
||||
scopeId: null,
|
||||
pageKey,
|
||||
refs: page.frontmatter.refs,
|
||||
content: page.content,
|
||||
});
|
||||
for (const missingRef of missingRefs) {
|
||||
dangling.push(`${pageKey} -> ${missingRef}`);
|
||||
}
|
||||
}
|
||||
return dangling;
|
||||
}
|
||||
|
||||
export async function validateFinalIngestArtifacts(input: FinalArtifactGateInput): Promise<void> {
|
||||
const touchedWithDependencies = await expandTouchedSlSourcesWithDirectJoinNeighbors(input);
|
||||
const validation = await input.validateTouchedSources(touchedWithDependencies);
|
||||
const errors: string[] = validation.invalidSources.map((source) => `semantic-layer validation failed for ${source}`);
|
||||
errors.push(...(await validateWikiSlRefs(input)));
|
||||
const danglingWikiRefs = await validateWikiRefs(input);
|
||||
if (danglingWikiRefs.length > 0) {
|
||||
errors.push(`wiki references target missing page(s): ${danglingWikiRefs.join(', ')}`);
|
||||
}
|
||||
|
||||
for (const pageKey of input.changedWikiPageKeys) {
|
||||
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
|
||||
if (!page) {
|
||||
continue;
|
||||
}
|
||||
errors.push(
|
||||
...(await findInvalidWikiBodyRefs({
|
||||
pageKey,
|
||||
body: page.content,
|
||||
visibleConnectionIds: input.connectionIds,
|
||||
loadSources: async (connectionId) => {
|
||||
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
|
||||
return sources;
|
||||
},
|
||||
tableExists: input.tableExists,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
if (errors.length > 0) {
|
||||
throw new Error(`final artifact gates failed:\n${errors.join('\n')}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function validateProvenanceRawPaths(input: ProvenanceRawPathValidationInput): void {
|
||||
for (const row of input.rows) {
|
||||
if (!input.currentRawPaths.has(row.rawPath) && !input.deletedRawPaths.has(row.rawPath)) {
|
||||
throw new Error(`provenance row references raw path outside this snapshot: ${row.rawPath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
136
packages/context/src/ingest/final-gate-repair.test.ts
Normal file
136
packages/context/src/ingest/final-gate-repair.test.ts
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { finalGateRepairPaths, repairFinalGateFailure } from './final-gate-repair.js';
|
||||
import { FileIngestTraceWriter } from './ingest-trace.js';
|
||||
|
||||
async function makeHarness() {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-final-gate-repair-'));
|
||||
const workdir = join(root, 'workdir');
|
||||
await mkdir(join(workdir, 'wiki/global'), { recursive: true });
|
||||
await mkdir(join(workdir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(workdir, 'wiki/global/account-segments.md'),
|
||||
'---\nsummary: Account segments\nusage_mode: auto\n---\n\nARR uses `mart_account_segments.total_contract_arr_cents`.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await writeFile(
|
||||
join(workdir, 'semantic-layer/warehouse/mart_account_segments.yaml'),
|
||||
'name: mart_account_segments\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n',
|
||||
'utf-8',
|
||||
);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(root, 'trace.jsonl'),
|
||||
jobId: 'job-1',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
runId: 'run-1',
|
||||
syncId: 'sync-1',
|
||||
level: 'trace',
|
||||
});
|
||||
return { root, workdir, trace };
|
||||
}
|
||||
|
||||
describe('finalGateRepairPaths', () => {
|
||||
it('derives sorted wiki and semantic-layer file paths', () => {
|
||||
expect(
|
||||
finalGateRepairPaths({
|
||||
changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'],
|
||||
touchedSlSources: [
|
||||
{ connectionId: 'warehouse', sourceName: 'mart_account_segments' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
],
|
||||
}),
|
||||
).toEqual([
|
||||
'semantic-layer/warehouse/mart_account_segments.yaml',
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
'wiki/global/account-segments.md',
|
||||
'wiki/global/overview.md',
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('repairFinalGateFailure', () => {
|
||||
it('lets the repair agent read gate errors and edit only allowed files', async () => {
|
||||
const { workdir, trace } = await makeHarness();
|
||||
const agentRunner = {
|
||||
runLoop: vi.fn(async (params: any) => {
|
||||
const error = await params.toolSet.read_gate_error.execute({});
|
||||
expect(error.markdown).toContain('total_contract_arr_cents');
|
||||
|
||||
const page = await params.toolSet.read_repair_file.execute({
|
||||
path: 'wiki/global/account-segments.md',
|
||||
});
|
||||
expect(page.markdown).toContain('total_contract_arr_cents');
|
||||
|
||||
await expect(
|
||||
params.toolSet.write_repair_file.execute({
|
||||
path: 'wiki/global/other.md',
|
||||
content: 'not allowed',
|
||||
}),
|
||||
).rejects.toThrow(/gate repair path not allowed/);
|
||||
|
||||
await params.toolSet.write_repair_file.execute({
|
||||
path: 'wiki/global/account-segments.md',
|
||||
content: page.markdown.replace('total_contract_arr_cents', 'total_contract_arr'),
|
||||
});
|
||||
return { stopReason: 'natural' as const };
|
||||
}),
|
||||
};
|
||||
|
||||
const result = await repairFinalGateFailure({
|
||||
agentRunner,
|
||||
workdir,
|
||||
gateError:
|
||||
'final artifact gates failed:\naccount-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents',
|
||||
allowedPaths: ['wiki/global/account-segments.md'],
|
||||
trace,
|
||||
repairKind: 'final_artifact_gate',
|
||||
maxAttempts: 1,
|
||||
stepBudget: 8,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
status: 'repaired',
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/account-segments.md'],
|
||||
});
|
||||
await expect(readFile(join(workdir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.toContain(
|
||||
'total_contract_arr',
|
||||
);
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_repaired');
|
||||
expect(agentRunner.runLoop).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
modelRole: 'repair',
|
||||
stepBudget: 8,
|
||||
telemetryTags: expect.objectContaining({
|
||||
operationName: 'ingest-isolated-diff-gate-repair',
|
||||
repairKind: 'final_artifact_gate',
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('returns failed when the repair agent edits no allowed file', async () => {
|
||||
const { workdir, trace } = await makeHarness();
|
||||
const result = await repairFinalGateFailure({
|
||||
agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) },
|
||||
workdir,
|
||||
gateError: 'final artifact gates failed:\naccount-segments: unknown semantic-layer entity',
|
||||
allowedPaths: ['wiki/global/account-segments.md'],
|
||||
trace,
|
||||
repairKind: 'final_artifact_gate',
|
||||
maxAttempts: 1,
|
||||
stepBudget: 8,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
status: 'failed',
|
||||
attempts: 1,
|
||||
reason: 'gate repair completed without editing an allowed path',
|
||||
});
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_failed');
|
||||
});
|
||||
});
|
||||
230
packages/context/src/ingest/final-gate-repair.ts
Normal file
230
packages/context/src/ingest/final-gate-repair.ts
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../llm/index.js';
|
||||
import type { TouchedSlSource } from '../tools/index.js';
|
||||
import type { IngestTraceWriter } from './ingest-trace.js';
|
||||
import { traceTimed } from './ingest-trace.js';
|
||||
|
||||
type FinalGateRepairKind = 'patch_semantic_gate' | 'final_artifact_gate';
|
||||
|
||||
export type FinalGateRepairResult =
|
||||
| { status: 'repaired'; attempts: number; changedPaths: string[] }
|
||||
| { status: 'failed'; attempts: number; reason: string };
|
||||
|
||||
export interface RepairFinalGateFailureInput {
|
||||
agentRunner: AgentRunnerPort;
|
||||
workdir: string;
|
||||
gateError: string;
|
||||
allowedPaths: string[];
|
||||
trace: IngestTraceWriter;
|
||||
repairKind: FinalGateRepairKind;
|
||||
maxAttempts?: number;
|
||||
stepBudget?: number;
|
||||
}
|
||||
|
||||
const readRepairFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
});
|
||||
|
||||
const writeRepairFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
content: z.string(),
|
||||
});
|
||||
|
||||
function normalizeRepoPath(path: string): string {
|
||||
const normalized = path.replace(/\\/g, '/').replace(/^\/+/, '');
|
||||
const parts = normalized.split('/').filter((part) => part.length > 0);
|
||||
if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) {
|
||||
throw new Error(`gate repair path must be a repository-relative path: ${path}`);
|
||||
}
|
||||
return parts.join('/');
|
||||
}
|
||||
|
||||
function assertAllowedPath(path: string, allowedPaths: ReadonlySet<string>): string {
|
||||
const normalized = normalizeRepoPath(path);
|
||||
if (!allowedPaths.has(normalized)) {
|
||||
throw new Error(`gate repair path not allowed: ${normalized}`);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> {
|
||||
try {
|
||||
return { exists: true, content: await readFile(path, 'utf-8') };
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return { exists: false, content: '' };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
function buildGateRepairSystemPrompt(): string {
|
||||
return `<role>
|
||||
You repair one KTX isolated-diff artifact gate failure inside the integration worktree.
|
||||
</role>
|
||||
|
||||
<rules>
|
||||
- Use read_gate_error first.
|
||||
- Read only files exposed by read_repair_file.
|
||||
- Edit only paths exposed by write_repair_file.
|
||||
- Prefer the smallest text edit that makes the gate pass.
|
||||
- Preserve accepted work-unit, reconciliation, and deterministic projection content.
|
||||
- Do not invent warehouse facts, business definitions, or semantic-layer entities.
|
||||
- If the gate error requires choosing between conflicting facts without evidence, stop without editing.
|
||||
</rules>`;
|
||||
}
|
||||
|
||||
function buildGateRepairUserPrompt(input: {
|
||||
gateError: string;
|
||||
allowedPaths: string[];
|
||||
repairKind: FinalGateRepairKind;
|
||||
attempt: number;
|
||||
maxAttempts: number;
|
||||
}): string {
|
||||
return `Repair isolated-diff artifact gates.
|
||||
|
||||
Repair kind: ${input.repairKind}
|
||||
Attempt: ${input.attempt} of ${input.maxAttempts}
|
||||
|
||||
Allowed files:
|
||||
${input.allowedPaths.map((path) => `- ${path}`).join('\n')}
|
||||
|
||||
Gate error:
|
||||
${input.gateError}
|
||||
|
||||
Use read_gate_error first. Then inspect only the allowed files, write the
|
||||
minimal repaired content, and stop.`;
|
||||
}
|
||||
|
||||
function buildToolSet(input: {
|
||||
workdir: string;
|
||||
gateError: string;
|
||||
allowedPaths: ReadonlySet<string>;
|
||||
editedPaths: Set<string>;
|
||||
}): KtxRuntimeToolSet {
|
||||
return {
|
||||
read_gate_error: {
|
||||
name: 'read_gate_error',
|
||||
description: 'Read the artifact gate failure that must be repaired.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => ({
|
||||
markdown: input.gateError,
|
||||
structured: { gateError: input.gateError },
|
||||
}),
|
||||
},
|
||||
read_repair_file: {
|
||||
name: 'read_repair_file',
|
||||
description: 'Read one allowed file from the integration worktree.',
|
||||
inputSchema: readRepairFileSchema,
|
||||
execute: async ({ path }: z.infer<typeof readRepairFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
const file = await readOptionalFile(join(input.workdir, normalized));
|
||||
return {
|
||||
markdown: file.exists ? file.content : `(missing file: ${normalized})`,
|
||||
structured: { path: normalized, exists: file.exists },
|
||||
};
|
||||
},
|
||||
},
|
||||
write_repair_file: {
|
||||
name: 'write_repair_file',
|
||||
description: 'Replace one allowed integration worktree file with repaired text content.',
|
||||
inputSchema: writeRepairFileSchema,
|
||||
execute: async ({ path, content }: z.infer<typeof writeRepairFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
const fullPath = join(input.workdir, normalized);
|
||||
await mkdir(dirname(fullPath), { recursive: true });
|
||||
await writeFile(fullPath, content, 'utf-8');
|
||||
input.editedPaths.add(normalized);
|
||||
return {
|
||||
markdown: `Wrote ${normalized}`,
|
||||
structured: { path: normalized, bytes: Buffer.byteLength(content) },
|
||||
};
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function finalGateRepairPaths(input: {
|
||||
changedWikiPageKeys: string[];
|
||||
touchedSlSources: TouchedSlSource[];
|
||||
}): string[] {
|
||||
return [
|
||||
...new Set([
|
||||
...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
|
||||
...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
||||
]),
|
||||
].sort();
|
||||
}
|
||||
|
||||
export async function repairFinalGateFailure(
|
||||
input: RepairFinalGateFailureInput,
|
||||
): Promise<FinalGateRepairResult> {
|
||||
const allowedPaths = new Set(input.allowedPaths.map(normalizeRepoPath));
|
||||
const maxAttempts = input.maxAttempts ?? 1;
|
||||
const stepBudget = input.stepBudget ?? 16;
|
||||
let lastFailure = 'gate repair did not run';
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
||||
const editedPaths = new Set<string>();
|
||||
const sortedAllowedPaths = [...allowedPaths].sort();
|
||||
const traceData = {
|
||||
repairKind: input.repairKind,
|
||||
attempt,
|
||||
maxAttempts,
|
||||
allowedPaths: sortedAllowedPaths,
|
||||
gateError: input.gateError,
|
||||
};
|
||||
const result = await traceTimed(input.trace, 'gate_repair', 'gate_repair', traceData, async () =>
|
||||
input.agentRunner.runLoop({
|
||||
modelRole: 'repair',
|
||||
systemPrompt: buildGateRepairSystemPrompt(),
|
||||
userPrompt: buildGateRepairUserPrompt({
|
||||
gateError: input.gateError,
|
||||
allowedPaths: sortedAllowedPaths,
|
||||
repairKind: input.repairKind,
|
||||
attempt,
|
||||
maxAttempts,
|
||||
}),
|
||||
toolSet: buildToolSet({
|
||||
workdir: input.workdir,
|
||||
gateError: input.gateError,
|
||||
allowedPaths,
|
||||
editedPaths,
|
||||
}),
|
||||
stepBudget,
|
||||
telemetryTags: {
|
||||
operationName: 'ingest-isolated-diff-gate-repair',
|
||||
source: input.trace.context.sourceKey,
|
||||
jobId: input.trace.context.jobId,
|
||||
repairKind: input.repairKind,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
if (result.stopReason === 'error') {
|
||||
lastFailure = result.error?.message ?? 'gate repair agent loop errored';
|
||||
await input.trace.event('error', 'gate_repair', 'gate_repair_failed', traceData, result.error);
|
||||
continue;
|
||||
}
|
||||
|
||||
const changedPaths = [...editedPaths].sort();
|
||||
if (changedPaths.length === 0) {
|
||||
lastFailure = 'gate repair completed without editing an allowed path';
|
||||
await input.trace.event('error', 'gate_repair', 'gate_repair_failed', {
|
||||
...traceData,
|
||||
reason: lastFailure,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
await input.trace.event('debug', 'gate_repair', 'gate_repair_repaired', {
|
||||
...traceData,
|
||||
changedPaths,
|
||||
});
|
||||
return { status: 'repaired', attempts: attempt, changedPaths };
|
||||
}
|
||||
|
||||
return { status: 'failed', attempts: maxAttempts, reason: lastFailure };
|
||||
}
|
||||
|
|
@ -17,6 +17,11 @@ export {
|
|||
buildLiveDatabaseTableNaturalKey,
|
||||
ktxSchemaSnapshotToExtractedSchema,
|
||||
} from './adapters/live-database/extracted-schema.js';
|
||||
export {
|
||||
assertSemanticLayerTargetPathsAllowed,
|
||||
findDisallowedSemanticLayerTargetPaths,
|
||||
semanticLayerConnectionIdFromPath,
|
||||
} from './semantic-layer-target-policy.js';
|
||||
export { LiveDatabaseSourceAdapter } from './adapters/live-database/live-database.adapter.js';
|
||||
export type {
|
||||
BuildLiveDatabaseManifestShardsInput,
|
||||
|
|
@ -609,6 +614,11 @@ export {
|
|||
} from './raw-sources-paths.js';
|
||||
export { ingestReportSnapshotSchema, parseIngestReportSnapshot } from './report-snapshot.js';
|
||||
export type { IngestReportBody, IngestReportSnapshot } from './reports.js';
|
||||
export * from './artifact-gates.js';
|
||||
export * from './ingest-trace.js';
|
||||
export * from './isolated-diff/git-patch.js';
|
||||
export * from './isolated-diff/patch-integrator.js';
|
||||
export * from './isolated-diff/work-unit-executor.js';
|
||||
export * from './reports.js';
|
||||
export { SourceAdapterRegistry } from './source-adapter-registry.js';
|
||||
export type { SqliteBundleIngestStoreOptions } from './sqlite-bundle-ingest-store.js';
|
||||
|
|
@ -652,4 +662,7 @@ export type {
|
|||
TriageSignals,
|
||||
UnresolvedCardInfo,
|
||||
WorkUnit,
|
||||
DeterministicProjectionContext,
|
||||
ProjectionResult,
|
||||
} from './types.js';
|
||||
export * from './wiki-body-refs.js';
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,8 +1,7 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises';
|
||||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { GitService } from '../core/index.js';
|
||||
import { addTouchedSlSource } from '../tools/index.js';
|
||||
import { IngestBundleRunner } from './ingest-bundle.runner.js';
|
||||
import { createMemoryFlowLiveBuffer } from './memory-flow/live-buffer.js';
|
||||
|
|
@ -123,9 +122,15 @@ const makeDeps = () => {
|
|||
};
|
||||
const scopedGit = {
|
||||
revParseHead: vi.fn().mockResolvedValue('h'),
|
||||
commitFiles: vi.fn(),
|
||||
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
|
||||
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
|
||||
resetHardTo: vi.fn(),
|
||||
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
|
||||
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
|
||||
await writeFile(patchPath, '', 'utf-8');
|
||||
}),
|
||||
applyPatchFile3WayIndex: vi.fn(),
|
||||
diffNameStatus: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
const sessionWorktreeService = {
|
||||
create: vi.fn().mockResolvedValue({
|
||||
|
|
@ -167,10 +172,12 @@ const makeDeps = () => {
|
|||
loadPrompt: vi.fn().mockResolvedValue('base-framing'),
|
||||
};
|
||||
const wikiService = {
|
||||
forWorktree: vi.fn().mockReturnValue({}),
|
||||
forWorktree: vi.fn(),
|
||||
listPageKeys: vi.fn().mockResolvedValue([]),
|
||||
readPage: vi.fn().mockResolvedValue(null),
|
||||
syncFromCommit: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
wikiService.forWorktree.mockReturnValue(wikiService);
|
||||
const knowledgeSlRefs = {
|
||||
syncFromWiki: vi.fn().mockResolvedValue({ inserted: 1, deleted: 0 }),
|
||||
};
|
||||
|
|
@ -178,7 +185,7 @@ const makeDeps = () => {
|
|||
listPagesForUser: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
const semanticLayerService = {
|
||||
forWorktree: vi.fn().mockReturnValue({}),
|
||||
forWorktree: vi.fn(),
|
||||
listFilesForConnection: vi
|
||||
.fn()
|
||||
.mockImplementation((connectionId: string) =>
|
||||
|
|
@ -193,6 +200,7 @@ const makeDeps = () => {
|
|||
}),
|
||||
),
|
||||
};
|
||||
semanticLayerService.forWorktree.mockReturnValue(semanticLayerService);
|
||||
const slSearchService = {
|
||||
indexSources: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
|
@ -255,8 +263,12 @@ const buildRunner = (deps: ReturnType<typeof makeDeps> = makeDeps(), overrides:
|
|||
resolveUploadDir: (uploadId) => `/tmp/ktx-test/ingest-uploads/${uploadId}`,
|
||||
resolvePullDir: (jobId) => `/tmp/ktx-test/ingest-pulls/${jobId}`,
|
||||
resolveTranscriptDir: (jobId) => `/tmp/ktx-test/run/wu-transcripts/${jobId}`,
|
||||
resolveTracePath: (jobId) => `/tmp/ktx-test/ingest-traces/${jobId}/trace.jsonl`,
|
||||
},
|
||||
settings: {
|
||||
probeRowCount: 1,
|
||||
memoryIngestionModel: 'test-model',
|
||||
},
|
||||
settings: { probeRowCount: 1, memoryIngestionModel: 'test-model' },
|
||||
skillsRegistry: deps.skillsRegistry as any,
|
||||
promptService: deps.promptService as any,
|
||||
wikiService: deps.wikiService as any,
|
||||
|
|
@ -1505,7 +1517,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['explores/b2b/sales_pipeline.json', 'h1']]),
|
||||
currentHashes: new Map([['a.yml', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/looker-run/fake/s',
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
|
@ -1570,6 +1582,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }],
|
||||
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
|
||||
});
|
||||
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']);
|
||||
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
|
||||
);
|
||||
|
|
@ -1972,9 +1985,15 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
const assertError = new Error('Worktree has in-progress git operation (sequencer ...); refusing to proceed');
|
||||
const sessionGit = {
|
||||
revParseHead: vi.fn().mockResolvedValue('h'),
|
||||
commitFiles: vi.fn(),
|
||||
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
|
||||
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
|
||||
resetHardTo: vi.fn(),
|
||||
assertWorktreeClean: vi.fn().mockRejectedValue(assertError),
|
||||
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
|
||||
await writeFile(patchPath, '', 'utf-8');
|
||||
}),
|
||||
applyPatchFile3WayIndex: vi.fn(),
|
||||
diffNameStatus: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
deps.sessionWorktreeService.create.mockResolvedValue({
|
||||
chatId: 'j1',
|
||||
|
|
@ -2005,135 +2024,6 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('squash-merges only successful WUs into main when one WU fails sl_validate', async () => {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ingest-rollback-'));
|
||||
try {
|
||||
const configDir = join(homeDir, 'config');
|
||||
const mainGit = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'Initialize test config repo',
|
||||
bootstrapAuthor: 'test-system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await mainGit.onModuleInit();
|
||||
const baseSha = await mainGit.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const deps = makeDeps();
|
||||
const sessionDir = join(homeDir, '.worktrees', 'session-j1');
|
||||
const sessionBranch = 'session/j1';
|
||||
let currentToolSession: any = null;
|
||||
|
||||
deps.gitService = mainGit as any;
|
||||
deps.sessionWorktreeService.create.mockImplementation(async (_jobId: string, startSha: string) => {
|
||||
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
|
||||
await mainGit.addWorktree(sessionDir, sessionBranch, startSha);
|
||||
return {
|
||||
chatId: 'j1',
|
||||
workdir: sessionDir,
|
||||
branch: sessionBranch,
|
||||
baseSha: startSha,
|
||||
createdAt: new Date(),
|
||||
git: mainGit.forWorktree(sessionDir),
|
||||
config: {},
|
||||
};
|
||||
});
|
||||
deps.sessionWorktreeService.cleanup.mockResolvedValue(undefined);
|
||||
deps.adapter.chunk.mockResolvedValue({
|
||||
workUnits: [
|
||||
{ unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] },
|
||||
{ unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] },
|
||||
],
|
||||
});
|
||||
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
|
||||
currentToolSession = toolSession;
|
||||
return {
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
});
|
||||
deps.slValidator.validateSingleSource.mockImplementation(
|
||||
(_validationDeps: unknown, _connectionId: string, sourceName: string) => ({
|
||||
errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [],
|
||||
warnings: [],
|
||||
}),
|
||||
);
|
||||
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
|
||||
const unitKey = params.telemetryTags?.unitKey;
|
||||
if (unitKey === 'wu-good') {
|
||||
await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true });
|
||||
await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'good.yaml'), 'name: good\n');
|
||||
addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'good');
|
||||
currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'good', detail: '' });
|
||||
await currentToolSession.gitService.commitFiles(
|
||||
['semantic-layer/c1/good.yaml'],
|
||||
'test: add good source',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
}
|
||||
if (unitKey === 'wu-bad') {
|
||||
await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true });
|
||||
await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'bad.yaml'), 'name: bad\n');
|
||||
addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'bad');
|
||||
currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'bad', detail: '' });
|
||||
await currentToolSession.gitService.commitFiles(
|
||||
['semantic-layer/c1/bad.yaml'],
|
||||
'test: add bad source',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
}
|
||||
return { stopReason: 'natural' };
|
||||
});
|
||||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockImplementation(async ({ worktreeRoot }: any) => {
|
||||
const rawDir = join(worktreeRoot, 'raw-sources', 'c1', 'fake', 's');
|
||||
await mkdir(rawDir, { recursive: true });
|
||||
await writeFile(join(rawDir, 'good.raw'), 'good raw');
|
||||
await writeFile(join(rawDir, 'bad.raw'), 'bad raw');
|
||||
return {
|
||||
currentHashes: new Map([
|
||||
['good.raw', 'good-hash'],
|
||||
['bad.raw', 'bad-hash'],
|
||||
]),
|
||||
rawDirInWorktree: 'raw-sources/c1/fake/s',
|
||||
};
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
||||
const result = await runner.run({
|
||||
jobId: 'j1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
|
||||
});
|
||||
|
||||
expect(result.failedWorkUnits).toEqual(['wu-bad']);
|
||||
expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'good.yaml'), 'utf-8')).toContain('good');
|
||||
expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'bad.yaml'), 'utf-8').catch(() => null)).toBeNull();
|
||||
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
body: expect.objectContaining({
|
||||
failedWorkUnits: ['wu-bad'],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
await expect(stat(join(configDir, '.git', 'sequencer'))).rejects.toThrow();
|
||||
} finally {
|
||||
await rm(homeDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('fails the run and rethrows when the adapter cannot detect the bundle', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.adapter.detect.mockResolvedValue(false);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
85
packages/context/src/ingest/ingest-trace.test.ts
Normal file
85
packages/context/src/ingest/ingest-trace.test.ts
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import { mkdtemp, readFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { FileIngestTraceWriter, ingestTracePathForJob, traceTimed } from './ingest-trace.js';
|
||||
|
||||
describe('FileIngestTraceWriter', () => {
|
||||
it('persists structured trace events as JSONL', async () => {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-trace-'));
|
||||
const tracePath = ingestTracePathForJob(root, 'job-1');
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath,
|
||||
jobId: 'job-1',
|
||||
connectionId: 'metabase-main',
|
||||
sourceKey: 'metabase',
|
||||
level: 'debug',
|
||||
});
|
||||
|
||||
await trace.event('debug', 'snapshot', 'input_snapshot', {
|
||||
baseSha: 'abc123',
|
||||
rawFileCount: 2,
|
||||
diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 },
|
||||
});
|
||||
|
||||
const lines = (await readFile(tracePath, 'utf-8'))
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => JSON.parse(line));
|
||||
expect(lines).toHaveLength(1);
|
||||
expect(lines[0]).toMatchObject({
|
||||
schemaVersion: 1,
|
||||
jobId: 'job-1',
|
||||
connectionId: 'metabase-main',
|
||||
sourceKey: 'metabase',
|
||||
level: 'debug',
|
||||
phase: 'snapshot',
|
||||
event: 'input_snapshot',
|
||||
data: {
|
||||
baseSha: 'abc123',
|
||||
rawFileCount: 2,
|
||||
diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 },
|
||||
},
|
||||
});
|
||||
expect(typeof lines[0].at).toBe('string');
|
||||
});
|
||||
|
||||
it('records timing and error context for postmortem inspection', async () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date('2026-05-17T12:00:00.000Z'));
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-trace-'));
|
||||
const tracePath = ingestTracePathForJob(root, 'job-2');
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath,
|
||||
jobId: 'job-2',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
await expect(
|
||||
traceTimed(trace, 'integration', 'apply_patch', { unitKey: 'wu-1' }, async () => {
|
||||
vi.advanceTimersByTime(17);
|
||||
throw new Error('patch conflict');
|
||||
}),
|
||||
).rejects.toThrow('patch conflict');
|
||||
|
||||
const lines = (await readFile(tracePath, 'utf-8'))
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => JSON.parse(line));
|
||||
expect(lines.map((line) => line.event)).toEqual(['apply_patch_started', 'apply_patch_failed']);
|
||||
expect(lines[1]).toMatchObject({
|
||||
level: 'error',
|
||||
phase: 'integration',
|
||||
data: { unitKey: 'wu-1' },
|
||||
error: { name: 'Error', message: 'patch conflict' },
|
||||
});
|
||||
expect(lines[1].durationMs).toBe(17);
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('uses the documented trace path layout', () => {
|
||||
expect(ingestTracePathForJob('/project/.ktx', 'job-3')).toBe('/project/.ktx/ingest-traces/job-3/trace.jsonl');
|
||||
});
|
||||
});
|
||||
158
packages/context/src/ingest/ingest-trace.ts
Normal file
158
packages/context/src/ingest/ingest-trace.ts
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
import { appendFile, mkdir } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
|
||||
export type IngestTraceLevel = 'info' | 'debug' | 'trace' | 'error';
|
||||
|
||||
const TRACE_LEVEL_RANK: Record<IngestTraceLevel, number> = {
|
||||
error: 0,
|
||||
info: 1,
|
||||
debug: 2,
|
||||
trace: 3,
|
||||
};
|
||||
|
||||
export interface IngestTraceContext {
|
||||
tracePath: string;
|
||||
jobId: string;
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
runId?: string;
|
||||
syncId?: string;
|
||||
level?: IngestTraceLevel;
|
||||
}
|
||||
|
||||
export interface IngestTraceEvent {
|
||||
schemaVersion: 1;
|
||||
at: string;
|
||||
level: IngestTraceLevel;
|
||||
jobId: string;
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
runId?: string;
|
||||
syncId?: string;
|
||||
phase: string;
|
||||
event: string;
|
||||
durationMs?: number;
|
||||
data?: Record<string, unknown>;
|
||||
error?: {
|
||||
name: string;
|
||||
message: string;
|
||||
stack?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface IngestTraceWriter {
|
||||
readonly tracePath: string;
|
||||
readonly context: IngestTraceContext;
|
||||
withContext(context: Partial<Pick<IngestTraceContext, 'runId' | 'syncId'>>): IngestTraceWriter;
|
||||
event(
|
||||
level: IngestTraceLevel,
|
||||
phase: string,
|
||||
event: string,
|
||||
data?: Record<string, unknown>,
|
||||
error?: unknown,
|
||||
durationMs?: number,
|
||||
): Promise<void>;
|
||||
}
|
||||
|
||||
export function ingestTracePathForJob(homeDir: string, jobId: string): string {
|
||||
return join(homeDir, 'ingest-traces', jobId, 'trace.jsonl');
|
||||
}
|
||||
|
||||
function serializeError(error: unknown): IngestTraceEvent['error'] | undefined {
|
||||
if (error === undefined || error === null) {
|
||||
return undefined;
|
||||
}
|
||||
if (error instanceof Error) {
|
||||
return {
|
||||
name: error.name,
|
||||
message: error.message,
|
||||
...(error.stack ? { stack: error.stack } : {}),
|
||||
};
|
||||
}
|
||||
return { name: 'Error', message: String(error) };
|
||||
}
|
||||
|
||||
function shouldWrite(configured: IngestTraceLevel, incoming: IngestTraceLevel): boolean {
|
||||
return TRACE_LEVEL_RANK[incoming] <= TRACE_LEVEL_RANK[configured];
|
||||
}
|
||||
|
||||
export class FileIngestTraceWriter implements IngestTraceWriter {
|
||||
readonly tracePath: string;
|
||||
readonly context: IngestTraceContext;
|
||||
|
||||
constructor(context: IngestTraceContext) {
|
||||
this.context = { ...context, level: context.level ?? 'debug' };
|
||||
this.tracePath = context.tracePath;
|
||||
}
|
||||
|
||||
withContext(context: Partial<Pick<IngestTraceContext, 'runId' | 'syncId'>>): IngestTraceWriter {
|
||||
return new FileIngestTraceWriter({ ...this.context, ...context, tracePath: this.tracePath });
|
||||
}
|
||||
|
||||
async event(
|
||||
level: IngestTraceLevel,
|
||||
phase: string,
|
||||
event: string,
|
||||
data?: Record<string, unknown>,
|
||||
error?: unknown,
|
||||
durationMs?: number,
|
||||
): Promise<void> {
|
||||
if (!shouldWrite(this.context.level ?? 'debug', level)) {
|
||||
return;
|
||||
}
|
||||
const serializedError = serializeError(error);
|
||||
const payload: IngestTraceEvent = {
|
||||
schemaVersion: 1,
|
||||
at: new Date().toISOString(),
|
||||
level,
|
||||
jobId: this.context.jobId,
|
||||
connectionId: this.context.connectionId,
|
||||
sourceKey: this.context.sourceKey,
|
||||
...(this.context.runId ? { runId: this.context.runId } : {}),
|
||||
...(this.context.syncId ? { syncId: this.context.syncId } : {}),
|
||||
phase,
|
||||
event,
|
||||
...(durationMs !== undefined ? { durationMs } : {}),
|
||||
...(data ? { data } : {}),
|
||||
...(serializedError ? { error: serializedError } : {}),
|
||||
};
|
||||
await mkdir(dirname(this.tracePath), { recursive: true });
|
||||
await appendFile(this.tracePath, `${JSON.stringify(payload)}\n`, 'utf-8');
|
||||
}
|
||||
}
|
||||
|
||||
export class NoopIngestTraceWriter implements IngestTraceWriter {
|
||||
readonly tracePath = '';
|
||||
readonly context: IngestTraceContext = {
|
||||
tracePath: '',
|
||||
jobId: '',
|
||||
connectionId: '',
|
||||
sourceKey: '',
|
||||
level: 'error',
|
||||
};
|
||||
|
||||
withContext(): IngestTraceWriter {
|
||||
return this;
|
||||
}
|
||||
|
||||
async event(): Promise<void> {}
|
||||
}
|
||||
|
||||
export async function traceTimed<T>(
|
||||
trace: IngestTraceWriter,
|
||||
phase: string,
|
||||
event: string,
|
||||
data: Record<string, unknown>,
|
||||
fn: () => Promise<T>,
|
||||
): Promise<T> {
|
||||
await trace.event('debug', phase, `${event}_started`, data);
|
||||
const started = Date.now();
|
||||
try {
|
||||
const result = await fn();
|
||||
await trace.event('debug', phase, `${event}_finished`, data, undefined, Date.now() - started);
|
||||
return result;
|
||||
} catch (error) {
|
||||
await trace.event('error', phase, `${event}_failed`, data, error, Date.now() - started);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
97
packages/context/src/ingest/isolated-diff/git-patch.test.ts
Normal file
97
packages/context/src/ingest/isolated-diff/git-patch.test.ts
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths, textArtifactRoots } from './git-patch.js';
|
||||
|
||||
describe('isolated diff patch contract', () => {
|
||||
it('parses touched paths from no-rename git patches', () => {
|
||||
const patch = [
|
||||
'diff --git a/wiki/global/a.md b/wiki/global/a.md',
|
||||
'index 1111111..2222222 100644',
|
||||
'--- a/wiki/global/a.md',
|
||||
'+++ b/wiki/global/a.md',
|
||||
'@@ -1 +1 @@',
|
||||
'-old',
|
||||
'+new',
|
||||
'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml',
|
||||
'new file mode 100644',
|
||||
'--- /dev/null',
|
||||
'+++ b/semantic-layer/c1/orders.yaml',
|
||||
'@@ -0,0 +1 @@',
|
||||
'+name: orders',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
expect(parsePatchTouchedPaths(patch)).toEqual([
|
||||
{
|
||||
path: 'wiki/global/a.md',
|
||||
oldPath: 'wiki/global/a.md',
|
||||
newPath: 'wiki/global/a.md',
|
||||
mode: '100644',
|
||||
binary: false,
|
||||
},
|
||||
{
|
||||
path: 'semantic-layer/c1/orders.yaml',
|
||||
oldPath: 'semantic-layer/c1/orders.yaml',
|
||||
newPath: 'semantic-layer/c1/orders.yaml',
|
||||
mode: '100644',
|
||||
binary: false,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('rejects semantic-layer paths for slDisallowed work units', () => {
|
||||
const patch = 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml\nindex 1..2 100644\n';
|
||||
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'lookml-mismatch',
|
||||
patch,
|
||||
slDisallowed: true,
|
||||
}),
|
||||
).toThrow(/slDisallowed WorkUnit lookml-mismatch touched semantic-layer\/c1\/orders.yaml/);
|
||||
});
|
||||
|
||||
it('rejects semantic-layer paths outside allowed target connections', () => {
|
||||
const patch =
|
||||
'diff --git a/semantic-layer/finance/orders.yaml b/semantic-layer/finance/orders.yaml\nindex 1..2 100644\n';
|
||||
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'wu-finance',
|
||||
patch,
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['warehouse']),
|
||||
}),
|
||||
).toThrow(
|
||||
/semantic-layer target connection not allowed: semantic-layer\/finance\/orders.yaml \(finance\); allowed: warehouse/,
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects executable and binary changes under known text artifact roots', () => {
|
||||
expect(textArtifactRoots).toEqual(['wiki/', 'semantic-layer/']);
|
||||
|
||||
const executablePatch =
|
||||
'diff --git a/wiki/global/a.md b/wiki/global/a.md\nold mode 100644\nnew mode 100755\nindex 1..2\n';
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'wu-1',
|
||||
patch: executablePatch,
|
||||
slDisallowed: false,
|
||||
}),
|
||||
).toThrow(/unexpected executable mode under wiki\/global\/a.md/);
|
||||
|
||||
const binaryPatch = [
|
||||
'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml',
|
||||
'index 1111111..2222222 100644',
|
||||
'GIT binary patch',
|
||||
'literal 0',
|
||||
'',
|
||||
].join('\n');
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'wu-2',
|
||||
patch: binaryPatch,
|
||||
slDisallowed: false,
|
||||
}),
|
||||
).toThrow(/unexpected binary patch under semantic-layer\/c1\/orders.yaml/);
|
||||
});
|
||||
});
|
||||
101
packages/context/src/ingest/isolated-diff/git-patch.ts
Normal file
101
packages/context/src/ingest/isolated-diff/git-patch.ts
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
import { assertSemanticLayerTargetPathsAllowed } from '../semantic-layer-target-policy.js';
|
||||
|
||||
export const textArtifactRoots = ['wiki/', 'semantic-layer/'] as const;
|
||||
|
||||
export interface PatchTouchedPath {
|
||||
path: string;
|
||||
oldPath: string;
|
||||
newPath: string;
|
||||
mode: string | null;
|
||||
binary: boolean;
|
||||
}
|
||||
|
||||
export interface PatchPolicyInput {
|
||||
unitKey: string;
|
||||
patch: string;
|
||||
slDisallowed: boolean;
|
||||
allowedTargetConnectionIds?: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
function stripPrefix(path: string): string {
|
||||
return path.replace(/^[ab]\//, '');
|
||||
}
|
||||
|
||||
function isTextArtifactPath(path: string): boolean {
|
||||
return textArtifactRoots.some((root) => path.startsWith(root));
|
||||
}
|
||||
|
||||
export function parsePatchTouchedPaths(patch: string): PatchTouchedPath[] {
|
||||
const lines = patch.split('\n');
|
||||
const entries: PatchTouchedPath[] = [];
|
||||
let current: PatchTouchedPath | null = null;
|
||||
|
||||
const pushCurrent = () => {
|
||||
if (current) {
|
||||
entries.push(current);
|
||||
}
|
||||
};
|
||||
|
||||
for (const line of lines) {
|
||||
const diffMatch = /^diff --git (.+) (.+)$/.exec(line);
|
||||
if (diffMatch) {
|
||||
pushCurrent();
|
||||
const oldPath = stripPrefix(diffMatch[1] ?? '');
|
||||
const newPath = stripPrefix(diffMatch[2] ?? '');
|
||||
current = {
|
||||
path: newPath === '/dev/null' ? oldPath : newPath,
|
||||
oldPath,
|
||||
newPath,
|
||||
mode: null,
|
||||
binary: false,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
if (!current) {
|
||||
continue;
|
||||
}
|
||||
const indexMode = /^index [0-9a-f]+\.\.[0-9a-f]+(?: ([0-7]{6}))?$/.exec(line);
|
||||
if (indexMode?.[1]) {
|
||||
current.mode = indexMode[1];
|
||||
}
|
||||
const newMode = /^new mode ([0-7]{6})$/.exec(line);
|
||||
if (newMode) {
|
||||
current.mode = newMode[1] ?? current.mode;
|
||||
}
|
||||
const newFileMode = /^new file mode ([0-7]{6})$/.exec(line);
|
||||
if (newFileMode) {
|
||||
current.mode = newFileMode[1] ?? current.mode;
|
||||
}
|
||||
if (line === 'GIT binary patch' || line.startsWith('Binary files ')) {
|
||||
current.binary = true;
|
||||
}
|
||||
}
|
||||
|
||||
pushCurrent();
|
||||
return entries;
|
||||
}
|
||||
|
||||
export function assertPatchAllowedForWorkUnit(input: PatchPolicyInput): PatchTouchedPath[] {
|
||||
const touched = parsePatchTouchedPaths(input.patch);
|
||||
if (input.allowedTargetConnectionIds) {
|
||||
assertSemanticLayerTargetPathsAllowed({
|
||||
paths: touched.map((entry) => entry.path),
|
||||
allowedConnectionIds: input.allowedTargetConnectionIds,
|
||||
});
|
||||
}
|
||||
for (const entry of touched) {
|
||||
if (input.slDisallowed && entry.path.startsWith('semantic-layer/')) {
|
||||
throw new Error(`slDisallowed WorkUnit ${input.unitKey} touched ${entry.path}`);
|
||||
}
|
||||
if (!isTextArtifactPath(entry.path)) {
|
||||
continue;
|
||||
}
|
||||
if (entry.binary) {
|
||||
throw new Error(`unexpected binary patch under ${entry.path}`);
|
||||
}
|
||||
if (entry.mode && entry.mode !== '100644') {
|
||||
throw new Error(`unexpected executable mode under ${entry.path}: ${entry.mode}`);
|
||||
}
|
||||
}
|
||||
return touched;
|
||||
}
|
||||
|
|
@ -0,0 +1,404 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { GitService } from '../../core/index.js';
|
||||
import { FileIngestTraceWriter } from '../ingest-trace.js';
|
||||
import { integrateWorkUnitPatch } from './patch-integrator.js';
|
||||
|
||||
async function makeRepo() {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-integrate-'));
|
||||
const configDir = join(homeDir, 'config');
|
||||
const git = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'init',
|
||||
bootstrapAuthor: 'system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await git.onModuleInit();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'old\n');
|
||||
await git.commitFiles(['wiki/global/a.md'], 'base', 'System User', 'system@example.com');
|
||||
return { homeDir, configDir, git, baseSha: await git.revParseHead() };
|
||||
}
|
||||
|
||||
describe('integrateWorkUnitPatch', () => {
|
||||
it('applies a clean patch, runs semantic gates, and commits accepted changes', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
const childDir = join(homeDir, 'child');
|
||||
await git.addWorktree(childDir, 'child', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'new\n');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/wu.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'),
|
||||
jobId: 'job-1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-1',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree: vi.fn().mockResolvedValue(undefined),
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['c1']),
|
||||
});
|
||||
|
||||
expect(result.status).toBe('accepted');
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('new\n');
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_apply_finished');
|
||||
});
|
||||
|
||||
it('rolls back and classifies semantic conflicts', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
const childDir = join(homeDir, 'child-semantic');
|
||||
await git.addWorktree(childDir, 'child-semantic', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'bad\n');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'bad edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/bad.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-2/trace.jsonl'),
|
||||
jobId: 'job-2',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-bad',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree: vi.fn().mockRejectedValue(new Error('final artifact gates failed')),
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['c1']),
|
||||
});
|
||||
|
||||
expect(result.status).toBe('semantic_conflict');
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('old\n');
|
||||
});
|
||||
|
||||
it('classifies slDisallowed patch policy failures as traced textual conflicts', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
await mkdir(join(configDir, 'semantic-layer/c1'), { recursive: true });
|
||||
await git.commitFiles(['semantic-layer/c1'], 'empty sl dir', 'System User', 'system@example.com');
|
||||
const childDir = join(homeDir, 'child-policy');
|
||||
await git.addWorktree(childDir, 'child-policy', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await mkdir(join(childDir, 'semantic-layer/c1'), { recursive: true });
|
||||
await writeFile(join(childDir, 'semantic-layer/c1/orders.yaml'), 'name: orders\ncolumns: []\njoins: []\nmeasures: []\n');
|
||||
await childGit.commitFiles(['semantic-layer/c1/orders.yaml'], 'forbidden sl', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/forbidden.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-policy/trace.jsonl'),
|
||||
jobId: 'job-policy',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'lookml-mismatch',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree: vi.fn().mockResolvedValue(undefined),
|
||||
slDisallowed: true,
|
||||
allowedTargetConnectionIds: new Set(['c1']),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'textual_conflict',
|
||||
touchedPaths: ['semantic-layer/c1/orders.yaml'],
|
||||
});
|
||||
const rawTrace = await readFile(trace.tracePath, 'utf-8');
|
||||
expect(rawTrace).toContain('patch_policy_rejected');
|
||||
expect(rawTrace).toContain('slDisallowed WorkUnit lookml-mismatch touched semantic-layer/c1/orders.yaml');
|
||||
});
|
||||
|
||||
it('classifies unauthorized semantic-layer targets as traced textual conflicts', async () => {
|
||||
const { homeDir, git, baseSha } = await makeRepo();
|
||||
const childDir = join(homeDir, 'child-target-policy');
|
||||
await git.addWorktree(childDir, 'child-target-policy', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await mkdir(join(childDir, 'semantic-layer/finance'), { recursive: true });
|
||||
await writeFile(
|
||||
join(childDir, 'semantic-layer/finance/orders.yaml'),
|
||||
'name: orders\ncolumns: []\njoins: []\nmeasures: []\n',
|
||||
);
|
||||
await childGit.commitFiles(['semantic-layer/finance/orders.yaml'], 'unauthorized sl', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/unauthorized.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-target-policy/trace.jsonl'),
|
||||
jobId: 'job-target-policy',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-finance',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree: vi.fn().mockResolvedValue(undefined),
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['warehouse']),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'textual_conflict',
|
||||
touchedPaths: ['semantic-layer/finance/orders.yaml'],
|
||||
});
|
||||
const rawTrace = await readFile(trace.tracePath, 'utf-8');
|
||||
expect(rawTrace).toContain('patch_policy_rejected');
|
||||
expect(rawTrace).toContain('semantic-layer target connection not allowed');
|
||||
expect(rawTrace).toContain('allowedTargetConnectionIds');
|
||||
});
|
||||
|
||||
it('repairs a textual conflict through the bounded resolver and commits repaired files', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'base\n', 'utf-8');
|
||||
await git.commitFiles(['wiki/global/a.md'], 'base page', 'System User', 'system@example.com');
|
||||
const conflictBase = await git.revParseHead();
|
||||
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\n', 'utf-8');
|
||||
await git.commitFiles(['wiki/global/a.md'], 'accepted edit', 'System User', 'system@example.com');
|
||||
|
||||
const childDir = join(homeDir, 'child-conflict');
|
||||
await git.addWorktree(childDir, 'child-conflict', conflictBase);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'proposal\n', 'utf-8');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'proposal edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'proposal.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(conflictBase, 'HEAD', patchPath);
|
||||
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-resolver/trace.jsonl'),
|
||||
jobId: 'job-resolver',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const validateAppliedTree = vi.fn(async (paths: string[]) => {
|
||||
expect(paths).toEqual(['wiki/global/a.md']);
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\nproposal\n');
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-conflict',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'System User', email: 'system@example.com' },
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['warehouse']),
|
||||
validateAppliedTree,
|
||||
resolveTextualConflict: vi.fn(async (context) => {
|
||||
expect(context).toMatchObject({
|
||||
unitKey: 'wu-conflict',
|
||||
patchPath,
|
||||
touchedPaths: ['wiki/global/a.md'],
|
||||
});
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\nproposal\n', 'utf-8');
|
||||
return {
|
||||
status: 'repaired' as const,
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/a.md'],
|
||||
};
|
||||
}),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'accepted',
|
||||
touchedPaths: ['wiki/global/a.md'],
|
||||
textualResolution: {
|
||||
status: 'repaired',
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/a.md'],
|
||||
},
|
||||
});
|
||||
expect(validateAppliedTree).toHaveBeenCalledOnce();
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\nproposal\n');
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_accepted_after_textual_resolution');
|
||||
expect(await git.revParseHead()).not.toBe(baseSha);
|
||||
});
|
||||
|
||||
it('keeps the pre-apply integration tree when the resolver cannot repair a textual conflict', async () => {
|
||||
const { homeDir, configDir, git } = await makeRepo();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'base\n', 'utf-8');
|
||||
await git.commitFiles(['wiki/global/a.md'], 'base page', 'System User', 'system@example.com');
|
||||
const conflictBase = await git.revParseHead();
|
||||
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\n', 'utf-8');
|
||||
await git.commitFiles(['wiki/global/a.md'], 'accepted edit', 'System User', 'system@example.com');
|
||||
const acceptedHead = await git.revParseHead();
|
||||
|
||||
const childDir = join(homeDir, 'child-conflict-fails');
|
||||
await git.addWorktree(childDir, 'child-conflict-fails', conflictBase);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'proposal\n', 'utf-8');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'proposal edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'proposal-fails.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(conflictBase, 'HEAD', patchPath);
|
||||
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-resolver-fails/trace.jsonl'),
|
||||
jobId: 'job-resolver-fails',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-conflict',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'System User', email: 'system@example.com' },
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['warehouse']),
|
||||
validateAppliedTree: vi.fn(async () => {}),
|
||||
resolveTextualConflict: vi.fn(async () => ({
|
||||
status: 'failed' as const,
|
||||
attempts: 1,
|
||||
reason: 'resolver completed without editing an allowed path',
|
||||
})),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'textual_conflict',
|
||||
textualResolution: {
|
||||
status: 'failed',
|
||||
attempts: 1,
|
||||
reason: 'resolver completed without editing an allowed path',
|
||||
},
|
||||
});
|
||||
expect(await git.revParseHead()).toBe(acceptedHead);
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\n');
|
||||
});
|
||||
|
||||
it('repairs semantic gate failures after a patch applies cleanly', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
const childDir = join(homeDir, 'child-semantic-repair');
|
||||
await git.addWorktree(childDir, 'child-semantic-repair', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'bad semantic ref\n');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'bad semantic edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/semantic-repair.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-repair/trace.jsonl'),
|
||||
jobId: 'job-semantic-repair',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
const validateAppliedTree = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error('final artifact gates failed:\na: unknown semantic-layer entity'))
|
||||
.mockResolvedValueOnce(undefined);
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-repairable',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree,
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['c1']),
|
||||
repairGateFailure: vi.fn(async (context) => {
|
||||
expect(context).toMatchObject({
|
||||
unitKey: 'wu-repairable',
|
||||
patchPath,
|
||||
touchedPaths: ['wiki/global/a.md'],
|
||||
});
|
||||
await writeFile(join(configDir, 'wiki/global/a.md'), 'repaired semantic ref\n', 'utf-8');
|
||||
return {
|
||||
status: 'repaired' as const,
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/a.md'],
|
||||
};
|
||||
}),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'accepted',
|
||||
touchedPaths: ['wiki/global/a.md'],
|
||||
gateRepair: {
|
||||
status: 'repaired',
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/a.md'],
|
||||
},
|
||||
});
|
||||
expect(validateAppliedTree).toHaveBeenCalledTimes(2);
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('repaired semantic ref\n');
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_accepted_after_gate_repair');
|
||||
});
|
||||
|
||||
it('keeps the pre-apply tree when semantic gate repair fails', async () => {
|
||||
const { homeDir, configDir, git, baseSha } = await makeRepo();
|
||||
const childDir = join(homeDir, 'child-semantic-repair-fails');
|
||||
await git.addWorktree(childDir, 'child-semantic-repair-fails', baseSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
await writeFile(join(childDir, 'wiki/global/a.md'), 'bad semantic ref\n');
|
||||
await childGit.commitFiles(['wiki/global/a.md'], 'bad semantic edit', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'patches/semantic-repair-fails.patch');
|
||||
await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-repair-fails/trace.jsonl'),
|
||||
jobId: 'job-semantic-repair-fails',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await integrateWorkUnitPatch({
|
||||
unitKey: 'wu-not-repaired',
|
||||
patchPath,
|
||||
integrationGit: git,
|
||||
trace,
|
||||
author: { name: 'KTX Test', email: 'system@ktx.local' },
|
||||
validateAppliedTree: vi.fn().mockRejectedValue(new Error('final artifact gates failed')),
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['c1']),
|
||||
repairGateFailure: vi.fn(async () => ({
|
||||
status: 'failed' as const,
|
||||
attempts: 1,
|
||||
reason: 'gate repair completed without editing an allowed path',
|
||||
})),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: 'semantic_conflict',
|
||||
gateRepair: {
|
||||
status: 'failed',
|
||||
attempts: 1,
|
||||
reason: 'gate repair completed without editing an allowed path',
|
||||
},
|
||||
});
|
||||
await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('old\n');
|
||||
});
|
||||
});
|
||||
321
packages/context/src/ingest/isolated-diff/patch-integrator.ts
Normal file
321
packages/context/src/ingest/isolated-diff/patch-integrator.ts
Normal file
|
|
@ -0,0 +1,321 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import type { GitService } from '../../core/index.js';
|
||||
import type { FinalGateRepairResult } from '../final-gate-repair.js';
|
||||
import type { IngestTraceWriter } from '../ingest-trace.js';
|
||||
import { traceTimed } from '../ingest-trace.js';
|
||||
import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths } from './git-patch.js';
|
||||
import type { TextualConflictResolutionResult } from './textual-conflict-resolver.js';
|
||||
|
||||
export type PatchIntegrationTextualResolution =
|
||||
| { status: 'repaired'; attempts: number; changedPaths: string[] }
|
||||
| { status: 'failed'; attempts: number; reason: string };
|
||||
|
||||
export type PatchIntegrationResult =
|
||||
| {
|
||||
status: 'accepted';
|
||||
commitSha: string;
|
||||
touchedPaths: string[];
|
||||
textualResolution?: PatchIntegrationTextualResolution;
|
||||
gateRepair?: FinalGateRepairResult;
|
||||
}
|
||||
| {
|
||||
status: 'textual_conflict';
|
||||
reason: string;
|
||||
touchedPaths: string[];
|
||||
textualResolution?: PatchIntegrationTextualResolution;
|
||||
gateRepair?: FinalGateRepairResult;
|
||||
}
|
||||
| {
|
||||
status: 'semantic_conflict';
|
||||
reason: string;
|
||||
touchedPaths: string[];
|
||||
textualResolution?: PatchIntegrationTextualResolution;
|
||||
gateRepair?: FinalGateRepairResult;
|
||||
};
|
||||
|
||||
export interface IntegrateWorkUnitPatchInput {
|
||||
unitKey: string;
|
||||
patchPath: string;
|
||||
integrationGit: GitService;
|
||||
trace: IngestTraceWriter;
|
||||
author: { name: string; email: string };
|
||||
slDisallowed: boolean;
|
||||
allowedTargetConnectionIds: ReadonlySet<string>;
|
||||
validateAppliedTree(touchedPaths: string[]): Promise<void>;
|
||||
resolveTextualConflict?(input: {
|
||||
unitKey: string;
|
||||
patchPath: string;
|
||||
touchedPaths: string[];
|
||||
reason: string;
|
||||
}): Promise<TextualConflictResolutionResult>;
|
||||
repairGateFailure?(input: {
|
||||
unitKey: string;
|
||||
patchPath: string;
|
||||
touchedPaths: string[];
|
||||
reason: string;
|
||||
}): Promise<FinalGateRepairResult>;
|
||||
}
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
export async function integrateWorkUnitPatch(input: IntegrateWorkUnitPatchInput): Promise<PatchIntegrationResult> {
|
||||
const preApplyHead = await input.integrationGit.revParseHead();
|
||||
const patch = await readFile(input.patchPath, 'utf-8');
|
||||
const touchedPaths = parsePatchTouchedPaths(patch).map((entry) => entry.path);
|
||||
if (touchedPaths.length === 0) {
|
||||
await input.trace.event('debug', 'integration', 'patch_noop_accepted', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
patchBytes: Buffer.byteLength(patch),
|
||||
});
|
||||
return { status: 'accepted', commitSha: preApplyHead ?? '', touchedPaths };
|
||||
}
|
||||
try {
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: input.unitKey,
|
||||
patch,
|
||||
slDisallowed: input.slDisallowed,
|
||||
allowedTargetConnectionIds: input.allowedTargetConnectionIds,
|
||||
});
|
||||
} catch (error) {
|
||||
await input.trace.event('error', 'integration', 'patch_policy_rejected', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths,
|
||||
allowedTargetConnectionIds: [...input.allowedTargetConnectionIds].sort(),
|
||||
reason: errorMessage(error),
|
||||
});
|
||||
return {
|
||||
status: 'textual_conflict',
|
||||
reason: errorMessage(error),
|
||||
touchedPaths,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
await traceTimed(
|
||||
input.trace,
|
||||
'integration',
|
||||
'patch_apply',
|
||||
{ unitKey: input.unitKey, patchPath: input.patchPath, touchedPaths },
|
||||
async () => {
|
||||
await input.integrationGit.applyPatchFile3WayIndex(input.patchPath);
|
||||
await input.integrationGit.assertWorktreeClean();
|
||||
},
|
||||
);
|
||||
} catch (error) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
const reason = errorMessage(error);
|
||||
await input.trace.event('error', 'integration', 'patch_textual_conflict', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths,
|
||||
reason,
|
||||
});
|
||||
|
||||
if (!input.resolveTextualConflict) {
|
||||
return {
|
||||
status: 'textual_conflict',
|
||||
reason,
|
||||
touchedPaths,
|
||||
};
|
||||
}
|
||||
|
||||
const textualResolution = await input.resolveTextualConflict({
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths,
|
||||
reason,
|
||||
});
|
||||
|
||||
if (textualResolution.status === 'failed') {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'textual_conflict',
|
||||
reason: textualResolution.reason,
|
||||
touchedPaths,
|
||||
textualResolution,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
await traceTimed(
|
||||
input.trace,
|
||||
'integration',
|
||||
'semantic_gate_after_textual_resolution',
|
||||
{ unitKey: input.unitKey, touchedPaths: textualResolution.changedPaths },
|
||||
async () => {
|
||||
await input.validateAppliedTree(textualResolution.changedPaths);
|
||||
},
|
||||
);
|
||||
} catch (semanticError) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
await input.trace.event('error', 'integration', 'patch_semantic_conflict_after_textual_resolution', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
reason: errorMessage(semanticError),
|
||||
});
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: errorMessage(semanticError),
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
textualResolution,
|
||||
};
|
||||
}
|
||||
|
||||
const commit = await input.integrationGit.commitFiles(
|
||||
textualResolution.changedPaths,
|
||||
`ingest: resolve WorkUnit ${input.unitKey} conflict`,
|
||||
input.author.name,
|
||||
input.author.email,
|
||||
);
|
||||
if (!commit.created) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
const noChangeReason = 'textual resolver produced no committable changes';
|
||||
await input.trace.event('error', 'integration', 'textual_conflict_resolver_noop', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
});
|
||||
return {
|
||||
status: 'textual_conflict',
|
||||
reason: noChangeReason,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
textualResolution,
|
||||
};
|
||||
}
|
||||
|
||||
await input.trace.event('debug', 'integration', 'patch_accepted_after_textual_resolution', {
|
||||
unitKey: input.unitKey,
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
attempts: textualResolution.attempts,
|
||||
});
|
||||
return {
|
||||
status: 'accepted',
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
textualResolution,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
await traceTimed(input.trace, 'integration', 'semantic_gate', { unitKey: input.unitKey, touchedPaths }, async () => {
|
||||
await input.validateAppliedTree(touchedPaths);
|
||||
});
|
||||
} catch (error) {
|
||||
const reason = errorMessage(error);
|
||||
await input.trace.event('error', 'integration', 'patch_semantic_conflict', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths,
|
||||
reason,
|
||||
});
|
||||
|
||||
if (input.repairGateFailure) {
|
||||
const gateRepair = await input.repairGateFailure({
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths,
|
||||
reason,
|
||||
});
|
||||
|
||||
if (gateRepair.status === 'failed') {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: gateRepair.reason,
|
||||
touchedPaths,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
await traceTimed(
|
||||
input.trace,
|
||||
'integration',
|
||||
'semantic_gate_after_gate_repair',
|
||||
{ unitKey: input.unitKey, touchedPaths: gateRepair.changedPaths },
|
||||
async () => {
|
||||
await input.validateAppliedTree(gateRepair.changedPaths);
|
||||
},
|
||||
);
|
||||
} catch (repairValidationError) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: errorMessage(repairValidationError),
|
||||
touchedPaths: gateRepair.changedPaths,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
|
||||
const commit = await input.integrationGit.commitFiles(
|
||||
gateRepair.changedPaths,
|
||||
`ingest: repair WorkUnit ${input.unitKey} gates`,
|
||||
input.author.name,
|
||||
input.author.email,
|
||||
);
|
||||
if (!commit.created) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: 'gate repair produced no committable changes',
|
||||
touchedPaths: gateRepair.changedPaths,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
|
||||
await input.trace.event('debug', 'integration', 'patch_accepted_after_gate_repair', {
|
||||
unitKey: input.unitKey,
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths: gateRepair.changedPaths,
|
||||
attempts: gateRepair.attempts,
|
||||
});
|
||||
return {
|
||||
status: 'accepted',
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths: gateRepair.changedPaths,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason,
|
||||
touchedPaths,
|
||||
};
|
||||
}
|
||||
|
||||
const commit = await input.integrationGit.commitStaged(
|
||||
`ingest: accept WorkUnit ${input.unitKey}`,
|
||||
input.author.name,
|
||||
input.author.email,
|
||||
);
|
||||
await input.trace.event('debug', 'integration', 'patch_accepted', {
|
||||
unitKey: input.unitKey,
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths,
|
||||
});
|
||||
return { status: 'accepted', commitSha: commit.commitHash, touchedPaths };
|
||||
}
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { FileIngestTraceWriter } from '../ingest-trace.js';
|
||||
import { resolveTextualConflict } from './textual-conflict-resolver.js';
|
||||
|
||||
async function makeHarness() {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-textual-resolver-'));
|
||||
const workdir = join(root, 'workdir');
|
||||
const patchPath = join(root, 'failed.patch');
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(root, 'trace.jsonl'),
|
||||
jobId: 'job-1',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
runId: 'run-1',
|
||||
syncId: 'sync-1',
|
||||
level: 'trace',
|
||||
});
|
||||
await mkdir(join(workdir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(workdir, 'wiki/global/account.md'), 'accepted line\n', 'utf-8');
|
||||
await writeFile(
|
||||
patchPath,
|
||||
[
|
||||
'diff --git a/wiki/global/account.md b/wiki/global/account.md',
|
||||
'index 8877391..6f63f4d 100644',
|
||||
'--- a/wiki/global/account.md',
|
||||
'+++ b/wiki/global/account.md',
|
||||
'@@ -1 +1 @@',
|
||||
'-base line',
|
||||
'+proposal line',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
return { root, workdir, patchPath, trace };
|
||||
}
|
||||
|
||||
describe('resolveTextualConflict', () => {
|
||||
it('lets the repair agent read the failed patch and write only touched paths', async () => {
|
||||
const { workdir, patchPath, trace } = await makeHarness();
|
||||
const agentRunner = {
|
||||
runLoop: vi.fn(async (params: any) => {
|
||||
const current = await params.toolSet.read_integration_file.execute({ path: 'wiki/global/account.md' });
|
||||
expect(current.structured).toEqual({ path: 'wiki/global/account.md', exists: true });
|
||||
expect(current.markdown).toContain('accepted line');
|
||||
|
||||
const patch = await params.toolSet.read_failed_patch.execute({});
|
||||
expect(patch.markdown).toContain('proposal line');
|
||||
|
||||
await expect(
|
||||
params.toolSet.write_integration_file.execute({
|
||||
path: 'wiki/global/not-allowed.md',
|
||||
content: 'bad\n',
|
||||
}),
|
||||
).rejects.toThrow(/resolver path not allowed/);
|
||||
|
||||
await params.toolSet.write_integration_file.execute({
|
||||
path: 'wiki/global/account.md',
|
||||
content: 'accepted line\nproposal line\n',
|
||||
});
|
||||
return { stopReason: 'natural' as const };
|
||||
}),
|
||||
};
|
||||
|
||||
const result = await resolveTextualConflict({
|
||||
agentRunner,
|
||||
workdir,
|
||||
unitKey: 'wu-a',
|
||||
patchPath,
|
||||
touchedPaths: ['wiki/global/account.md'],
|
||||
trace,
|
||||
reason: 'patch failed: wiki/global/account.md',
|
||||
maxAttempts: 1,
|
||||
stepBudget: 8,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
status: 'repaired',
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/account.md'],
|
||||
});
|
||||
await expect(readFile(join(workdir, 'wiki/global/account.md'), 'utf-8')).resolves.toBe(
|
||||
'accepted line\nproposal line\n',
|
||||
);
|
||||
expect(agentRunner.runLoop).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
modelRole: 'repair',
|
||||
stepBudget: 8,
|
||||
telemetryTags: expect.objectContaining({
|
||||
operationName: 'ingest-isolated-diff-textual-resolver',
|
||||
jobId: 'job-1',
|
||||
unitKey: 'wu-a',
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('fails when the repair agent completes without editing any touched path', async () => {
|
||||
const { workdir, patchPath, trace } = await makeHarness();
|
||||
const result = await resolveTextualConflict({
|
||||
agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) },
|
||||
workdir,
|
||||
unitKey: 'wu-a',
|
||||
patchPath,
|
||||
touchedPaths: ['wiki/global/account.md'],
|
||||
trace,
|
||||
reason: 'patch failed: wiki/global/account.md',
|
||||
maxAttempts: 1,
|
||||
stepBudget: 8,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
status: 'failed',
|
||||
attempts: 1,
|
||||
reason: 'resolver completed without editing an allowed path',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,238 @@
|
|||
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../llm/index.js';
|
||||
import type { IngestTraceWriter } from '../ingest-trace.js';
|
||||
import { traceTimed } from '../ingest-trace.js';
|
||||
|
||||
export type TextualConflictResolutionResult =
|
||||
| { status: 'repaired'; attempts: number; changedPaths: string[] }
|
||||
| { status: 'failed'; attempts: number; reason: string };
|
||||
|
||||
export interface ResolveTextualConflictInput {
|
||||
agentRunner: AgentRunnerPort;
|
||||
workdir: string;
|
||||
unitKey: string;
|
||||
patchPath: string;
|
||||
touchedPaths: string[];
|
||||
trace: IngestTraceWriter;
|
||||
reason: string;
|
||||
maxAttempts?: number;
|
||||
stepBudget?: number;
|
||||
}
|
||||
|
||||
const readIntegrationFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
});
|
||||
|
||||
const writeIntegrationFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
content: z.string(),
|
||||
});
|
||||
|
||||
const deleteIntegrationFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
});
|
||||
|
||||
function normalizeRepoPath(path: string): string {
|
||||
const normalized = path.replace(/\\/g, '/').replace(/^\/+/, '');
|
||||
const parts = normalized.split('/').filter((part) => part.length > 0);
|
||||
if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) {
|
||||
throw new Error(`resolver path must be a repository-relative path: ${path}`);
|
||||
}
|
||||
return parts.join('/');
|
||||
}
|
||||
|
||||
function assertAllowedPath(path: string, allowedPaths: ReadonlySet<string>): string {
|
||||
const normalized = normalizeRepoPath(path);
|
||||
if (!allowedPaths.has(normalized)) {
|
||||
throw new Error(`resolver path not allowed: ${normalized}`);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> {
|
||||
try {
|
||||
return { exists: true, content: await readFile(path, 'utf-8') };
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return { exists: false, content: '' };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
function buildResolverSystemPrompt(): string {
|
||||
return `<role>
|
||||
You repair one failed KTX isolated-diff patch inside the integration worktree.
|
||||
</role>
|
||||
|
||||
<rules>
|
||||
- Preserve accepted integration content that is unrelated to the failed patch.
|
||||
- Incorporate the failed patch only when the patch evidence is compatible with the current file.
|
||||
- Edit only paths exposed by the resolver tools.
|
||||
- Prefer the smallest text edit that makes the composed artifact coherent.
|
||||
- Do not create new facts that are absent from the current file or failed patch.
|
||||
- Stop after writing the repaired file content.
|
||||
</rules>`;
|
||||
}
|
||||
|
||||
function buildResolverUserPrompt(input: {
|
||||
unitKey: string;
|
||||
patchPath: string;
|
||||
touchedPaths: string[];
|
||||
reason: string;
|
||||
attempt: number;
|
||||
maxAttempts: number;
|
||||
}): string {
|
||||
return `Repair isolated-diff textual conflict.
|
||||
|
||||
WorkUnit: ${input.unitKey}
|
||||
Attempt: ${input.attempt} of ${input.maxAttempts}
|
||||
Patch path: ${input.patchPath}
|
||||
Touched paths:
|
||||
${input.touchedPaths.map((path) => `- ${path}`).join('\n')}
|
||||
|
||||
Git apply failure:
|
||||
${input.reason}
|
||||
|
||||
Use read_failed_patch first. Then read the touched integration files, write the
|
||||
repaired content, and stop.`;
|
||||
}
|
||||
|
||||
function buildToolSet(input: {
|
||||
workdir: string;
|
||||
patchPath: string;
|
||||
allowedPaths: ReadonlySet<string>;
|
||||
editedPaths: Set<string>;
|
||||
}): KtxRuntimeToolSet {
|
||||
return {
|
||||
read_failed_patch: {
|
||||
name: 'read_failed_patch',
|
||||
description: 'Read the failed Git patch that could not be applied to the integration worktree.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => {
|
||||
const patch = await readFile(input.patchPath, 'utf-8');
|
||||
return {
|
||||
markdown: patch,
|
||||
structured: { patchPath: input.patchPath, bytes: Buffer.byteLength(patch) },
|
||||
};
|
||||
},
|
||||
},
|
||||
read_integration_file: {
|
||||
name: 'read_integration_file',
|
||||
description: 'Read one allowed file from the current integration worktree.',
|
||||
inputSchema: readIntegrationFileSchema,
|
||||
execute: async ({ path }: z.infer<typeof readIntegrationFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
const file = await readOptionalFile(join(input.workdir, normalized));
|
||||
return {
|
||||
markdown: file.exists ? file.content : `(missing file: ${normalized})`,
|
||||
structured: { path: normalized, exists: file.exists },
|
||||
};
|
||||
},
|
||||
},
|
||||
write_integration_file: {
|
||||
name: 'write_integration_file',
|
||||
description: 'Replace one allowed integration worktree file with repaired text content.',
|
||||
inputSchema: writeIntegrationFileSchema,
|
||||
execute: async ({ path, content }: z.infer<typeof writeIntegrationFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
const fullPath = join(input.workdir, normalized);
|
||||
await mkdir(dirname(fullPath), { recursive: true });
|
||||
await writeFile(fullPath, content, 'utf-8');
|
||||
input.editedPaths.add(normalized);
|
||||
return {
|
||||
markdown: `Wrote ${normalized}`,
|
||||
structured: { path: normalized, bytes: Buffer.byteLength(content) },
|
||||
};
|
||||
},
|
||||
},
|
||||
delete_integration_file: {
|
||||
name: 'delete_integration_file',
|
||||
description: 'Delete one allowed integration worktree file when the failed patch proves the deletion is correct.',
|
||||
inputSchema: deleteIntegrationFileSchema,
|
||||
execute: async ({ path }: z.infer<typeof deleteIntegrationFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
await rm(join(input.workdir, normalized), { force: true });
|
||||
input.editedPaths.add(normalized);
|
||||
return {
|
||||
markdown: `Deleted ${normalized}`,
|
||||
structured: { path: normalized },
|
||||
};
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function resolveTextualConflict(
|
||||
input: ResolveTextualConflictInput,
|
||||
): Promise<TextualConflictResolutionResult> {
|
||||
const allowedPaths = new Set(input.touchedPaths.map(normalizeRepoPath));
|
||||
const maxAttempts = input.maxAttempts ?? 1;
|
||||
const stepBudget = input.stepBudget ?? 12;
|
||||
let lastFailure = 'resolver did not run';
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
||||
const editedPaths = new Set<string>();
|
||||
const traceData = {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: [...allowedPaths].sort(),
|
||||
attempt,
|
||||
maxAttempts,
|
||||
reason: input.reason,
|
||||
};
|
||||
const result = await traceTimed(input.trace, 'resolver', 'textual_conflict_resolver', traceData, async () =>
|
||||
input.agentRunner.runLoop({
|
||||
modelRole: 'repair',
|
||||
systemPrompt: buildResolverSystemPrompt(),
|
||||
userPrompt: buildResolverUserPrompt({
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: [...allowedPaths].sort(),
|
||||
reason: input.reason,
|
||||
attempt,
|
||||
maxAttempts,
|
||||
}),
|
||||
toolSet: buildToolSet({
|
||||
workdir: input.workdir,
|
||||
patchPath: input.patchPath,
|
||||
allowedPaths,
|
||||
editedPaths,
|
||||
}),
|
||||
stepBudget,
|
||||
telemetryTags: {
|
||||
operationName: 'ingest-isolated-diff-textual-resolver',
|
||||
source: input.trace.context.sourceKey,
|
||||
jobId: input.trace.context.jobId,
|
||||
unitKey: input.unitKey,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
if (result.stopReason === 'error') {
|
||||
lastFailure = result.error?.message ?? 'resolver agent loop errored';
|
||||
await input.trace.event('error', 'resolver', 'textual_conflict_resolver_failed', traceData, result.error);
|
||||
continue;
|
||||
}
|
||||
|
||||
const changedPaths = [...editedPaths].sort();
|
||||
if (changedPaths.length === 0) {
|
||||
lastFailure = 'resolver completed without editing an allowed path';
|
||||
await input.trace.event('error', 'resolver', 'textual_conflict_resolver_failed', {
|
||||
...traceData,
|
||||
reason: lastFailure,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
await input.trace.event('debug', 'resolver', 'textual_conflict_resolver_repaired', {
|
||||
...traceData,
|
||||
changedPaths,
|
||||
});
|
||||
return { status: 'repaired', attempts: attempt, changedPaths };
|
||||
}
|
||||
|
||||
return { status: 'failed', attempts: maxAttempts, reason: lastFailure };
|
||||
}
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { GitService } from '../../core/index.js';
|
||||
import { FileIngestTraceWriter } from '../ingest-trace.js';
|
||||
import { runIsolatedWorkUnit } from './work-unit-executor.js';
|
||||
|
||||
async function makeGit() {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-isolated-wu-'));
|
||||
const configDir = join(homeDir, 'config');
|
||||
const git = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'init',
|
||||
bootstrapAuthor: 'system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await git.onModuleInit();
|
||||
await mkdir(join(configDir, 'raw-sources/c1/fake/s'), { recursive: true });
|
||||
await writeFile(join(configDir, 'raw-sources/c1/fake/s/a.json'), '{}\n');
|
||||
await git.commitFiles(['raw-sources/c1/fake/s/a.json'], 'raw snapshot', 'System User', 'system@example.com');
|
||||
return { homeDir, configDir, git, baseSha: await git.revParseHead() };
|
||||
}
|
||||
|
||||
describe('runIsolatedWorkUnit', () => {
|
||||
it('creates a child worktree at the ingestion base and persists a patch proposal', async () => {
|
||||
const { homeDir, git, baseSha } = await makeGit();
|
||||
const childDir = join(homeDir, '.worktrees/session-job-1-wu-1');
|
||||
const sessionWorktreeService = {
|
||||
create: vi.fn(async (_key: string, startSha: string) => {
|
||||
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
|
||||
await git.addWorktree(childDir, 'session/job-1-wu-1', startSha);
|
||||
const childGit = git.forWorktree(childDir);
|
||||
return {
|
||||
chatId: 'job-1-wu-1',
|
||||
workdir: childDir,
|
||||
branch: 'session/job-1-wu-1',
|
||||
baseSha: startSha,
|
||||
createdAt: new Date(),
|
||||
git: childGit,
|
||||
config: {},
|
||||
};
|
||||
}),
|
||||
cleanup: vi.fn(async () => undefined),
|
||||
};
|
||||
const tracePath = join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl');
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath,
|
||||
jobId: 'job-1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await runIsolatedWorkUnit({
|
||||
unitIndex: 0,
|
||||
ingestionBaseSha: baseSha,
|
||||
sessionWorktreeService: sessionWorktreeService as never,
|
||||
patchDir: join(homeDir, '.ktx/ingest-patches/job-1'),
|
||||
trace,
|
||||
run: async (child) => {
|
||||
await mkdir(join(child.workdir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(child.workdir, 'wiki/global/a.md'), '---\nsummary: A\nusage_mode: auto\n---\n\nBody\n');
|
||||
await child.git.commitFiles(['wiki/global/a.md'], 'test: write wiki', 'KTX Test', 'system@ktx.local');
|
||||
return {
|
||||
unitKey: 'wu-1',
|
||||
status: 'success',
|
||||
preSha: baseSha,
|
||||
postSha: await child.git.revParseHead(),
|
||||
actions: [{ target: 'wiki', type: 'created', key: 'a', detail: 'A' }],
|
||||
touchedSlSources: [],
|
||||
};
|
||||
},
|
||||
workUnit: { unitKey: 'wu-1', rawFiles: ['a.json'], peerFileIndex: [], dependencyPaths: [] },
|
||||
});
|
||||
|
||||
expect(sessionWorktreeService.create).toHaveBeenCalledWith('job-1-wu-1', baseSha);
|
||||
expect(sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
|
||||
expect(result.status).toBe('success');
|
||||
if (result.status !== 'success') {
|
||||
throw new Error('expected successful work unit');
|
||||
}
|
||||
const patchPath = result.patchPath;
|
||||
if (!patchPath) {
|
||||
throw new Error('expected patch path');
|
||||
}
|
||||
expect(patchPath).toContain('0000-wu-1.patch');
|
||||
await expect(readFile(patchPath, 'utf-8')).resolves.toContain('wiki/global/a.md');
|
||||
await expect(readFile(tracePath, 'utf-8')).resolves.toContain('work_unit_child_created');
|
||||
});
|
||||
|
||||
it('removes child worktrees after failed WorkUnit outcomes are traced', async () => {
|
||||
const { homeDir, git, baseSha } = await makeGit();
|
||||
const childDir = join(homeDir, '.worktrees/session-job-1-wu-fail');
|
||||
const sessionWorktreeService = {
|
||||
create: vi.fn(async (_key: string, startSha: string) => {
|
||||
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
|
||||
await git.addWorktree(childDir, 'session/job-1-wu-fail', startSha);
|
||||
return {
|
||||
chatId: 'job-1-wu-fail',
|
||||
workdir: childDir,
|
||||
branch: 'session/job-1-wu-fail',
|
||||
baseSha: startSha,
|
||||
createdAt: new Date(),
|
||||
git: git.forWorktree(childDir),
|
||||
config: {},
|
||||
};
|
||||
}),
|
||||
cleanup: vi.fn(async () => undefined),
|
||||
};
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'),
|
||||
jobId: 'job-1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
const result = await runIsolatedWorkUnit({
|
||||
unitIndex: 0,
|
||||
ingestionBaseSha: baseSha,
|
||||
sessionWorktreeService: sessionWorktreeService as never,
|
||||
patchDir: join(homeDir, '.ktx/ingest-patches/job-1'),
|
||||
trace,
|
||||
run: async () => ({
|
||||
unitKey: 'wu-fail',
|
||||
status: 'failed',
|
||||
reason: 'agent loop errored',
|
||||
preSha: baseSha,
|
||||
postSha: baseSha,
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
}),
|
||||
workUnit: { unitKey: 'wu-fail', rawFiles: ['a.json'], peerFileIndex: [], dependencyPaths: [] },
|
||||
});
|
||||
|
||||
expect(result.status).toBe('failed');
|
||||
expect(sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
import { mkdir, readFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import type { SessionOutcome } from '../../core/index.js';
|
||||
import type { IngestSessionWorktree, IngestSessionWorktreePort } from '../ports.js';
|
||||
import type { WorkUnit } from '../types.js';
|
||||
import type { IngestTraceWriter } from '../ingest-trace.js';
|
||||
import type { WorkUnitOutcome } from '../stages/stage-3-work-units.js';
|
||||
import { parsePatchTouchedPaths } from './git-patch.js';
|
||||
|
||||
export interface RunIsolatedWorkUnitInput {
|
||||
unitIndex: number;
|
||||
ingestionBaseSha: string;
|
||||
sessionWorktreeService: IngestSessionWorktreePort;
|
||||
patchDir: string;
|
||||
trace: IngestTraceWriter;
|
||||
workUnit: WorkUnit;
|
||||
run(child: IngestSessionWorktree): Promise<WorkUnitOutcome>;
|
||||
afterSuccess?(child: IngestSessionWorktree): Promise<void>;
|
||||
}
|
||||
|
||||
function patchFileName(unitIndex: number, unitKey: string): string {
|
||||
const safeKey = unitKey.replace(/[^a-zA-Z0-9_.-]+/g, '-');
|
||||
return `${String(unitIndex).padStart(4, '0')}-${safeKey}.patch`;
|
||||
}
|
||||
|
||||
export async function runIsolatedWorkUnit(input: RunIsolatedWorkUnitInput): Promise<WorkUnitOutcome> {
|
||||
const sessionKey = `${input.trace.context.jobId}-${input.workUnit.unitKey}`;
|
||||
let cleanupOutcome: SessionOutcome = 'crash';
|
||||
const child = await input.sessionWorktreeService.create(sessionKey, input.ingestionBaseSha);
|
||||
await input.trace.event('debug', 'work_unit', 'work_unit_child_created', {
|
||||
unitKey: input.workUnit.unitKey,
|
||||
unitIndex: input.unitIndex,
|
||||
worktreePath: child.workdir,
|
||||
baseSha: input.ingestionBaseSha,
|
||||
});
|
||||
|
||||
try {
|
||||
const outcome = await input.run(child);
|
||||
if (outcome.status !== 'success') {
|
||||
cleanupOutcome = 'success';
|
||||
await input.trace.event('error', 'work_unit', 'work_unit_failed_before_patch', {
|
||||
unitKey: input.workUnit.unitKey,
|
||||
reason: outcome.reason ?? 'unknown failure',
|
||||
});
|
||||
return { ...outcome, childWorktreePath: child.workdir };
|
||||
}
|
||||
|
||||
await input.afterSuccess?.(child);
|
||||
await mkdir(input.patchDir, { recursive: true });
|
||||
const patchPath = join(input.patchDir, patchFileName(input.unitIndex, input.workUnit.unitKey));
|
||||
await child.git.writeBinaryNoRenamePatch(input.ingestionBaseSha, 'HEAD', patchPath);
|
||||
const patch = await readFile(patchPath, 'utf-8');
|
||||
const touched = parsePatchTouchedPaths(patch);
|
||||
cleanupOutcome = 'success';
|
||||
await input.trace.event('debug', 'work_unit', 'work_unit_patch_collected', {
|
||||
unitKey: input.workUnit.unitKey,
|
||||
patchPath,
|
||||
touchedPaths: touched.map((entry) => entry.path),
|
||||
patchBytes: Buffer.byteLength(patch),
|
||||
});
|
||||
return {
|
||||
...outcome,
|
||||
patchPath,
|
||||
patchTouchedPaths: touched.map((entry) => entry.path),
|
||||
childWorktreePath: child.workdir,
|
||||
};
|
||||
} catch (error) {
|
||||
await input.trace.event(
|
||||
'error',
|
||||
'work_unit',
|
||||
'work_unit_child_failed',
|
||||
{ unitKey: input.workUnit.unitKey, worktreePath: child.workdir },
|
||||
error,
|
||||
);
|
||||
cleanupOutcome = 'success';
|
||||
throw error;
|
||||
} finally {
|
||||
await input.sessionWorktreeService.cleanup(child, cleanupOutcome);
|
||||
await input.trace.event('trace', 'work_unit', 'work_unit_child_cleanup', {
|
||||
unitKey: input.workUnit.unitKey,
|
||||
outcome: cleanupOutcome,
|
||||
worktreePath: child.workdir,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -694,6 +694,14 @@ describe('canonical local ingest', () => {
|
|||
],
|
||||
},
|
||||
});
|
||||
expect(result.report.body.isolatedDiff).toMatchObject({
|
||||
enabled: true,
|
||||
acceptedPatches: 0,
|
||||
projectionSha: expect.any(String),
|
||||
});
|
||||
|
||||
const projectedSourcePath = join(metricflowProject.projectDir, 'semantic-layer/warehouse/orders.yaml');
|
||||
await expect(readFile(projectedSourcePath, 'utf-8')).resolves.toContain('name: orders');
|
||||
|
||||
const stagedRawPath = join(
|
||||
metricflowProject.projectDir,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,24 @@ type RuntimeWithConnectionDeps = {
|
|||
};
|
||||
};
|
||||
|
||||
type RuntimeWithSlValidationDeps = {
|
||||
deps: {
|
||||
slValidator: {
|
||||
validateSingleSource(
|
||||
deps: unknown,
|
||||
connectionId: string,
|
||||
sourceName: string,
|
||||
): Promise<{ errors: string[]; warnings: string[] }>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
type RuntimeWithSettingsDeps = {
|
||||
deps: {
|
||||
settings: Record<string, unknown>;
|
||||
};
|
||||
};
|
||||
|
||||
function testAgentRunner(): AgentRunnerPort {
|
||||
return { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' as const }) };
|
||||
}
|
||||
|
|
@ -144,6 +162,77 @@ describe('createLocalBundleIngestRuntime', () => {
|
|||
]);
|
||||
});
|
||||
|
||||
it('validates manifest-backed scan sources during local ingest gates', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
[
|
||||
'tables:',
|
||||
' payments:',
|
||||
' table: public.payments',
|
||||
' columns:',
|
||||
' - name: payment_id',
|
||||
' type: string',
|
||||
' - name: amount',
|
||||
' type: number',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add warehouse manifest',
|
||||
);
|
||||
const agentRunner = testAgentRunner();
|
||||
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
agentRunner,
|
||||
});
|
||||
const deps = (runtime.runner as unknown as RuntimeWithSlValidationDeps).deps;
|
||||
|
||||
await expect(deps.slValidator.validateSingleSource(deps, 'warehouse', 'payments')).resolves.toEqual({
|
||||
errors: [],
|
||||
warnings: expect.any(Array),
|
||||
});
|
||||
});
|
||||
|
||||
it('does not mask malformed direct overlays with manifest-backed fallback validation', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
[
|
||||
'tables:',
|
||||
' payments:',
|
||||
' table: public.payments',
|
||||
' columns:',
|
||||
' - name: payment_id',
|
||||
' type: string',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add warehouse manifest',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/payments.yaml',
|
||||
['name: payments', 'columns:', ' - [', ''].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add malformed overlay',
|
||||
);
|
||||
const agentRunner = testAgentRunner();
|
||||
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
agentRunner,
|
||||
});
|
||||
const deps = (runtime.runner as unknown as RuntimeWithSlValidationDeps).deps;
|
||||
|
||||
await expect(deps.slValidator.validateSingleSource(deps, 'warehouse', 'payments')).resolves.toEqual({
|
||||
errors: [expect.stringContaining('invalid YAML')],
|
||||
warnings: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('passes project connection config to local ingest query executors', async () => {
|
||||
const agentRunner = testAgentRunner();
|
||||
const queryExecutor = {
|
||||
|
|
@ -175,6 +264,27 @@ describe('createLocalBundleIngestRuntime', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('defaults local bundle ingest to isolated diffs without a shared-worktree fallback setting', () => {
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
agentRunner: testAgentRunner(),
|
||||
});
|
||||
|
||||
const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings;
|
||||
const fallbackSettingKey = ['sharedWorktree', 'SourceKeys'].join('');
|
||||
|
||||
expect(settings).not.toHaveProperty(fallbackSettingKey);
|
||||
expect(Object.keys(settings).sort()).toEqual([
|
||||
'ingestTraceLevel',
|
||||
'memoryIngestionModel',
|
||||
'probeRowCount',
|
||||
'workUnitFailureMode',
|
||||
'workUnitMaxConcurrency',
|
||||
'workUnitStepBudget',
|
||||
]);
|
||||
});
|
||||
|
||||
it('accepts a debug LLM request file when constructing the default agent runner', async () => {
|
||||
await writeFile(
|
||||
join(project.projectDir, 'ktx.yaml'),
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ import {
|
|||
type KtxConnectionInfo,
|
||||
type KtxQueryResult,
|
||||
SemanticLayerService,
|
||||
type SemanticLayerSource,
|
||||
type SlConnectionCatalogPort,
|
||||
SlDiscoverTool,
|
||||
SlEditSourceTool,
|
||||
|
|
@ -76,6 +75,7 @@ import { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evide
|
|||
import { HistoricSqlProjectionPostProcessor } from './adapters/historic-sql/post-processor.js';
|
||||
import { ContextEvidenceIndexService, SqliteContextEvidenceStore } from './context-evidence/index.js';
|
||||
import { DiffSetService } from './diff-set.service.js';
|
||||
import { ingestTracePathForJob, type IngestTraceLevel } from './ingest-trace.js';
|
||||
import { IngestBundleRunner } from './ingest-bundle.runner.js';
|
||||
import { PageTriageService } from './page-triage/index.js';
|
||||
import { createWarehouseVerificationTools } from './tools/warehouse-verification/index.js';
|
||||
|
|
@ -96,6 +96,12 @@ const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url));
|
|||
const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url));
|
||||
const LOCAL_AUTHOR = { name: 'KTX Local', email: 'local@ktx.local' };
|
||||
const LOCAL_SHAPE_WARNING = 'Local ingest validates semantic-layer YAML shape only.';
|
||||
const INGEST_TRACE_LEVELS = new Set<IngestTraceLevel>(['error', 'info', 'debug', 'trace']);
|
||||
|
||||
function ingestTraceLevelFromEnv(env: NodeJS.ProcessEnv = process.env): IngestTraceLevel {
|
||||
const raw = env.KTX_INGEST_TRACE_LEVEL;
|
||||
return raw && INGEST_TRACE_LEVELS.has(raw as IngestTraceLevel) ? (raw as IngestTraceLevel) : 'debug';
|
||||
}
|
||||
|
||||
export interface CreateLocalBundleIngestRuntimeOptions {
|
||||
project: KtxLocalProject;
|
||||
|
|
@ -151,6 +157,10 @@ class LocalIngestStorage implements IngestStoragePort {
|
|||
resolveTranscriptDir(jobId: string): string {
|
||||
return join(this.project.projectDir, '.ktx/ingest-transcripts', jobId);
|
||||
}
|
||||
|
||||
resolveTracePath(jobId: string): string {
|
||||
return ingestTracePathForJob(this.homeDir, jobId);
|
||||
}
|
||||
}
|
||||
|
||||
class LocalIngestLock implements IngestLockPort {
|
||||
|
|
@ -237,22 +247,63 @@ class LocalSlPythonPort implements SlPythonPort {
|
|||
}
|
||||
|
||||
class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
|
||||
private validateParsedSource(sourceName: string, parsed: Record<string, unknown>) {
|
||||
const isOverlay = parsed.table == null && parsed.sql == null;
|
||||
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
|
||||
return result.success
|
||||
? { errors: [], warnings: [LOCAL_SHAPE_WARNING] }
|
||||
: {
|
||||
errors: result.error.issues.map(
|
||||
(issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`,
|
||||
),
|
||||
warnings: [],
|
||||
};
|
||||
}
|
||||
|
||||
private async validateComposedSource(
|
||||
deps: SlValidationDeps,
|
||||
connectionId: string,
|
||||
sourceName: string,
|
||||
readError: unknown,
|
||||
) {
|
||||
try {
|
||||
const { sources, loadErrors } = await deps.semanticLayerService.loadAllSources(connectionId);
|
||||
const source = sources.find((candidate) => candidate.name === sourceName);
|
||||
if (source) {
|
||||
return this.validateParsedSource(sourceName, source as unknown as Record<string, unknown>);
|
||||
}
|
||||
const detail =
|
||||
loadErrors.length > 0
|
||||
? loadErrors.join('; ')
|
||||
: readError instanceof Error
|
||||
? readError.message
|
||||
: String(readError);
|
||||
return { errors: [`${sourceName}: ${detail}`], warnings: [] };
|
||||
} catch (fallbackError) {
|
||||
return {
|
||||
errors: [`${sourceName}: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`],
|
||||
warnings: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
|
||||
let content: string;
|
||||
try {
|
||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
const parsed = YAML.parse(file.content) as SemanticLayerSource;
|
||||
const isOverlay = parsed.table == null && parsed.sql == null;
|
||||
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
|
||||
return result.success
|
||||
? { errors: [], warnings: [LOCAL_SHAPE_WARNING] }
|
||||
: {
|
||||
errors: result.error.issues.map(
|
||||
(issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`,
|
||||
),
|
||||
warnings: [],
|
||||
};
|
||||
content = file.content;
|
||||
} catch (error) {
|
||||
return { errors: [`${sourceName}: ${error instanceof Error ? error.message : String(error)}`], warnings: [] };
|
||||
return this.validateComposedSource(deps, connectionId, sourceName, error);
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = YAML.parse(content) as unknown as Record<string, unknown>;
|
||||
return this.validateParsedSource(sourceName, parsed);
|
||||
} catch (error) {
|
||||
return {
|
||||
errors: [`${sourceName}: invalid YAML — ${error instanceof Error ? error.message : String(error)}`],
|
||||
warnings: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -671,6 +722,7 @@ export function createLocalBundleIngestRuntime(
|
|||
workUnitMaxConcurrency: options.project.config.ingest.workUnits.maxConcurrency,
|
||||
workUnitStepBudget: options.project.config.ingest.workUnits.stepBudget,
|
||||
workUnitFailureMode: options.project.config.ingest.workUnits.failureMode,
|
||||
ingestTraceLevel: ingestTraceLevelFromEnv(),
|
||||
},
|
||||
skillsRegistry: new SkillsRegistryService({ skillsDir, logger }),
|
||||
promptService,
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ function snapshot(overrides: Partial<MemoryFlowReplayInput> = {}): MemoryFlowRep
|
|||
{ type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 },
|
||||
{ type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 0 },
|
||||
{ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 },
|
||||
{ type: 'stage_progress', stage: 'integration', percent: 80, message: 'Integrating 1/1 patches: orders' },
|
||||
{ type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 },
|
||||
{ type: 'work_unit_step', unitKey: 'orders', stepIndex: 1, stepBudget: 40 },
|
||||
{ type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'wiki/orders.md' },
|
||||
|
|
|
|||
|
|
@ -53,6 +53,23 @@ export const memoryFlowEventSchema = z.discriminatedUnion('type', [
|
|||
stage: z.enum(['source', 'chunks', 'workUnits', 'actions', 'gates', 'saved']),
|
||||
reason: z.string().min(1),
|
||||
}),
|
||||
eventSchema({
|
||||
type: z.literal('stage_progress'),
|
||||
stage: z.enum([
|
||||
'source',
|
||||
'integration',
|
||||
'reconciliation',
|
||||
'post_processor',
|
||||
'wiki_sl_ref_repair',
|
||||
'final_gates',
|
||||
'save',
|
||||
'provenance',
|
||||
'report',
|
||||
]),
|
||||
percent: z.number().min(0).max(100),
|
||||
message: z.string().min(1),
|
||||
transient: z.boolean().optional(),
|
||||
}),
|
||||
eventSchema({
|
||||
type: z.literal('work_unit_started'),
|
||||
unitKey: z.string().min(1),
|
||||
|
|
|
|||
|
|
@ -44,6 +44,22 @@ type MemoryFlowEventPayload =
|
|||
stage: MemoryFlowColumnId;
|
||||
reason: string;
|
||||
}
|
||||
| {
|
||||
type: 'stage_progress';
|
||||
stage:
|
||||
| 'source'
|
||||
| 'integration'
|
||||
| 'reconciliation'
|
||||
| 'post_processor'
|
||||
| 'wiki_sl_ref_repair'
|
||||
| 'final_gates'
|
||||
| 'save'
|
||||
| 'provenance'
|
||||
| 'report';
|
||||
percent: number;
|
||||
message: string;
|
||||
transient?: boolean;
|
||||
}
|
||||
| {
|
||||
type: 'work_unit_started';
|
||||
unitKey: string;
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import type {
|
|||
import type { ToolContext, ToolSession, TouchedSlSource } from '../tools/index.js';
|
||||
import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js';
|
||||
import type { CanonicalPin } from './canonical-pins.js';
|
||||
import type { IngestTraceLevel } from './ingest-trace.js';
|
||||
import type { IngestReportSnapshot } from './reports.js';
|
||||
import type {
|
||||
ReconcileCandidateForPrompt,
|
||||
|
|
@ -142,6 +143,7 @@ export interface IngestSettingsPort {
|
|||
workUnitMaxConcurrency?: number;
|
||||
workUnitStepBudget?: number;
|
||||
workUnitFailureMode?: 'abort' | 'continue';
|
||||
ingestTraceLevel?: IngestTraceLevel;
|
||||
}
|
||||
|
||||
export interface IngestGitAuthor {
|
||||
|
|
@ -155,6 +157,7 @@ export interface IngestStoragePort {
|
|||
resolveUploadDir(uploadId: string): string;
|
||||
resolvePullDir(jobId: string): string;
|
||||
resolveTranscriptDir(jobId: string): string;
|
||||
resolveTracePath(jobId: string): string;
|
||||
}
|
||||
|
||||
export interface IngestCommitMessagePort {
|
||||
|
|
|
|||
|
|
@ -206,6 +206,47 @@ describe('parseIngestReportSnapshot', () => {
|
|||
expect(snapshot.body.toolTranscripts).toEqual([]);
|
||||
});
|
||||
|
||||
it('parses failed ingest reports with trace and failure details', () => {
|
||||
const snapshot = parseIngestReportSnapshot({
|
||||
id: 'report-failed',
|
||||
runId: 'run-failed',
|
||||
jobId: 'job-failed',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
createdAt: '2026-05-17T12:00:00.000Z',
|
||||
body: {
|
||||
status: 'failed',
|
||||
syncId: 'sync-failed',
|
||||
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
|
||||
commitSha: null,
|
||||
tracePath: '/project/.ktx/ingest-traces/job-failed/trace.jsonl',
|
||||
failure: {
|
||||
phase: 'final_gates',
|
||||
message: 'final artifact gates failed',
|
||||
},
|
||||
workUnits: [],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: true,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
evictionInputs: [],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
},
|
||||
});
|
||||
|
||||
expect(snapshot.body.status).toBe('failed');
|
||||
expect(snapshot.body.failure).toEqual({
|
||||
phase: 'final_gates',
|
||||
message: 'final artifact gates failed',
|
||||
});
|
||||
expect(snapshot.body.tracePath).toContain('trace.jsonl');
|
||||
});
|
||||
|
||||
it('rejects malformed report snapshots with a concise message', () => {
|
||||
const report = validReportSnapshot();
|
||||
report.body.workUnits[0] = {
|
||||
|
|
@ -215,4 +256,93 @@ describe('parseIngestReportSnapshot', () => {
|
|||
|
||||
expect(() => parseIngestReportSnapshot(report)).toThrow('Invalid ingest report snapshot');
|
||||
});
|
||||
|
||||
it('parses isolated-diff textual resolver counters', () => {
|
||||
const snapshot = parseIngestReportSnapshot({
|
||||
id: 'report-1',
|
||||
runId: 'run-1',
|
||||
jobId: 'job-1',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
createdAt: '2026-05-18T00:00:00.000Z',
|
||||
body: {
|
||||
status: 'completed',
|
||||
syncId: 'sync-1',
|
||||
diffSummary: { added: 0, modified: 1, deleted: 0, unchanged: 0 },
|
||||
commitSha: 'abc123',
|
||||
isolatedDiff: {
|
||||
enabled: true,
|
||||
acceptedPatches: 2,
|
||||
textualConflicts: 1,
|
||||
semanticConflicts: 0,
|
||||
resolverAttempts: 1,
|
||||
resolverRepairs: 1,
|
||||
resolverFailures: 0,
|
||||
},
|
||||
workUnits: [],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: true,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
artifactResolutions: [],
|
||||
evictionInputs: [],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
},
|
||||
});
|
||||
|
||||
expect(snapshot.body.isolatedDiff).toMatchObject({
|
||||
resolverAttempts: 1,
|
||||
resolverRepairs: 1,
|
||||
resolverFailures: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it('parses isolated-diff gate repair counters', () => {
|
||||
const snapshot = parseIngestReportSnapshot({
|
||||
id: 'report-1',
|
||||
runId: 'run-1',
|
||||
jobId: 'job-1',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
createdAt: '2026-05-18T00:00:00.000Z',
|
||||
body: {
|
||||
status: 'completed',
|
||||
syncId: 'sync-1',
|
||||
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
|
||||
commitSha: 'abc123',
|
||||
isolatedDiff: {
|
||||
enabled: true,
|
||||
acceptedPatches: 1,
|
||||
textualConflicts: 0,
|
||||
semanticConflicts: 1,
|
||||
gateRepairAttempts: 1,
|
||||
gateRepairs: 1,
|
||||
gateRepairFailures: 0,
|
||||
},
|
||||
workUnits: [],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: true,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
evictionInputs: [],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
},
|
||||
});
|
||||
|
||||
expect(snapshot.body.isolatedDiff).toMatchObject({
|
||||
gateRepairAttempts: 1,
|
||||
gateRepairs: 1,
|
||||
gateRepairFailures: 0,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -123,6 +123,12 @@ const sourceFetchReportSchema = z.object({
|
|||
warnings: z.array(sourceFetchIssueSchema).default([]),
|
||||
});
|
||||
|
||||
const ingestReportFailureSchema = z.object({
|
||||
phase: z.string().min(1),
|
||||
message: z.string().min(1),
|
||||
details: z.record(z.string(), z.unknown()).optional(),
|
||||
});
|
||||
|
||||
export const ingestReportSnapshotSchema = z
|
||||
.object({
|
||||
id: z.string().min(1),
|
||||
|
|
@ -133,10 +139,30 @@ export const ingestReportSnapshotSchema = z
|
|||
createdAt: z.string().min(1),
|
||||
body: z
|
||||
.object({
|
||||
status: z.enum(['completed', 'failed']).optional(),
|
||||
syncId: z.string().min(1),
|
||||
diffSummary: ingestDiffSummarySchema,
|
||||
fetch: sourceFetchReportSchema.optional(),
|
||||
commitSha: z.string().nullable(),
|
||||
tracePath: z.string().optional(),
|
||||
failure: ingestReportFailureSchema.optional(),
|
||||
isolatedDiff: z
|
||||
.object({
|
||||
enabled: z.boolean(),
|
||||
integrationWorktreePath: z.string().optional(),
|
||||
ingestionBaseSha: z.string().optional(),
|
||||
projectionSha: z.string().nullable().optional(),
|
||||
acceptedPatches: z.number().int().min(0),
|
||||
textualConflicts: z.number().int().min(0),
|
||||
semanticConflicts: z.number().int().min(0),
|
||||
resolverAttempts: z.number().int().min(0).default(0),
|
||||
resolverRepairs: z.number().int().min(0).default(0),
|
||||
resolverFailures: z.number().int().min(0).default(0),
|
||||
gateRepairAttempts: z.number().int().min(0).default(0),
|
||||
gateRepairs: z.number().int().min(0).default(0),
|
||||
gateRepairFailures: z.number().int().min(0).default(0),
|
||||
})
|
||||
.optional(),
|
||||
workUnits: z.array(
|
||||
z.object({
|
||||
unitKey: z.string().min(1),
|
||||
|
|
|
|||
|
|
@ -48,11 +48,35 @@ export interface IngestReportPostProcessorOutcome {
|
|||
touchedSources: TouchedSlSource[];
|
||||
}
|
||||
|
||||
export interface IngestReportFailure {
|
||||
phase: string;
|
||||
message: string;
|
||||
details?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface IngestReportBody {
|
||||
status?: 'completed' | 'failed';
|
||||
syncId: string;
|
||||
diffSummary: IngestDiffSummary;
|
||||
fetch?: SourceFetchReport;
|
||||
commitSha: string | null;
|
||||
tracePath?: string;
|
||||
failure?: IngestReportFailure;
|
||||
isolatedDiff?: {
|
||||
enabled: boolean;
|
||||
integrationWorktreePath?: string;
|
||||
ingestionBaseSha?: string;
|
||||
projectionSha?: string | null;
|
||||
acceptedPatches: number;
|
||||
textualConflicts: number;
|
||||
semanticConflicts: number;
|
||||
resolverAttempts?: number;
|
||||
resolverRepairs?: number;
|
||||
resolverFailures?: number;
|
||||
gateRepairAttempts?: number;
|
||||
gateRepairs?: number;
|
||||
gateRepairFailures?: number;
|
||||
};
|
||||
workUnits: IngestReportWorkUnit[];
|
||||
failedWorkUnits: string[];
|
||||
reconciliationSkipped: boolean;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,38 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
assertSemanticLayerTargetPathsAllowed,
|
||||
findDisallowedSemanticLayerTargetPaths,
|
||||
semanticLayerConnectionIdFromPath,
|
||||
} from './semantic-layer-target-policy.js';
|
||||
|
||||
describe('semantic-layer target policy', () => {
|
||||
it('extracts connection ids from semantic-layer paths', () => {
|
||||
expect(semanticLayerConnectionIdFromPath('semantic-layer/warehouse/orders.yaml')).toBe('warehouse');
|
||||
expect(semanticLayerConnectionIdFromPath('a/semantic-layer/finance/orders.yaml')).toBe('finance');
|
||||
expect(semanticLayerConnectionIdFromPath('wiki/global/orders.md')).toBeNull();
|
||||
});
|
||||
|
||||
it('finds semantic-layer paths outside the allowed target connections', () => {
|
||||
expect(
|
||||
findDisallowedSemanticLayerTargetPaths({
|
||||
paths: [
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
'semantic-layer/finance/orders.yaml',
|
||||
'wiki/global/orders.md',
|
||||
],
|
||||
allowedConnectionIds: new Set(['warehouse']),
|
||||
}),
|
||||
).toEqual([{ path: 'semantic-layer/finance/orders.yaml', connectionId: 'finance' }]);
|
||||
});
|
||||
|
||||
it('throws a deterministic error for unauthorized semantic-layer targets', () => {
|
||||
expect(() =>
|
||||
assertSemanticLayerTargetPathsAllowed({
|
||||
paths: ['semantic-layer/finance/orders.yaml', 'semantic-layer/marketing/accounts.yaml'],
|
||||
allowedConnectionIds: new Set(['warehouse']),
|
||||
}),
|
||||
).toThrow(
|
||||
/semantic-layer target connection not allowed: semantic-layer\/finance\/orders\.yaml \(finance\), semantic-layer\/marketing\/accounts\.yaml \(marketing\); allowed: warehouse/,
|
||||
);
|
||||
});
|
||||
});
|
||||
42
packages/context/src/ingest/semantic-layer-target-policy.ts
Normal file
42
packages/context/src/ingest/semantic-layer-target-policy.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
export interface SemanticLayerTargetPolicyInput {
|
||||
paths: readonly string[];
|
||||
allowedConnectionIds: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
export interface SemanticLayerTargetPolicyViolation {
|
||||
path: string;
|
||||
connectionId: string;
|
||||
}
|
||||
|
||||
export function semanticLayerConnectionIdFromPath(path: string): string | null {
|
||||
const normalized = path.replace(/^[ab]\//, '');
|
||||
const match = /^semantic-layer\/([^/]+)\//.exec(normalized);
|
||||
return match?.[1] ?? null;
|
||||
}
|
||||
|
||||
export function findDisallowedSemanticLayerTargetPaths(
|
||||
input: SemanticLayerTargetPolicyInput,
|
||||
): SemanticLayerTargetPolicyViolation[] {
|
||||
return input.paths
|
||||
.map((path) => ({ path, connectionId: semanticLayerConnectionIdFromPath(path) }))
|
||||
.filter((entry): entry is SemanticLayerTargetPolicyViolation => {
|
||||
return entry.connectionId !== null && !input.allowedConnectionIds.has(entry.connectionId);
|
||||
})
|
||||
.sort((left, right) => {
|
||||
const byConnection = left.connectionId.localeCompare(right.connectionId);
|
||||
return byConnection === 0 ? left.path.localeCompare(right.path) : byConnection;
|
||||
});
|
||||
}
|
||||
|
||||
export function assertSemanticLayerTargetPathsAllowed(input: SemanticLayerTargetPolicyInput): void {
|
||||
const violations = findDisallowedSemanticLayerTargetPaths(input);
|
||||
if (violations.length === 0) {
|
||||
return;
|
||||
}
|
||||
const allowed = [...input.allowedConnectionIds].sort();
|
||||
throw new Error(
|
||||
`semantic-layer target connection not allowed: ${violations
|
||||
.map((violation) => `${violation.path} (${violation.connectionId})`)
|
||||
.join(', ')}; allowed: ${allowed.length > 0 ? allowed.join(', ') : '(none)'}`,
|
||||
);
|
||||
}
|
||||
|
|
@ -41,6 +41,9 @@ export interface WorkUnitOutcome {
|
|||
touchedSlSources: TouchedSlSource[];
|
||||
slDisallowed?: boolean;
|
||||
slDisallowedReason?: 'lookml_connection_mismatch';
|
||||
patchPath?: string;
|
||||
patchTouchedPaths?: string[];
|
||||
childWorktreePath?: string;
|
||||
}
|
||||
|
||||
export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit): Promise<WorkUnitOutcome> {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type { KtxEmbeddingPort } from '../core/embedding.js';
|
||||
import type { SemanticLayerService } from '../sl/index.js';
|
||||
import type { MemoryFlowEventSink } from './memory-flow/types.js';
|
||||
|
||||
export type IngestTrigger = 'upload' | 'scheduled_pull' | 'manual_resync' | 'manual_override';
|
||||
|
|
@ -47,6 +48,7 @@ export interface ChunkResult {
|
|||
export interface FetchContext {
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
memoryFlow?: MemoryFlowEventSink;
|
||||
}
|
||||
|
||||
type SourceFetchIssueKind =
|
||||
|
|
@ -96,6 +98,26 @@ export interface ClusterWorkUnitsContext {
|
|||
embedding: KtxEmbeddingPort;
|
||||
}
|
||||
|
||||
export interface DeterministicProjectionContext {
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
syncId: string;
|
||||
jobId: string;
|
||||
runId: string;
|
||||
stagedDir: string;
|
||||
workdir: string;
|
||||
parseArtifacts?: unknown;
|
||||
semanticLayerService: SemanticLayerService;
|
||||
}
|
||||
|
||||
export interface ProjectionResult {
|
||||
warnings: string[];
|
||||
errors: string[];
|
||||
touchedSources: Array<{ connectionId: string; sourceName: string }>;
|
||||
changedWikiPageKeys: string[];
|
||||
result?: unknown;
|
||||
}
|
||||
|
||||
export interface SourceAdapter {
|
||||
readonly source: string;
|
||||
readonly skillNames: string[];
|
||||
|
|
@ -109,6 +131,7 @@ export interface SourceAdapter {
|
|||
listTargetConnectionIds?(stagedDir: string): Promise<string[]>;
|
||||
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult>;
|
||||
clusterWorkUnits?(ctx: ClusterWorkUnitsContext): Promise<WorkUnit[]>;
|
||||
project?(ctx: DeterministicProjectionContext): Promise<ProjectionResult>;
|
||||
describeScope?(stagedDir: string): Promise<ScopeDescriptor>;
|
||||
onPullSucceeded?(ctx: {
|
||||
connectionId: string;
|
||||
|
|
|
|||
153
packages/context/src/ingest/wiki-body-refs.test.ts
Normal file
153
packages/context/src/ingest/wiki-body-refs.test.ts
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { findInvalidWikiBodyRefs, parseWikiBodyRefs } from './wiki-body-refs.js';
|
||||
|
||||
const sources = [
|
||||
{
|
||||
name: 'mart_account_segments',
|
||||
grain: ['account_id'],
|
||||
columns: [
|
||||
{ name: 'account_id', type: 'string' },
|
||||
{ name: 'segment', type: 'string' },
|
||||
],
|
||||
joins: [],
|
||||
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
|
||||
segments: [{ name: 'enterprise', expr: "segment = 'enterprise'" }],
|
||||
table: 'analytics.mart_account_segments',
|
||||
},
|
||||
];
|
||||
|
||||
describe('wiki body refs', () => {
|
||||
it('parses only explicit inline-code body references outside fenced blocks', () => {
|
||||
const body = [
|
||||
'Valid `mart_account_segments.total_contract_arr` and `source:mart_account_segments`.',
|
||||
'Also `warehouse/mart_account_segments.segment` and `table:analytics.mart_account_segments`.',
|
||||
'Ignore prose mart_account_segments.total_contract_arr_cents.',
|
||||
'Ignore `single_token`.',
|
||||
'Ignore wildcard pattern `mart_nrr_quarterly.*_arr_cents`.',
|
||||
'Ignore condition `users.is_internal = false`.',
|
||||
'```sql',
|
||||
'select `mart_account_segments.total_contract_arr_cents`',
|
||||
'```',
|
||||
].join('\n');
|
||||
|
||||
expect(parseWikiBodyRefs(body)).toEqual([
|
||||
{ kind: 'sl_entity', connectionId: null, sourceName: 'mart_account_segments', entityName: 'total_contract_arr' },
|
||||
{ kind: 'sl_source', connectionId: null, sourceName: 'mart_account_segments' },
|
||||
{ kind: 'sl_entity', connectionId: 'warehouse', sourceName: 'mart_account_segments', entityName: 'segment' },
|
||||
{ kind: 'table', connectionId: null, tableRef: 'analytics.mart_account_segments' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('rejects stale inline-code semantic-layer references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'account-segments',
|
||||
body: 'ARR is documented as `mart_account_segments.total_contract_arr_cents`.',
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => sources,
|
||||
tableExists: async () => true,
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([
|
||||
'account-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents',
|
||||
]);
|
||||
});
|
||||
|
||||
it('does not treat wildcard inline-code patterns as exact semantic-layer entity references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'revenue-metrics-encoding',
|
||||
body: 'Cents columns include `mart_nrr_quarterly.*_arr_cents` and `mart_retention_movement_breakout.*_arr_cents`.',
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => [
|
||||
{ name: 'mart_nrr_quarterly', grain: [], columns: [], joins: [], measures: [], table: 'analytics.mart_nrr_quarterly' },
|
||||
{
|
||||
name: 'mart_retention_movement_breakout',
|
||||
grain: [],
|
||||
columns: [],
|
||||
joins: [],
|
||||
measures: [],
|
||||
table: 'analytics.mart_retention_movement_breakout',
|
||||
},
|
||||
],
|
||||
tableExists: async () => true,
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([]);
|
||||
});
|
||||
|
||||
it('does not treat inline-code SQL predicates as exact semantic-layer entity references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'account-reporting-exclusions',
|
||||
body: 'Exclude internal users with `users.is_internal = false` and test users with `users.is_test = false`.',
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => [
|
||||
{
|
||||
name: 'users',
|
||||
grain: [],
|
||||
columns: [
|
||||
{ name: 'is_internal', type: 'boolean' },
|
||||
{ name: 'is_test', type: 'boolean' },
|
||||
],
|
||||
joins: [],
|
||||
measures: [],
|
||||
table: 'analytics.users',
|
||||
},
|
||||
],
|
||||
tableExists: async () => true,
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([]);
|
||||
});
|
||||
|
||||
it('validates source, dimension, segment, measure, and table references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'account-segments',
|
||||
body: [
|
||||
'`mart_account_segments.total_contract_arr`',
|
||||
'`mart_account_segments.segment`',
|
||||
'`mart_account_segments.enterprise`',
|
||||
'`source:mart_account_segments`',
|
||||
'`table:analytics.mart_account_segments`',
|
||||
].join('\n'),
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => sources,
|
||||
tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments',
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([]);
|
||||
});
|
||||
|
||||
it('ignores two-part inline code when the source is not visible', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'engineering-notes',
|
||||
body: [
|
||||
'A version token like `node.v22` is not a semantic-layer reference.',
|
||||
'A raw table must use `table:analytics.mart_account_segments`.',
|
||||
].join('\n'),
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => sources,
|
||||
tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments',
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([]);
|
||||
});
|
||||
|
||||
it('still rejects explicit missing source and table references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'account-segments',
|
||||
body: [
|
||||
'`source:missing_source`',
|
||||
'`warehouse/source:missing_source`',
|
||||
'`table:analytics.missing_table`',
|
||||
].join('\n'),
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => sources,
|
||||
tableExists: async () => false,
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([
|
||||
'account-segments: unknown semantic-layer source missing_source',
|
||||
'account-segments: unknown semantic-layer source warehouse/missing_source',
|
||||
'account-segments: unknown raw table analytics.missing_table',
|
||||
]);
|
||||
});
|
||||
});
|
||||
141
packages/context/src/ingest/wiki-body-refs.ts
Normal file
141
packages/context/src/ingest/wiki-body-refs.ts
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
import type { SemanticLayerSource } from '../sl/index.js';
|
||||
|
||||
export type WikiBodyRef =
|
||||
| { kind: 'sl_entity'; connectionId: string | null; sourceName: string; entityName: string }
|
||||
| { kind: 'sl_source'; connectionId: string | null; sourceName: string }
|
||||
| { kind: 'table'; connectionId: string | null; tableRef: string };
|
||||
|
||||
export interface WikiBodyRefValidationInput {
|
||||
pageKey: string;
|
||||
body: string;
|
||||
visibleConnectionIds: string[];
|
||||
loadSources(connectionId: string): Promise<SemanticLayerSource[]>;
|
||||
tableExists(connectionId: string, tableRef: string): Promise<boolean>;
|
||||
}
|
||||
|
||||
const inlineCodePattern = /`([^`\n]+)`/g;
|
||||
|
||||
function visibleLinesOutsideFences(body: string): string[] {
|
||||
const lines: string[] = [];
|
||||
let fenced = false;
|
||||
for (const line of body.split('\n')) {
|
||||
if (/^\s*```/.test(line)) {
|
||||
fenced = !fenced;
|
||||
continue;
|
||||
}
|
||||
if (!fenced) {
|
||||
lines.push(line);
|
||||
}
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
function parseConnectionScoped(value: string): { connectionId: string | null; body: string } {
|
||||
const slash = value.indexOf('/');
|
||||
if (slash <= 0) {
|
||||
return { connectionId: null, body: value };
|
||||
}
|
||||
return { connectionId: value.slice(0, slash), body: value.slice(slash + 1) };
|
||||
}
|
||||
|
||||
function isIdentifierToken(value: string): boolean {
|
||||
return /^[A-Za-z_][A-Za-z0-9_]*$/.test(value);
|
||||
}
|
||||
|
||||
export function parseWikiBodyRefs(body: string): WikiBodyRef[] {
|
||||
const refs: WikiBodyRef[] = [];
|
||||
for (const line of visibleLinesOutsideFences(body)) {
|
||||
for (const match of line.matchAll(inlineCodePattern)) {
|
||||
const token = (match[1] ?? '').trim();
|
||||
if (!token) {
|
||||
continue;
|
||||
}
|
||||
const scoped = parseConnectionScoped(token);
|
||||
if (scoped.body.startsWith('source:')) {
|
||||
const sourceName = scoped.body.slice('source:'.length).trim();
|
||||
if (sourceName) {
|
||||
refs.push({ kind: 'sl_source', connectionId: scoped.connectionId, sourceName });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (scoped.body.startsWith('table:')) {
|
||||
const tableRef = scoped.body.slice('table:'.length).trim();
|
||||
if (tableRef) {
|
||||
refs.push({ kind: 'table', connectionId: scoped.connectionId, tableRef });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const parts = scoped.body.split('.');
|
||||
if (parts.length === 2 && isIdentifierToken(parts[0] ?? '') && isIdentifierToken(parts[1] ?? '')) {
|
||||
refs.push({
|
||||
kind: 'sl_entity',
|
||||
connectionId: scoped.connectionId,
|
||||
sourceName: parts[0],
|
||||
entityName: parts[1],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return refs;
|
||||
}
|
||||
|
||||
function entityNames(source: SemanticLayerSource): Set<string> {
|
||||
return new Set([
|
||||
...(source.measures ?? []).map((measure) => measure.name),
|
||||
...(source.columns ?? []).map((column) => column.name),
|
||||
...(source.segments ?? []).map((segment) => segment.name),
|
||||
]);
|
||||
}
|
||||
|
||||
export async function findInvalidWikiBodyRefs(input: WikiBodyRefValidationInput): Promise<string[]> {
|
||||
const errors: string[] = [];
|
||||
const sourceCache = new Map<string, SemanticLayerSource[]>();
|
||||
const loadSources = async (connectionId: string): Promise<SemanticLayerSource[]> => {
|
||||
const cached = sourceCache.get(connectionId);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
const sources = await input.loadSources(connectionId);
|
||||
sourceCache.set(connectionId, sources);
|
||||
return sources;
|
||||
};
|
||||
|
||||
const findSource = async (
|
||||
connectionIds: string[],
|
||||
sourceName: string,
|
||||
): Promise<{ connectionId: string; source: SemanticLayerSource } | null> => {
|
||||
for (const connectionId of connectionIds) {
|
||||
const source = (await loadSources(connectionId)).find((candidate) => candidate.name === sourceName);
|
||||
if (source) {
|
||||
return { connectionId, source };
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
for (const ref of parseWikiBodyRefs(input.body)) {
|
||||
const connectionIds = ref.connectionId ? [ref.connectionId] : input.visibleConnectionIds;
|
||||
if (ref.kind === 'table') {
|
||||
const found = await Promise.all(connectionIds.map((connectionId) => input.tableExists(connectionId, ref.tableRef)));
|
||||
if (!found.some(Boolean)) {
|
||||
errors.push(`${input.pageKey}: unknown raw table ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.tableRef}`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const found = await findSource(connectionIds, ref.sourceName);
|
||||
if (!found) {
|
||||
if (ref.kind === 'sl_source') {
|
||||
errors.push(
|
||||
`${input.pageKey}: unknown semantic-layer source ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.sourceName}`,
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (ref.kind === 'sl_entity' && !entityNames(found.source).has(ref.entityName)) {
|
||||
errors.push(`${input.pageKey}: unknown semantic-layer entity ${ref.sourceName}.${ref.entityName}`);
|
||||
}
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
|
@ -78,6 +78,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
},
|
||||
strictMcpConfig: true,
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
|
|
@ -144,6 +149,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
|
||||
const options = query.mock.calls[0][0].options;
|
||||
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
|
||||
expect(options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [{ serverName: 'ktx' }],
|
||||
});
|
||||
expect(options.strictMcpConfig).toBe(true);
|
||||
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
|
||||
behavior: 'allow',
|
||||
toolUseID: '1',
|
||||
|
|
@ -176,6 +186,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
},
|
||||
strictMcpConfig: true,
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
|
|
@ -268,6 +283,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
|
||||
const options = query.mock.calls[0][0].options;
|
||||
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
|
||||
expect(options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [{ serverName: 'ktx' }],
|
||||
});
|
||||
expect(options.strictMcpConfig).toBe(true);
|
||||
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
|
||||
behavior: 'allow',
|
||||
toolUseID: '1',
|
||||
|
|
@ -334,6 +354,10 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
answer: 'yes',
|
||||
});
|
||||
expect(objectQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ PATH: '/usr/bin' }));
|
||||
expect(objectQuery.mock.calls[0][0].options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
});
|
||||
expect(objectQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod' }), // pragma: allowlist secret
|
||||
);
|
||||
|
|
@ -374,6 +398,10 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
telemetryTags: { operationName: 'test' },
|
||||
});
|
||||
expect(agentQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ HOME: '/Users/test' }));
|
||||
expect(agentQuery.mock.calls[0][0].options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [{ serverName: 'ktx' }],
|
||||
});
|
||||
expect(agentQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1' }),
|
||||
);
|
||||
|
|
@ -442,6 +470,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
},
|
||||
strictMcpConfig: true,
|
||||
allowedTools: [],
|
||||
persistSession: false,
|
||||
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
|
||||
|
|
|
|||
|
|
@ -45,6 +45,8 @@ const BUILTIN_TOOLS = [
|
|||
'TodoWrite',
|
||||
];
|
||||
|
||||
const KTX_MCP_SERVER_NAME = 'ktx';
|
||||
|
||||
function isResult(message: SDKMessage): message is SDKResultMessage {
|
||||
return message.type === 'result';
|
||||
}
|
||||
|
|
@ -113,7 +115,14 @@ function assertInitIsolation(
|
|||
}
|
||||
|
||||
function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set<string> {
|
||||
return tools && Object.keys(tools).length > 0 ? new Set(['ktx']) : new Set();
|
||||
return tools && Object.keys(tools).length > 0 ? new Set([KTX_MCP_SERVER_NAME]) : new Set();
|
||||
}
|
||||
|
||||
function managedMcpSettings(serverNames: string[]): NonNullable<Options['managedSettings']> {
|
||||
return {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: serverNames.map((serverName) => ({ serverName })),
|
||||
};
|
||||
}
|
||||
|
||||
function baseOptions(input: {
|
||||
|
|
@ -125,6 +134,7 @@ function baseOptions(input: {
|
|||
}): Options {
|
||||
const toolIds = mcpToolIds(input.tools ?? {});
|
||||
const allowedToolIds = new Set(toolIds);
|
||||
const expectedServerNames = [...expectedMcpServerNames(input.tools)];
|
||||
return {
|
||||
cwd: input.projectDir,
|
||||
model: input.model,
|
||||
|
|
@ -133,6 +143,8 @@ function baseOptions(input: {
|
|||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: managedMcpSettings(expectedServerNames),
|
||||
strictMcpConfig: true,
|
||||
allowedTools: toolIds,
|
||||
disallowedTools: BUILTIN_TOOLS,
|
||||
canUseTool: async (toolName, _toolInput, options) =>
|
||||
|
|
@ -147,7 +159,14 @@ function baseOptions(input: {
|
|||
persistSession: false,
|
||||
env: createKtxClaudeCodeEnv(input.env),
|
||||
...(input.tools && Object.keys(input.tools).length > 0
|
||||
? { mcpServers: { ktx: createSdkMcpServer({ name: 'ktx', tools: createClaudeSdkTools(input.tools) }) } }
|
||||
? {
|
||||
mcpServers: {
|
||||
[KTX_MCP_SERVER_NAME]: createSdkMcpServer({
|
||||
name: KTX_MCP_SERVER_NAME,
|
||||
tools: createClaudeSdkTools(input.tools),
|
||||
}),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,6 +99,27 @@ describe('SlEditSourceTool — session gating', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('rejects session-scoped edits outside allowed target connections', async () => {
|
||||
const { tool } = makeTool();
|
||||
const session = makeSession({
|
||||
allowedConnectionNames: new Set(['warehouse']),
|
||||
});
|
||||
const context: ToolContext = { ...baseContext, session };
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: 'finance',
|
||||
sourceName: 'orders',
|
||||
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
|
||||
} as any,
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(false);
|
||||
expect(result.markdown).toContain('connectionId "finance" is outside this ingest session');
|
||||
expect(session.actions).toEqual([]);
|
||||
});
|
||||
|
||||
it('indexes normally when no session is present', async () => {
|
||||
const { tool, slSearchService } = makeTool();
|
||||
const result = await tool.call(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,12 @@
|
|||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
|
||||
import {
|
||||
addTouchedSlSource,
|
||||
type ToolContext,
|
||||
type ToolOutput,
|
||||
validateActionRawPaths,
|
||||
validateActionTargetConnection,
|
||||
} from '../../tools/index.js';
|
||||
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
|
||||
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
|
|
@ -79,6 +85,10 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
|
||||
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
|
||||
const skipIndex = context.session?.isWorktreeScoped === true;
|
||||
const targetConnectionValidation = validateActionTargetConnection(context.session, connectionId);
|
||||
if (!targetConnectionValidation.ok) {
|
||||
return this.buildOutput(false, [targetConnectionValidation.error], sourceName);
|
||||
}
|
||||
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
|
||||
if (!rawPathValidation.ok) {
|
||||
return this.buildOutput(false, [rawPathValidation.error], sourceName);
|
||||
|
|
|
|||
|
|
@ -133,6 +133,34 @@ describe('SlWriteSourceTool — session gating', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('rejects session-scoped writes outside allowed target connections', async () => {
|
||||
const { tool } = makeTool();
|
||||
const session = makeSession({
|
||||
allowedConnectionNames: new Set(['warehouse']),
|
||||
});
|
||||
const context: ToolContext = { ...baseContext, session };
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: 'finance',
|
||||
sourceName: 'finance_orders',
|
||||
source: {
|
||||
name: 'finance_orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'string' }],
|
||||
measures: [],
|
||||
joins: [],
|
||||
} as any,
|
||||
} as any,
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(false);
|
||||
expect(result.markdown).toContain('connectionId "finance" is outside this ingest session');
|
||||
expect(session.actions).toEqual([]);
|
||||
});
|
||||
|
||||
it('indexes normally when no session is present', async () => {
|
||||
const { tool, slSearchService } = makeTool();
|
||||
const result = await tool.call(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,12 @@
|
|||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
|
||||
import {
|
||||
addTouchedSlSource,
|
||||
type ToolContext,
|
||||
type ToolOutput,
|
||||
validateActionRawPaths,
|
||||
validateActionTargetConnection,
|
||||
} from '../../tools/index.js';
|
||||
import { sourceOverlaySchema } from '../schemas.js';
|
||||
import type { SemanticLayerService } from '../semantic-layer.service.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
|
|
@ -106,6 +112,10 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
|
||||
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
|
||||
const skipIndex = context.session?.isWorktreeScoped === true;
|
||||
const targetConnectionValidation = validateActionTargetConnection(context.session, connectionId);
|
||||
if (!targetConnectionValidation.ok) {
|
||||
return this.buildOutput(false, [targetConnectionValidation.error], sourceName);
|
||||
}
|
||||
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
|
||||
if (!rawPathValidation.ok) {
|
||||
return this.buildOutput(false, [rawPathValidation.error], sourceName);
|
||||
|
|
|
|||
23
packages/context/src/tools/action-target-connection.ts
Normal file
23
packages/context/src/tools/action-target-connection.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import type { ToolSession } from './tool-session.js';
|
||||
|
||||
type ActionTargetConnectionValidation = { ok: true } | { ok: false; error: string };
|
||||
|
||||
export function validateActionTargetConnection(
|
||||
session: ToolSession | undefined,
|
||||
connectionId: string,
|
||||
): ActionTargetConnectionValidation {
|
||||
const allowed = session?.allowedConnectionNames;
|
||||
if (!allowed) {
|
||||
return { ok: true };
|
||||
}
|
||||
if (allowed.has(connectionId)) {
|
||||
return { ok: true };
|
||||
}
|
||||
const allowedList = [...allowed].sort();
|
||||
return {
|
||||
ok: false,
|
||||
error: `connectionId "${connectionId}" is outside this ingest session's allowed target connections: ${
|
||||
allowedList.length > 0 ? allowedList.join(', ') : '(none)'
|
||||
}`,
|
||||
};
|
||||
}
|
||||
|
|
@ -32,6 +32,7 @@ export type { SqlEdit } from './sql-edit-replacer.js';
|
|||
export { applySqlEdits } from './sql-edit-replacer.js';
|
||||
export type { IngestToolMetadata, MemoryAction, ToolSession } from './tool-session.js';
|
||||
export { validateActionRawPaths } from './action-raw-paths.js';
|
||||
export { validateActionTargetConnection } from './action-target-connection.js';
|
||||
export type { TouchedSlSource, TouchedSlSourceSet } from './touched-sl-sources.js';
|
||||
export {
|
||||
addTouchedSlSource,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue