Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,53 @@
import { tool } from 'ai';
import { z } from 'zod';
import type { ArtifactResolutionRecord, StageIndex } from '../stages/stage-index.types.js';
interface EmitArtifactResolutionDeps {
stageIndex: StageIndex;
allowedPaths: Set<string>;
}
function sameArtifactResolution(left: ArtifactResolutionRecord, right: ArtifactResolutionRecord): boolean {
return (
left.rawPath === right.rawPath &&
left.artifactKind === right.artifactKind &&
left.artifactKey === right.artifactKey &&
left.actionType === right.actionType
);
}
export function createEmitArtifactResolutionTool(deps: EmitArtifactResolutionDeps) {
return tool({
description:
'Record one explicit artifact resolution for ingest provenance. Use when reconciliation merges or subsumes an artifact without creating a new wiki or SL write action.',
inputSchema: z.object({
rawPath: z.string().min(1),
artifactKind: z.enum(['sl', 'wiki']),
artifactKey: z.string().min(1),
actionType: z.enum(['merged', 'subsumed']),
reason: z.string().min(1),
}),
execute: async (input): Promise<string> => {
if (!deps.allowedPaths.has(input.rawPath)) {
return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`;
}
const record: ArtifactResolutionRecord = {
rawPath: input.rawPath,
artifactKind: input.artifactKind,
artifactKey: input.artifactKey,
actionType: input.actionType,
reason: input.reason,
};
const existingIndex = deps.stageIndex.artifactResolutions?.findIndex((candidate) =>
sameArtifactResolution(candidate, record),
);
if (existingIndex !== undefined && existingIndex >= 0 && deps.stageIndex.artifactResolutions) {
deps.stageIndex.artifactResolutions[existingIndex] = record;
} else {
deps.stageIndex.artifactResolutions = [...(deps.stageIndex.artifactResolutions ?? []), record];
}
return `recorded artifact resolution for ${record.artifactKind}:${record.artifactKey}`;
},
});
}

View file

@ -0,0 +1,38 @@
import { tool } from 'ai';
import { z } from 'zod';
import type { ConflictResolvedRecord, StageIndex } from '../stages/stage-index.types.js';
interface EmitConflictResolutionDeps {
stageIndex: StageIndex;
}
export function createEmitConflictResolutionTool(deps: EmitConflictResolutionDeps) {
return tool({
description:
'Record one conflict resolution decision for the final IngestReport. Call after resolving or flagging a cross-WorkUnit conflict.',
inputSchema: z.object({
unitKey: z.string().min(1).optional(),
kind: z.enum(['structural_duplicate', 'near_duplicate', 'definitional_contradiction', 're_ingest_change']),
contestedKey: z.string().min(1).optional(),
artifactKey: z.string().min(1),
detail: z.string().min(1),
flaggedForHuman: z.boolean().default(false),
}),
execute: async (input): Promise<string> => {
const record: ConflictResolvedRecord = {
kind: input.kind,
artifactKey: input.artifactKey,
detail: input.detail,
flaggedForHuman: input.flaggedForHuman,
};
if (input.unitKey) {
record.unitKey = input.unitKey;
}
if (input.contestedKey) {
record.contestedKey = input.contestedKey;
}
deps.stageIndex.conflictsResolved.push(record);
return `recorded conflict resolution for ${record.artifactKey}`;
},
});
}

View file

@ -0,0 +1,51 @@
import { tool } from 'ai';
import { z } from 'zod';
import type { EvictionAppliedRecord, StageIndex } from '../stages/stage-index.types.js';
interface EmitEvictionDecisionDeps {
stageIndex: StageIndex;
deletedRawPaths: string[];
}
function sameEvictionArtifact(left: EvictionAppliedRecord, right: EvictionAppliedRecord): boolean {
return (
left.rawPath === right.rawPath && left.artifactKind === right.artifactKind && left.artifactKey === right.artifactKey
);
}
export function createEmitEvictionDecisionTool(deps: EmitEvictionDecisionDeps) {
const allowedPaths = new Set(deps.deletedRawPaths);
return tool({
description:
'Record one eviction decision for the final IngestReport. The rawPath must come from the current Eviction Set.',
inputSchema: z.object({
rawPath: z.string().min(1),
artifactKind: z.enum(['sl', 'wiki']),
artifactKey: z.string().min(1),
action: z.enum(['removed', 'retained_deprecated']),
reason: z.string().min(1),
}),
execute: async (input): Promise<string> => {
if (!allowedPaths.has(input.rawPath)) {
return `Error: rawPath "${input.rawPath}" is not in the current eviction set`;
}
const record: EvictionAppliedRecord = {
rawPath: input.rawPath,
artifactKind: input.artifactKind,
artifactKey: input.artifactKey,
action: input.action,
reason: input.reason,
};
const existingIndex = deps.stageIndex.evictionsApplied.findIndex((candidate) =>
sameEvictionArtifact(candidate, record),
);
if (existingIndex >= 0) {
deps.stageIndex.evictionsApplied[existingIndex] = record;
} else {
deps.stageIndex.evictionsApplied.push(record);
}
return `recorded eviction decision for ${record.rawPath} -> ${record.artifactKind}:${record.artifactKey}`;
},
});
}

View file

@ -0,0 +1,228 @@
import type { Tool } from 'ai';
import { describe, expect, it } from 'vitest';
import type { StageIndex } from '../stages/stage-index.types.js';
import { createEmitArtifactResolutionTool } from './emit-artifact-resolution.tool.js';
import { createEmitConflictResolutionTool } from './emit-conflict-resolution.tool.js';
import { createEmitEvictionDecisionTool } from './emit-eviction-decision.tool.js';
import { createEmitUnmappedFallbackTool } from './emit-unmapped-fallback.tool.js';
function makeStageIndex(): StageIndex {
return {
jobId: 'job-1',
connectionId: 'c1',
workUnits: [],
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
};
}
async function executeTool<Input>(tool: Tool<Input, string>, input: NoInfer<Input>) {
if (!tool.execute) {
throw new Error('tool is not executable');
}
return (await tool.execute(input, { toolCallId: 'tool-call-1', messages: [] })) as string;
}
describe('reconciliation emit tools', () => {
it('records conflict resolutions on the shared stage index', async () => {
const stageIndex = makeStageIndex();
const tool = createEmitConflictResolutionTool({ stageIndex });
const output = await executeTool(tool, {
unitKey: 'wu-orders',
kind: 'near_duplicate',
contestedKey: 'gross_revenue',
artifactKey: 'sl:orders.gross_revenue',
detail: 'orders and order_facts compute the same revenue metric; retained orders as canonical',
flaggedForHuman: true,
});
expect(stageIndex.conflictsResolved).toEqual([
{
unitKey: 'wu-orders',
kind: 'near_duplicate',
contestedKey: 'gross_revenue',
artifactKey: 'sl:orders.gross_revenue',
detail: 'orders and order_facts compute the same revenue metric; retained orders as canonical',
flaggedForHuman: true,
},
]);
expect(output).toBe('recorded conflict resolution for sl:orders.gross_revenue');
});
it('records eviction decisions only for deleted raw paths in the current eviction set', async () => {
const stageIndex = makeStageIndex();
const tool = createEmitEvictionDecisionTool({
stageIndex,
deletedRawPaths: ['views/old_orders.view.lkml'],
});
const output = await executeTool(tool, {
rawPath: 'views/old_orders.view.lkml',
artifactKind: 'sl',
artifactKey: 'old_orders',
action: 'removed',
reason: 'source raw file was deleted and no retained artifacts are required',
});
expect(output).toContain('recorded eviction decision for views/old_orders.view.lkml');
expect(stageIndex.evictionsApplied).toEqual([
{
rawPath: 'views/old_orders.view.lkml',
artifactKind: 'sl',
artifactKey: 'old_orders',
action: 'removed',
reason: 'source raw file was deleted and no retained artifacts are required',
},
]);
});
it('updates an existing eviction decision for the same raw path and artifact', async () => {
const stageIndex = makeStageIndex();
const tool = createEmitEvictionDecisionTool({
stageIndex,
deletedRawPaths: ['views/old_orders.view.lkml'],
});
await executeTool(tool, {
rawPath: 'views/old_orders.view.lkml',
artifactKind: 'wiki',
artifactKey: 'orders/legacy',
action: 'retained_deprecated',
reason: 'first pass',
});
await executeTool(tool, {
rawPath: 'views/old_orders.view.lkml',
artifactKind: 'wiki',
artifactKey: 'orders/legacy',
action: 'removed',
reason: 'second pass after checking references',
});
expect(stageIndex.evictionsApplied).toEqual([
{
rawPath: 'views/old_orders.view.lkml',
artifactKind: 'wiki',
artifactKey: 'orders/legacy',
action: 'removed',
reason: 'second pass after checking references',
},
]);
});
it('rejects eviction decisions for raw paths outside the current eviction set', async () => {
const stageIndex = makeStageIndex();
const tool = createEmitEvictionDecisionTool({
stageIndex,
deletedRawPaths: ['views/old_orders.view.lkml'],
});
const output = await executeTool(tool, {
rawPath: 'views/not_deleted.view.lkml',
artifactKind: 'sl',
artifactKey: 'not_deleted',
action: 'removed',
reason: 'bad input',
});
expect(output).toContain('Error: rawPath "views/not_deleted.view.lkml" is not in the current eviction set');
expect(stageIndex.evictionsApplied).toEqual([]);
});
it('records unmapped fallback decisions for allowed raw paths', async () => {
const stageIndex = makeStageIndex();
const tool = createEmitUnmappedFallbackTool({
stageIndex,
allowedPaths: new Set(['metrics/conversion.yml']),
});
const output = await executeTool(tool, {
rawPath: 'metrics/conversion.yml',
reason: 'no_physical_table',
fallback: 'flagged',
});
expect(output).toContain('recorded unmapped fallback for metrics/conversion.yml');
expect(stageIndex.unmappedFallbacks).toEqual([
{
rawPath: 'metrics/conversion.yml',
reason: 'no_physical_table',
fallback: 'flagged',
},
]);
});
it('deduplicates identical unmapped fallback decisions', async () => {
const stageIndex = makeStageIndex();
const tool = createEmitUnmappedFallbackTool({
stageIndex,
allowedPaths: new Set(['metrics/conversion.yml']),
});
await executeTool(tool, {
rawPath: 'metrics/conversion.yml',
reason: 'no_physical_table',
fallback: 'flagged',
});
await executeTool(tool, {
rawPath: 'metrics/conversion.yml',
reason: 'no_physical_table',
fallback: 'flagged',
});
expect(stageIndex.unmappedFallbacks).toEqual([
{
rawPath: 'metrics/conversion.yml',
reason: 'no_physical_table',
fallback: 'flagged',
},
]);
});
it('rejects unmapped fallback decisions for raw paths outside the allowed set', async () => {
const stageIndex = makeStageIndex();
const tool = createEmitUnmappedFallbackTool({
stageIndex,
allowedPaths: new Set(['metrics/conversion.yml']),
});
const output = await executeTool(tool, {
rawPath: 'metrics/not-in-this-work-unit.yml',
reason: 'no_physical_table',
fallback: 'flagged',
});
expect(output).toContain(
'Error: rawPath "metrics/not-in-this-work-unit.yml" is not available to this ingest stage',
);
expect(stageIndex.unmappedFallbacks).toEqual([]);
});
it('records explicit artifact resolutions for provenance rows', async () => {
const stageIndex = makeStageIndex();
const tool = createEmitArtifactResolutionTool({
stageIndex,
allowedPaths: new Set(['explores/b2b/sales_pipeline.json']),
});
const output = await executeTool(tool, {
rawPath: 'explores/b2b/sales_pipeline.json',
artifactKind: 'sl',
artifactKey: 'looker__b2b__sales_pipeline',
actionType: 'subsumed',
reason: 'File-adapter source b2b__sales_pipeline is canonical for this explore.',
});
expect(output).toBe('recorded artifact resolution for sl:looker__b2b__sales_pipeline');
expect(stageIndex.artifactResolutions).toEqual([
{
rawPath: 'explores/b2b/sales_pipeline.json',
artifactKind: 'sl',
artifactKey: 'looker__b2b__sales_pipeline',
actionType: 'subsumed',
reason: 'File-adapter source b2b__sales_pipeline is canonical for this explore.',
},
]);
});
});

View file

@ -0,0 +1,52 @@
import { tool } from 'ai';
import { z } from 'zod';
import type { StageIndex, UnmappedFallbackRecord } from '../stages/stage-index.types.js';
interface EmitUnmappedFallbackDeps {
stageIndex: StageIndex;
allowedPaths: ReadonlySet<string>;
}
const unmappedFallbackReasonSchema = z.enum([
'no_connection_mapping',
'looker_template_unresolved',
'derived_table_not_supported',
'no_physical_table',
'multiple_table_references',
'unsupported_dialect',
'parse_error',
'missing_target_table',
]);
function sameUnmappedFallback(left: UnmappedFallbackRecord, right: UnmappedFallbackRecord): boolean {
return left.rawPath === right.rawPath && left.reason === right.reason && left.fallback === right.fallback;
}
export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) {
return tool({
description:
'Record one unmapped fallback decision for the final IngestReport. The rawPath must be available to the current ingest stage. The reason MUST be one of the structured codes; put any human-readable context in detail.',
inputSchema: z.object({
rawPath: z.string().min(1),
reason: unmappedFallbackReasonSchema,
detail: z.string().optional(),
fallback: z.enum(['sql_standalone', 'wiki_only', 'flagged']),
}),
execute: async (input): Promise<string> => {
if (!deps.allowedPaths.has(input.rawPath)) {
return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`;
}
const record: UnmappedFallbackRecord = {
rawPath: input.rawPath,
reason: input.reason,
...(input.detail !== undefined ? { detail: input.detail } : {}),
fallback: input.fallback,
};
if (!deps.stageIndex.unmappedFallbacks.some((candidate) => sameUnmappedFallback(candidate, record))) {
deps.stageIndex.unmappedFallbacks.push(record);
}
return `recorded unmapped fallback for ${record.rawPath} (${record.fallback})`;
},
});
}

View file

@ -0,0 +1,56 @@
import { describe, expect, it, vi } from 'vitest';
import { createEvictionListTool } from './eviction-list.tool.js';
describe('eviction_list tool', () => {
it('returns artifacts produced for each deleted raw path', async () => {
const provenance = {
findLatestArtifactsForRawPaths: vi.fn().mockResolvedValue(
new Map([
[
'views/old.lkml',
[{ artifact_kind: 'sl', artifact_key: 'old_metric', action_type: 'source_created' } as any],
],
['views/gone.lkml', []],
]),
),
};
const tool = createEvictionListTool({
provenance: provenance as any,
connectionId: 'c1',
sourceKey: 'lookml',
deletedRawPaths: ['views/old.lkml', 'views/gone.lkml'],
});
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
{},
{ toolCallId: 't', messages: [] },
)) as string;
expect(out).toContain('views/old.lkml');
expect(out).toContain('old_metric');
expect(out).toContain('views/gone.lkml');
});
it('returns empty string when no deletions', async () => {
const tool = createEvictionListTool({
provenance: {} as any,
connectionId: 'c1',
sourceKey: 'lookml',
deletedRawPaths: [],
});
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
{},
{ toolCallId: 't', messages: [] },
)) as string;
expect(out).toMatch(/empty/i);
});
it('tells curators to record decisions', () => {
const tool = createEvictionListTool({
provenance: {} as any,
connectionId: 'c1',
sourceKey: 'lookml',
deletedRawPaths: [],
});
expect(tool.description).toContain('context_eviction_decision_write');
});
});

View file

@ -0,0 +1,39 @@
import { tool } from 'ai';
import { z } from 'zod';
import type { IngestProvenancePort } from '../ports.js';
export interface EvictionListDeps {
provenance: IngestProvenancePort;
connectionId: string;
sourceKey: string;
deletedRawPaths: string[];
}
export function createEvictionListTool(deps: EvictionListDeps) {
return tool({
description:
'List every artifact that the most recent completed sync produced from a now-deleted raw file. Use this to decide whether to remove (no inbound refs) or retain with deprecation (has inbound refs). Inbound refs are NOT currently computed — treat every retained entry as a candidate and ask the user via the IngestReport. After deciding, record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.',
inputSchema: z.object({}),
execute: async () => {
if (deps.deletedRawPaths.length === 0) {
return '(empty) — no files were deleted since the last sync';
}
const map = await deps.provenance.findLatestArtifactsForRawPaths(
deps.connectionId,
deps.sourceKey,
deps.deletedRawPaths,
);
return [...map.entries()]
.map(([path, rows]) => {
if (rows.length === 0) {
return `- raw_path: ${path}\n artifacts: (none)`;
}
const artifactLines = rows
.map((r) => ` - kind: ${r.artifact_kind} key: ${r.artifact_key} (last action: ${r.action_type})`)
.join('\n');
return `- raw_path: ${path}\n artifacts:\n${artifactLines}`;
})
.join('\n');
},
});
}

View file

@ -0,0 +1,69 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { createReadRawFileTool } from './read-raw-file.tool.js';
describe('read_raw_file tool', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'readraw-'));
await mkdir(join(stagedDir, 'views'), { recursive: true });
await writeFile(join(stagedDir, 'views', 'a.yml'), 'line1\nline2\nline3\n', 'utf-8');
await writeFile(join(stagedDir, 'peer.yml'), 'secret', 'utf-8');
});
afterEach(async () => rm(stagedDir, { recursive: true, force: true }));
it('returns content for an allowed path', async () => {
const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) });
const result = await (tool.execute as (...args: unknown[]) => unknown)(
{ path: 'views/a.yml' },
{ toolCallId: 't1', messages: [] },
);
expect(result).toContain('line1');
expect(result).toContain('line2');
});
it('refuses to return oversized files and directs callers to read spans', async () => {
await writeFile(join(stagedDir, 'views', 'huge.yml'), `${'x'.repeat(160_000)}\n`, 'utf-8');
const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/huge.yml']) });
const result = await (tool.execute as (...args: unknown[]) => unknown)(
{ path: 'views/huge.yml' },
{ toolCallId: 't1', messages: [] },
);
expect(result).toMatch(/too large/i);
expect(result).toMatch(/read_raw_span/i);
expect(String(result).length).toBeLessThan(1000);
});
it('rejects a path not in the allow-list', async () => {
const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) });
const result = await (tool.execute as (...args: unknown[]) => unknown)(
{ path: 'peer.yml' },
{ toolCallId: 't1', messages: [] },
);
expect(result).toMatch(/not accessible/i);
expect(result).not.toContain('secret');
});
it('rejects directory traversal attempts', async () => {
const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) });
const result = await (tool.execute as (...args: unknown[]) => unknown)(
{ path: '../outside.yml' },
{ toolCallId: 't1', messages: [] },
);
expect(result).toMatch(/not accessible/i);
});
it('returns a clear error when the file is missing despite being allowed', async () => {
const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/missing.yml']) });
const result = await (tool.execute as (...args: unknown[]) => unknown)(
{ path: 'views/missing.yml' },
{ toolCallId: 't1', messages: [] },
);
expect(result).toMatch(/not found/i);
});
});

View file

@ -0,0 +1,41 @@
import { readFile, stat } from 'node:fs/promises';
import { join, normalize, resolve } from 'node:path';
import { tool } from 'ai';
import { z } from 'zod';
interface ReadRawFileDeps {
stagedDir: string;
allowedPaths: Set<string>;
}
const MAX_READ_RAW_FILE_BYTES = 120_000;
export function createReadRawFileTool(deps: ReadRawFileDeps) {
const stagedRoot = resolve(deps.stagedDir);
return tool({
description:
"Read the full text content of a raw source file inside this WorkUnit. `path` must be relative to the staged bundle root (no leading slash, no `..`) and must appear in the WorkUnit's rawFiles or dependencyPaths list.",
inputSchema: z.object({
path: z.string().describe('Path relative to the staged bundle root. Example: "views/customers/customer.lkml".'),
}),
execute: async ({ path }) => {
const normalized = normalize(path).replace(/^[/\\]+/, '');
if (normalized.startsWith('..') || !deps.allowedPaths.has(normalized)) {
return `Error: path "${path}" is not accessible from this WorkUnit. Allowed paths: ${[...deps.allowedPaths].sort().join(', ')}`;
}
const absolute = resolve(join(stagedRoot, normalized));
if (!absolute.startsWith(`${stagedRoot}/`) && absolute !== stagedRoot) {
return `Error: path "${path}" is not accessible from this WorkUnit.`;
}
try {
const fileStat = await stat(absolute);
if (fileStat.size > MAX_READ_RAW_FILE_BYTES) {
return `Error: file "${path}" is too large to return in full (${fileStat.size} bytes). Use read_raw_span with targeted line ranges instead.`;
}
return await readFile(absolute, 'utf-8');
} catch (err) {
return `Error: file "${path}" not found. (${err instanceof Error ? err.message : String(err)})`;
}
},
});
}

View file

@ -0,0 +1,53 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { createReadRawSpanTool } from './read-raw-span.tool.js';
describe('read_raw_span tool', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'readspan-'));
await mkdir(join(stagedDir, 'v'), { recursive: true });
await writeFile(join(stagedDir, 'v', 'a.yml'), 'line1\nline2\nline3\nline4\nline5\n', 'utf-8');
});
afterEach(async () => rm(stagedDir, { recursive: true, force: true }));
it('returns the requested 1-based inclusive line range', async () => {
const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) });
const result = await (tool.execute as (...args: unknown[]) => unknown)(
{ path: 'v/a.yml', startLine: 2, endLine: 4 },
{ toolCallId: 't1', messages: [] },
);
expect(result).toBe('line2\nline3\nline4');
});
it('clamps endLine to the end of the file', async () => {
const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) });
const result = await (tool.execute as (...args: unknown[]) => unknown)(
{ path: 'v/a.yml', startLine: 4, endLine: 99 },
{ toolCallId: 't1', messages: [] },
);
expect(result).toBe('line4\nline5');
});
it('rejects start > end', async () => {
const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) });
const result = await (tool.execute as (...args: unknown[]) => unknown)(
{ path: 'v/a.yml', startLine: 5, endLine: 2 },
{ toolCallId: 't1', messages: [] },
);
expect(result).toMatch(/startLine must be/i);
});
it('rejects paths not in the allow-list', async () => {
const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set([]) });
const result = await (tool.execute as (...args: unknown[]) => unknown)(
{ path: 'v/a.yml', startLine: 1, endLine: 1 },
{ toolCallId: 't1', messages: [] },
);
expect(result).toMatch(/not accessible/i);
});
});

View file

@ -0,0 +1,46 @@
import { readFile } from 'node:fs/promises';
import { join, normalize, resolve } from 'node:path';
import { tool } from 'ai';
import { z } from 'zod';
interface ReadRawSpanDeps {
stagedDir: string;
allowedPaths: Set<string>;
}
export function createReadRawSpanTool(deps: ReadRawSpanDeps) {
const stagedRoot = resolve(deps.stagedDir);
return tool({
description:
'Read a 1-based inclusive line range from a raw source file. Use this to resolve a provenance pointer like `file.lkml#L15-28` without loading the whole file into context.',
inputSchema: z.object({
path: z.string().describe('Path relative to the staged bundle root.'),
startLine: z.number().int().min(1).describe('First line to return (1-based, inclusive).'),
endLine: z.number().int().min(1).describe('Last line to return (1-based, inclusive). Clamped to file length.'),
}),
execute: async ({ path, startLine, endLine }) => {
if (startLine > endLine) {
return `Error: startLine must be <= endLine (got startLine=${startLine}, endLine=${endLine})`;
}
const normalized = normalize(path).replace(/^[/\\]+/, '');
if (normalized.startsWith('..') || !deps.allowedPaths.has(normalized)) {
return `Error: path "${path}" is not accessible from this context. Allowed paths: ${[...deps.allowedPaths].sort().join(', ')}`;
}
const absolute = resolve(join(stagedRoot, normalized));
if (!absolute.startsWith(`${stagedRoot}/`) && absolute !== stagedRoot) {
return `Error: path "${path}" is not accessible from this context.`;
}
try {
const body = await readFile(absolute, 'utf-8');
const rawLines = body.split('\n');
// Treat a trailing empty element caused by a file-ending newline as NOT a line.
const lines = rawLines.length > 0 && rawLines[rawLines.length - 1] === '' ? rawLines.slice(0, -1) : rawLines;
const from = Math.max(1, startLine);
const to = Math.min(lines.length, endLine);
return lines.slice(from - 1, to).join('\n');
} catch (err) {
return `Error: file "${path}" not found. (${err instanceof Error ? err.message : String(err)})`;
}
},
});
}

View file

@ -0,0 +1,131 @@
import { describe, expect, it } from 'vitest';
import { createStageDiffTool } from './stage-diff.tool.js';
describe('stage_diff tool', () => {
const stageIndex = {
jobId: 'j',
connectionId: 'c1',
workUnits: [
{
unitKey: 'u1',
rawFiles: [],
status: 'success' as const,
actions: [{ target: 'sl' as const, type: 'created' as const, key: 'churn_risk_score', detail: 'customers' }],
touchedSlSources: [{ connectionId: 'c1', sourceName: 'customers' }],
},
{
unitKey: 'u2',
rawFiles: [],
status: 'success' as const,
actions: [{ target: 'sl' as const, type: 'created' as const, key: 'churn_risk_score', detail: 'billing' }],
touchedSlSources: [{ connectionId: 'c1', sourceName: 'billing' }],
},
],
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
};
it('finds overlapping artifact keys between two WUs', async () => {
const tool = createStageDiffTool({ stageIndex });
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
{ unitKeyA: 'u1', unitKeyB: 'u2' },
{ toolCallId: 't', messages: [] },
)) as string;
expect(out).toContain('churn_risk_score');
expect(out).toMatch(/overlap/i);
});
it('says no overlap when keys are disjoint', async () => {
const tool = createStageDiffTool({
stageIndex: {
jobId: 'j',
connectionId: 'c1',
workUnits: [
{
unitKey: 'u1',
rawFiles: [],
status: 'success',
actions: [{ target: 'sl', type: 'created', key: 'a', detail: '' }],
touchedSlSources: [{ connectionId: 'c1', sourceName: 'a' }],
},
{
unitKey: 'u2',
rawFiles: [],
status: 'success',
actions: [{ target: 'sl', type: 'created', key: 'b', detail: '' }],
touchedSlSources: [{ connectionId: 'c1', sourceName: 'b' }],
},
],
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
},
});
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
{ unitKeyA: 'u1', unitKeyB: 'u2' },
{ toolCallId: 't', messages: [] },
)) as string;
expect(out).toMatch(/no overlap/i);
});
it('does not overlap same-named SL actions on different target connections', async () => {
const tool = createStageDiffTool({
stageIndex: {
jobId: 'j',
connectionId: 'looker-run',
workUnits: [
{
unitKey: 'u1',
rawFiles: [],
status: 'success',
actions: [
{
target: 'sl',
type: 'created',
key: 'looker__b2b__sales_pipeline',
detail: 'W1',
targetConnectionId: 'W1',
},
],
touchedSlSources: [{ connectionId: 'W1', sourceName: 'looker__b2b__sales_pipeline' }],
},
{
unitKey: 'u2',
rawFiles: [],
status: 'success',
actions: [
{
target: 'sl',
type: 'created',
key: 'looker__b2b__sales_pipeline',
detail: 'W2',
targetConnectionId: 'W2',
},
],
touchedSlSources: [{ connectionId: 'W2', sourceName: 'looker__b2b__sales_pipeline' }],
},
],
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
},
});
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
{ unitKeyA: 'u1', unitKeyB: 'u2' },
{ toolCallId: 't', messages: [] },
)) as string;
expect(out).toMatch(/no overlap/i);
});
it('returns an error when a unitKey is unknown', async () => {
const tool = createStageDiffTool({ stageIndex });
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
{ unitKeyA: 'u1', unitKeyB: 'nope' },
{ toolCallId: 't', messages: [] },
)) as string;
expect(out).toMatch(/unknown/i);
});
});

View file

@ -0,0 +1,44 @@
import { tool } from 'ai';
import { z } from 'zod';
import { memoryActionIdentity } from '../action-identity.js';
import type { StageIndex } from '../stages/stage-index.types.js';
export interface StageDiffDeps {
stageIndex: StageIndex;
}
export function createStageDiffTool(deps: StageDiffDeps) {
return tool({
description:
'Compare two WorkUnits by their writes. SL writes overlap only when target connection and artifact key both match; same-key SL actions on different target connections are non-overlapping.',
inputSchema: z.object({
unitKeyA: z.string(),
unitKeyB: z.string(),
}),
execute: ({ unitKeyA, unitKeyB }) => {
const a = deps.stageIndex.workUnits.find((wu) => wu.unitKey === unitKeyA);
const b = deps.stageIndex.workUnits.find((wu) => wu.unitKey === unitKeyB);
if (!a) {
return Promise.resolve(`Error: unknown unitKey "${unitKeyA}"`);
}
if (!b) {
return Promise.resolve(`Error: unknown unitKey "${unitKeyB}"`);
}
const runConnectionId = deps.stageIndex.connectionId;
const keysA = new Set(a.actions.map((ac) => memoryActionIdentity(ac, runConnectionId)));
const keysB = new Set(b.actions.map((ac) => memoryActionIdentity(ac, runConnectionId)));
const overlap = [...keysA].filter((k) => keysB.has(k));
if (overlap.length === 0) {
return Promise.resolve(`No overlap between ${unitKeyA} and ${unitKeyB}.`);
}
const overlapDetail = overlap
.map((k) => {
const aDetail = a.actions.find((ac) => memoryActionIdentity(ac, runConnectionId) === k);
const bDetail = b.actions.find((ac) => memoryActionIdentity(ac, runConnectionId) === k);
return `- ${k}\n ${unitKeyA}: ${aDetail?.detail ?? ''}\n ${unitKeyB}: ${bDetail?.detail ?? ''}`;
})
.join('\n');
return Promise.resolve(`Overlap between ${unitKeyA} and ${unitKeyB}:\n${overlapDetail}`);
},
});
}

View file

@ -0,0 +1,58 @@
import { describe, expect, it } from 'vitest';
import { createStageListTool } from './stage-list.tool.js';
describe('stage_list tool', () => {
it('returns a compact summary of the stage index', async () => {
const tool = createStageListTool({
stageIndex: {
jobId: 'j1',
connectionId: 'c1',
workUnits: [
{
unitKey: 'u1',
rawFiles: ['a.yml'],
status: 'success',
actions: [{ target: 'sl', type: 'created', key: 'src_a', detail: '' }],
touchedSlSources: [{ connectionId: 'c1', sourceName: 'src_a' }],
},
{
unitKey: 'u2',
rawFiles: ['b.yml'],
status: 'success',
actions: [{ target: 'wiki', type: 'created', key: 'page_b', detail: '' }],
touchedSlSources: [],
},
],
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
},
});
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
{},
{ toolCallId: 't', messages: [] },
)) as string;
expect(out).toContain('u1');
expect(out).toContain('src_a');
expect(out).toContain('u2');
expect(out).toContain('page_b');
});
it('says empty when no writes', async () => {
const tool = createStageListTool({
stageIndex: {
jobId: 'j',
connectionId: 'c1',
workUnits: [],
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
},
});
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
{},
{ toolCallId: 't', messages: [] },
)) as string;
expect(out).toMatch(/empty/i);
});
});

View file

@ -0,0 +1,30 @@
import { tool } from 'ai';
import { z } from 'zod';
import type { StageIndex } from '../stages/stage-index.types.js';
export interface StageListDeps {
stageIndex: StageIndex;
}
export function createStageListTool(deps: StageListDeps) {
return tool({
description:
'List every write made by Stage 3 WorkUnits in this job. Each entry has the unitKey, raw files, and the action set (SL sources touched, wiki pages written).',
inputSchema: z.object({}),
execute: () => {
if (deps.stageIndex.workUnits.length === 0) {
return Promise.resolve('(empty) — no WorkUnits wrote anything in this job');
}
const out = deps.stageIndex.workUnits
.map((wu) => {
const actions =
wu.actions.length === 0
? ' (no actions)'
: wu.actions.map((a) => ` - ${a.target}:${a.type} ${a.key}`).join('\n');
return `- unitKey: ${wu.unitKey} (status=${wu.status})\n rawFiles: ${wu.rawFiles.join(', ') || '(none)'}\n actions:\n${actions}`;
})
.join('\n');
return Promise.resolve(out);
},
});
}

View file

@ -0,0 +1,106 @@
import { appendFile, mkdir } from 'node:fs/promises';
import { dirname } from 'node:path';
import type { ToolExecuteFunction, ToolExecutionOptions, ToolSet } from 'ai';
export interface ToolCallLogEntry {
ts: string;
wuKey: string;
toolCallId?: string;
toolName: string;
durationMs: number;
input: unknown;
output?: unknown;
error?: { message: string; name?: string };
}
interface ToolCallLoggerOptions {
onEntry?(entry: ToolCallLogEntry): void;
}
/**
* Wrap every tool in `tools` so each invocation appends a JSONL record with
* `{toolName, input, output | error, durationMs}` to `logFilePath`. Used by
* the ingest runner to produce per-WU transcripts so a completed sync can be
* inspected the way `parse_chat.py` inspects a chat.
*
* Tool shape is preserved (description, inputSchema, ...). Tools without an
* `execute` function (provider-defined) pass through untouched.
*
* Log writes are best-effort and fire-and-forget; a failing write will never
* block or error the agent. Tool execution inside a single agent loop is
* sequential (`generateText` awaits each tool result), so per-WU files are
* effectively single-writer and lines land in call order.
*/
export function wrapToolsWithLogger<T extends ToolSet>(
tools: T,
logFilePath: string,
wuKey: string,
options: ToolCallLoggerOptions = {},
): T {
const wrapped: Record<string, unknown> = {};
for (const [name, original] of Object.entries(tools) as Array<[string, T[string]]>) {
const originalExecute = original.execute;
if (typeof originalExecute !== 'function') {
wrapped[name] = original;
continue;
}
const wrappedExecute: ToolExecuteFunction<unknown, unknown> = async (
input: unknown,
opts: ToolExecutionOptions,
) => {
const start = Date.now();
try {
const output = await (originalExecute as ToolExecuteFunction<unknown, unknown>)(input, opts);
const entry: ToolCallLogEntry = {
ts: new Date().toISOString(),
wuKey,
toolCallId: opts.toolCallId,
toolName: name,
durationMs: Date.now() - start,
input,
output,
};
options.onEntry?.(entry);
appendEntry(logFilePath, entry);
return output;
} catch (err) {
const entry: ToolCallLogEntry = {
ts: new Date().toISOString(),
wuKey,
toolCallId: opts.toolCallId,
toolName: name,
durationMs: Date.now() - start,
input,
error: {
message: err instanceof Error ? err.message : String(err),
name: err instanceof Error ? err.name : undefined,
},
};
options.onEntry?.(entry);
appendEntry(logFilePath, entry);
throw err;
}
};
wrapped[name] = { ...original, execute: wrappedExecute };
}
return wrapped as T;
}
function appendEntry(path: string, entry: ToolCallLogEntry): void {
void (async () => {
try {
await mkdir(dirname(path), { recursive: true });
await appendFile(path, `${safeStringify(entry)}\n`, 'utf-8');
} catch {
// best-effort
}
})();
}
function safeStringify(v: unknown): string {
try {
return JSON.stringify(v);
} catch {
return JSON.stringify({ error: 'serialize-failed' });
}
}