mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-28 08:49:38 +02:00
Initial open-source release
This commit is contained in:
commit
1a42152e6f
1199 changed files with 257054 additions and 0 deletions
|
|
@ -0,0 +1,53 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { ArtifactResolutionRecord, StageIndex } from '../stages/stage-index.types.js';
|
||||
|
||||
interface EmitArtifactResolutionDeps {
|
||||
stageIndex: StageIndex;
|
||||
allowedPaths: Set<string>;
|
||||
}
|
||||
|
||||
function sameArtifactResolution(left: ArtifactResolutionRecord, right: ArtifactResolutionRecord): boolean {
|
||||
return (
|
||||
left.rawPath === right.rawPath &&
|
||||
left.artifactKind === right.artifactKind &&
|
||||
left.artifactKey === right.artifactKey &&
|
||||
left.actionType === right.actionType
|
||||
);
|
||||
}
|
||||
|
||||
export function createEmitArtifactResolutionTool(deps: EmitArtifactResolutionDeps) {
|
||||
return tool({
|
||||
description:
|
||||
'Record one explicit artifact resolution for ingest provenance. Use when reconciliation merges or subsumes an artifact without creating a new wiki or SL write action.',
|
||||
inputSchema: z.object({
|
||||
rawPath: z.string().min(1),
|
||||
artifactKind: z.enum(['sl', 'wiki']),
|
||||
artifactKey: z.string().min(1),
|
||||
actionType: z.enum(['merged', 'subsumed']),
|
||||
reason: z.string().min(1),
|
||||
}),
|
||||
execute: async (input): Promise<string> => {
|
||||
if (!deps.allowedPaths.has(input.rawPath)) {
|
||||
return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`;
|
||||
}
|
||||
|
||||
const record: ArtifactResolutionRecord = {
|
||||
rawPath: input.rawPath,
|
||||
artifactKind: input.artifactKind,
|
||||
artifactKey: input.artifactKey,
|
||||
actionType: input.actionType,
|
||||
reason: input.reason,
|
||||
};
|
||||
const existingIndex = deps.stageIndex.artifactResolutions?.findIndex((candidate) =>
|
||||
sameArtifactResolution(candidate, record),
|
||||
);
|
||||
if (existingIndex !== undefined && existingIndex >= 0 && deps.stageIndex.artifactResolutions) {
|
||||
deps.stageIndex.artifactResolutions[existingIndex] = record;
|
||||
} else {
|
||||
deps.stageIndex.artifactResolutions = [...(deps.stageIndex.artifactResolutions ?? []), record];
|
||||
}
|
||||
return `recorded artifact resolution for ${record.artifactKind}:${record.artifactKey}`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { ConflictResolvedRecord, StageIndex } from '../stages/stage-index.types.js';
|
||||
|
||||
interface EmitConflictResolutionDeps {
|
||||
stageIndex: StageIndex;
|
||||
}
|
||||
|
||||
export function createEmitConflictResolutionTool(deps: EmitConflictResolutionDeps) {
|
||||
return tool({
|
||||
description:
|
||||
'Record one conflict resolution decision for the final IngestReport. Call after resolving or flagging a cross-WorkUnit conflict.',
|
||||
inputSchema: z.object({
|
||||
unitKey: z.string().min(1).optional(),
|
||||
kind: z.enum(['structural_duplicate', 'near_duplicate', 'definitional_contradiction', 're_ingest_change']),
|
||||
contestedKey: z.string().min(1).optional(),
|
||||
artifactKey: z.string().min(1),
|
||||
detail: z.string().min(1),
|
||||
flaggedForHuman: z.boolean().default(false),
|
||||
}),
|
||||
execute: async (input): Promise<string> => {
|
||||
const record: ConflictResolvedRecord = {
|
||||
kind: input.kind,
|
||||
artifactKey: input.artifactKey,
|
||||
detail: input.detail,
|
||||
flaggedForHuman: input.flaggedForHuman,
|
||||
};
|
||||
if (input.unitKey) {
|
||||
record.unitKey = input.unitKey;
|
||||
}
|
||||
if (input.contestedKey) {
|
||||
record.contestedKey = input.contestedKey;
|
||||
}
|
||||
deps.stageIndex.conflictsResolved.push(record);
|
||||
return `recorded conflict resolution for ${record.artifactKey}`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { EvictionAppliedRecord, StageIndex } from '../stages/stage-index.types.js';
|
||||
|
||||
interface EmitEvictionDecisionDeps {
|
||||
stageIndex: StageIndex;
|
||||
deletedRawPaths: string[];
|
||||
}
|
||||
|
||||
function sameEvictionArtifact(left: EvictionAppliedRecord, right: EvictionAppliedRecord): boolean {
|
||||
return (
|
||||
left.rawPath === right.rawPath && left.artifactKind === right.artifactKind && left.artifactKey === right.artifactKey
|
||||
);
|
||||
}
|
||||
|
||||
export function createEmitEvictionDecisionTool(deps: EmitEvictionDecisionDeps) {
|
||||
const allowedPaths = new Set(deps.deletedRawPaths);
|
||||
return tool({
|
||||
description:
|
||||
'Record one eviction decision for the final IngestReport. The rawPath must come from the current Eviction Set.',
|
||||
inputSchema: z.object({
|
||||
rawPath: z.string().min(1),
|
||||
artifactKind: z.enum(['sl', 'wiki']),
|
||||
artifactKey: z.string().min(1),
|
||||
action: z.enum(['removed', 'retained_deprecated']),
|
||||
reason: z.string().min(1),
|
||||
}),
|
||||
execute: async (input): Promise<string> => {
|
||||
if (!allowedPaths.has(input.rawPath)) {
|
||||
return `Error: rawPath "${input.rawPath}" is not in the current eviction set`;
|
||||
}
|
||||
|
||||
const record: EvictionAppliedRecord = {
|
||||
rawPath: input.rawPath,
|
||||
artifactKind: input.artifactKind,
|
||||
artifactKey: input.artifactKey,
|
||||
action: input.action,
|
||||
reason: input.reason,
|
||||
};
|
||||
const existingIndex = deps.stageIndex.evictionsApplied.findIndex((candidate) =>
|
||||
sameEvictionArtifact(candidate, record),
|
||||
);
|
||||
if (existingIndex >= 0) {
|
||||
deps.stageIndex.evictionsApplied[existingIndex] = record;
|
||||
} else {
|
||||
deps.stageIndex.evictionsApplied.push(record);
|
||||
}
|
||||
return `recorded eviction decision for ${record.rawPath} -> ${record.artifactKind}:${record.artifactKey}`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,228 @@
|
|||
import type { Tool } from 'ai';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import type { StageIndex } from '../stages/stage-index.types.js';
|
||||
import { createEmitArtifactResolutionTool } from './emit-artifact-resolution.tool.js';
|
||||
import { createEmitConflictResolutionTool } from './emit-conflict-resolution.tool.js';
|
||||
import { createEmitEvictionDecisionTool } from './emit-eviction-decision.tool.js';
|
||||
import { createEmitUnmappedFallbackTool } from './emit-unmapped-fallback.tool.js';
|
||||
|
||||
function makeStageIndex(): StageIndex {
|
||||
return {
|
||||
jobId: 'job-1',
|
||||
connectionId: 'c1',
|
||||
workUnits: [],
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
};
|
||||
}
|
||||
|
||||
async function executeTool<Input>(tool: Tool<Input, string>, input: NoInfer<Input>) {
|
||||
if (!tool.execute) {
|
||||
throw new Error('tool is not executable');
|
||||
}
|
||||
return (await tool.execute(input, { toolCallId: 'tool-call-1', messages: [] })) as string;
|
||||
}
|
||||
|
||||
describe('reconciliation emit tools', () => {
|
||||
it('records conflict resolutions on the shared stage index', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitConflictResolutionTool({ stageIndex });
|
||||
|
||||
const output = await executeTool(tool, {
|
||||
unitKey: 'wu-orders',
|
||||
kind: 'near_duplicate',
|
||||
contestedKey: 'gross_revenue',
|
||||
artifactKey: 'sl:orders.gross_revenue',
|
||||
detail: 'orders and order_facts compute the same revenue metric; retained orders as canonical',
|
||||
flaggedForHuman: true,
|
||||
});
|
||||
|
||||
expect(stageIndex.conflictsResolved).toEqual([
|
||||
{
|
||||
unitKey: 'wu-orders',
|
||||
kind: 'near_duplicate',
|
||||
contestedKey: 'gross_revenue',
|
||||
artifactKey: 'sl:orders.gross_revenue',
|
||||
detail: 'orders and order_facts compute the same revenue metric; retained orders as canonical',
|
||||
flaggedForHuman: true,
|
||||
},
|
||||
]);
|
||||
expect(output).toBe('recorded conflict resolution for sl:orders.gross_revenue');
|
||||
});
|
||||
|
||||
it('records eviction decisions only for deleted raw paths in the current eviction set', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitEvictionDecisionTool({
|
||||
stageIndex,
|
||||
deletedRawPaths: ['views/old_orders.view.lkml'],
|
||||
});
|
||||
|
||||
const output = await executeTool(tool, {
|
||||
rawPath: 'views/old_orders.view.lkml',
|
||||
artifactKind: 'sl',
|
||||
artifactKey: 'old_orders',
|
||||
action: 'removed',
|
||||
reason: 'source raw file was deleted and no retained artifacts are required',
|
||||
});
|
||||
|
||||
expect(output).toContain('recorded eviction decision for views/old_orders.view.lkml');
|
||||
expect(stageIndex.evictionsApplied).toEqual([
|
||||
{
|
||||
rawPath: 'views/old_orders.view.lkml',
|
||||
artifactKind: 'sl',
|
||||
artifactKey: 'old_orders',
|
||||
action: 'removed',
|
||||
reason: 'source raw file was deleted and no retained artifacts are required',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('updates an existing eviction decision for the same raw path and artifact', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitEvictionDecisionTool({
|
||||
stageIndex,
|
||||
deletedRawPaths: ['views/old_orders.view.lkml'],
|
||||
});
|
||||
|
||||
await executeTool(tool, {
|
||||
rawPath: 'views/old_orders.view.lkml',
|
||||
artifactKind: 'wiki',
|
||||
artifactKey: 'orders/legacy',
|
||||
action: 'retained_deprecated',
|
||||
reason: 'first pass',
|
||||
});
|
||||
await executeTool(tool, {
|
||||
rawPath: 'views/old_orders.view.lkml',
|
||||
artifactKind: 'wiki',
|
||||
artifactKey: 'orders/legacy',
|
||||
action: 'removed',
|
||||
reason: 'second pass after checking references',
|
||||
});
|
||||
|
||||
expect(stageIndex.evictionsApplied).toEqual([
|
||||
{
|
||||
rawPath: 'views/old_orders.view.lkml',
|
||||
artifactKind: 'wiki',
|
||||
artifactKey: 'orders/legacy',
|
||||
action: 'removed',
|
||||
reason: 'second pass after checking references',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('rejects eviction decisions for raw paths outside the current eviction set', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitEvictionDecisionTool({
|
||||
stageIndex,
|
||||
deletedRawPaths: ['views/old_orders.view.lkml'],
|
||||
});
|
||||
|
||||
const output = await executeTool(tool, {
|
||||
rawPath: 'views/not_deleted.view.lkml',
|
||||
artifactKind: 'sl',
|
||||
artifactKey: 'not_deleted',
|
||||
action: 'removed',
|
||||
reason: 'bad input',
|
||||
});
|
||||
|
||||
expect(output).toContain('Error: rawPath "views/not_deleted.view.lkml" is not in the current eviction set');
|
||||
expect(stageIndex.evictionsApplied).toEqual([]);
|
||||
});
|
||||
|
||||
it('records unmapped fallback decisions for allowed raw paths', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(['metrics/conversion.yml']),
|
||||
});
|
||||
|
||||
const output = await executeTool(tool, {
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'no_physical_table',
|
||||
fallback: 'flagged',
|
||||
});
|
||||
|
||||
expect(output).toContain('recorded unmapped fallback for metrics/conversion.yml');
|
||||
expect(stageIndex.unmappedFallbacks).toEqual([
|
||||
{
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'no_physical_table',
|
||||
fallback: 'flagged',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('deduplicates identical unmapped fallback decisions', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(['metrics/conversion.yml']),
|
||||
});
|
||||
|
||||
await executeTool(tool, {
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'no_physical_table',
|
||||
fallback: 'flagged',
|
||||
});
|
||||
await executeTool(tool, {
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'no_physical_table',
|
||||
fallback: 'flagged',
|
||||
});
|
||||
|
||||
expect(stageIndex.unmappedFallbacks).toEqual([
|
||||
{
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'no_physical_table',
|
||||
fallback: 'flagged',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('rejects unmapped fallback decisions for raw paths outside the allowed set', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(['metrics/conversion.yml']),
|
||||
});
|
||||
|
||||
const output = await executeTool(tool, {
|
||||
rawPath: 'metrics/not-in-this-work-unit.yml',
|
||||
reason: 'no_physical_table',
|
||||
fallback: 'flagged',
|
||||
});
|
||||
|
||||
expect(output).toContain(
|
||||
'Error: rawPath "metrics/not-in-this-work-unit.yml" is not available to this ingest stage',
|
||||
);
|
||||
expect(stageIndex.unmappedFallbacks).toEqual([]);
|
||||
});
|
||||
|
||||
it('records explicit artifact resolutions for provenance rows', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitArtifactResolutionTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(['explores/b2b/sales_pipeline.json']),
|
||||
});
|
||||
|
||||
const output = await executeTool(tool, {
|
||||
rawPath: 'explores/b2b/sales_pipeline.json',
|
||||
artifactKind: 'sl',
|
||||
artifactKey: 'looker__b2b__sales_pipeline',
|
||||
actionType: 'subsumed',
|
||||
reason: 'File-adapter source b2b__sales_pipeline is canonical for this explore.',
|
||||
});
|
||||
|
||||
expect(output).toBe('recorded artifact resolution for sl:looker__b2b__sales_pipeline');
|
||||
expect(stageIndex.artifactResolutions).toEqual([
|
||||
{
|
||||
rawPath: 'explores/b2b/sales_pipeline.json',
|
||||
artifactKind: 'sl',
|
||||
artifactKey: 'looker__b2b__sales_pipeline',
|
||||
actionType: 'subsumed',
|
||||
reason: 'File-adapter source b2b__sales_pipeline is canonical for this explore.',
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { StageIndex, UnmappedFallbackRecord } from '../stages/stage-index.types.js';
|
||||
|
||||
interface EmitUnmappedFallbackDeps {
|
||||
stageIndex: StageIndex;
|
||||
allowedPaths: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
const unmappedFallbackReasonSchema = z.enum([
|
||||
'no_connection_mapping',
|
||||
'looker_template_unresolved',
|
||||
'derived_table_not_supported',
|
||||
'no_physical_table',
|
||||
'multiple_table_references',
|
||||
'unsupported_dialect',
|
||||
'parse_error',
|
||||
'missing_target_table',
|
||||
]);
|
||||
|
||||
function sameUnmappedFallback(left: UnmappedFallbackRecord, right: UnmappedFallbackRecord): boolean {
|
||||
return left.rawPath === right.rawPath && left.reason === right.reason && left.fallback === right.fallback;
|
||||
}
|
||||
|
||||
export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) {
|
||||
return tool({
|
||||
description:
|
||||
'Record one unmapped fallback decision for the final IngestReport. The rawPath must be available to the current ingest stage. The reason MUST be one of the structured codes; put any human-readable context in detail.',
|
||||
inputSchema: z.object({
|
||||
rawPath: z.string().min(1),
|
||||
reason: unmappedFallbackReasonSchema,
|
||||
detail: z.string().optional(),
|
||||
fallback: z.enum(['sql_standalone', 'wiki_only', 'flagged']),
|
||||
}),
|
||||
execute: async (input): Promise<string> => {
|
||||
if (!deps.allowedPaths.has(input.rawPath)) {
|
||||
return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`;
|
||||
}
|
||||
|
||||
const record: UnmappedFallbackRecord = {
|
||||
rawPath: input.rawPath,
|
||||
reason: input.reason,
|
||||
...(input.detail !== undefined ? { detail: input.detail } : {}),
|
||||
fallback: input.fallback,
|
||||
};
|
||||
if (!deps.stageIndex.unmappedFallbacks.some((candidate) => sameUnmappedFallback(candidate, record))) {
|
||||
deps.stageIndex.unmappedFallbacks.push(record);
|
||||
}
|
||||
return `recorded unmapped fallback for ${record.rawPath} (${record.fallback})`;
|
||||
},
|
||||
});
|
||||
}
|
||||
56
packages/context/src/ingest/tools/eviction-list.tool.test.ts
Normal file
56
packages/context/src/ingest/tools/eviction-list.tool.test.ts
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createEvictionListTool } from './eviction-list.tool.js';
|
||||
|
||||
describe('eviction_list tool', () => {
|
||||
it('returns artifacts produced for each deleted raw path', async () => {
|
||||
const provenance = {
|
||||
findLatestArtifactsForRawPaths: vi.fn().mockResolvedValue(
|
||||
new Map([
|
||||
[
|
||||
'views/old.lkml',
|
||||
[{ artifact_kind: 'sl', artifact_key: 'old_metric', action_type: 'source_created' } as any],
|
||||
],
|
||||
['views/gone.lkml', []],
|
||||
]),
|
||||
),
|
||||
};
|
||||
const tool = createEvictionListTool({
|
||||
provenance: provenance as any,
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'lookml',
|
||||
deletedRawPaths: ['views/old.lkml', 'views/gone.lkml'],
|
||||
});
|
||||
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{},
|
||||
{ toolCallId: 't', messages: [] },
|
||||
)) as string;
|
||||
expect(out).toContain('views/old.lkml');
|
||||
expect(out).toContain('old_metric');
|
||||
expect(out).toContain('views/gone.lkml');
|
||||
});
|
||||
|
||||
it('returns empty string when no deletions', async () => {
|
||||
const tool = createEvictionListTool({
|
||||
provenance: {} as any,
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'lookml',
|
||||
deletedRawPaths: [],
|
||||
});
|
||||
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{},
|
||||
{ toolCallId: 't', messages: [] },
|
||||
)) as string;
|
||||
expect(out).toMatch(/empty/i);
|
||||
});
|
||||
|
||||
it('tells curators to record decisions', () => {
|
||||
const tool = createEvictionListTool({
|
||||
provenance: {} as any,
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'lookml',
|
||||
deletedRawPaths: [],
|
||||
});
|
||||
|
||||
expect(tool.description).toContain('context_eviction_decision_write');
|
||||
});
|
||||
});
|
||||
39
packages/context/src/ingest/tools/eviction-list.tool.ts
Normal file
39
packages/context/src/ingest/tools/eviction-list.tool.ts
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { IngestProvenancePort } from '../ports.js';
|
||||
|
||||
export interface EvictionListDeps {
|
||||
provenance: IngestProvenancePort;
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
deletedRawPaths: string[];
|
||||
}
|
||||
|
||||
export function createEvictionListTool(deps: EvictionListDeps) {
|
||||
return tool({
|
||||
description:
|
||||
'List every artifact that the most recent completed sync produced from a now-deleted raw file. Use this to decide whether to remove (no inbound refs) or retain with deprecation (has inbound refs). Inbound refs are NOT currently computed — treat every retained entry as a candidate and ask the user via the IngestReport. After deciding, record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => {
|
||||
if (deps.deletedRawPaths.length === 0) {
|
||||
return '(empty) — no files were deleted since the last sync';
|
||||
}
|
||||
const map = await deps.provenance.findLatestArtifactsForRawPaths(
|
||||
deps.connectionId,
|
||||
deps.sourceKey,
|
||||
deps.deletedRawPaths,
|
||||
);
|
||||
return [...map.entries()]
|
||||
.map(([path, rows]) => {
|
||||
if (rows.length === 0) {
|
||||
return `- raw_path: ${path}\n artifacts: (none)`;
|
||||
}
|
||||
const artifactLines = rows
|
||||
.map((r) => ` - kind: ${r.artifact_kind} key: ${r.artifact_key} (last action: ${r.action_type})`)
|
||||
.join('\n');
|
||||
return `- raw_path: ${path}\n artifacts:\n${artifactLines}`;
|
||||
})
|
||||
.join('\n');
|
||||
},
|
||||
});
|
||||
}
|
||||
69
packages/context/src/ingest/tools/read-raw-file.tool.test.ts
Normal file
69
packages/context/src/ingest/tools/read-raw-file.tool.test.ts
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { createReadRawFileTool } from './read-raw-file.tool.js';
|
||||
|
||||
describe('read_raw_file tool', () => {
|
||||
let stagedDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'readraw-'));
|
||||
await mkdir(join(stagedDir, 'views'), { recursive: true });
|
||||
await writeFile(join(stagedDir, 'views', 'a.yml'), 'line1\nline2\nline3\n', 'utf-8');
|
||||
await writeFile(join(stagedDir, 'peer.yml'), 'secret', 'utf-8');
|
||||
});
|
||||
|
||||
afterEach(async () => rm(stagedDir, { recursive: true, force: true }));
|
||||
|
||||
it('returns content for an allowed path', async () => {
|
||||
const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) });
|
||||
const result = await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ path: 'views/a.yml' },
|
||||
{ toolCallId: 't1', messages: [] },
|
||||
);
|
||||
expect(result).toContain('line1');
|
||||
expect(result).toContain('line2');
|
||||
});
|
||||
|
||||
it('refuses to return oversized files and directs callers to read spans', async () => {
|
||||
await writeFile(join(stagedDir, 'views', 'huge.yml'), `${'x'.repeat(160_000)}\n`, 'utf-8');
|
||||
const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/huge.yml']) });
|
||||
const result = await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ path: 'views/huge.yml' },
|
||||
{ toolCallId: 't1', messages: [] },
|
||||
);
|
||||
|
||||
expect(result).toMatch(/too large/i);
|
||||
expect(result).toMatch(/read_raw_span/i);
|
||||
expect(String(result).length).toBeLessThan(1000);
|
||||
});
|
||||
|
||||
it('rejects a path not in the allow-list', async () => {
|
||||
const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) });
|
||||
const result = await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ path: 'peer.yml' },
|
||||
{ toolCallId: 't1', messages: [] },
|
||||
);
|
||||
expect(result).toMatch(/not accessible/i);
|
||||
expect(result).not.toContain('secret');
|
||||
});
|
||||
|
||||
it('rejects directory traversal attempts', async () => {
|
||||
const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) });
|
||||
const result = await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ path: '../outside.yml' },
|
||||
{ toolCallId: 't1', messages: [] },
|
||||
);
|
||||
expect(result).toMatch(/not accessible/i);
|
||||
});
|
||||
|
||||
it('returns a clear error when the file is missing despite being allowed', async () => {
|
||||
const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/missing.yml']) });
|
||||
const result = await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ path: 'views/missing.yml' },
|
||||
{ toolCallId: 't1', messages: [] },
|
||||
);
|
||||
expect(result).toMatch(/not found/i);
|
||||
});
|
||||
});
|
||||
41
packages/context/src/ingest/tools/read-raw-file.tool.ts
Normal file
41
packages/context/src/ingest/tools/read-raw-file.tool.ts
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
import { readFile, stat } from 'node:fs/promises';
|
||||
import { join, normalize, resolve } from 'node:path';
|
||||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
|
||||
interface ReadRawFileDeps {
|
||||
stagedDir: string;
|
||||
allowedPaths: Set<string>;
|
||||
}
|
||||
|
||||
const MAX_READ_RAW_FILE_BYTES = 120_000;
|
||||
|
||||
export function createReadRawFileTool(deps: ReadRawFileDeps) {
|
||||
const stagedRoot = resolve(deps.stagedDir);
|
||||
return tool({
|
||||
description:
|
||||
"Read the full text content of a raw source file inside this WorkUnit. `path` must be relative to the staged bundle root (no leading slash, no `..`) and must appear in the WorkUnit's rawFiles or dependencyPaths list.",
|
||||
inputSchema: z.object({
|
||||
path: z.string().describe('Path relative to the staged bundle root. Example: "views/customers/customer.lkml".'),
|
||||
}),
|
||||
execute: async ({ path }) => {
|
||||
const normalized = normalize(path).replace(/^[/\\]+/, '');
|
||||
if (normalized.startsWith('..') || !deps.allowedPaths.has(normalized)) {
|
||||
return `Error: path "${path}" is not accessible from this WorkUnit. Allowed paths: ${[...deps.allowedPaths].sort().join(', ')}`;
|
||||
}
|
||||
const absolute = resolve(join(stagedRoot, normalized));
|
||||
if (!absolute.startsWith(`${stagedRoot}/`) && absolute !== stagedRoot) {
|
||||
return `Error: path "${path}" is not accessible from this WorkUnit.`;
|
||||
}
|
||||
try {
|
||||
const fileStat = await stat(absolute);
|
||||
if (fileStat.size > MAX_READ_RAW_FILE_BYTES) {
|
||||
return `Error: file "${path}" is too large to return in full (${fileStat.size} bytes). Use read_raw_span with targeted line ranges instead.`;
|
||||
}
|
||||
return await readFile(absolute, 'utf-8');
|
||||
} catch (err) {
|
||||
return `Error: file "${path}" not found. (${err instanceof Error ? err.message : String(err)})`;
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
53
packages/context/src/ingest/tools/read-raw-span.tool.test.ts
Normal file
53
packages/context/src/ingest/tools/read-raw-span.tool.test.ts
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { createReadRawSpanTool } from './read-raw-span.tool.js';
|
||||
|
||||
describe('read_raw_span tool', () => {
|
||||
let stagedDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'readspan-'));
|
||||
await mkdir(join(stagedDir, 'v'), { recursive: true });
|
||||
await writeFile(join(stagedDir, 'v', 'a.yml'), 'line1\nline2\nline3\nline4\nline5\n', 'utf-8');
|
||||
});
|
||||
|
||||
afterEach(async () => rm(stagedDir, { recursive: true, force: true }));
|
||||
|
||||
it('returns the requested 1-based inclusive line range', async () => {
|
||||
const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) });
|
||||
const result = await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ path: 'v/a.yml', startLine: 2, endLine: 4 },
|
||||
{ toolCallId: 't1', messages: [] },
|
||||
);
|
||||
expect(result).toBe('line2\nline3\nline4');
|
||||
});
|
||||
|
||||
it('clamps endLine to the end of the file', async () => {
|
||||
const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) });
|
||||
const result = await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ path: 'v/a.yml', startLine: 4, endLine: 99 },
|
||||
{ toolCallId: 't1', messages: [] },
|
||||
);
|
||||
expect(result).toBe('line4\nline5');
|
||||
});
|
||||
|
||||
it('rejects start > end', async () => {
|
||||
const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) });
|
||||
const result = await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ path: 'v/a.yml', startLine: 5, endLine: 2 },
|
||||
{ toolCallId: 't1', messages: [] },
|
||||
);
|
||||
expect(result).toMatch(/startLine must be/i);
|
||||
});
|
||||
|
||||
it('rejects paths not in the allow-list', async () => {
|
||||
const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set([]) });
|
||||
const result = await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ path: 'v/a.yml', startLine: 1, endLine: 1 },
|
||||
{ toolCallId: 't1', messages: [] },
|
||||
);
|
||||
expect(result).toMatch(/not accessible/i);
|
||||
});
|
||||
});
|
||||
46
packages/context/src/ingest/tools/read-raw-span.tool.ts
Normal file
46
packages/context/src/ingest/tools/read-raw-span.tool.ts
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { join, normalize, resolve } from 'node:path';
|
||||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
|
||||
interface ReadRawSpanDeps {
|
||||
stagedDir: string;
|
||||
allowedPaths: Set<string>;
|
||||
}
|
||||
|
||||
export function createReadRawSpanTool(deps: ReadRawSpanDeps) {
|
||||
const stagedRoot = resolve(deps.stagedDir);
|
||||
return tool({
|
||||
description:
|
||||
'Read a 1-based inclusive line range from a raw source file. Use this to resolve a provenance pointer like `file.lkml#L15-28` without loading the whole file into context.',
|
||||
inputSchema: z.object({
|
||||
path: z.string().describe('Path relative to the staged bundle root.'),
|
||||
startLine: z.number().int().min(1).describe('First line to return (1-based, inclusive).'),
|
||||
endLine: z.number().int().min(1).describe('Last line to return (1-based, inclusive). Clamped to file length.'),
|
||||
}),
|
||||
execute: async ({ path, startLine, endLine }) => {
|
||||
if (startLine > endLine) {
|
||||
return `Error: startLine must be <= endLine (got startLine=${startLine}, endLine=${endLine})`;
|
||||
}
|
||||
const normalized = normalize(path).replace(/^[/\\]+/, '');
|
||||
if (normalized.startsWith('..') || !deps.allowedPaths.has(normalized)) {
|
||||
return `Error: path "${path}" is not accessible from this context. Allowed paths: ${[...deps.allowedPaths].sort().join(', ')}`;
|
||||
}
|
||||
const absolute = resolve(join(stagedRoot, normalized));
|
||||
if (!absolute.startsWith(`${stagedRoot}/`) && absolute !== stagedRoot) {
|
||||
return `Error: path "${path}" is not accessible from this context.`;
|
||||
}
|
||||
try {
|
||||
const body = await readFile(absolute, 'utf-8');
|
||||
const rawLines = body.split('\n');
|
||||
// Treat a trailing empty element caused by a file-ending newline as NOT a line.
|
||||
const lines = rawLines.length > 0 && rawLines[rawLines.length - 1] === '' ? rawLines.slice(0, -1) : rawLines;
|
||||
const from = Math.max(1, startLine);
|
||||
const to = Math.min(lines.length, endLine);
|
||||
return lines.slice(from - 1, to).join('\n');
|
||||
} catch (err) {
|
||||
return `Error: file "${path}" not found. (${err instanceof Error ? err.message : String(err)})`;
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
131
packages/context/src/ingest/tools/stage-diff.tool.test.ts
Normal file
131
packages/context/src/ingest/tools/stage-diff.tool.test.ts
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { createStageDiffTool } from './stage-diff.tool.js';
|
||||
|
||||
describe('stage_diff tool', () => {
|
||||
const stageIndex = {
|
||||
jobId: 'j',
|
||||
connectionId: 'c1',
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'u1',
|
||||
rawFiles: [],
|
||||
status: 'success' as const,
|
||||
actions: [{ target: 'sl' as const, type: 'created' as const, key: 'churn_risk_score', detail: 'customers' }],
|
||||
touchedSlSources: [{ connectionId: 'c1', sourceName: 'customers' }],
|
||||
},
|
||||
{
|
||||
unitKey: 'u2',
|
||||
rawFiles: [],
|
||||
status: 'success' as const,
|
||||
actions: [{ target: 'sl' as const, type: 'created' as const, key: 'churn_risk_score', detail: 'billing' }],
|
||||
touchedSlSources: [{ connectionId: 'c1', sourceName: 'billing' }],
|
||||
},
|
||||
],
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
};
|
||||
|
||||
it('finds overlapping artifact keys between two WUs', async () => {
|
||||
const tool = createStageDiffTool({ stageIndex });
|
||||
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ unitKeyA: 'u1', unitKeyB: 'u2' },
|
||||
{ toolCallId: 't', messages: [] },
|
||||
)) as string;
|
||||
expect(out).toContain('churn_risk_score');
|
||||
expect(out).toMatch(/overlap/i);
|
||||
});
|
||||
|
||||
it('says no overlap when keys are disjoint', async () => {
|
||||
const tool = createStageDiffTool({
|
||||
stageIndex: {
|
||||
jobId: 'j',
|
||||
connectionId: 'c1',
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'u1',
|
||||
rawFiles: [],
|
||||
status: 'success',
|
||||
actions: [{ target: 'sl', type: 'created', key: 'a', detail: '' }],
|
||||
touchedSlSources: [{ connectionId: 'c1', sourceName: 'a' }],
|
||||
},
|
||||
{
|
||||
unitKey: 'u2',
|
||||
rawFiles: [],
|
||||
status: 'success',
|
||||
actions: [{ target: 'sl', type: 'created', key: 'b', detail: '' }],
|
||||
touchedSlSources: [{ connectionId: 'c1', sourceName: 'b' }],
|
||||
},
|
||||
],
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
},
|
||||
});
|
||||
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ unitKeyA: 'u1', unitKeyB: 'u2' },
|
||||
{ toolCallId: 't', messages: [] },
|
||||
)) as string;
|
||||
expect(out).toMatch(/no overlap/i);
|
||||
});
|
||||
|
||||
it('does not overlap same-named SL actions on different target connections', async () => {
|
||||
const tool = createStageDiffTool({
|
||||
stageIndex: {
|
||||
jobId: 'j',
|
||||
connectionId: 'looker-run',
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'u1',
|
||||
rawFiles: [],
|
||||
status: 'success',
|
||||
actions: [
|
||||
{
|
||||
target: 'sl',
|
||||
type: 'created',
|
||||
key: 'looker__b2b__sales_pipeline',
|
||||
detail: 'W1',
|
||||
targetConnectionId: 'W1',
|
||||
},
|
||||
],
|
||||
touchedSlSources: [{ connectionId: 'W1', sourceName: 'looker__b2b__sales_pipeline' }],
|
||||
},
|
||||
{
|
||||
unitKey: 'u2',
|
||||
rawFiles: [],
|
||||
status: 'success',
|
||||
actions: [
|
||||
{
|
||||
target: 'sl',
|
||||
type: 'created',
|
||||
key: 'looker__b2b__sales_pipeline',
|
||||
detail: 'W2',
|
||||
targetConnectionId: 'W2',
|
||||
},
|
||||
],
|
||||
touchedSlSources: [{ connectionId: 'W2', sourceName: 'looker__b2b__sales_pipeline' }],
|
||||
},
|
||||
],
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
},
|
||||
});
|
||||
|
||||
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ unitKeyA: 'u1', unitKeyB: 'u2' },
|
||||
{ toolCallId: 't', messages: [] },
|
||||
)) as string;
|
||||
|
||||
expect(out).toMatch(/no overlap/i);
|
||||
});
|
||||
|
||||
it('returns an error when a unitKey is unknown', async () => {
|
||||
const tool = createStageDiffTool({ stageIndex });
|
||||
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{ unitKeyA: 'u1', unitKeyB: 'nope' },
|
||||
{ toolCallId: 't', messages: [] },
|
||||
)) as string;
|
||||
expect(out).toMatch(/unknown/i);
|
||||
});
|
||||
});
|
||||
44
packages/context/src/ingest/tools/stage-diff.tool.ts
Normal file
44
packages/context/src/ingest/tools/stage-diff.tool.ts
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import { memoryActionIdentity } from '../action-identity.js';
|
||||
import type { StageIndex } from '../stages/stage-index.types.js';
|
||||
|
||||
export interface StageDiffDeps {
|
||||
stageIndex: StageIndex;
|
||||
}
|
||||
|
||||
export function createStageDiffTool(deps: StageDiffDeps) {
|
||||
return tool({
|
||||
description:
|
||||
'Compare two WorkUnits by their writes. SL writes overlap only when target connection and artifact key both match; same-key SL actions on different target connections are non-overlapping.',
|
||||
inputSchema: z.object({
|
||||
unitKeyA: z.string(),
|
||||
unitKeyB: z.string(),
|
||||
}),
|
||||
execute: ({ unitKeyA, unitKeyB }) => {
|
||||
const a = deps.stageIndex.workUnits.find((wu) => wu.unitKey === unitKeyA);
|
||||
const b = deps.stageIndex.workUnits.find((wu) => wu.unitKey === unitKeyB);
|
||||
if (!a) {
|
||||
return Promise.resolve(`Error: unknown unitKey "${unitKeyA}"`);
|
||||
}
|
||||
if (!b) {
|
||||
return Promise.resolve(`Error: unknown unitKey "${unitKeyB}"`);
|
||||
}
|
||||
const runConnectionId = deps.stageIndex.connectionId;
|
||||
const keysA = new Set(a.actions.map((ac) => memoryActionIdentity(ac, runConnectionId)));
|
||||
const keysB = new Set(b.actions.map((ac) => memoryActionIdentity(ac, runConnectionId)));
|
||||
const overlap = [...keysA].filter((k) => keysB.has(k));
|
||||
if (overlap.length === 0) {
|
||||
return Promise.resolve(`No overlap between ${unitKeyA} and ${unitKeyB}.`);
|
||||
}
|
||||
const overlapDetail = overlap
|
||||
.map((k) => {
|
||||
const aDetail = a.actions.find((ac) => memoryActionIdentity(ac, runConnectionId) === k);
|
||||
const bDetail = b.actions.find((ac) => memoryActionIdentity(ac, runConnectionId) === k);
|
||||
return `- ${k}\n ${unitKeyA}: ${aDetail?.detail ?? ''}\n ${unitKeyB}: ${bDetail?.detail ?? ''}`;
|
||||
})
|
||||
.join('\n');
|
||||
return Promise.resolve(`Overlap between ${unitKeyA} and ${unitKeyB}:\n${overlapDetail}`);
|
||||
},
|
||||
});
|
||||
}
|
||||
58
packages/context/src/ingest/tools/stage-list.tool.test.ts
Normal file
58
packages/context/src/ingest/tools/stage-list.tool.test.ts
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { createStageListTool } from './stage-list.tool.js';
|
||||
|
||||
describe('stage_list tool', () => {
|
||||
it('returns a compact summary of the stage index', async () => {
|
||||
const tool = createStageListTool({
|
||||
stageIndex: {
|
||||
jobId: 'j1',
|
||||
connectionId: 'c1',
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'u1',
|
||||
rawFiles: ['a.yml'],
|
||||
status: 'success',
|
||||
actions: [{ target: 'sl', type: 'created', key: 'src_a', detail: '' }],
|
||||
touchedSlSources: [{ connectionId: 'c1', sourceName: 'src_a' }],
|
||||
},
|
||||
{
|
||||
unitKey: 'u2',
|
||||
rawFiles: ['b.yml'],
|
||||
status: 'success',
|
||||
actions: [{ target: 'wiki', type: 'created', key: 'page_b', detail: '' }],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
},
|
||||
});
|
||||
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{},
|
||||
{ toolCallId: 't', messages: [] },
|
||||
)) as string;
|
||||
expect(out).toContain('u1');
|
||||
expect(out).toContain('src_a');
|
||||
expect(out).toContain('u2');
|
||||
expect(out).toContain('page_b');
|
||||
});
|
||||
|
||||
it('says empty when no writes', async () => {
|
||||
const tool = createStageListTool({
|
||||
stageIndex: {
|
||||
jobId: 'j',
|
||||
connectionId: 'c1',
|
||||
workUnits: [],
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
},
|
||||
});
|
||||
const out = (await (tool.execute as (...args: unknown[]) => unknown)(
|
||||
{},
|
||||
{ toolCallId: 't', messages: [] },
|
||||
)) as string;
|
||||
expect(out).toMatch(/empty/i);
|
||||
});
|
||||
});
|
||||
30
packages/context/src/ingest/tools/stage-list.tool.ts
Normal file
30
packages/context/src/ingest/tools/stage-list.tool.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { StageIndex } from '../stages/stage-index.types.js';
|
||||
|
||||
export interface StageListDeps {
|
||||
stageIndex: StageIndex;
|
||||
}
|
||||
|
||||
export function createStageListTool(deps: StageListDeps) {
|
||||
return tool({
|
||||
description:
|
||||
'List every write made by Stage 3 WorkUnits in this job. Each entry has the unitKey, raw files, and the action set (SL sources touched, wiki pages written).',
|
||||
inputSchema: z.object({}),
|
||||
execute: () => {
|
||||
if (deps.stageIndex.workUnits.length === 0) {
|
||||
return Promise.resolve('(empty) — no WorkUnits wrote anything in this job');
|
||||
}
|
||||
const out = deps.stageIndex.workUnits
|
||||
.map((wu) => {
|
||||
const actions =
|
||||
wu.actions.length === 0
|
||||
? ' (no actions)'
|
||||
: wu.actions.map((a) => ` - ${a.target}:${a.type} ${a.key}`).join('\n');
|
||||
return `- unitKey: ${wu.unitKey} (status=${wu.status})\n rawFiles: ${wu.rawFiles.join(', ') || '(none)'}\n actions:\n${actions}`;
|
||||
})
|
||||
.join('\n');
|
||||
return Promise.resolve(out);
|
||||
},
|
||||
});
|
||||
}
|
||||
106
packages/context/src/ingest/tools/tool-call-logger.ts
Normal file
106
packages/context/src/ingest/tools/tool-call-logger.ts
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
import { appendFile, mkdir } from 'node:fs/promises';
|
||||
import { dirname } from 'node:path';
|
||||
import type { ToolExecuteFunction, ToolExecutionOptions, ToolSet } from 'ai';
|
||||
|
||||
export interface ToolCallLogEntry {
|
||||
ts: string;
|
||||
wuKey: string;
|
||||
toolCallId?: string;
|
||||
toolName: string;
|
||||
durationMs: number;
|
||||
input: unknown;
|
||||
output?: unknown;
|
||||
error?: { message: string; name?: string };
|
||||
}
|
||||
|
||||
interface ToolCallLoggerOptions {
|
||||
onEntry?(entry: ToolCallLogEntry): void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap every tool in `tools` so each invocation appends a JSONL record with
|
||||
* `{toolName, input, output | error, durationMs}` to `logFilePath`. Used by
|
||||
* the ingest runner to produce per-WU transcripts so a completed sync can be
|
||||
* inspected the way `parse_chat.py` inspects a chat.
|
||||
*
|
||||
* Tool shape is preserved (description, inputSchema, ...). Tools without an
|
||||
* `execute` function (provider-defined) pass through untouched.
|
||||
*
|
||||
* Log writes are best-effort and fire-and-forget; a failing write will never
|
||||
* block or error the agent. Tool execution inside a single agent loop is
|
||||
* sequential (`generateText` awaits each tool result), so per-WU files are
|
||||
* effectively single-writer and lines land in call order.
|
||||
*/
|
||||
export function wrapToolsWithLogger<T extends ToolSet>(
|
||||
tools: T,
|
||||
logFilePath: string,
|
||||
wuKey: string,
|
||||
options: ToolCallLoggerOptions = {},
|
||||
): T {
|
||||
const wrapped: Record<string, unknown> = {};
|
||||
for (const [name, original] of Object.entries(tools) as Array<[string, T[string]]>) {
|
||||
const originalExecute = original.execute;
|
||||
if (typeof originalExecute !== 'function') {
|
||||
wrapped[name] = original;
|
||||
continue;
|
||||
}
|
||||
const wrappedExecute: ToolExecuteFunction<unknown, unknown> = async (
|
||||
input: unknown,
|
||||
opts: ToolExecutionOptions,
|
||||
) => {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const output = await (originalExecute as ToolExecuteFunction<unknown, unknown>)(input, opts);
|
||||
const entry: ToolCallLogEntry = {
|
||||
ts: new Date().toISOString(),
|
||||
wuKey,
|
||||
toolCallId: opts.toolCallId,
|
||||
toolName: name,
|
||||
durationMs: Date.now() - start,
|
||||
input,
|
||||
output,
|
||||
};
|
||||
options.onEntry?.(entry);
|
||||
appendEntry(logFilePath, entry);
|
||||
return output;
|
||||
} catch (err) {
|
||||
const entry: ToolCallLogEntry = {
|
||||
ts: new Date().toISOString(),
|
||||
wuKey,
|
||||
toolCallId: opts.toolCallId,
|
||||
toolName: name,
|
||||
durationMs: Date.now() - start,
|
||||
input,
|
||||
error: {
|
||||
message: err instanceof Error ? err.message : String(err),
|
||||
name: err instanceof Error ? err.name : undefined,
|
||||
},
|
||||
};
|
||||
options.onEntry?.(entry);
|
||||
appendEntry(logFilePath, entry);
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
wrapped[name] = { ...original, execute: wrappedExecute };
|
||||
}
|
||||
return wrapped as T;
|
||||
}
|
||||
|
||||
function appendEntry(path: string, entry: ToolCallLogEntry): void {
|
||||
void (async () => {
|
||||
try {
|
||||
await mkdir(dirname(path), { recursive: true });
|
||||
await appendFile(path, `${safeStringify(entry)}\n`, 'utf-8');
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
function safeStringify(v: unknown): string {
|
||||
try {
|
||||
return JSON.stringify(v);
|
||||
} catch {
|
||||
return JSON.stringify({ error: 'serialize-failed' });
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue