Initial open-source release

2026-06-28 08:49:38 +02:00 · 2026-05-10 23:12:26 +02:00 · 2026-05-10 23:12:26 +02:00 · 1a42152e6f
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions
--- a/packages/context/src/ingest/tools/emit-artifact-resolution.tool.ts
+++ b/packages/context/src/ingest/tools/emit-artifact-resolution.tool.ts
@ -0,0 +1,53 @@
+import { tool } from 'ai';
+import { z } from 'zod';
+import type { ArtifactResolutionRecord, StageIndex } from '../stages/stage-index.types.js';
+
+interface EmitArtifactResolutionDeps {
+  stageIndex: StageIndex;
+  allowedPaths: Set<string>;
+}
+
+function sameArtifactResolution(left: ArtifactResolutionRecord, right: ArtifactResolutionRecord): boolean {
+  return (
+    left.rawPath === right.rawPath &&
+    left.artifactKind === right.artifactKind &&
+    left.artifactKey === right.artifactKey &&
+    left.actionType === right.actionType
+  );
+}
+
+export function createEmitArtifactResolutionTool(deps: EmitArtifactResolutionDeps) {
+  return tool({
+    description:
+      'Record one explicit artifact resolution for ingest provenance. Use when reconciliation merges or subsumes an artifact without creating a new wiki or SL write action.',
+    inputSchema: z.object({
+      rawPath: z.string().min(1),
+      artifactKind: z.enum(['sl', 'wiki']),
+      artifactKey: z.string().min(1),
+      actionType: z.enum(['merged', 'subsumed']),
+      reason: z.string().min(1),
+    }),
+    execute: async (input): Promise<string> => {
+      if (!deps.allowedPaths.has(input.rawPath)) {
+        return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`;
+      }
+
+      const record: ArtifactResolutionRecord = {
+        rawPath: input.rawPath,
+        artifactKind: input.artifactKind,
+        artifactKey: input.artifactKey,
+        actionType: input.actionType,
+        reason: input.reason,
+      };
+      const existingIndex = deps.stageIndex.artifactResolutions?.findIndex((candidate) =>
+        sameArtifactResolution(candidate, record),
+      );
+      if (existingIndex !== undefined && existingIndex >= 0 && deps.stageIndex.artifactResolutions) {
+        deps.stageIndex.artifactResolutions[existingIndex] = record;
+      } else {
+        deps.stageIndex.artifactResolutions = [...(deps.stageIndex.artifactResolutions ?? []), record];
+      }
+      return `recorded artifact resolution for ${record.artifactKind}:${record.artifactKey}`;
+    },
+  });
+}
--- a/packages/context/src/ingest/tools/emit-conflict-resolution.tool.ts
+++ b/packages/context/src/ingest/tools/emit-conflict-resolution.tool.ts
@ -0,0 +1,38 @@
+import { tool } from 'ai';
+import { z } from 'zod';
+import type { ConflictResolvedRecord, StageIndex } from '../stages/stage-index.types.js';
+
+interface EmitConflictResolutionDeps {
+  stageIndex: StageIndex;
+}
+
+export function createEmitConflictResolutionTool(deps: EmitConflictResolutionDeps) {
+  return tool({
+    description:
+      'Record one conflict resolution decision for the final IngestReport. Call after resolving or flagging a cross-WorkUnit conflict.',
+    inputSchema: z.object({
+      unitKey: z.string().min(1).optional(),
+      kind: z.enum(['structural_duplicate', 'near_duplicate', 'definitional_contradiction', 're_ingest_change']),
+      contestedKey: z.string().min(1).optional(),
+      artifactKey: z.string().min(1),
+      detail: z.string().min(1),
+      flaggedForHuman: z.boolean().default(false),
+    }),
+    execute: async (input): Promise<string> => {
+      const record: ConflictResolvedRecord = {
+        kind: input.kind,
+        artifactKey: input.artifactKey,
+        detail: input.detail,
+        flaggedForHuman: input.flaggedForHuman,
+      };
+      if (input.unitKey) {
+        record.unitKey = input.unitKey;
+      }
+      if (input.contestedKey) {
+        record.contestedKey = input.contestedKey;
+      }
+      deps.stageIndex.conflictsResolved.push(record);
+      return `recorded conflict resolution for ${record.artifactKey}`;
+    },
+  });
+}
--- a/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts
+++ b/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts
@ -0,0 +1,51 @@
+import { tool } from 'ai';
+import { z } from 'zod';
+import type { EvictionAppliedRecord, StageIndex } from '../stages/stage-index.types.js';
+
+interface EmitEvictionDecisionDeps {
+  stageIndex: StageIndex;
+  deletedRawPaths: string[];
+}
+
+function sameEvictionArtifact(left: EvictionAppliedRecord, right: EvictionAppliedRecord): boolean {
+  return (
+    left.rawPath === right.rawPath && left.artifactKind === right.artifactKind && left.artifactKey === right.artifactKey
+  );
+}
+
+export function createEmitEvictionDecisionTool(deps: EmitEvictionDecisionDeps) {
+  const allowedPaths = new Set(deps.deletedRawPaths);
+  return tool({
+    description:
+      'Record one eviction decision for the final IngestReport. The rawPath must come from the current Eviction Set.',
+    inputSchema: z.object({
+      rawPath: z.string().min(1),
+      artifactKind: z.enum(['sl', 'wiki']),
+      artifactKey: z.string().min(1),
+      action: z.enum(['removed', 'retained_deprecated']),
+      reason: z.string().min(1),
+    }),
+    execute: async (input): Promise<string> => {
+      if (!allowedPaths.has(input.rawPath)) {
+        return `Error: rawPath "${input.rawPath}" is not in the current eviction set`;
+      }
+
+      const record: EvictionAppliedRecord = {
+        rawPath: input.rawPath,
+        artifactKind: input.artifactKind,
+        artifactKey: input.artifactKey,
+        action: input.action,
+        reason: input.reason,
+      };
+      const existingIndex = deps.stageIndex.evictionsApplied.findIndex((candidate) =>
+        sameEvictionArtifact(candidate, record),
+      );
+      if (existingIndex >= 0) {
+        deps.stageIndex.evictionsApplied[existingIndex] = record;
+      } else {
+        deps.stageIndex.evictionsApplied.push(record);
+      }
+      return `recorded eviction decision for ${record.rawPath} -> ${record.artifactKind}:${record.artifactKey}`;
+    },
+  });
+}
--- a/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts
+++ b/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts
@ -0,0 +1,228 @@
+import type { Tool } from 'ai';
+import { describe, expect, it } from 'vitest';
+import type { StageIndex } from '../stages/stage-index.types.js';
+import { createEmitArtifactResolutionTool } from './emit-artifact-resolution.tool.js';
+import { createEmitConflictResolutionTool } from './emit-conflict-resolution.tool.js';
+import { createEmitEvictionDecisionTool } from './emit-eviction-decision.tool.js';
+import { createEmitUnmappedFallbackTool } from './emit-unmapped-fallback.tool.js';
+
+function makeStageIndex(): StageIndex {
+  return {
+    jobId: 'job-1',
+    connectionId: 'c1',
+    workUnits: [],
+    conflictsResolved: [],
+    evictionsApplied: [],
+    unmappedFallbacks: [],
+  };
+}
+
+async function executeTool<Input>(tool: Tool<Input, string>, input: NoInfer<Input>) {
+  if (!tool.execute) {
+    throw new Error('tool is not executable');
+  }
+  return (await tool.execute(input, { toolCallId: 'tool-call-1', messages: [] })) as string;
+}
+
+describe('reconciliation emit tools', () => {
+  it('records conflict resolutions on the shared stage index', async () => {
+    const stageIndex = makeStageIndex();
+    const tool = createEmitConflictResolutionTool({ stageIndex });
+
+    const output = await executeTool(tool, {
+      unitKey: 'wu-orders',
+      kind: 'near_duplicate',
+      contestedKey: 'gross_revenue',
+      artifactKey: 'sl:orders.gross_revenue',
+      detail: 'orders and order_facts compute the same revenue metric; retained orders as canonical',
+      flaggedForHuman: true,
+    });
+
+    expect(stageIndex.conflictsResolved).toEqual([
+      {
+        unitKey: 'wu-orders',
+        kind: 'near_duplicate',
+        contestedKey: 'gross_revenue',
+        artifactKey: 'sl:orders.gross_revenue',
+        detail: 'orders and order_facts compute the same revenue metric; retained orders as canonical',
+        flaggedForHuman: true,
+      },
+    ]);
+    expect(output).toBe('recorded conflict resolution for sl:orders.gross_revenue');
+  });
+
+  it('records eviction decisions only for deleted raw paths in the current eviction set', async () => {
+    const stageIndex = makeStageIndex();
+    const tool = createEmitEvictionDecisionTool({
+      stageIndex,
+      deletedRawPaths: ['views/old_orders.view.lkml'],
+    });
+
+    const output = await executeTool(tool, {
+      rawPath: 'views/old_orders.view.lkml',
+      artifactKind: 'sl',
+      artifactKey: 'old_orders',
+      action: 'removed',
+      reason: 'source raw file was deleted and no retained artifacts are required',
+    });
+
+    expect(output).toContain('recorded eviction decision for views/old_orders.view.lkml');
+    expect(stageIndex.evictionsApplied).toEqual([
+      {
+        rawPath: 'views/old_orders.view.lkml',
+        artifactKind: 'sl',
+        artifactKey: 'old_orders',
+        action: 'removed',
+        reason: 'source raw file was deleted and no retained artifacts are required',
+      },
+    ]);
+  });
+
+  it('updates an existing eviction decision for the same raw path and artifact', async () => {
+    const stageIndex = makeStageIndex();
+    const tool = createEmitEvictionDecisionTool({
+      stageIndex,
+      deletedRawPaths: ['views/old_orders.view.lkml'],
+    });
+
+    await executeTool(tool, {
+      rawPath: 'views/old_orders.view.lkml',
+      artifactKind: 'wiki',
+      artifactKey: 'orders/legacy',
+      action: 'retained_deprecated',
+      reason: 'first pass',
+    });
+    await executeTool(tool, {
+      rawPath: 'views/old_orders.view.lkml',
+      artifactKind: 'wiki',
+      artifactKey: 'orders/legacy',
+      action: 'removed',
+      reason: 'second pass after checking references',
+    });
+
+    expect(stageIndex.evictionsApplied).toEqual([
+      {
+        rawPath: 'views/old_orders.view.lkml',
+        artifactKind: 'wiki',
+        artifactKey: 'orders/legacy',
+        action: 'removed',
+        reason: 'second pass after checking references',
+      },
+    ]);
+  });
+
+  it('rejects eviction decisions for raw paths outside the current eviction set', async () => {
+    const stageIndex = makeStageIndex();
+    const tool = createEmitEvictionDecisionTool({
+      stageIndex,
+      deletedRawPaths: ['views/old_orders.view.lkml'],
+    });
+
+    const output = await executeTool(tool, {
+      rawPath: 'views/not_deleted.view.lkml',
+      artifactKind: 'sl',
+      artifactKey: 'not_deleted',
+      action: 'removed',
+      reason: 'bad input',
+    });
+
+    expect(output).toContain('Error: rawPath "views/not_deleted.view.lkml" is not in the current eviction set');
+    expect(stageIndex.evictionsApplied).toEqual([]);
+  });
+
+  it('records unmapped fallback decisions for allowed raw paths', async () => {
+    const stageIndex = makeStageIndex();
+    const tool = createEmitUnmappedFallbackTool({
+      stageIndex,
+      allowedPaths: new Set(['metrics/conversion.yml']),
+    });
+
+    const output = await executeTool(tool, {
+      rawPath: 'metrics/conversion.yml',
+      reason: 'no_physical_table',
+      fallback: 'flagged',
+    });
+
+    expect(output).toContain('recorded unmapped fallback for metrics/conversion.yml');
+    expect(stageIndex.unmappedFallbacks).toEqual([
+      {
+        rawPath: 'metrics/conversion.yml',
+        reason: 'no_physical_table',
+        fallback: 'flagged',
+      },
+    ]);
+  });
+
+  it('deduplicates identical unmapped fallback decisions', async () => {
+    const stageIndex = makeStageIndex();
+    const tool = createEmitUnmappedFallbackTool({
+      stageIndex,
+      allowedPaths: new Set(['metrics/conversion.yml']),
+    });
+
+    await executeTool(tool, {
+      rawPath: 'metrics/conversion.yml',
+      reason: 'no_physical_table',
+      fallback: 'flagged',
+    });
+    await executeTool(tool, {
+      rawPath: 'metrics/conversion.yml',
+      reason: 'no_physical_table',
+      fallback: 'flagged',
+    });
+
+    expect(stageIndex.unmappedFallbacks).toEqual([
+      {
+        rawPath: 'metrics/conversion.yml',
+        reason: 'no_physical_table',
+        fallback: 'flagged',
+      },
+    ]);
+  });
+
+  it('rejects unmapped fallback decisions for raw paths outside the allowed set', async () => {
+    const stageIndex = makeStageIndex();
+    const tool = createEmitUnmappedFallbackTool({
+      stageIndex,
+      allowedPaths: new Set(['metrics/conversion.yml']),
+    });
+
+    const output = await executeTool(tool, {
+      rawPath: 'metrics/not-in-this-work-unit.yml',
+      reason: 'no_physical_table',
+      fallback: 'flagged',
+    });
+
+    expect(output).toContain(
+      'Error: rawPath "metrics/not-in-this-work-unit.yml" is not available to this ingest stage',
+    );
+    expect(stageIndex.unmappedFallbacks).toEqual([]);
+  });
+
+  it('records explicit artifact resolutions for provenance rows', async () => {
+    const stageIndex = makeStageIndex();
+    const tool = createEmitArtifactResolutionTool({
+      stageIndex,
+      allowedPaths: new Set(['explores/b2b/sales_pipeline.json']),
+    });
+
+    const output = await executeTool(tool, {
+      rawPath: 'explores/b2b/sales_pipeline.json',
+      artifactKind: 'sl',
+      artifactKey: 'looker__b2b__sales_pipeline',
+      actionType: 'subsumed',
+      reason: 'File-adapter source b2b__sales_pipeline is canonical for this explore.',
+    });
+
+    expect(output).toBe('recorded artifact resolution for sl:looker__b2b__sales_pipeline');
+    expect(stageIndex.artifactResolutions).toEqual([
+      {
+        rawPath: 'explores/b2b/sales_pipeline.json',
+        artifactKind: 'sl',
+        artifactKey: 'looker__b2b__sales_pipeline',
+        actionType: 'subsumed',
+        reason: 'File-adapter source b2b__sales_pipeline is canonical for this explore.',
+      },
+    ]);
+  });
+});
--- a/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts
+++ b/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts
@ -0,0 +1,52 @@
+import { tool } from 'ai';
+import { z } from 'zod';
+import type { StageIndex, UnmappedFallbackRecord } from '../stages/stage-index.types.js';
+
+interface EmitUnmappedFallbackDeps {
+  stageIndex: StageIndex;
+  allowedPaths: ReadonlySet<string>;
+}
+
+const unmappedFallbackReasonSchema = z.enum([
+  'no_connection_mapping',
+  'looker_template_unresolved',
+  'derived_table_not_supported',
+  'no_physical_table',
+  'multiple_table_references',
+  'unsupported_dialect',
+  'parse_error',
+  'missing_target_table',
+]);
+
+function sameUnmappedFallback(left: UnmappedFallbackRecord, right: UnmappedFallbackRecord): boolean {
+  return left.rawPath === right.rawPath && left.reason === right.reason && left.fallback === right.fallback;
+}
+
+export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) {
+  return tool({
+    description:
+      'Record one unmapped fallback decision for the final IngestReport. The rawPath must be available to the current ingest stage. The reason MUST be one of the structured codes; put any human-readable context in detail.',
+    inputSchema: z.object({
+      rawPath: z.string().min(1),
+      reason: unmappedFallbackReasonSchema,
+      detail: z.string().optional(),
+      fallback: z.enum(['sql_standalone', 'wiki_only', 'flagged']),
+    }),
+    execute: async (input): Promise<string> => {
+      if (!deps.allowedPaths.has(input.rawPath)) {
+        return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`;
+      }
+
+      const record: UnmappedFallbackRecord = {
+        rawPath: input.rawPath,
+        reason: input.reason,
+        ...(input.detail !== undefined ? { detail: input.detail } : {}),
+        fallback: input.fallback,
+      };
+      if (!deps.stageIndex.unmappedFallbacks.some((candidate) => sameUnmappedFallback(candidate, record))) {
+        deps.stageIndex.unmappedFallbacks.push(record);
+      }
+      return `recorded unmapped fallback for ${record.rawPath} (${record.fallback})`;
+    },
+  });
+}
--- a/packages/context/src/ingest/tools/eviction-list.tool.test.ts
+++ b/packages/context/src/ingest/tools/eviction-list.tool.test.ts
@ -0,0 +1,56 @@
+import { describe, expect, it, vi } from 'vitest';
+import { createEvictionListTool } from './eviction-list.tool.js';
+
+describe('eviction_list tool', () => {
+  it('returns artifacts produced for each deleted raw path', async () => {
+    const provenance = {
+      findLatestArtifactsForRawPaths: vi.fn().mockResolvedValue(
+        new Map([
+          [
+            'views/old.lkml',
+            [{ artifact_kind: 'sl', artifact_key: 'old_metric', action_type: 'source_created' } as any],
+          ],
+          ['views/gone.lkml', []],
+        ]),
+      ),
+    };
+    const tool = createEvictionListTool({
+      provenance: provenance as any,
+      connectionId: 'c1',
+      sourceKey: 'lookml',
+      deletedRawPaths: ['views/old.lkml', 'views/gone.lkml'],
+    });
+    const out = (await (tool.execute as (...args: unknown[]) => unknown)(
+      {},
+      { toolCallId: 't', messages: [] },
+    )) as string;
+    expect(out).toContain('views/old.lkml');
+    expect(out).toContain('old_metric');
+    expect(out).toContain('views/gone.lkml');
+  });
+
+  it('returns empty string when no deletions', async () => {
+    const tool = createEvictionListTool({
+      provenance: {} as any,
+      connectionId: 'c1',
+      sourceKey: 'lookml',
+      deletedRawPaths: [],
+    });
+    const out = (await (tool.execute as (...args: unknown[]) => unknown)(
+      {},
+      { toolCallId: 't', messages: [] },
+    )) as string;
+    expect(out).toMatch(/empty/i);
+  });
+
+  it('tells curators to record decisions', () => {
+    const tool = createEvictionListTool({
+      provenance: {} as any,
+      connectionId: 'c1',
+      sourceKey: 'lookml',
+      deletedRawPaths: [],
+    });
+
+    expect(tool.description).toContain('context_eviction_decision_write');
+  });
+});
--- a/packages/context/src/ingest/tools/eviction-list.tool.ts
+++ b/packages/context/src/ingest/tools/eviction-list.tool.ts
@ -0,0 +1,39 @@
+import { tool } from 'ai';
+import { z } from 'zod';
+import type { IngestProvenancePort } from '../ports.js';
+
+export interface EvictionListDeps {
+  provenance: IngestProvenancePort;
+  connectionId: string;
+  sourceKey: string;
+  deletedRawPaths: string[];
+}
+
+export function createEvictionListTool(deps: EvictionListDeps) {
+  return tool({
+    description:
+      'List every artifact that the most recent completed sync produced from a now-deleted raw file. Use this to decide whether to remove (no inbound refs) or retain with deprecation (has inbound refs). Inbound refs are NOT currently computed — treat every retained entry as a candidate and ask the user via the IngestReport. After deciding, record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.',
+    inputSchema: z.object({}),
+    execute: async () => {
+      if (deps.deletedRawPaths.length === 0) {
+        return '(empty) — no files were deleted since the last sync';
+      }
+      const map = await deps.provenance.findLatestArtifactsForRawPaths(
+        deps.connectionId,
+        deps.sourceKey,
+        deps.deletedRawPaths,
+      );
+      return [...map.entries()]
+        .map(([path, rows]) => {
+          if (rows.length === 0) {
+            return `- raw_path: ${path}\n  artifacts: (none)`;
+          }
+          const artifactLines = rows
+            .map((r) => `  - kind: ${r.artifact_kind} key: ${r.artifact_key} (last action: ${r.action_type})`)
+            .join('\n');
+          return `- raw_path: ${path}\n  artifacts:\n${artifactLines}`;
+        })
+        .join('\n');
+    },
+  });
+}
--- a/packages/context/src/ingest/tools/read-raw-file.tool.test.ts
+++ b/packages/context/src/ingest/tools/read-raw-file.tool.test.ts
@ -0,0 +1,69 @@
+import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { createReadRawFileTool } from './read-raw-file.tool.js';
+
+describe('read_raw_file tool', () => {
+  let stagedDir: string;
+
+  beforeEach(async () => {
+    stagedDir = await mkdtemp(join(tmpdir(), 'readraw-'));
+    await mkdir(join(stagedDir, 'views'), { recursive: true });
+    await writeFile(join(stagedDir, 'views', 'a.yml'), 'line1\nline2\nline3\n', 'utf-8');
+    await writeFile(join(stagedDir, 'peer.yml'), 'secret', 'utf-8');
+  });
+
+  afterEach(async () => rm(stagedDir, { recursive: true, force: true }));
+
+  it('returns content for an allowed path', async () => {
+    const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) });
+    const result = await (tool.execute as (...args: unknown[]) => unknown)(
+      { path: 'views/a.yml' },
+      { toolCallId: 't1', messages: [] },
+    );
+    expect(result).toContain('line1');
+    expect(result).toContain('line2');
+  });
+
+  it('refuses to return oversized files and directs callers to read spans', async () => {
+    await writeFile(join(stagedDir, 'views', 'huge.yml'), `${'x'.repeat(160_000)}\n`, 'utf-8');
+    const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/huge.yml']) });
+    const result = await (tool.execute as (...args: unknown[]) => unknown)(
+      { path: 'views/huge.yml' },
+      { toolCallId: 't1', messages: [] },
+    );
+
+    expect(result).toMatch(/too large/i);
+    expect(result).toMatch(/read_raw_span/i);
+    expect(String(result).length).toBeLessThan(1000);
+  });
+
+  it('rejects a path not in the allow-list', async () => {
+    const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) });
+    const result = await (tool.execute as (...args: unknown[]) => unknown)(
+      { path: 'peer.yml' },
+      { toolCallId: 't1', messages: [] },
+    );
+    expect(result).toMatch(/not accessible/i);
+    expect(result).not.toContain('secret');
+  });
+
+  it('rejects directory traversal attempts', async () => {
+    const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) });
+    const result = await (tool.execute as (...args: unknown[]) => unknown)(
+      { path: '../outside.yml' },
+      { toolCallId: 't1', messages: [] },
+    );
+    expect(result).toMatch(/not accessible/i);
+  });
+
+  it('returns a clear error when the file is missing despite being allowed', async () => {
+    const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/missing.yml']) });
+    const result = await (tool.execute as (...args: unknown[]) => unknown)(
+      { path: 'views/missing.yml' },
+      { toolCallId: 't1', messages: [] },
+    );
+    expect(result).toMatch(/not found/i);
+  });
+});
--- a/packages/context/src/ingest/tools/read-raw-file.tool.ts
+++ b/packages/context/src/ingest/tools/read-raw-file.tool.ts
@ -0,0 +1,41 @@
+import { readFile, stat } from 'node:fs/promises';
+import { join, normalize, resolve } from 'node:path';
+import { tool } from 'ai';
+import { z } from 'zod';
+
+interface ReadRawFileDeps {
+  stagedDir: string;
+  allowedPaths: Set<string>;
+}
+
+const MAX_READ_RAW_FILE_BYTES = 120_000;
+
+export function createReadRawFileTool(deps: ReadRawFileDeps) {
+  const stagedRoot = resolve(deps.stagedDir);
+  return tool({
+    description:
+      "Read the full text content of a raw source file inside this WorkUnit. `path` must be relative to the staged bundle root (no leading slash, no `..`) and must appear in the WorkUnit's rawFiles or dependencyPaths list.",
+    inputSchema: z.object({
+      path: z.string().describe('Path relative to the staged bundle root. Example: "views/customers/customer.lkml".'),
+    }),
+    execute: async ({ path }) => {
+      const normalized = normalize(path).replace(/^[/\\]+/, '');
+      if (normalized.startsWith('..') || !deps.allowedPaths.has(normalized)) {
+        return `Error: path "${path}" is not accessible from this WorkUnit. Allowed paths: ${[...deps.allowedPaths].sort().join(', ')}`;
+      }
+      const absolute = resolve(join(stagedRoot, normalized));
+      if (!absolute.startsWith(`${stagedRoot}/`) && absolute !== stagedRoot) {
+        return `Error: path "${path}" is not accessible from this WorkUnit.`;
+      }
+      try {
+        const fileStat = await stat(absolute);
+        if (fileStat.size > MAX_READ_RAW_FILE_BYTES) {
+          return `Error: file "${path}" is too large to return in full (${fileStat.size} bytes). Use read_raw_span with targeted line ranges instead.`;
+        }
+        return await readFile(absolute, 'utf-8');
+      } catch (err) {
+        return `Error: file "${path}" not found. (${err instanceof Error ? err.message : String(err)})`;
+      }
+    },
+  });
+}
--- a/packages/context/src/ingest/tools/read-raw-span.tool.test.ts
+++ b/packages/context/src/ingest/tools/read-raw-span.tool.test.ts
@ -0,0 +1,53 @@
+import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { createReadRawSpanTool } from './read-raw-span.tool.js';
+
+describe('read_raw_span tool', () => {
+  let stagedDir: string;
+
+  beforeEach(async () => {
+    stagedDir = await mkdtemp(join(tmpdir(), 'readspan-'));
+    await mkdir(join(stagedDir, 'v'), { recursive: true });
+    await writeFile(join(stagedDir, 'v', 'a.yml'), 'line1\nline2\nline3\nline4\nline5\n', 'utf-8');
+  });
+
+  afterEach(async () => rm(stagedDir, { recursive: true, force: true }));
+
+  it('returns the requested 1-based inclusive line range', async () => {
+    const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) });
+    const result = await (tool.execute as (...args: unknown[]) => unknown)(
+      { path: 'v/a.yml', startLine: 2, endLine: 4 },
+      { toolCallId: 't1', messages: [] },
+    );
+    expect(result).toBe('line2\nline3\nline4');
+  });
+
+  it('clamps endLine to the end of the file', async () => {
+    const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) });
+    const result = await (tool.execute as (...args: unknown[]) => unknown)(
+      { path: 'v/a.yml', startLine: 4, endLine: 99 },
+      { toolCallId: 't1', messages: [] },
+    );
+    expect(result).toBe('line4\nline5');
+  });
+
+  it('rejects start > end', async () => {
+    const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) });
+    const result = await (tool.execute as (...args: unknown[]) => unknown)(
+      { path: 'v/a.yml', startLine: 5, endLine: 2 },
+      { toolCallId: 't1', messages: [] },
+    );
+    expect(result).toMatch(/startLine must be/i);
+  });
+
+  it('rejects paths not in the allow-list', async () => {
+    const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set([]) });
+    const result = await (tool.execute as (...args: unknown[]) => unknown)(
+      { path: 'v/a.yml', startLine: 1, endLine: 1 },
+      { toolCallId: 't1', messages: [] },
+    );
+    expect(result).toMatch(/not accessible/i);
+  });
+});
--- a/packages/context/src/ingest/tools/read-raw-span.tool.ts
+++ b/packages/context/src/ingest/tools/read-raw-span.tool.ts
@ -0,0 +1,46 @@
+import { readFile } from 'node:fs/promises';
+import { join, normalize, resolve } from 'node:path';
+import { tool } from 'ai';
+import { z } from 'zod';
+
+interface ReadRawSpanDeps {
+  stagedDir: string;
+  allowedPaths: Set<string>;
+}
+
+export function createReadRawSpanTool(deps: ReadRawSpanDeps) {
+  const stagedRoot = resolve(deps.stagedDir);
+  return tool({
+    description:
+      'Read a 1-based inclusive line range from a raw source file. Use this to resolve a provenance pointer like `file.lkml#L15-28` without loading the whole file into context.',
+    inputSchema: z.object({
+      path: z.string().describe('Path relative to the staged bundle root.'),
+      startLine: z.number().int().min(1).describe('First line to return (1-based, inclusive).'),
+      endLine: z.number().int().min(1).describe('Last line to return (1-based, inclusive). Clamped to file length.'),
+    }),
+    execute: async ({ path, startLine, endLine }) => {
+      if (startLine > endLine) {
+        return `Error: startLine must be <= endLine (got startLine=${startLine}, endLine=${endLine})`;
+      }
+      const normalized = normalize(path).replace(/^[/\\]+/, '');
+      if (normalized.startsWith('..') || !deps.allowedPaths.has(normalized)) {
+        return `Error: path "${path}" is not accessible from this context. Allowed paths: ${[...deps.allowedPaths].sort().join(', ')}`;
+      }
+      const absolute = resolve(join(stagedRoot, normalized));
+      if (!absolute.startsWith(`${stagedRoot}/`) && absolute !== stagedRoot) {
+        return `Error: path "${path}" is not accessible from this context.`;
+      }
+      try {
+        const body = await readFile(absolute, 'utf-8');
+        const rawLines = body.split('\n');
+        // Treat a trailing empty element caused by a file-ending newline as NOT a line.
+        const lines = rawLines.length > 0 && rawLines[rawLines.length - 1] === '' ? rawLines.slice(0, -1) : rawLines;
+        const from = Math.max(1, startLine);
+        const to = Math.min(lines.length, endLine);
+        return lines.slice(from - 1, to).join('\n');
+      } catch (err) {
+        return `Error: file "${path}" not found. (${err instanceof Error ? err.message : String(err)})`;
+      }
+    },
+  });
+}
--- a/packages/context/src/ingest/tools/stage-diff.tool.test.ts
+++ b/packages/context/src/ingest/tools/stage-diff.tool.test.ts
@ -0,0 +1,131 @@
+import { describe, expect, it } from 'vitest';
+import { createStageDiffTool } from './stage-diff.tool.js';
+
+describe('stage_diff tool', () => {
+  const stageIndex = {
+    jobId: 'j',
+    connectionId: 'c1',
+    workUnits: [
+      {
+        unitKey: 'u1',
+        rawFiles: [],
+        status: 'success' as const,
+        actions: [{ target: 'sl' as const, type: 'created' as const, key: 'churn_risk_score', detail: 'customers' }],
+        touchedSlSources: [{ connectionId: 'c1', sourceName: 'customers' }],
+      },
+      {
+        unitKey: 'u2',
+        rawFiles: [],
+        status: 'success' as const,
+        actions: [{ target: 'sl' as const, type: 'created' as const, key: 'churn_risk_score', detail: 'billing' }],
+        touchedSlSources: [{ connectionId: 'c1', sourceName: 'billing' }],
+      },
+    ],
+    conflictsResolved: [],
+    evictionsApplied: [],
+    unmappedFallbacks: [],
+  };
+
+  it('finds overlapping artifact keys between two WUs', async () => {
+    const tool = createStageDiffTool({ stageIndex });
+    const out = (await (tool.execute as (...args: unknown[]) => unknown)(
+      { unitKeyA: 'u1', unitKeyB: 'u2' },
+      { toolCallId: 't', messages: [] },
+    )) as string;
+    expect(out).toContain('churn_risk_score');
+    expect(out).toMatch(/overlap/i);
+  });
+
+  it('says no overlap when keys are disjoint', async () => {
+    const tool = createStageDiffTool({
+      stageIndex: {
+        jobId: 'j',
+        connectionId: 'c1',
+        workUnits: [
+          {
+            unitKey: 'u1',
+            rawFiles: [],
+            status: 'success',
+            actions: [{ target: 'sl', type: 'created', key: 'a', detail: '' }],
+            touchedSlSources: [{ connectionId: 'c1', sourceName: 'a' }],
+          },
+          {
+            unitKey: 'u2',
+            rawFiles: [],
+            status: 'success',
+            actions: [{ target: 'sl', type: 'created', key: 'b', detail: '' }],
+            touchedSlSources: [{ connectionId: 'c1', sourceName: 'b' }],
+          },
+        ],
+        conflictsResolved: [],
+        evictionsApplied: [],
+        unmappedFallbacks: [],
+      },
+    });
+    const out = (await (tool.execute as (...args: unknown[]) => unknown)(
+      { unitKeyA: 'u1', unitKeyB: 'u2' },
+      { toolCallId: 't', messages: [] },
+    )) as string;
+    expect(out).toMatch(/no overlap/i);
+  });
+
+  it('does not overlap same-named SL actions on different target connections', async () => {
+    const tool = createStageDiffTool({
+      stageIndex: {
+        jobId: 'j',
+        connectionId: 'looker-run',
+        workUnits: [
+          {
+            unitKey: 'u1',
+            rawFiles: [],
+            status: 'success',
+            actions: [
+              {
+                target: 'sl',
+                type: 'created',
+                key: 'looker__b2b__sales_pipeline',
+                detail: 'W1',
+                targetConnectionId: 'W1',
+              },
+            ],
+            touchedSlSources: [{ connectionId: 'W1', sourceName: 'looker__b2b__sales_pipeline' }],
+          },
+          {
+            unitKey: 'u2',
+            rawFiles: [],
+            status: 'success',
+            actions: [
+              {
+                target: 'sl',
+                type: 'created',
+                key: 'looker__b2b__sales_pipeline',
+                detail: 'W2',
+                targetConnectionId: 'W2',
+              },
+            ],
+            touchedSlSources: [{ connectionId: 'W2', sourceName: 'looker__b2b__sales_pipeline' }],
+          },
+        ],
+        conflictsResolved: [],
+        evictionsApplied: [],
+        unmappedFallbacks: [],
+      },
+    });
+
+    const out = (await (tool.execute as (...args: unknown[]) => unknown)(
+      { unitKeyA: 'u1', unitKeyB: 'u2' },
+      { toolCallId: 't', messages: [] },
+    )) as string;
+
+    expect(out).toMatch(/no overlap/i);
+  });
+
+  it('returns an error when a unitKey is unknown', async () => {
+    const tool = createStageDiffTool({ stageIndex });
+    const out = (await (tool.execute as (...args: unknown[]) => unknown)(
+      { unitKeyA: 'u1', unitKeyB: 'nope' },
+      { toolCallId: 't', messages: [] },
+    )) as string;
+    expect(out).toMatch(/unknown/i);
+  });
+});
--- a/packages/context/src/ingest/tools/stage-diff.tool.ts
+++ b/packages/context/src/ingest/tools/stage-diff.tool.ts
@ -0,0 +1,44 @@
+import { tool } from 'ai';
+import { z } from 'zod';
+import { memoryActionIdentity } from '../action-identity.js';
+import type { StageIndex } from '../stages/stage-index.types.js';
+
+export interface StageDiffDeps {
+  stageIndex: StageIndex;
+}
+
+export function createStageDiffTool(deps: StageDiffDeps) {
+  return tool({
+    description:
+      'Compare two WorkUnits by their writes. SL writes overlap only when target connection and artifact key both match; same-key SL actions on different target connections are non-overlapping.',
+    inputSchema: z.object({
+      unitKeyA: z.string(),
+      unitKeyB: z.string(),
+    }),
+    execute: ({ unitKeyA, unitKeyB }) => {
+      const a = deps.stageIndex.workUnits.find((wu) => wu.unitKey === unitKeyA);
+      const b = deps.stageIndex.workUnits.find((wu) => wu.unitKey === unitKeyB);
+      if (!a) {
+        return Promise.resolve(`Error: unknown unitKey "${unitKeyA}"`);
+      }
+      if (!b) {
+        return Promise.resolve(`Error: unknown unitKey "${unitKeyB}"`);
+      }
+      const runConnectionId = deps.stageIndex.connectionId;
+      const keysA = new Set(a.actions.map((ac) => memoryActionIdentity(ac, runConnectionId)));
+      const keysB = new Set(b.actions.map((ac) => memoryActionIdentity(ac, runConnectionId)));
+      const overlap = [...keysA].filter((k) => keysB.has(k));
+      if (overlap.length === 0) {
+        return Promise.resolve(`No overlap between ${unitKeyA} and ${unitKeyB}.`);
+      }
+      const overlapDetail = overlap
+        .map((k) => {
+          const aDetail = a.actions.find((ac) => memoryActionIdentity(ac, runConnectionId) === k);
+          const bDetail = b.actions.find((ac) => memoryActionIdentity(ac, runConnectionId) === k);
+          return `- ${k}\n  ${unitKeyA}: ${aDetail?.detail ?? ''}\n  ${unitKeyB}: ${bDetail?.detail ?? ''}`;
+        })
+        .join('\n');
+      return Promise.resolve(`Overlap between ${unitKeyA} and ${unitKeyB}:\n${overlapDetail}`);
+    },
+  });
+}
--- a/packages/context/src/ingest/tools/stage-list.tool.test.ts
+++ b/packages/context/src/ingest/tools/stage-list.tool.test.ts
@ -0,0 +1,58 @@
+import { describe, expect, it } from 'vitest';
+import { createStageListTool } from './stage-list.tool.js';
+
+describe('stage_list tool', () => {
+  it('returns a compact summary of the stage index', async () => {
+    const tool = createStageListTool({
+      stageIndex: {
+        jobId: 'j1',
+        connectionId: 'c1',
+        workUnits: [
+          {
+            unitKey: 'u1',
+            rawFiles: ['a.yml'],
+            status: 'success',
+            actions: [{ target: 'sl', type: 'created', key: 'src_a', detail: '' }],
+            touchedSlSources: [{ connectionId: 'c1', sourceName: 'src_a' }],
+          },
+          {
+            unitKey: 'u2',
+            rawFiles: ['b.yml'],
+            status: 'success',
+            actions: [{ target: 'wiki', type: 'created', key: 'page_b', detail: '' }],
+            touchedSlSources: [],
+          },
+        ],
+        conflictsResolved: [],
+        evictionsApplied: [],
+        unmappedFallbacks: [],
+      },
+    });
+    const out = (await (tool.execute as (...args: unknown[]) => unknown)(
+      {},
+      { toolCallId: 't', messages: [] },
+    )) as string;
+    expect(out).toContain('u1');
+    expect(out).toContain('src_a');
+    expect(out).toContain('u2');
+    expect(out).toContain('page_b');
+  });
+
+  it('says empty when no writes', async () => {
+    const tool = createStageListTool({
+      stageIndex: {
+        jobId: 'j',
+        connectionId: 'c1',
+        workUnits: [],
+        conflictsResolved: [],
+        evictionsApplied: [],
+        unmappedFallbacks: [],
+      },
+    });
+    const out = (await (tool.execute as (...args: unknown[]) => unknown)(
+      {},
+      { toolCallId: 't', messages: [] },
+    )) as string;
+    expect(out).toMatch(/empty/i);
+  });
+});
--- a/packages/context/src/ingest/tools/stage-list.tool.ts
+++ b/packages/context/src/ingest/tools/stage-list.tool.ts
@ -0,0 +1,30 @@
+import { tool } from 'ai';
+import { z } from 'zod';
+import type { StageIndex } from '../stages/stage-index.types.js';
+
+export interface StageListDeps {
+  stageIndex: StageIndex;
+}
+
+export function createStageListTool(deps: StageListDeps) {
+  return tool({
+    description:
+      'List every write made by Stage 3 WorkUnits in this job. Each entry has the unitKey, raw files, and the action set (SL sources touched, wiki pages written).',
+    inputSchema: z.object({}),
+    execute: () => {
+      if (deps.stageIndex.workUnits.length === 0) {
+        return Promise.resolve('(empty) — no WorkUnits wrote anything in this job');
+      }
+      const out = deps.stageIndex.workUnits
+        .map((wu) => {
+          const actions =
+            wu.actions.length === 0
+              ? '  (no actions)'
+              : wu.actions.map((a) => `  - ${a.target}:${a.type} ${a.key}`).join('\n');
+          return `- unitKey: ${wu.unitKey} (status=${wu.status})\n  rawFiles: ${wu.rawFiles.join(', ') || '(none)'}\n  actions:\n${actions}`;
+        })
+        .join('\n');
+      return Promise.resolve(out);
+    },
+  });
+}
--- a/packages/context/src/ingest/tools/tool-call-logger.ts
+++ b/packages/context/src/ingest/tools/tool-call-logger.ts
@ -0,0 +1,106 @@
+import { appendFile, mkdir } from 'node:fs/promises';
+import { dirname } from 'node:path';
+import type { ToolExecuteFunction, ToolExecutionOptions, ToolSet } from 'ai';
+
+export interface ToolCallLogEntry {
+  ts: string;
+  wuKey: string;
+  toolCallId?: string;
+  toolName: string;
+  durationMs: number;
+  input: unknown;
+  output?: unknown;
+  error?: { message: string; name?: string };
+}
+
+interface ToolCallLoggerOptions {
+  onEntry?(entry: ToolCallLogEntry): void;
+}
+
+/**
+ * Wrap every tool in `tools` so each invocation appends a JSONL record with
+ * `{toolName, input, output | error, durationMs}` to `logFilePath`. Used by
+ * the ingest runner to produce per-WU transcripts so a completed sync can be
+ * inspected the way `parse_chat.py` inspects a chat.
+ *
+ * Tool shape is preserved (description, inputSchema, ...). Tools without an
+ * `execute` function (provider-defined) pass through untouched.
+ *
+ * Log writes are best-effort and fire-and-forget; a failing write will never
+ * block or error the agent. Tool execution inside a single agent loop is
+ * sequential (`generateText` awaits each tool result), so per-WU files are
+ * effectively single-writer and lines land in call order.
+ */
+export function wrapToolsWithLogger<T extends ToolSet>(
+  tools: T,
+  logFilePath: string,
+  wuKey: string,
+  options: ToolCallLoggerOptions = {},
+): T {
+  const wrapped: Record<string, unknown> = {};
+  for (const [name, original] of Object.entries(tools) as Array<[string, T[string]]>) {
+    const originalExecute = original.execute;
+    if (typeof originalExecute !== 'function') {
+      wrapped[name] = original;
+      continue;
+    }
+    const wrappedExecute: ToolExecuteFunction<unknown, unknown> = async (
+      input: unknown,
+      opts: ToolExecutionOptions,
+    ) => {
+      const start = Date.now();
+      try {
+        const output = await (originalExecute as ToolExecuteFunction<unknown, unknown>)(input, opts);
+        const entry: ToolCallLogEntry = {
+          ts: new Date().toISOString(),
+          wuKey,
+          toolCallId: opts.toolCallId,
+          toolName: name,
+          durationMs: Date.now() - start,
+          input,
+          output,
+        };
+        options.onEntry?.(entry);
+        appendEntry(logFilePath, entry);
+        return output;
+      } catch (err) {
+        const entry: ToolCallLogEntry = {
+          ts: new Date().toISOString(),
+          wuKey,
+          toolCallId: opts.toolCallId,
+          toolName: name,
+          durationMs: Date.now() - start,
+          input,
+          error: {
+            message: err instanceof Error ? err.message : String(err),
+            name: err instanceof Error ? err.name : undefined,
+          },
+        };
+        options.onEntry?.(entry);
+        appendEntry(logFilePath, entry);
+        throw err;
+      }
+    };
+    wrapped[name] = { ...original, execute: wrappedExecute };
+  }
+  return wrapped as T;
+}
+
+function appendEntry(path: string, entry: ToolCallLogEntry): void {
+  void (async () => {
+    try {
+      await mkdir(dirname(path), { recursive: true });
+      await appendFile(path, `${safeStringify(entry)}\n`, 'utf-8');
+    } catch {
+      // best-effort
+    }
+  })();
+}
+
+function safeStringify(v: unknown): string {
+  try {
+    return JSON.stringify(v);
+  } catch {
+    return JSON.stringify({ error: 'serialize-failed' });
+  }
+}