test: verify claude-code backend runtime

This commit is contained in:
Andrey Avtomonov 2026-05-15 16:26:59 +02:00
parent ade9b4a5db
commit 14081020e8
9 changed files with 135 additions and 205 deletions

View file

@ -1,7 +1,7 @@
import { EventEmitter } from 'node:events';
import { mkdir, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent';
import type { AgentRunnerPort, RunLoopParams } from '@ktx/context';
import {
KtxYamlMetabaseSourceStateReader,
LocalMetabaseDiscoveryCache,
@ -255,8 +255,8 @@ export function failedLocalBundleRun(input: RunLocalIngestOptions, jobId: string
};
}
export class CliLookerSlWritingAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: RunLoopParams) => {
export class CliLookerSlWritingAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
@ -294,18 +294,10 @@ export class CliLookerSlWritingAgentRunner extends AgentRunnerService {
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
export class CliMetabaseAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async () => ({ stopReason: 'natural' as const }));
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
export class CliMetabaseAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async () => ({ stopReason: 'natural' as const }));
}
export class CliMetabaseSourceAdapter implements SourceAdapter {

View file

@ -4,7 +4,6 @@ import { AiSdkKtxLlmRuntime, type AgentTelemetryPort } from '../llm/ai-sdk-runti
import type { KtxLlmDebugRequestRecorder } from '../llm/debug-request-recorder.js';
import type { AgentRunnerPort, RunLoopParams, RunLoopResult } from '../llm/runtime-port.js';
export type {
AgentRunnerPort,
RunLoopParams,
RunLoopResult,
RunLoopStepInfo,

View file

@ -2,7 +2,7 @@ import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import YAML from 'yaml';
import { AgentRunnerService } from '../../../agent/index.js';
import type { AgentRunnerPort, RunLoopParams } from '../../../llm/index.js';
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../../../project/index.js';
import {
type SqlAnalysisBatchItem,
@ -47,8 +47,8 @@ class AcceptanceHistoricSqlReader implements HistoricSqlReader {
}
}
class HistoricSqlAcceptanceAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
class HistoricSqlAcceptanceAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.telemetryTags?.operationName !== 'ingest-bundle-wu') {
return { stopReason: 'natural' as const };
}
@ -59,78 +59,65 @@ class HistoricSqlAcceptanceAgentRunner extends AgentRunnerService {
}
if (params.telemetryTags.unitKey === 'historic-sql-table-public-orders') {
const result = await emitEvidence.execute(
{
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Analysts repeatedly inspect paid order lifecycle by customer segment.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status', 'segment'],
commonJoins: [{ table: 'public.customers', on: ['customer_id', 'id'] }],
staleSince: null,
},
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Analysts repeatedly inspect paid order lifecycle by customer segment.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status', 'segment'],
commonJoins: [{ table: 'public.customers', on: ['customer_id', 'id'] }],
staleSince: null,
},
{ toolCallId: 'historic-sql-orders-usage' },
);
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected orders evidence result: ${String(result)}`);
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected orders evidence result: ${result.markdown}`);
}
}
if (params.telemetryTags.unitKey === 'historic-sql-table-public-customers') {
const result = await emitEvidence.execute(
{
kind: 'table_usage',
table: 'public.customers',
rawPath: 'tables/public.customers.json',
usage: {
narrative: 'Customers provide segment context for paid order lifecycle analysis.',
frequencyTier: 'mid',
commonFilters: [],
commonGroupBys: ['segment'],
commonJoins: [{ table: 'public.orders', on: ['id', 'customer_id'] }],
staleSince: null,
},
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.customers',
rawPath: 'tables/public.customers.json',
usage: {
narrative: 'Customers provide segment context for paid order lifecycle analysis.',
frequencyTier: 'mid',
commonFilters: [],
commonGroupBys: ['segment'],
commonJoins: [{ table: 'public.orders', on: ['id', 'customer_id'] }],
staleSince: null,
},
{ toolCallId: 'historic-sql-customers-usage' },
);
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected customers evidence result: ${String(result)}`);
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected customers evidence result: ${result.markdown}`);
}
}
if (params.telemetryTags.unitKey === 'historic-sql-patterns-part-0001') {
const result = await emitEvidence.execute(
{
kind: 'pattern',
rawPath: 'patterns-input/part-0001.json',
pattern: {
slug: 'paid-order-lifecycle',
title: 'Paid Order Lifecycle',
narrative: 'Analysts join orders and customers to compare paid order lifecycle by segment.',
definitionSql:
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status, c.segment',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:orders-lifecycle'],
},
const result = await emitEvidence.execute({
kind: 'pattern',
rawPath: 'patterns-input/part-0001.json',
pattern: {
slug: 'paid-order-lifecycle',
title: 'Paid Order Lifecycle',
narrative: 'Analysts join orders and customers to compare paid order lifecycle by segment.',
definitionSql:
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status, c.segment',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:orders-lifecycle'],
},
{ toolCallId: 'historic-sql-pattern' },
);
if (!String(result).includes('Recorded historic-SQL pattern evidence')) {
throw new Error(`Unexpected pattern evidence result: ${String(result)}`);
});
if (!result.markdown.includes('Recorded historic-SQL pattern evidence')) {
throw new Error(`Unexpected pattern evidence result: ${result.markdown}`);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
function acceptanceSqlAnalysis(): SqlAnalysisPort {

View file

@ -3,7 +3,7 @@ import { tmpdir } from 'node:os';
import { join } from 'node:path';
import Database from 'better-sqlite3';
import YAML from 'yaml';
import { AgentRunnerService } from '../agent/index.js';
import type { AgentRunnerPort, RunLoopParams } from '../llm/index.js';
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js';
import { makeLocalGitRepo } from '../test/make-local-git-repo.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
@ -13,16 +13,12 @@ import { createDefaultLocalIngestAdapters, localPullConfigForAdapter } from './l
import { getLocalIngestStatus, runLocalIngest } from './local-ingest.js';
import type { ChunkResult, DiffSet, SourceAdapter } from './types.js';
class TestAgentRunner extends AgentRunnerService {
override runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const });
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
class TestAgentRunner implements AgentRunnerPort {
runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const });
}
class LookerSlWritingAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
class LookerSlWritingAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
@ -31,130 +27,100 @@ class LookerSlWritingAgentRunner extends AgentRunnerService {
if (!ledger?.execute) {
throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit');
}
await ledger.execute(
{
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
unverifiedIdentifiers: [],
},
{ toolCallId: 'looker-verification-ledger', messages: [] },
);
await ledger.execute({
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
unverifiedIdentifiers: [],
});
const slWrite = params.toolSet.sl_write_source;
if (!slWrite?.execute) {
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
}
const result = await slWrite.execute(
{
connectionId: 'prod-warehouse',
sourceName: 'looker__ecommerce__orders',
source: {
name: 'looker__ecommerce__orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{ name: 'revenue', type: 'number' },
],
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
},
const result = await slWrite.execute({
connectionId: 'prod-warehouse',
sourceName: 'looker__ecommerce__orders',
source: {
name: 'looker__ecommerce__orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{ name: 'revenue', type: 'number' },
],
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
},
{ toolCallId: 'looker-sl-write' },
);
if (!result.structured.success) {
});
if (!(result.structured as { success?: boolean } | undefined)?.success) {
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
class WikiWritingAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
class WikiWritingAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
const ledger = params.toolSet.record_verification_ledger;
if (!ledger?.execute) {
throw new Error('record_verification_ledger tool was not available to the WorkUnit');
}
await ledger.execute(
{
summary: 'Test fixture writes wiki-only context with no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
},
{ toolCallId: 'wiki-verification-ledger', messages: [] },
);
await ledger.execute({
summary: 'Test fixture writes wiki-only context with no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
});
const wikiWrite = params.toolSet.wiki_write;
if (!wikiWrite?.execute) {
throw new Error('wiki_write tool was not available to the WorkUnit');
}
const result = await wikiWrite.execute(
{
key: 'orders_context',
summary: 'Orders source context',
content: 'Orders are purchase records used for revenue analysis.',
tags: ['orders'],
},
{ toolCallId: 'wiki-write' },
);
if (!result.structured.success) {
const result = await wikiWrite.execute({
key: 'orders_context',
summary: 'Orders source context',
content: 'Orders are purchase records used for revenue analysis.',
tags: ['orders'],
});
if (!(result.structured as { success?: boolean } | undefined)?.success) {
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
class WikiWritingWithRawPathAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
class WikiWritingWithRawPathAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
const ledger = params.toolSet.record_verification_ledger;
if (!ledger?.execute) {
throw new Error('record_verification_ledger tool was not available to the WorkUnit');
}
await ledger.execute(
{
summary: 'Test fixture writes wiki-only context with explicit raw provenance and no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
},
{ toolCallId: 'wiki-raw-path-verification-ledger', messages: [] },
);
await ledger.execute({
summary: 'Test fixture writes wiki-only context with explicit raw provenance and no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
});
const wikiWrite = params.toolSet.wiki_write;
if (!wikiWrite?.execute) {
throw new Error('wiki_write tool was not available to the WorkUnit');
}
const result = await wikiWrite.execute(
{
key: 'orders_context',
summary: 'Orders source context',
content: 'Orders are purchase records used for revenue analysis.',
tags: ['orders'],
rawPaths: ['orders/orders.json'],
},
{ toolCallId: 'wiki-write' },
);
if (!result.structured.success) {
const result = await wikiWrite.execute({
key: 'orders_context',
summary: 'Orders source context',
content: 'Orders are purchase records used for revenue analysis.',
tags: ['orders'],
rawPaths: ['orders/orders.json'],
});
if (!(result.structured as { success?: boolean } | undefined)?.success) {
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
class HistoricSqlEvidenceAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
class HistoricSqlEvidenceAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
params.telemetryTags?.unitKey === 'historic-sql-table-public-orders'
@ -163,31 +129,24 @@ class HistoricSqlEvidenceAgentRunner extends AgentRunnerService {
if (!emitEvidence?.execute) {
throw new Error('emit_historic_sql_evidence tool was not available to the historic-SQL WorkUnit');
}
const result = await emitEvidence.execute(
{
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried by lifecycle status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [],
staleSince: null,
},
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried by lifecycle status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [],
staleSince: null,
},
{ toolCallId: 'historic-sql-evidence' },
);
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected historic-SQL evidence result: ${String(result)}`);
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected historic-SQL evidence result: ${result.markdown}`);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
class HistoricSqlEvidenceTestAdapter implements SourceAdapter {

View file

@ -1,7 +1,7 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { AgentRunnerService } from '../agent/index.js';
import type { AgentRunnerPort } from '../llm/index.js';
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js';
@ -17,6 +17,10 @@ type RuntimeWithConnectionDeps = {
};
};
function testAgentRunner(): AgentRunnerPort {
return { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' as const }) };
}
describe('createLocalBundleIngestRuntime', () => {
let tempDir: string;
let project: KtxLocalProject;
@ -89,7 +93,7 @@ describe('createLocalBundleIngestRuntime', () => {
});
it('builds runner deps with local SQLite stores and context tools enabled', async () => {
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
const agentRunner = testAgentRunner();
const runtime = createLocalBundleIngestRuntime({
project,
@ -120,7 +124,7 @@ describe('createLocalBundleIngestRuntime', () => {
project_id: 'acme',
dataset_id: 'warehouse',
};
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
const agentRunner = testAgentRunner();
const runtime = createLocalBundleIngestRuntime({
project,
@ -140,7 +144,7 @@ describe('createLocalBundleIngestRuntime', () => {
});
it('passes project connection config to local ingest query executors', async () => {
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
const agentRunner = testAgentRunner();
const queryExecutor = {
execute: vi.fn(async () => ({
headers: ['answer'],

View file

@ -1,24 +1,20 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { AgentRunnerService } from '../agent/index.js';
import type { AgentRunnerPort, RunLoopParams } from '../llm/index.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
import { LocalMetabaseDiscoveryCache } from './adapters/metabase/local-source-state-store.js';
import { getLocalIngestStatus, runLocalMetabaseIngest } from './local-ingest.js';
import type { ChunkResult, FetchContext, SourceAdapter } from './types.js';
class TestAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: Parameters<AgentRunnerService['runLoop']>[0]) => {
class TestAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.userPrompt.includes('metabase-db-2')) {
return { stopReason: 'error' as const, error: new Error('database 2 failed') };
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
class FakeMetabaseSourceAdapter implements SourceAdapter {

View file

@ -2,7 +2,7 @@ import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promise
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { AgentRunnerService } from '../agent/index.js';
import type { AgentRunnerPort } from '../llm/index.js';
import { FakeSourceAdapter, type MemoryFlowReplayInput } from '../ingest/index.js';
import { initKtxProject } from '../project/index.js';
import {
@ -14,12 +14,8 @@ import {
import { writeLocalSlSource } from '../sl/index.js';
import { createLocalProjectMcpContextPorts } from './local-project-ports.js';
class TestAgentRunner extends AgentRunnerService {
override runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const });
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
class TestAgentRunner implements AgentRunnerPort {
runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const });
}
describe('createLocalProjectMcpContextPorts', () => {

View file

@ -18,7 +18,6 @@ import { MemoryAgentService } from './memory-agent.service.js';
interface BuiltMocks {
appSettings: any;
llmProvider: any;
prompt: any;
eventTracker: any;
telemetry: any;
@ -66,7 +65,6 @@ const buildMocks = (overrides: Partial<BuiltMocks> = {}): BuiltMocks => {
llm: { memoryIngestionModel: 'test-model' },
},
},
llmProvider: { getModel: vi.fn().mockReturnValue({}) },
prompt: { loadPrompt: vi.fn().mockResolvedValue('base framing') },
eventTracker: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) },
telemetry: {

View file

@ -1,4 +1,4 @@
import { generateKtxText, type KtxLlmRuntimePort } from '../llm/index.js';
import type { KtxLlmRuntimePort } from '../llm/index.js';
import type {
KtxColumnSampleInput,
KtxColumnSampleResult,
@ -778,8 +778,7 @@ export class KtxDescriptionGenerator {
private async generateAiDescription(prompt: KtxDescriptionPrompt, _operationName: string): Promise<string | null> {
try {
const text = await generateKtxText({
runtime: this.llmRuntime,
const text = await this.llmRuntime.generateText({
role: 'candidateExtraction',
system: prompt.system,
prompt: prompt.user,