ktx/packages/cli/test/context/llm/ai-sdk-runtime.test.ts

569 lines
19 KiB
TypeScript
Raw Permalink Normal View History

2026-05-10 23:12:26 +02:00
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
vi.mock('ai', () => ({
generateText: vi.fn(),
stepCountIs: (n: number) => n,
tool: (def: unknown) => def,
}));
import { generateText } from 'ai';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import { AiSdkKtxLlmRuntime } from '../../../src/context/llm/ai-sdk-runtime.js';
2026-05-10 23:12:26 +02:00
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
describe('AiSdkKtxLlmRuntime.runAgentLoop', () => {
let runtime: AiSdkKtxLlmRuntime;
2026-05-10 23:12:26 +02:00
const llmProvider = {
getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }),
getModelByName: vi.fn(),
cacheMarker: vi.fn(),
repairToolCallHandler: vi.fn(),
thinkingProviderOptions: vi.fn(),
telemetryConfig: vi.fn(),
promptCachingConfig: vi.fn(() => ({
enabled: false,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
})),
activeBackend: vi.fn(() => 'anthropic'),
};
beforeEach(() => {
vi.clearAllMocks();
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
runtime = new AiSdkKtxLlmRuntime({ llmProvider: llmProvider as any });
2026-05-10 23:12:26 +02:00
});
afterEach(() => vi.clearAllMocks());
it('passes systemPrompt, userPrompt, tools, and step budget through to generateText', async () => {
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
const repairHandler = vi.fn();
llmProvider.repairToolCallHandler.mockReturnValueOnce(repairHandler);
2026-05-10 23:12:26 +02:00
const tools = { noop: { description: 'noop', inputSchema: {}, execute: vi.fn() } };
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
await runtime.runAgentLoop({
2026-05-10 23:12:26 +02:00
modelRole: 'candidateExtraction',
systemPrompt: 'SYS',
userPrompt: 'USR',
toolSet: tools as any,
stepBudget: 17,
telemetryTags: { source: 'test' },
});
const call = (generateText as any).mock.calls[0][0];
feat: merge ingest and scan * docs: add CLI component reuse guidance * docs: add unified ingest ux design * Refine unified ingest UX design after adversarial review iteration 1 * Refine unified ingest UX design after adversarial review iteration 2 * Refine unified ingest UX design after adversarial review iteration 3 * feat(cli): route public connection ingest command * feat(cli): hide standalone scan from public help * feat(cli): plan public ingest depth and query history * feat(cli): execute public database ingest facets * feat(ingest): read connection query history config * fix(cli): use public ingest wording * fix(config): stop generating ingest adapter allow lists * docs: document public ingest command * test: align ingest surface expectations * docs: add unified ingest public CLI surface plan * feat(cli): preflight deep public ingest readiness * feat(setup): store query history in connection context * feat(setup): store database context depth * feat(setup): verify context readiness by database depth * fix(setup): keep context build foreground only * fix(config): reject reserved ingest connection ids * test: close unified ingest v1 expectations * docs: add unified ingest v1 closure plan * fix(ingest): bypass adapter allow-list for public source ingest * fix(ingest): honor query history window intent * fix(ingest): hide scan internals from public database ingest * feat(ingest): use foreground view for interactive public ingest * fix(setup): use schema context and query history wording * test(cli): verify unified ingest public output * docs: add unified ingest v1 public output closure plan * fix(setup): forward query history flags * fix(setup): prompt for postgres query history * fix(status): report query history readiness * fix(ingest): remove legacy public guidance * fix(ingest): polish foreground retry copy * docs(examples): use unified query history wording * chore(ingest): finish public query history cleanup * docs: add unified ingest v1 query history status cleanup plan * test(docs): cover unified ingest public docs * docs: align ingest CLI reference with unified UX * docs: update context build guides for unified ingest * docs: update setup and primary source ingest wording * docs: stop advertising adapter-backed example ingest * docs: close unified ingest public docs gaps * docs: add unified ingest v1 docs site closure plan * fix: render unified ingest foreground warnings * fix: explain query history schema order * fix: add public ingest retry guidance * fix: align setup next steps with unified ingest * fix: remove scan wording from demo progress * test: verify unified ingest ux closure * docs: add unified ingest v1 foreground and retry closure plan * fix(cli): preserve query-history pull config in public ingest * fix(cli): omit hidden commands from docs command tree * test(cli): close unified ingest final public surface checks * docs: add unified ingest v1 final public surface closure plan * fix(cli): use public source labels in ingest reports * fix(cli): suppress low-level public ingest output * test(cli): verify unified ingest public plain output * docs: add unified ingest v1 public plain output closure plan * fix(cli): add public ingest copy sanitizers * fix(cli): sanitize public ingest progress copy * fix(cli): rename setup schema scope prompt * docs(plan): add progress copy closure; test: align setup back-nav fixture Adds the iter9 plan and updates the setup back-navigation test fixture to pass disableQueryHistory plus listSchemas/listTables stubs that the unified ingest setup step now requires. * docs(plan): add final ux labels plan with narrowed label scans * fix(cli): aggregate unsupported query-history warnings * fix(cli): align setup database labels * test(cli): fix setup database test type-check * fix(cli): remove primary-source wording from setup output * test(cli): verify unified ingest setup closure * docs(plan): add unified ingest v1 verification copy closure plan * fix(cli): remove top-level scan command * fix(cli): remove legacy ingest and wiki commands * Merge scan into ingest flow * feat(cli): split ingest progress into per-phase rows, rename work units to tasks Each database target in the unified ingest dashboard now renders one row per real subprocess (Schema, then Query history when enabled) instead of a single combined bar. Each phase has its own monotonic 0-100% bar so the progress never snaps back to zero when historic-sql starts after scan completes. Completed phases keep their final bar, summary, and elapsed time visible as an inline audit trail; queued and skipped phases are shown explicitly. Also rename user-facing "work units" / "Failed work units" to "tasks" / "Failed tasks" in ingest output and parseIngestSummary. The parser still accepts the legacy "Work units:" wording in captured output for backward compat. Internal memory-flow event names and type fields are left alone. * Fix test harness failures * Fix CI smoke checks --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-14 01:43:06 +02:00
expect(call.system).toEqual({ role: 'system', content: 'SYS' });
expect(call.messages).toEqual([{ role: 'user', content: 'USR' }]);
2026-05-10 23:12:26 +02:00
expect(call.prompt).toBeUndefined();
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
expect(call.tools.noop).toEqual(
expect.objectContaining({
description: 'noop',
inputSchema: {},
execute: expect.any(Function),
toModelOutput: expect.any(Function),
}),
);
2026-05-10 23:12:26 +02:00
expect(call.stopWhen).toBe(17);
expect(call.temperature).toBe(0);
expect(call.experimental_repairToolCall).toBe(repairHandler);
2026-05-10 23:12:26 +02:00
expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction');
expect(llmProvider.repairToolCallHandler).toHaveBeenCalledWith({ source: 'ktx-agent-runner' });
2026-05-10 23:12:26 +02:00
});
it('returns stopReason=natural when the loop completes without error', async () => {
(generateText as any).mockResolvedValue({ text: 'done', toolCalls: [], steps: [] });
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
const result = await runtime.runAgentLoop({
2026-05-10 23:12:26 +02:00
modelRole: 'candidateExtraction',
systemPrompt: 'system',
userPrompt: 'user',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.stopReason).toBe('natural');
expect(result.error).toBeUndefined();
expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction');
expect(generateText).toHaveBeenCalledWith(
expect.objectContaining({
feat: merge ingest and scan * docs: add CLI component reuse guidance * docs: add unified ingest ux design * Refine unified ingest UX design after adversarial review iteration 1 * Refine unified ingest UX design after adversarial review iteration 2 * Refine unified ingest UX design after adversarial review iteration 3 * feat(cli): route public connection ingest command * feat(cli): hide standalone scan from public help * feat(cli): plan public ingest depth and query history * feat(cli): execute public database ingest facets * feat(ingest): read connection query history config * fix(cli): use public ingest wording * fix(config): stop generating ingest adapter allow lists * docs: document public ingest command * test: align ingest surface expectations * docs: add unified ingest public CLI surface plan * feat(cli): preflight deep public ingest readiness * feat(setup): store query history in connection context * feat(setup): store database context depth * feat(setup): verify context readiness by database depth * fix(setup): keep context build foreground only * fix(config): reject reserved ingest connection ids * test: close unified ingest v1 expectations * docs: add unified ingest v1 closure plan * fix(ingest): bypass adapter allow-list for public source ingest * fix(ingest): honor query history window intent * fix(ingest): hide scan internals from public database ingest * feat(ingest): use foreground view for interactive public ingest * fix(setup): use schema context and query history wording * test(cli): verify unified ingest public output * docs: add unified ingest v1 public output closure plan * fix(setup): forward query history flags * fix(setup): prompt for postgres query history * fix(status): report query history readiness * fix(ingest): remove legacy public guidance * fix(ingest): polish foreground retry copy * docs(examples): use unified query history wording * chore(ingest): finish public query history cleanup * docs: add unified ingest v1 query history status cleanup plan * test(docs): cover unified ingest public docs * docs: align ingest CLI reference with unified UX * docs: update context build guides for unified ingest * docs: update setup and primary source ingest wording * docs: stop advertising adapter-backed example ingest * docs: close unified ingest public docs gaps * docs: add unified ingest v1 docs site closure plan * fix: render unified ingest foreground warnings * fix: explain query history schema order * fix: add public ingest retry guidance * fix: align setup next steps with unified ingest * fix: remove scan wording from demo progress * test: verify unified ingest ux closure * docs: add unified ingest v1 foreground and retry closure plan * fix(cli): preserve query-history pull config in public ingest * fix(cli): omit hidden commands from docs command tree * test(cli): close unified ingest final public surface checks * docs: add unified ingest v1 final public surface closure plan * fix(cli): use public source labels in ingest reports * fix(cli): suppress low-level public ingest output * test(cli): verify unified ingest public plain output * docs: add unified ingest v1 public plain output closure plan * fix(cli): add public ingest copy sanitizers * fix(cli): sanitize public ingest progress copy * fix(cli): rename setup schema scope prompt * docs(plan): add progress copy closure; test: align setup back-nav fixture Adds the iter9 plan and updates the setup back-navigation test fixture to pass disableQueryHistory plus listSchemas/listTables stubs that the unified ingest setup step now requires. * docs(plan): add final ux labels plan with narrowed label scans * fix(cli): aggregate unsupported query-history warnings * fix(cli): align setup database labels * test(cli): fix setup database test type-check * fix(cli): remove primary-source wording from setup output * test(cli): verify unified ingest setup closure * docs(plan): add unified ingest v1 verification copy closure plan * fix(cli): remove top-level scan command * fix(cli): remove legacy ingest and wiki commands * Merge scan into ingest flow * feat(cli): split ingest progress into per-phase rows, rename work units to tasks Each database target in the unified ingest dashboard now renders one row per real subprocess (Schema, then Query history when enabled) instead of a single combined bar. Each phase has its own monotonic 0-100% bar so the progress never snaps back to zero when historic-sql starts after scan completes. Completed phases keep their final bar, summary, and elapsed time visible as an inline audit trail; queued and skipped phases are shown explicitly. Also rename user-facing "work units" / "Failed work units" to "tasks" / "Failed tasks" in ingest output and parseIngestSummary. The parser still accepts the legacy "Work units:" wording in captured output for backward compat. Internal memory-flow event names and type fields are left alone. * Fix test harness failures * Fix CI smoke checks --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-14 01:43:06 +02:00
system: { role: 'system', content: 'system' },
messages: [{ role: 'user', content: 'user' }],
2026-05-10 23:12:26 +02:00
}),
);
});
it('returns stopReason=error with the error on generateText failure', async () => {
const err = new Error('LLM unavailable');
(generateText as any).mockRejectedValue(err);
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
const result = await runtime.runAgentLoop({
2026-05-10 23:12:26 +02:00
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.stopReason).toBe('error');
expect(result.error).toBe(err);
});
feat(cli): add ingest LLM rate-limit governor with paced retries (#261) * feat(cli): add ingest rate limit governor * feat(cli): wire ingest rate-limit config * feat(cli): report provider rate-limit signals * feat(cli): show ingest rate-limit waits * fix(cli): complete rate-limit event coverage * fix(cli): abort ingest provider calls cleanly * fix(cli): propagate ingest cancellation * fix(cli): reject pre-aborted ingest rate-limit waits * fix(cli): honor Claude rate-limit reset waits * fix(cli): retry thrown Codex rate-limit failures * fix(cli): type Claude rate-limit result details * fix(cli): emit ingest rate-limit countdowns from rejected signals * fix(cli): report ai sdk rate-limit header utilization * fix(cli): gate LLM rate-limit retries on the governor budget The AI SDK and Codex runtimes retried 429 / opaque rate-limit failures up to 6-7 times with no backoff when constructed without a RateLimitGovernor (scan, memory, setup) or with pacing disabled, ignoring Retry-After and worsening the limit. The outer retry loop only cooperates with the governor's pause, so without active pacing there is no backoff to apply. Route the retry bound through a single source: RateLimitGovernor .maxRetryAttempts(), which returns retry.maxAttempts when enabled and 1 (no outer retry) when absent or disabled. All three runtimes (ai-sdk, codex, claude-code) now use it, so ingest.rateLimit.retry.maxAttempts genuinely controls attempts and the hard-coded 6 (plus Codex's off-by-one extra attempt) is gone. Backend-native retry (e.g. the AI SDK's maxRetries) still handles transient 429s. Also correct the ktx.yaml docs for maxWaitMs (caps each wait, not the whole run) and maxAttempts, and sync uv.lock ktx-sl/ktx-daemon to 0.9.0.
2026-06-05 12:10:27 +02:00
it('reports AI SDK retry-after rate limits and retries through the governor', async () => {
const waitForReady = vi.fn().mockResolvedValue(undefined);
const report = vi.fn();
const rateLimitError = Object.assign(new Error('too many requests'), {
name: 'TooManyRequestsError',
retryAfter: 2,
statusCode: 429,
});
(generateText as any).mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce({
text: 'done',
toolCalls: [],
steps: [],
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
});
const runtime = new AiSdkKtxLlmRuntime({
llmProvider: llmProvider as any,
rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 6 } as never,
});
const result = await runtime.runAgentLoop({
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.stopReason).toBe('natural');
expect(report).toHaveBeenCalledWith({
provider: 'anthropic-api',
status: 'rejected',
retryAfterMs: 2_000,
rateLimitType: 'http_429',
});
expect(waitForReady).toHaveBeenCalledTimes(2);
expect(generateText).toHaveBeenCalledTimes(2);
});
it('does not retry AI SDK rate limits without a governor', async () => {
const rateLimitError = Object.assign(new Error('too many requests'), {
name: 'TooManyRequestsError',
statusCode: 429,
});
(generateText as any).mockRejectedValue(rateLimitError);
// The beforeEach runtime is constructed without a rateLimitGovernor.
const result = await runtime.runAgentLoop({
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.stopReason).toBe('error');
expect(generateText).toHaveBeenCalledTimes(1);
});
it('honors a governor retry budget of one attempt without retrying', async () => {
const waitForReady = vi.fn().mockResolvedValue(undefined);
const report = vi.fn();
const rateLimitError = Object.assign(new Error('too many requests'), {
name: 'TooManyRequestsError',
statusCode: 429,
});
(generateText as any).mockRejectedValue(rateLimitError);
const runtime = new AiSdkKtxLlmRuntime({
llmProvider: llmProvider as any,
rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 1 } as never,
});
const result = await runtime.runAgentLoop({
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.stopReason).toBe('error');
expect(generateText).toHaveBeenCalledTimes(1);
expect(report).not.toHaveBeenCalled();
});
it('reports Anthropic API response-header utilization to the governor', async () => {
const waitForReady = vi.fn().mockResolvedValue(undefined);
const report = vi.fn();
(generateText as any).mockResolvedValue({
text: 'done',
toolCalls: [],
steps: [],
response: {
headers: {
'anthropic-ratelimit-requests-limit': '100',
'anthropic-ratelimit-requests-remaining': '8',
'anthropic-ratelimit-input-tokens-limit': '10000',
'anthropic-ratelimit-input-tokens-remaining': '9000',
},
},
});
const runtime = new AiSdkKtxLlmRuntime({
llmProvider: llmProvider as any,
rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 6 } as never,
});
const result = await runtime.runAgentLoop({
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.stopReason).toBe('natural');
expect(report).toHaveBeenCalledWith({
provider: 'anthropic-api',
status: 'allowed',
rateLimitType: 'rpm',
utilization: 0.92,
});
});
it('reports generic x-ratelimit response-header utilization for Vertex providers', async () => {
const waitForReady = vi.fn().mockResolvedValue(undefined);
const report = vi.fn();
const vertexProvider = {
...llmProvider,
getModel: vi.fn().mockReturnValue({ modelId: 'gemini-3-pro', provider: 'google-vertex' }),
};
(generateText as any).mockResolvedValue({
text: 'done',
toolCalls: [],
steps: [],
response: {
headers: {
'x-ratelimit-limit-requests': '200',
'x-ratelimit-remaining-requests': '30',
'x-ratelimit-limit-tokens': '100000',
'x-ratelimit-remaining-tokens': '4000',
},
},
});
const runtime = new AiSdkKtxLlmRuntime({
llmProvider: vertexProvider as any,
rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 6 } as never,
});
const result = await runtime.runAgentLoop({
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.stopReason).toBe('natural');
expect(report).toHaveBeenCalledWith({
provider: 'vertex',
status: 'allowed',
rateLimitType: 'tpm',
utilization: 0.96,
});
});
it('passes abort signals into governor waits and AI SDK generateText calls', async () => {
const controller = new AbortController();
const waitForReady = vi.fn().mockResolvedValue(undefined);
(generateText as any).mockResolvedValue({ text: 'done', toolCalls: [], steps: [] });
const runtime = new AiSdkKtxLlmRuntime({
llmProvider: llmProvider as any,
rateLimitGovernor: { waitForReady, report: vi.fn(), maxRetryAttempts: () => 6 } as never,
});
const result = await runtime.runAgentLoop({
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
abortSignal: controller.signal,
});
expect(result.stopReason).toBe('natural');
expect(waitForReady).toHaveBeenCalledWith(controller.signal);
expect((generateText as any).mock.calls[0][0].abortSignal).toBe(controller.signal);
});
feat(cli): profile ingest runs and split model vs tool time (#249) * feat(cli): profile ingest runs to find where wall-clock time goes Add opt-in profiling for `ktx ingest`. Each timed phase, work unit, and agent loop now records durationMs / step count / token usage in the trace, and a post-run aggregator rolls them up into a "where did the time go" report printed to stderr. Enable per run with KTX_PROFILE_INGEST (1/true -> human table, json -> raw structured profile) or persistently via `ingest.profile` in ktx.yaml. The json form emits raw milliseconds, token counts, and a summary.headline one-line diagnosis so coding agents can parse it directly; json wins when both env and config request profiling. - runtime-port: RunLoopMetrics (totalMs, usage, stepCount, stepBoundariesMs) plus onMetrics callbacks on text/object generation - ai-sdk + claude-code runtimes: capture per-loop timing and token usage - work-unit-executor and stages 3/4: thread metrics into trace events - ingest-bundle.runner: time worktree / triage / clustering / index / reconcile / squash phases and emit the profile in a finally block (best-effort; never affects the run outcome) - ingest-profile: new trace+transcript aggregator with table/json formatters - config: ingest.profile flag; docs: profiling section in ktx-ingest.mdx * fix(cli): flush tool-call logs before reading ingest profile Tool transcripts are appended fire-and-forget so the agent hot path never blocks on logging. The ingest profiler read them before the writes settled, so per-work-unit toolMs (and the model-vs-tool split derived from it) could be incomplete. Track in-flight appends and expose flushToolCallLogs() — bounded by a timeout so it can never hang — and flush before the profiler reads the transcript.
2026-06-01 15:49:17 +02:00
it('returns metrics with stepCount, per-step boundaries, and aggregate token usage', async () => {
(generateText as any).mockImplementation(async (opts: any) => {
await opts.onStepFinish({});
await opts.onStepFinish({});
return {
text: 'ok',
toolCalls: [],
steps: [],
totalUsage: { inputTokens: 100, outputTokens: 20, totalTokens: 120 },
};
});
const result = await runtime.runAgentLoop({
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.metrics).toBeDefined();
expect(result.metrics?.stepCount).toBe(2);
expect(result.metrics?.stepBoundariesMs).toHaveLength(2);
expect(result.metrics?.totalMs).toBeGreaterThanOrEqual(0);
expect(result.metrics?.usage).toEqual({ inputTokens: 100, outputTokens: 20, totalTokens: 120 });
});
it('falls back to result.usage when totalUsage is absent', async () => {
(generateText as any).mockResolvedValue({
text: 'ok',
toolCalls: [],
steps: [],
usage: { inputTokens: 7, outputTokens: 3, totalTokens: 10 },
});
const result = await runtime.runAgentLoop({
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.metrics?.usage).toEqual({ inputTokens: 7, outputTokens: 3, totalTokens: 10 });
expect(result.metrics?.stepCount).toBe(0);
});
it('returns partial metrics even when the loop errors', async () => {
(generateText as any).mockRejectedValue(new Error('boom'));
const result = await runtime.runAgentLoop({
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.stopReason).toBe('error');
expect(result.metrics).toBeDefined();
expect(result.metrics?.stepCount).toBe(0);
expect(result.metrics?.usage).toEqual({});
});
it('counts model round-trips into metrics.stepCount', async () => {
2026-05-10 23:12:26 +02:00
(generateText as any).mockImplementation(async (opts: any) => {
for (let i = 0; i < 3; i++) {
opts.onStepFinish({});
2026-05-10 23:12:26 +02:00
}
return { text: 'ok', toolCalls: [], steps: [] };
});
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
const result = await runtime.runAgentLoop({
2026-05-10 23:12:26 +02:00
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: {},
});
expect(result.metrics?.stepCount).toBe(3);
expect(result.metrics?.stepBoundariesMs).toHaveLength(3);
2026-05-10 23:12:26 +02:00
});
it('forwards telemetryTags.source through experimental_telemetry metadata', async () => {
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
const telemetryConfigEnabled = {
isEnabled: () => true,
devtoolsEnabled: false,
appSettingsService: {
settings: { telemetry: { recordInputs: false, recordOutputs: false } },
},
systemConfigService: {
config: { instance: { name: 'test-instance' } },
},
} as any;
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
const runtimeWithTelemetry = new AiSdkKtxLlmRuntime({
2026-05-10 23:12:26 +02:00
llmProvider: llmProvider as any,
telemetry: {
createTelemetry: (tags) => ({
isEnabled: telemetryConfigEnabled.isEnabled(),
metadata: {
source: tags.source ?? 'RESEARCH',
jobId: tags.jobId,
unitKey: tags.unitKey,
},
}),
},
});
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
await runtimeWithTelemetry.runAgentLoop({
2026-05-10 23:12:26 +02:00
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: { source: 'metabase', jobId: 'job-123', unitKey: 'u/1' },
});
const call = (generateText as any).mock.calls[0][0];
expect(call.experimental_telemetry.metadata.source).toBe('metabase');
});
it('defaults to source=RESEARCH when telemetryTags omits source', async () => {
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
const telemetryConfigEnabled = {
isEnabled: () => true,
devtoolsEnabled: false,
appSettingsService: {
settings: { telemetry: { recordInputs: false, recordOutputs: false } },
},
systemConfigService: {
config: { instance: { name: 'test-instance' } },
},
} as any;
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
const runtimeWithTelemetry = new AiSdkKtxLlmRuntime({
2026-05-10 23:12:26 +02:00
llmProvider: llmProvider as any,
telemetry: {
createTelemetry: (tags) => ({
isEnabled: telemetryConfigEnabled.isEnabled(),
metadata: {
source: tags.source ?? 'RESEARCH',
jobId: tags.jobId,
unitKey: tags.unitKey,
},
}),
},
});
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
await runtimeWithTelemetry.runAgentLoop({
2026-05-10 23:12:26 +02:00
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: { operationName: 'memory-agent-ingest' },
});
const call = (generateText as any).mock.calls[0][0];
expect(call.experimental_telemetry.metadata.source).toBe('RESEARCH');
});
it('forwards jobId and unitKey through experimental_telemetry metadata', async () => {
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
const telemetryConfigEnabled = {
isEnabled: () => true,
devtoolsEnabled: false,
appSettingsService: {
settings: { telemetry: { recordInputs: false, recordOutputs: false } },
},
systemConfigService: {
config: { instance: { name: 'test-instance' } },
},
} as any;
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
const runtimeWithTelemetry = new AiSdkKtxLlmRuntime({
2026-05-10 23:12:26 +02:00
llmProvider: llmProvider as any,
telemetry: {
createTelemetry: (tags) => ({
isEnabled: telemetryConfigEnabled.isEnabled(),
metadata: {
source: tags.source ?? 'RESEARCH',
jobId: tags.jobId,
unitKey: tags.unitKey,
},
}),
},
});
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
await runtimeWithTelemetry.runAgentLoop({
2026-05-10 23:12:26 +02:00
modelRole: 'candidateExtraction',
systemPrompt: '',
userPrompt: '',
toolSet: {},
stepBudget: 10,
telemetryTags: { source: 'metabase', jobId: 'job-777', unitKey: 'sources/users' },
});
const call = (generateText as any).mock.calls[0][0];
expect(call.experimental_telemetry.metadata.jobId).toBe('job-777');
expect(call.experimental_telemetry.metadata.unitKey).toBe('sources/users');
});
it('records a sanitized LLM debug request when a recorder is injected', async () => {
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
const record = vi.fn();
const provider = {
...llmProvider,
cacheMarker: vi.fn((ttl: '5m' | '1h') => ({
anthropic: { cacheControl: { type: 'ephemeral' as const, ttl } },
})),
promptCachingConfig: vi.fn(() => ({
enabled: true,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
})),
};
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
const runtimeWithDebug = new AiSdkKtxLlmRuntime({
2026-05-10 23:12:26 +02:00
llmProvider: provider as any,
debugRequestRecorder: { record },
});
fix: surface silent failures and drop unused dead-code paths (#193) Address overengineering audit findings across cli/context/connector packages: - F1 Snowflake `query`: drop bare catch that flattened all errors to empty result - F2 memory-agent: treat LLM `stopReason === 'error'` as crash (skip squash-merge) - F3 WikiSearchTool: description honest about token-only fallback vs sqlite-fts5 hybrid - F5 Scan enrichment provider resolution: return discriminated status and surface distinct `llm_unavailable` / `embedding_unavailable` warnings per failure mode - F6 Relationship validation budget: drop dead `tableCount === undefined → 'all'` branch; update tests to pass `tableCount` like production - F8 `ktx sql`: use canonical `resolveOutputMode` (now honors KTX_OUTPUT/CI/TTY) - F9 MCP stdio server: default `protocolIo.stderr` to `process.stderr` so memory_ingest startup failures are visible - F13/F14 Scan/setup JSON readers: distinguish ENOENT from corruption instead of silently treating both as missing - F15 `createKtxCliScanConnector`: throw config-shape error when driver matches but type guard rejects, instead of "no native connector" - F16 ContextEvidenceSearchTool: surface `embedding_unhealthy:<reason>` instead of silently dropping the semantic lane - F17 PromptService: default partials to `[]` (removes stale `clinical_policy` reference from a prior product) - F20 `contextBuildCommands`: drop unused `runId` parameter Dead-code removal: - F4 Delete `AgentRunnerService` (duplicated `RuntimeAgentRunner`, only test-used); migrate tests to exercise `AiSdkKtxLlmRuntime.runAgentLoop` directly - F7 Delete `KtxScanOrchestrator` and its test (no production callers; the inline pipeline in `runLocalScan` is the single source of truth) - F18 Delete `generateKtxText`/`generateKtxObject` pass-through helpers; inline the single `runtime.generateObject` call at its caller Plus a clarifying comment on the SQLite `resolveStringReference` `file:` carve-out (load-bearing for SQLite URI form, not a bug).
2026-05-21 02:38:18 +02:00
await runtimeWithDebug.runAgentLoop({
2026-05-10 23:12:26 +02:00
modelRole: 'candidateExtraction',
systemPrompt: 'SECRET SYSTEM PROMPT',
userPrompt: 'SECRET USER PROMPT',
toolSet: {
emit_candidate: {
description: 'SECRET TOOL DESCRIPTION',
inputSchema: {},
execute: vi.fn(),
} as any,
},
stepBudget: 10,
telemetryTags: { operationName: 'ingest-bundle-wu', source: 'metabase', jobId: 'job-1', unitKey: 'cards/1' },
});
expect(record).toHaveBeenCalledTimes(1);
expect(record).toHaveBeenCalledWith(
expect.objectContaining({
operationName: 'ingest-bundle-wu',
source: 'metabase',
jobId: 'job-1',
unitKey: 'cards/1',
modelRole: 'candidateExtraction',
modelId: 'claude-sonnet-4-6',
messageCount: 2,
toolNames: ['emit_candidate'],
}),
);
const providerOptions = record.mock.calls[0][0].providerOptions;
expect(providerOptions).toEqual(
expect.arrayContaining([
expect.objectContaining({ target: 'message', index: 0, role: 'system' }),
expect.objectContaining({ target: 'message-part', index: 1, role: 'user', partIndex: 0 }),
expect.objectContaining({ target: 'tool', name: 'emit_candidate' }),
]),
);
expect(providerOptions).toHaveLength(3);
const serialized = JSON.stringify(record.mock.calls[0][0]);
expect(serialized).not.toContain('SECRET SYSTEM PROMPT');
expect(serialized).not.toContain('SECRET USER PROMPT');
expect(serialized).not.toContain('SECRET TOOL DESCRIPTION');
});
});