From 53a6f8d1112adbb282205525ddc10b2690fc250d Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sat, 30 May 2026 00:42:59 +0200 Subject: [PATCH 1/6] fix(cli): treat artifact-producing ingests with failures as partial (#238) * fix(cli): derive ingest outcomes from saved artifacts * fix(cli): treat artifact-producing ingests with failures as partial * fix(cli): route memory-flow run status through shared ingest outcome * fix(cli): treat partial ingest as saved context in setup status * test(cli): align memory-flow replay expectations with partial ingests --- .../cli/src/context/ingest/local-ingest.ts | 12 +- .../src/context/ingest/memory-flow/events.ts | 3 +- packages/cli/src/context/ingest/reports.ts | 14 ++ packages/cli/src/ingest.ts | 18 +-- packages/cli/src/setup.ts | 4 +- .../ingest/local-metabase-ingest.test.ts | 19 +++ .../context/ingest/memory-flow/events.test.ts | 4 +- .../cli/test/context/ingest/reports.test.ts | 71 +++++++++ packages/cli/test/ingest.test.ts | 139 +++++++++++++++++- packages/cli/test/setup.test.ts | 53 +++++++ 10 files changed, 312 insertions(+), 25 deletions(-) create mode 100644 packages/cli/test/context/ingest/reports.test.ts diff --git a/packages/cli/src/context/ingest/local-ingest.ts b/packages/cli/src/context/ingest/local-ingest.ts index 2351d420..ec8a72f4 100644 --- a/packages/cli/src/context/ingest/local-ingest.ts +++ b/packages/cli/src/context/ingest/local-ingest.ts @@ -13,6 +13,7 @@ import { localPullConfigForAdapter, type DefaultLocalIngestAdaptersOptions } fro import { createLocalBundleIngestRuntime } from './local-bundle-runtime.js'; import type { MemoryFlowEventSink } from './memory-flow/types.js'; import { buildSyncId } from './raw-sources-paths.js'; +import { ingestReportOutcome } from './reports.js'; import type { IngestReportBody, IngestReportSnapshot } from './reports.js'; import { SqliteBundleIngestStore } from './sqlite-bundle-ingest-store.js'; import type { IngestBundleResult, IngestJobContext, IngestJobPhase, IngestTrigger, SourceAdapter } from './types.js'; @@ -79,7 +80,7 @@ export interface LocalMetabaseFanoutProgress { metabaseDatabaseId: number; targetConnectionId: string; jobId: string; - status: 'done' | 'failed'; + status: 'done' | 'partial' | 'failed'; }): void; } @@ -232,11 +233,11 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise child.report.body.failedWorkUnits.length === 0).length; - if (succeeded === children.length) { + const outcomes = children.map((child) => ingestReportOutcome(child.report)); + if (outcomes.every((outcome) => outcome === 'done')) { return 'all_succeeded'; } - if (succeeded === 0) { + if (outcomes.every((outcome) => outcome === 'error')) { return 'all_failed'; } return 'partial_failure'; @@ -401,12 +402,13 @@ export async function runLocalMetabaseIngest( error, }); } + const childOutcome = ingestReportOutcome(child.report); options.progress?.onMetabaseChildCompleted?.({ metabaseConnectionId, metabaseDatabaseId: childPlan.metabaseDatabaseId, targetConnectionId, jobId: child.report.jobId, - status: child.report.body.failedWorkUnits.length > 0 ? 'failed' : 'done', + status: childOutcome === 'error' ? 'failed' : childOutcome, }); children.push({ jobId: child.report.jobId, diff --git a/packages/cli/src/context/ingest/memory-flow/events.ts b/packages/cli/src/context/ingest/memory-flow/events.ts index 020ce5ae..92cebe0f 100644 --- a/packages/cli/src/context/ingest/memory-flow/events.ts +++ b/packages/cli/src/context/ingest/memory-flow/events.ts @@ -1,5 +1,6 @@ import type { MemoryAction } from '../../../context/memory/types.js'; import type { LocalIngestRunRecord } from '../local-stage-ingest.js'; +import { ingestReportOutcome } from '../reports.js'; import type { IngestReportSnapshot } from '../reports.js'; import type { MemoryFlowActionDetail, @@ -72,7 +73,7 @@ function fullModeMetadata(input: { } function reportStatus(report: IngestReportSnapshot): MemoryFlowReplayInput['status'] { - return report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; + return ingestReportOutcome(report) === 'error' ? 'error' : 'done'; } function reportCreatedEvent(report: IngestReportSnapshot): MemoryFlowEvent { diff --git a/packages/cli/src/context/ingest/reports.ts b/packages/cli/src/context/ingest/reports.ts index ea02a31a..09f92170 100644 --- a/packages/cli/src/context/ingest/reports.ts +++ b/packages/cli/src/context/ingest/reports.ts @@ -146,6 +146,20 @@ export function savedMemoryCountsForReport(report: IngestReportSnapshot): Ingest }; } +/** @internal */ +export type IngestReportOutcome = 'done' | 'partial' | 'error'; + +export function ingestReportOutcome(report: IngestReportSnapshot): IngestReportOutcome { + if (report.body.status === 'failed') { + return 'error'; + } + if (report.body.failedWorkUnits.length === 0) { + return 'done'; + } + const { wikiCount, slCount } = savedMemoryCountsForReport(report); + return wikiCount + slCount > 0 ? 'partial' : 'error'; +} + export function buildStageIndexFromReportBody(jobId: string, connectionId: string, body: IngestReportBody): StageIndex { return { jobId, diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index fb8c9a29..ad5ba270 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -2,7 +2,7 @@ import { buildMemoryFlowViewModel } from './context/ingest/memory-flow/view-mode import { createMemoryFlowLiveBuffer, sanitizeMemoryFlowError } from './context/ingest/memory-flow/live-buffer.js'; import { formatMemoryFlowFinalSummary } from './context/ingest/memory-flow/summary.js'; import { getLatestLocalIngestStatus, getLocalIngestStatus, type LocalMetabaseFanoutResult, type LocalMetabaseFanoutProgress, type RunLocalIngestOptions, runLocalIngest, runLocalMetabaseIngest } from './context/ingest/local-ingest.js'; -import { type IngestReportSnapshot, savedMemoryCountsForReport } from './context/ingest/reports.js'; +import { type IngestReportSnapshot, ingestReportOutcome, savedMemoryCountsForReport } from './context/ingest/reports.js'; import { ingestReportToMemoryFlowReplay } from './context/ingest/memory-flow/events.js'; import type { MemoryFlowEvent, MemoryFlowReplayInput } from './context/ingest/memory-flow/types.js'; import { renderMemoryFlowReplay } from './context/ingest/memory-flow/render.js'; @@ -93,10 +93,6 @@ export interface KtxIngestDeps { runtimeIo?: KtxIngestIo; } -function reportStatus(report: IngestReportSnapshot): 'done' | 'error' { - return report.body.status === 'failed' || report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; -} - const REPORT_SOURCE_LABELS = new Map([ ['live-database', 'Database schema'], ['historic-sql', 'Query history'], @@ -193,7 +189,7 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void if (report.body.tracePath) { io.stdout.write(`Trace: ${report.body.tracePath}\n`); } - io.stdout.write(`Status: ${reportStatus(report)}\n`); + io.stdout.write(`Status: ${ingestReportOutcome(report)}\n`); io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`); io.stdout.write(`Connection: ${report.connectionId}\n`); io.stdout.write(`Sync: ${report.body.syncId}\n`); @@ -231,7 +227,7 @@ function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIng } io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`); for (const child of result.children) { - const status = reportStatus(child.report); + const status = ingestReportOutcome(child.report); io.stdout.write( `- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId} report=${child.report.id}\n`, ); @@ -595,7 +591,7 @@ function initialRunMemoryFlowInput( } function finalRunMemoryFlowInput(snapshot: MemoryFlowReplayInput, report: IngestReportSnapshot): MemoryFlowReplayInput { - const status = reportStatus(report); + const status = ingestReportOutcome(report) === 'error' ? 'error' : 'done'; return { ...snapshot, runId: report.runId, @@ -777,7 +773,7 @@ export async function runKtxIngest( } finally { plainProgress?.flush(); } - return result.status === 'all_succeeded' ? 0 : 1; + return result.status === 'all_failed' ? 1 : 0; } const jobId = deps.jobIdFactory?.(); @@ -846,7 +842,7 @@ export async function runKtxIngest( liveTui?.close(); liveTui = null; io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot)); - return reportStatus(result.report) === 'done' ? 0 : 1; + return ingestReportOutcome(result.report) === 'error' ? 1 : 0; } plainProgress?.flush(); await writeReportRecord(result.report, runOutputMode, io, { @@ -854,7 +850,7 @@ export async function runKtxIngest( renderStoredMemoryFlow: deps.renderStoredMemoryFlow, env, }); - return reportStatus(result.report) === 'done' ? 0 : 1; + return ingestReportOutcome(result.report) === 'error' ? 1 : 0; } finally { plainProgress?.flush(); liveTui?.close(); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 74056542..ebc04c87 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -1,7 +1,7 @@ import { existsSync } from 'node:fs'; import { basename, join, resolve } from 'node:path'; import { getLatestLocalIngestStatus } from './context/ingest/local-ingest.js'; -import { savedMemoryCountsForReport } from './context/ingest/reports.js'; +import { ingestReportOutcome, savedMemoryCountsForReport } from './context/ingest/reports.js'; import { ktxLocalStateDbPath } from './context/project/local-state-db.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; import { readKtxSetupState } from './context/project/setup-config.js'; @@ -306,7 +306,7 @@ function sourceConnections(config: Awaited>['c type LocalIngestStatusReport = NonNullable>>; function reportHasSavedContext(report: LocalIngestStatusReport): boolean { - if (report.body.failedWorkUnits.length > 0) { + if (ingestReportOutcome(report) === 'error') { return false; } const counts = savedMemoryCountsForReport(report); diff --git a/packages/cli/test/context/ingest/local-metabase-ingest.test.ts b/packages/cli/test/context/ingest/local-metabase-ingest.test.ts index 06822aa2..8fb89bd0 100644 --- a/packages/cli/test/context/ingest/local-metabase-ingest.test.ts +++ b/packages/cli/test/context/ingest/local-metabase-ingest.test.ts @@ -6,6 +6,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js'; import { LocalMetabaseDiscoveryCache } from '../../../src/context/ingest/adapters/metabase/local-source-state-store.js'; import { getLocalIngestStatus, runLocalMetabaseIngest } from '../../../src/context/ingest/local-ingest.js'; +import { ingestReportOutcome } from '../../../src/context/ingest/reports.js'; import type { ChunkResult, FetchContext, SourceAdapter } from '../../../src/context/ingest/types.js'; class TestAgentRunner implements AgentRunnerPort { @@ -202,6 +203,24 @@ describe('runLocalMetabaseIngest', () => { expect(result.children[1]?.report.body.failedWorkUnits).toEqual(['metabase-db-2']); }); + it('keeps a child that saved memory out of all_failed when another child fails', async () => { + await seedMetabaseState(); + const agentRunner = new TestAgentRunner(); + const ids = ['metabase-child-1', 'metabase-child-2']; + + const result = await runLocalMetabaseIngest({ + project, + adapters: [new FakeMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + agentRunner, + jobIdFactory: () => ids.shift() ?? 'metabase-child-extra', + }); + + expect(result.status).toBe('partial_failure'); + expect(ingestReportOutcome(result.children[0].report)).toBe('done'); + expect(ingestReportOutcome(result.children[1].report)).toBe('error'); + }); + it('captures fetch-time child failures and continues later mappings', async () => { await seedMetabaseState(); project.config.connections.warehouse_c = { driver: 'postgres', url: 'postgres://localhost/c' }; diff --git a/packages/cli/test/context/ingest/memory-flow/events.test.ts b/packages/cli/test/context/ingest/memory-flow/events.test.ts index e29405a4..cb0e72c8 100644 --- a/packages/cli/test/context/ingest/memory-flow/events.test.ts +++ b/packages/cli/test/context/ingest/memory-flow/events.test.ts @@ -166,7 +166,7 @@ describe('memory-flow event mapping', () => { runId: 'run-1', connectionId: 'warehouse', adapter: 'lookml', - status: 'error', + status: 'done', sourceDir: null, syncId: 'sync-2', reportId: 'report-1', @@ -308,7 +308,7 @@ describe('memory-flow event mapping', () => { sourceReportPath: 'report-1', fallbackReason: null, }); - expect(replay.status).toBe('error'); + expect(replay.status).toBe('done'); expect(replay.reportId).toBe('report-1'); expect(replay.reportPath).toBe('report-1'); expect(replay.events[0]).toMatchObject({ type: 'source_acquired', emittedAt: '2026-05-01T10:00:00.000Z' }); diff --git a/packages/cli/test/context/ingest/reports.test.ts b/packages/cli/test/context/ingest/reports.test.ts new file mode 100644 index 00000000..5fc24f6d --- /dev/null +++ b/packages/cli/test/context/ingest/reports.test.ts @@ -0,0 +1,71 @@ +import { describe, expect, it } from 'vitest'; +import { ingestReportOutcome } from '../../../src/context/ingest/reports.js'; +import type { IngestReportSnapshot } from '../../../src/context/ingest/reports.js'; + +function report(body: Partial): IngestReportSnapshot { + return { + id: 'r', + runId: 'run', + jobId: 'job', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-29T00:00:00.000Z', + body: { + syncId: 'sync', + diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + ...body, + }, + }; +} + +const savingWorkUnit = { + unitKey: 'ok', + rawFiles: ['cards/1.json'], + status: 'success' as const, + actions: [{ target: 'sl' as const, type: 'updated' as const, key: 'warehouse.orders', detail: 'measure' }], + touchedSlSources: [], +}; + +const failedWorkUnit = { + unitKey: 'bad', + rawFiles: ['cards/2.json'], + status: 'failed' as const, + reason: 'tool write failed', + actions: [], + touchedSlSources: [], +}; + +describe('ingestReportOutcome', () => { + it('returns done when there are no failed work units', () => { + expect(ingestReportOutcome(report({ workUnits: [savingWorkUnit] }))).toBe('done'); + }); + + it('returns partial when failed work units coexist with saved memory', () => { + expect( + ingestReportOutcome(report({ workUnits: [savingWorkUnit, failedWorkUnit], failedWorkUnits: ['bad'] })), + ).toBe('partial'); + }); + + it('returns error when failed work units produced no saved memory', () => { + expect(ingestReportOutcome(report({ workUnits: [failedWorkUnit], failedWorkUnits: ['bad'] }))).toBe('error'); + }); + + it('returns error for a stage-level failure even if artifacts were recorded', () => { + expect(ingestReportOutcome(report({ status: 'failed', workUnits: [savingWorkUnit], failedWorkUnits: [] }))).toBe( + 'error', + ); + }); +}); diff --git a/packages/cli/test/ingest.test.ts b/packages/cli/test/ingest.test.ts index eef751ba..f5cd1ac5 100644 --- a/packages/cli/test/ingest.test.ts +++ b/packages/cli/test/ingest.test.ts @@ -403,7 +403,7 @@ describe('runKtxIngest', () => { expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); }); - it('returns a non-zero code when Metabase fanout has failed children', async () => { + it('returns a non-zero code when a Metabase fanout child fully fails', async () => { const projectDir = join(tempDir, 'project'); await writeMetabaseConfig(projectDir); const io = makeIo(); @@ -441,7 +441,7 @@ describe('runKtxIngest', () => { { runLocalMetabaseIngest: async () => ({ metabaseConnectionId: 'prod-metabase', - status: 'partial_failure', + status: 'all_failed', totals: { workUnits: 1, failedWorkUnits: 1 }, children: [ { @@ -467,9 +467,83 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(1); - expect(io.stdout()).toContain('Metabase fanout: partial_failure'); - expect(io.stdout()).toContain('Failed tasks: 1'); + expect(io.stdout()).toContain('Metabase fanout: all_failed'); expect(io.stdout()).toContain('status=error'); + }); + + it('exits 0 and reports status=partial when a Metabase child saved memory despite a failure', async () => { + const projectDir = join(tempDir, 'project'); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + const report = localFakeBundleReport('metabase-child-1', { + id: 'report-metabase-child-1', + runId: 'run-a', + jobId: 'metabase-child-1', + connectionId: 'warehouse_a', + sourceKey: 'metabase', + body: { + failedWorkUnits: ['metabase-db-2'], + workUnits: [ + { + unitKey: 'metabase-db-1', + rawFiles: ['cards/1.json'], + status: 'success', + actions: [{ target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'measure' }], + touchedSlSources: [], + }, + { + unitKey: 'metabase-db-2', + rawFiles: ['cards/2.json'], + status: 'failed', + reason: 'bad SQL', + actions: [], + touchedSlSources: [], + }, + ], + }, + }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + runLocalMetabaseIngest: async () => ({ + metabaseConnectionId: 'prod-metabase', + status: 'partial_failure', + totals: { workUnits: 2, failedWorkUnits: 1 }, + children: [ + { + jobId: 'metabase-child-1', + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + result: { + jobId: 'metabase-child-1', + runId: 'run-a', + syncId: 'sync-a', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 2, + failedWorkUnits: ['metabase-db-2'], + artifactsWritten: 1, + commitSha: 'abc', + }, + report, + }, + ], + }), + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Metabase fanout: partial_failure'); + expect(io.stdout()).toContain('status=partial'); expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); }); @@ -1140,6 +1214,63 @@ describe('runKtxIngest', () => { expect(io.stdout()).toContain('Status: error\n'); }); + it('exits 0 and reports Status: partial when a single-source ingest saved memory despite a failure', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const partialReport = localFakeBundleReport('local-job-partial', { + connectionId: 'warehouse', + sourceKey: 'fake', + body: { + failedWorkUnits: ['orders-bad'], + workUnits: [ + { + unitKey: 'orders-ok', + rawFiles: ['orders/orders.json'], + status: 'success', + actions: [{ target: 'wiki', type: 'created', key: 'wiki/orders.md', detail: 'orders' }], + touchedSlSources: [], + }, + { + unitKey: 'orders-bad', + rawFiles: ['orders/bad.json'], + status: 'failed', + reason: 'writer tool failed', + actions: [], + touchedSlSources: [], + }, + ], + }, + }); + const runLocal = vi.fn(async (_input: RunLocalIngestOptions) => ({ + result: { + jobId: 'local-job-partial', + runId: partialReport.runId, + syncId: partialReport.body.syncId, + diffSummary: partialReport.body.diffSummary, + workUnitCount: partialReport.body.workUnits.length, + failedWorkUnits: partialReport.body.failedWorkUnits, + artifactsWritten: 1, + commitSha: partialReport.body.commitSha, + }, + report: partialReport, + })); + + const io = makeIo(); + await expect( + runKtxIngest( + { command: 'run', projectDir, connectionId: 'warehouse', adapter: 'fake', sourceDir, outputMode: 'plain' }, + io.io, + { runLocalIngest: runLocal, jobIdFactory: () => 'local-job-partial' }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Status: partial\n'); + }); + it('prints trace path and error status for stored failed ingest reports', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/test/setup.test.ts b/packages/cli/test/setup.test.ts index 0bc00919..da51e9af 100644 --- a/packages/cli/test/setup.test.ts +++ b/packages/cli/test/setup.test.ts @@ -398,6 +398,59 @@ describe('setup status', () => { expect(rendered).toContain('KTX context built: yes'); }); + it('reports context ready after a partial ingest report saved memory', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'setup:', + ' database_connection_ids:', + ' - warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + 'ingest:', + ' embeddings:', + ' backend: none', + ' dimensions: 8', + '', + ].join('\n'), + 'utf-8', + ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'databases'] }); + await persistLocalBundleReport( + tempDir, + localFakeBundleReport('warehouse-job-partial', { + connectionId: 'warehouse', + sourceKey: 'fake', + body: { + failedWorkUnits: ['orders-bad'], + workUnits: [ + { + unitKey: 'orders-ok', + rawFiles: ['orders/orders.json'], + status: 'success', + actions: [{ target: 'wiki', type: 'created', key: 'wiki/orders.md', detail: 'orders' }], + touchedSlSources: [], + }, + { + unitKey: 'orders-bad', + rawFiles: ['orders/bad.json'], + status: 'failed', + reason: 'writer tool failed', + actions: [], + touchedSlSources: [], + }, + ], + }, + }), + ); + + const status = await readKtxSetupStatus(tempDir); + + expect(status.context).toMatchObject({ ready: true, status: 'completed' }); + }); + it('formats plain and JSON setup status payloads', async () => { const status = await readKtxSetupStatus(tempDir); const rendered = formatKtxSetupStatus(status); From 08d08d8ea00639f9a8198566805cd955eadcad0b Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sat, 30 May 2026 12:07:15 +0200 Subject: [PATCH 2/6] ci: refresh README star history chart twice daily Point the README chart at a committed assets/star-history.svg instead of the star-history API URL so GitHub serves it directly and bypasses the Camo proxy cache. A scheduled workflow regenerates the SVG at 06:00/18:00 UTC, busting star-history's server-side cache, and commits it when it changes. --- .github/workflows/star-history.yml | 61 ++++++++++++++++++++++++++++++ README.md | 2 +- assets/star-history.svg | 1 + 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/star-history.yml create mode 100644 assets/star-history.svg diff --git a/.github/workflows/star-history.yml b/.github/workflows/star-history.yml new file mode 100644 index 00000000..ec484b05 --- /dev/null +++ b/.github/workflows/star-history.yml @@ -0,0 +1,61 @@ +name: Refresh star history chart + +on: + schedule: + # Twice daily at 06:00 and 18:00 UTC. + - cron: "0 6,18 * * *" + workflow_dispatch: + +permissions: + contents: write + +env: + DO_NOT_TRACK: "1" + KTX_TELEMETRY_DISABLED: "1" + NEXT_TELEMETRY_DISABLED: "1" + +concurrency: + group: star-history-refresh + cancel-in-progress: true + +jobs: + refresh: + name: Regenerate assets/star-history.svg + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Fetch fresh star-history SVG + run: | + set -euo pipefail + # cachebust forces star-history to regenerate instead of serving its + # own server-side cache; --location follows the slug-normalizing 301. + url="https://api.star-history.com/svg?repos=Kaelio/ktx&type=Date&cachebust=${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" + curl --fail --location --silent --show-error \ + --retry 3 --retry-delay 5 --max-time 60 \ + -o assets/star-history.svg.new "$url" + # Guard against error pages / truncated responses before overwriting. + if ! grep -q "" assets/star-history.svg.new; then + echo "Downloaded file is not a valid SVG; aborting." >&2 + exit 1 + fi + if [ "$(wc -c < assets/star-history.svg.new)" -lt 1000 ]; then + echo "Downloaded SVG is suspiciously small; aborting." >&2 + exit 1 + fi + mv assets/star-history.svg.new assets/star-history.svg + + - name: Commit if changed + run: | + set -euo pipefail + if git diff --quiet -- assets/star-history.svg; then + echo "Star-history chart unchanged; nothing to commit." + exit 0 + fi + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add assets/star-history.svg + # [skip ci] keeps this housekeeping commit from triggering KTX CI. + git commit -m "chore: refresh star history chart [skip ci]" + git push diff --git a/README.md b/README.md index 23b2fa0a..686ece22 100644 --- a/README.md +++ b/README.md @@ -248,6 +248,6 @@ event catalog and opt-out options.

- ktx Star History Chart + ktx Star History Chart

diff --git a/assets/star-history.svg b/assets/star-history.svg new file mode 100644 index 00000000..3f6c4a04 --- /dev/null +++ b/assets/star-history.svg @@ -0,0 +1 @@ +star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars From ba06f7078af69fdba2184186ca5cc53c65427ea2 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sat, 30 May 2026 16:01:47 +0200 Subject: [PATCH 3/6] ci: push star-history refresh to protected main with RELEASE_PAT (#239) The scheduled star-history workflow checked out with the default GITHUB_TOKEN, so its git push to main was rejected by the branch protection hook (GH006). Check out with RELEASE_PAT instead, matching release.yml, whose semantic-release step already pushes to the protected main branch with the same token. --- .github/workflows/star-history.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/star-history.yml b/.github/workflows/star-history.yml index ec484b05..e67a0517 100644 --- a/.github/workflows/star-history.yml +++ b/.github/workflows/star-history.yml @@ -25,6 +25,10 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + # RELEASE_PAT can push to the protected main branch; the default + # GITHUB_TOKEN is rejected by the branch-protection hook (GH006). + token: ${{ secrets.RELEASE_PAT }} - name: Fetch fresh star-history SVG run: | From 54d6e877335a7218dc2ec795ff85529403ac6bde Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 30 May 2026 14:02:55 +0000 Subject: [PATCH 4/6] chore: refresh star history chart [skip ci] --- assets/star-history.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/star-history.svg b/assets/star-history.svg index 3f6c4a04..246ba5a0 100644 --- a/assets/star-history.svg +++ b/assets/star-history.svg @@ -1 +1 @@ -star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars +star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars \ No newline at end of file From cbbcf8e8bdd1560b3d0c73e47abd36eb5d8c6f23 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sat, 30 May 2026 17:44:27 +0200 Subject: [PATCH 5/6] ci: normalize star-history.svg trailing newline (#241) The star-history refresh workflow committed the API's SVG verbatim, but the response has no trailing newline. Because the refresh commit uses [skip ci], the file never ran end-of-file-fixer at commit time, so pre-commit's `--all-files` run failed end-of-file-fixer on every open PR (e.g. #240), even PRs that never touched the file. Normalize the downloaded SVG to exactly one trailing newline in the workflow (idempotent, so the "unchanged" guard still works), and fix the currently committed file so open PRs go green now. --- .github/workflows/star-history.yml | 9 ++++++++- assets/star-history.svg | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/star-history.yml b/.github/workflows/star-history.yml index e67a0517..b7d90c43 100644 --- a/.github/workflows/star-history.yml +++ b/.github/workflows/star-history.yml @@ -48,7 +48,14 @@ jobs: echo "Downloaded SVG is suspiciously small; aborting." >&2 exit 1 fi - mv assets/star-history.svg.new assets/star-history.svg + # The star-history API returns the SVG without a trailing newline, + # which end-of-file-fixer rewrites whenever pre-commit runs + # --all-files on a PR. Because the refresh commit below uses [skip ci], + # the hook never runs against it here, so an un-normalized file + # silently breaks the pre-commit check on every open PR. Normalize to + # exactly one trailing newline before committing. + printf '%s\n' "$(cat assets/star-history.svg.new)" > assets/star-history.svg + rm -f assets/star-history.svg.new - name: Commit if changed run: | diff --git a/assets/star-history.svg b/assets/star-history.svg index 246ba5a0..3f6c4a04 100644 --- a/assets/star-history.svg +++ b/assets/star-history.svg @@ -1 +1 @@ -star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars \ No newline at end of file +star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars From 25f639fba2f71aa880e4184ce4b50b56e2374d0e Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sat, 30 May 2026 17:54:24 +0200 Subject: [PATCH 6/6] feat: trim MCP query response payloads (#240) --- .../mcp/__snapshots__/mcp-tools-list.json | 1620 ----------------- packages/cli/src/context/mcp/context-tools.ts | 106 +- packages/cli/src/context/mcp/types.ts | 5 +- packages/cli/src/context/search/discover.ts | 7 +- packages/cli/src/skills/analytics/SKILL.md | 7 +- .../mcp/__snapshots__/mcp-tools-list.json | 41 +- packages/cli/test/context/mcp/server.test.ts | 152 +- 7 files changed, 235 insertions(+), 1703 deletions(-) delete mode 100644 packages/cli/src/context/mcp/__snapshots__/mcp-tools-list.json diff --git a/packages/cli/src/context/mcp/__snapshots__/mcp-tools-list.json b/packages/cli/src/context/mcp/__snapshots__/mcp-tools-list.json deleted file mode 100644 index 10cb0b77..00000000 --- a/packages/cli/src/context/mcp/__snapshots__/mcp-tools-list.json +++ /dev/null @@ -1,1620 +0,0 @@ -[ - { - "name": "connection_list", - "title": "Connection List", - "description": "List configured read-only data connections available to this KTX project. Use this before connection-scoped tools when the project may have multiple warehouses.", - "inputSchema": { - "type": "object", - "properties": {}, - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "connections": { - "type": "array", - "items": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "connectionType": { - "type": "string" - } - }, - "required": [ - "id", - "name", - "connectionType" - ], - "additionalProperties": false - } - } - }, - "required": [ - "connections" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "Connection List", - "readOnlyHint": true, - "idempotentHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - }, - { - "name": "wiki_search", - "title": "Wiki Search", - "description": "Search KTX wiki pages for reusable business context. Example: wiki_search({ query: \"revenue recognition\", limit: 5 }).", - "inputSchema": { - "type": "object", - "properties": { - "query": { - "type": "string", - "minLength": 1, - "description": "Natural-language wiki search query, e.g. \"revenue recognition policy\"." - }, - "limit": { - "default": 10, - "description": "Maximum wiki pages to return. Defaults to 10.", - "type": "integer", - "minimum": 1, - "maximum": 50 - } - }, - "required": [ - "query" - ], - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "results": { - "type": "array", - "items": { - "type": "object", - "properties": { - "key": { - "type": "string" - }, - "path": { - "type": "string" - }, - "scope": { - "type": "string", - "enum": [ - "GLOBAL", - "USER" - ] - }, - "summary": { - "type": "string" - }, - "score": { - "type": "number" - }, - "matchReasons": { - "type": "array", - "items": { - "type": "string" - } - }, - "lanes": { - "type": "array", - "items": { - "type": "object", - "properties": { - "lane": { - "type": "string" - }, - "status": { - "type": "string" - }, - "requestedCandidatePoolLimit": { - "type": "number" - }, - "effectiveCandidatePoolLimit": { - "type": "number" - }, - "returnedCandidateCount": { - "type": "number" - }, - "weight": { - "type": "number" - }, - "reason": { - "type": "string" - } - }, - "required": [ - "lane", - "status", - "requestedCandidatePoolLimit", - "effectiveCandidatePoolLimit", - "returnedCandidateCount", - "weight" - ], - "additionalProperties": false - } - } - }, - "required": [ - "key", - "path", - "scope", - "summary", - "score" - ], - "additionalProperties": false - } - }, - "totalFound": { - "type": "number" - } - }, - "required": [ - "results", - "totalFound" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "Wiki Search", - "readOnlyHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - }, - { - "name": "wiki_read", - "title": "Wiki Read", - "description": "Read a KTX wiki page by key returned from wiki_search. Example: wiki_read({ key: \"global/revenue\" }).", - "inputSchema": { - "type": "object", - "properties": { - "key": { - "type": "string", - "minLength": 1, - "description": "Wiki page key returned by wiki_search, e.g. \"global/revenue\"." - } - }, - "required": [ - "key" - ], - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "key": { - "type": "string" - }, - "summary": { - "type": "string" - }, - "content": { - "type": "string" - }, - "scope": { - "type": "string", - "enum": [ - "GLOBAL", - "USER" - ] - }, - "tags": { - "type": "array", - "items": { - "type": "string" - } - }, - "refs": { - "type": "array", - "items": { - "type": "string" - } - }, - "slRefs": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "key", - "summary", - "content", - "scope" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "Wiki Read", - "readOnlyHint": true, - "idempotentHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - }, - { - "name": "sl_read_source", - "title": "Semantic Layer Read Source", - "description": "Read a semantic-layer YAML source by connection id and source name. Example: sl_read_source({ connectionId: \"warehouse\", sourceName: \"orders\" }).", - "inputSchema": { - "type": "object", - "properties": { - "connectionId": { - "type": "string", - "minLength": 1, - "description": "Connection id that owns the semantic-layer source." - }, - "sourceName": { - "type": "string", - "minLength": 1, - "description": "Semantic-layer source name without \".yaml\", e.g. \"orders\"." - } - }, - "required": [ - "connectionId", - "sourceName" - ], - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "sourceName": { - "type": "string" - }, - "yaml": { - "type": "string" - } - }, - "required": [ - "sourceName", - "yaml" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "Semantic Layer Read Source", - "readOnlyHint": true, - "idempotentHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - }, - { - "name": "sl_query", - "title": "Semantic Layer Query", - "description": "Execute a semantic-layer query and return rows, headers, generated SQL, and plan details. Example: sl_query({ connectionId: \"warehouse\", measures: [\"orders.order_count\"], dimensions: [{ field: \"orders.created_at\", granularity: \"month\" }] }).", - "inputSchema": { - "type": "object", - "properties": { - "connectionId": { - "description": "Connection id to query. Omit only when the project has exactly one configured connection.", - "type": "string", - "minLength": 1 - }, - "measures": { - "minItems": 1, - "type": "array", - "items": { - "anyOf": [ - { - "type": "string", - "description": "Semantic-layer measure key, e.g. \"orders.order_count\"." - }, - { - "type": "object", - "properties": { - "expr": { - "type": "string", - "minLength": 1, - "description": "Ad hoc aggregate expression, e.g. \"sum(orders.amount)\"." - }, - "name": { - "type": "string", - "minLength": 1, - "description": "Alias for the ad hoc measure, e.g. \"gross_revenue\"." - } - }, - "required": [ - "expr", - "name" - ] - } - ] - }, - "description": "Measures to select. Use semantic-layer keys when available." - }, - "dimensions": { - "default": [], - "description": "Dimensions to group by. Use {field, granularity?} entries.", - "type": "array", - "items": { - "type": "object", - "properties": { - "field": { - "type": "string", - "minLength": 1, - "description": "Dimension to group by, e.g. \"orders.created_at\" or \"orders.status\"." - }, - "granularity": { - "description": "Time grain for time dimensions: day, week, month, quarter, or year.", - "type": "string", - "minLength": 1 - } - }, - "required": [ - "field" - ] - } - }, - "filters": { - "default": [], - "description": "Semantic-layer filter expressions to apply.", - "type": "array", - "items": { - "type": "string", - "description": "Semantic-layer filter expression, e.g. \"orders.status = paid\"." - } - }, - "segments": { - "default": [], - "description": "Semantic-layer segment keys to apply.", - "type": "array", - "items": { - "type": "string", - "description": "Semantic-layer segment key to apply." - } - }, - "order_by": { - "default": [], - "description": "Sort clauses. Use {field, direction?} entries.", - "type": "array", - "items": { - "type": "object", - "properties": { - "field": { - "type": "string", - "minLength": 1, - "description": "Field/measure/dimension id to order by, e.g. \"orders.created_at\", a dimension key like \"mart_nrr_quarterly.quarter_label\", or a measure alias." - }, - "direction": { - "default": "asc", - "description": "Sort direction: \"asc\" or \"desc\". Defaults to \"asc\".", - "type": "string", - "enum": [ - "asc", - "desc" - ] - } - }, - "required": [ - "field" - ] - } - }, - "limit": { - "default": 1000, - "description": "Maximum rows to return. Defaults to 1000.", - "type": "integer", - "minimum": 0, - "maximum": 9007199254740991 - }, - "include_empty": { - "default": true, - "description": "Whether to include empty dimension groups. Defaults to true.", - "type": "boolean" - } - }, - "required": [ - "measures" - ], - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "connectionId": { - "type": "string" - }, - "dialect": { - "type": "string" - }, - "sql": { - "type": "string" - }, - "headers": { - "type": "array", - "items": { - "type": "string" - } - }, - "rows": { - "type": "array", - "items": { - "type": "array", - "items": {} - } - }, - "totalRows": { - "type": "number" - }, - "plan": { - "type": "object", - "propertyNames": { - "type": "string" - }, - "additionalProperties": {} - } - }, - "required": [ - "sql", - "headers", - "rows", - "totalRows" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "Semantic Layer Query", - "readOnlyHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - }, - { - "name": "entity_details", - "title": "Entity Details", - "description": "Read table and column metadata from the latest live-database scan snapshot. Example: entity_details({ connectionId: \"warehouse\", entities: [{ table: { catalog: null, db: \"public\", name: \"orders\" }, columns: [\"id\"] }] }).", - "inputSchema": { - "type": "object", - "properties": { - "connectionId": { - "type": "string", - "minLength": 1, - "description": "Connection id whose latest scan snapshot should be read." - }, - "entities": { - "minItems": 1, - "maxItems": 20, - "type": "array", - "items": { - "type": "object", - "properties": { - "table": { - "anyOf": [ - { - "type": "string", - "minLength": 1 - }, - { - "type": "object", - "properties": { - "catalog": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "Catalog/project/database. Use null when not applicable." - }, - "db": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "Schema/database/dataset. Use null when not applicable." - }, - "name": { - "type": "string", - "minLength": 1, - "description": "Table name." - } - }, - "required": [ - "catalog", - "db", - "name" - ] - } - ], - "description": "Table display string or canonical object ref." - }, - "columns": { - "description": "Optional column filter.", - "type": "array", - "items": { - "type": "string", - "minLength": 1, - "description": "Column name to inspect." - } - } - }, - "required": [ - "table" - ] - }, - "description": "Tables or columns to inspect. Maximum 20 entities." - } - }, - "required": [ - "connectionId", - "entities" - ], - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "results": { - "type": "array", - "items": { - "anyOf": [ - { - "type": "object", - "properties": { - "ok": { - "type": "boolean", - "const": true - }, - "connectionId": { - "type": "string" - }, - "tableRef": { - "type": "object", - "properties": { - "catalog": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "db": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "name": { - "type": "string" - } - }, - "required": [ - "catalog", - "db", - "name" - ], - "additionalProperties": false - }, - "display": { - "type": "string" - }, - "kind": { - "type": "string", - "enum": [ - "table", - "view", - "external", - "event_stream" - ] - }, - "comment": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "estimatedRows": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "null" - } - ] - }, - "columns": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "nativeType": { - "type": "string" - }, - "normalizedType": { - "type": "string" - }, - "dimensionType": { - "type": "string", - "enum": [ - "time", - "string", - "number", - "boolean" - ] - }, - "nullable": { - "type": "boolean" - }, - "primaryKey": { - "type": "boolean" - }, - "comment": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - } - }, - "required": [ - "name", - "nativeType", - "normalizedType", - "dimensionType", - "nullable", - "primaryKey", - "comment" - ], - "additionalProperties": false - } - }, - "foreignKeys": { - "type": "array", - "items": { - "type": "object", - "properties": { - "fromColumn": { - "type": "string" - }, - "toCatalog": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "toDb": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "toTable": { - "type": "string" - }, - "toColumn": { - "type": "string" - }, - "constraintName": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - } - }, - "required": [ - "fromColumn", - "toCatalog", - "toDb", - "toTable", - "toColumn", - "constraintName" - ], - "additionalProperties": false - } - }, - "snapshot": { - "type": "object", - "properties": { - "syncId": { - "type": "string" - }, - "extractedAt": { - "type": "string" - }, - "scanRunId": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - } - }, - "required": [ - "syncId", - "extractedAt", - "scanRunId" - ], - "additionalProperties": false - } - }, - "required": [ - "ok", - "connectionId", - "tableRef", - "display", - "kind", - "comment", - "estimatedRows", - "columns", - "foreignKeys", - "snapshot" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "ok": { - "type": "boolean", - "const": false - }, - "connectionId": { - "type": "string" - }, - "table": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "object", - "properties": { - "catalog": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "db": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "name": { - "type": "string" - } - }, - "required": [ - "catalog", - "db", - "name" - ], - "additionalProperties": false - } - ] - }, - "snapshot": { - "type": "object", - "properties": { - "syncId": { - "type": "string" - }, - "extractedAt": { - "type": "string" - }, - "scanRunId": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - } - }, - "required": [ - "syncId", - "extractedAt", - "scanRunId" - ], - "additionalProperties": false - }, - "error": { - "type": "object", - "properties": { - "code": { - "type": "string", - "enum": [ - "scan_missing", - "table_not_found", - "ambiguous_table", - "column_not_found" - ] - }, - "message": { - "type": "string" - }, - "candidates": { - "anyOf": [ - { - "type": "array", - "items": { - "type": "object", - "properties": { - "tableRef": { - "type": "object", - "properties": { - "catalog": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "db": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "name": { - "type": "string" - } - }, - "required": [ - "catalog", - "db", - "name" - ], - "additionalProperties": false - }, - "display": { - "type": "string" - } - }, - "required": [ - "tableRef", - "display" - ], - "additionalProperties": false - } - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - } - }, - "required": [ - "code", - "message" - ], - "additionalProperties": false - } - }, - "required": [ - "ok", - "connectionId", - "table", - "error" - ], - "additionalProperties": false - } - ] - } - } - }, - "required": [ - "results" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "Entity Details", - "readOnlyHint": true, - "idempotentHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - }, - { - "name": "dictionary_search", - "title": "Dictionary Search", - "description": "Search profile-sampled warehouse values to locate likely source columns for business values. Example: dictionary_search({ values: [\"Acme Corp\"], connectionId: \"warehouse\" }).", - "inputSchema": { - "type": "object", - "properties": { - "values": { - "minItems": 1, - "maxItems": 20, - "type": "array", - "items": { - "type": "string", - "minLength": 1, - "description": "Business value to locate, e.g. \"Acme Corp\" or \"enterprise\"." - }, - "description": "Values to search for in sampled warehouse dictionaries." - }, - "connectionId": { - "description": "Optional connection id. Pass it when user intent pins a specific warehouse.", - "type": "string", - "minLength": 1 - } - }, - "required": [ - "values" - ], - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "searched": { - "type": "array", - "items": { - "type": "object", - "properties": { - "connectionId": { - "type": "string" - }, - "coverage": { - "type": "object", - "properties": { - "sampledRows": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "null" - } - ] - }, - "valuesPerColumn": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "null" - } - ] - }, - "profiledColumns": { - "type": "number" - }, - "syncId": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "profiledAt": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - } - }, - "required": [ - "sampledRows", - "valuesPerColumn", - "profiledColumns", - "syncId", - "profiledAt" - ], - "additionalProperties": false - }, - "status": { - "type": "string", - "enum": [ - "ready", - "no_profile_artifact", - "no_candidate_columns" - ] - } - }, - "required": [ - "connectionId", - "coverage", - "status" - ], - "additionalProperties": false - } - }, - "results": { - "type": "array", - "items": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "matches": { - "type": "array", - "items": { - "type": "object", - "properties": { - "connectionId": { - "type": "string" - }, - "sourceName": { - "type": "string" - }, - "columnName": { - "type": "string" - }, - "matchedValue": { - "type": "string" - }, - "cardinality": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "null" - } - ] - } - }, - "required": [ - "connectionId", - "sourceName", - "columnName", - "matchedValue", - "cardinality" - ], - "additionalProperties": false - } - }, - "misses": { - "type": "array", - "items": { - "type": "object", - "properties": { - "connectionId": { - "type": "string" - }, - "reason": { - "type": "string", - "enum": [ - "no_profile_artifact", - "no_candidate_columns", - "value_not_in_sample" - ] - } - }, - "required": [ - "connectionId", - "reason" - ], - "additionalProperties": false - } - } - }, - "required": [ - "value", - "matches", - "misses" - ], - "additionalProperties": false - } - } - }, - "required": [ - "searched", - "results" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "Dictionary Search", - "readOnlyHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - }, - { - "name": "discover_data", - "title": "Discover Data", - "description": "Search across KTX wiki pages, semantic-layer sources, measures, dimensions, raw tables, and columns. Example: discover_data({ query: \"monthly orders by customer\", connectionId: \"warehouse\", kinds: [\"sl_source\", \"table\"] }).", - "inputSchema": { - "type": "object", - "properties": { - "query": { - "type": "string", - "minLength": 1, - "description": "Natural-language discovery query, e.g. \"monthly orders by customer\"." - }, - "connectionId": { - "description": "Optional connection id. Pass it when user intent pins a specific warehouse.", - "type": "string", - "minLength": 1 - }, - "kinds": { - "description": "Optional kind filter.", - "type": "array", - "items": { - "type": "string", - "enum": [ - "wiki", - "sl_source", - "sl_measure", - "sl_dimension", - "table", - "column" - ], - "description": "Reference kind to include." - } - }, - "limit": { - "description": "Maximum refs to return. Defaults to 15.", - "default": 15, - "type": "integer", - "minimum": 1, - "maximum": 50 - } - }, - "required": [ - "query" - ], - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "refs": { - "type": "array", - "items": { - "type": "object", - "properties": { - "kind": { - "type": "string", - "enum": [ - "wiki", - "sl_source", - "sl_measure", - "sl_dimension", - "table", - "column" - ] - }, - "id": { - "type": "string" - }, - "score": { - "type": "number" - }, - "summary": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "snippet": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "matchedOn": { - "type": "string", - "enum": [ - "name", - "display", - "description", - "comment", - "expr", - "sample_value", - "body" - ] - }, - "connectionId": { - "type": "string" - }, - "tableRef": { - "type": "object", - "properties": { - "catalog": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "db": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "name": { - "type": "string" - } - }, - "required": [ - "catalog", - "db", - "name" - ], - "additionalProperties": false - }, - "columnName": { - "type": "string" - } - }, - "required": [ - "kind", - "id", - "score", - "summary", - "snippet", - "matchedOn" - ], - "additionalProperties": false - } - } - }, - "required": [ - "refs" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "Discover Data", - "readOnlyHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - }, - { - "name": "sql_execution", - "title": "SQL Execution", - "description": "Execute one parser-validated read-only SQL query against a configured KTX connection. Example: sql_execution({ connectionId: \"warehouse\", sql: \"select count(*) from public.orders\", maxRows: 100 }).", - "inputSchema": { - "type": "object", - "properties": { - "connectionId": { - "type": "string", - "minLength": 1, - "description": "Connection id to execute against. Required for raw SQL." - }, - "sql": { - "type": "string", - "minLength": 1, - "description": "Parser-validated read-only SQL, e.g. \"select count(*) from public.orders\"." - }, - "maxRows": { - "description": "Maximum rows to return. Defaults to 1000.", - "default": 1000, - "type": "integer", - "minimum": 1, - "maximum": 10000 - } - }, - "required": [ - "connectionId", - "sql" - ], - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "headers": { - "type": "array", - "items": { - "type": "string" - } - }, - "headerTypes": { - "type": "array", - "items": { - "type": "string" - } - }, - "rows": { - "type": "array", - "items": { - "type": "array", - "items": {} - } - }, - "rowCount": { - "type": "number" - } - }, - "required": [ - "headers", - "rows", - "rowCount" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "SQL Execution", - "readOnlyHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - }, - { - "name": "memory_ingest", - "title": "Memory Ingest", - "description": "Ingest free-form markdown knowledge into durable KTX memory. Use this for business rules, metric definitions, schema gotchas, recurring findings, or explicit user requests to remember something. Example: memory_ingest({ connectionId: \"warehouse\", content: \"ARR is reported in cents in this warehouse.\" }).", - "inputSchema": { - "type": "object", - "properties": { - "content": { - "type": "string", - "minLength": 1, - "description": "Free-form markdown to ingest. Include the knowledge itself plus any context (source, the user question, why this came up) that the memory agent should consider when triaging into wiki/SL." - }, - "connectionId": { - "description": "Scope this memory to a specific connection. Required when the knowledge is warehouse-specific, including measure definitions, schema gotchas, or anything tied to a particular warehouse. Omit only for global wiki knowledge.", - "type": "string", - "minLength": 1 - } - }, - "required": [ - "content" - ], - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "runId": { - "type": "string" - } - }, - "required": [ - "runId" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "Memory Ingest", - "destructiveHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - }, - { - "name": "memory_ingest_status", - "title": "Memory Ingest Status", - "description": "Read the current or final status for a memory ingest run. Example: memory_ingest_status({ runId: \"memory-run-1\" }).", - "inputSchema": { - "type": "object", - "properties": { - "runId": { - "type": "string", - "minLength": 1, - "description": "The memory ingest run id returned by memory_ingest." - } - }, - "required": [ - "runId" - ], - "$schema": "http://json-schema.org/draft-07/schema#" - }, - "outputSchema": { - "type": "object", - "properties": { - "runId": { - "type": "string" - }, - "status": { - "type": "string", - "enum": [ - "running", - "done", - "error" - ] - }, - "stage": { - "type": "string" - }, - "done": { - "type": "boolean" - }, - "captured": { - "type": "object", - "properties": { - "wiki": { - "type": "array", - "items": { - "type": "string" - } - }, - "sl": { - "type": "array", - "items": { - "type": "string" - } - }, - "xrefs": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "wiki", - "sl", - "xrefs" - ], - "additionalProperties": false - }, - "error": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "commitHash": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "skillsLoaded": { - "type": "array", - "items": { - "type": "string" - } - }, - "signalDetected": { - "type": "boolean" - } - }, - "required": [ - "runId", - "status", - "stage", - "done", - "captured", - "error", - "commitHash", - "skillsLoaded", - "signalDetected" - ], - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": false - }, - "annotations": { - "title": "Memory Ingest Status", - "readOnlyHint": true, - "openWorldHint": false - }, - "execution": { - "taskSupport": "forbidden" - } - } -] diff --git a/packages/cli/src/context/mcp/context-tools.ts b/packages/cli/src/context/mcp/context-tools.ts index aa593f7f..057e570b 100644 --- a/packages/cli/src/context/mcp/context-tools.ts +++ b/packages/cli/src/context/mcp/context-tools.ts @@ -12,6 +12,7 @@ import type { KtxMcpToolHandlerContext, KtxMcpToolResult, KtxMcpUserContext, + KtxSemanticLayerQueryResponse, NonArrayObject, } from './types.js'; @@ -60,7 +61,7 @@ const toolDescriptions = { sl_read_source: 'Read a semantic-layer YAML source by connection id and source name. Example: sl_read_source({ connectionId: "warehouse", sourceName: "orders" }).', sl_query: - 'Execute a semantic-layer query and return rows, headers, generated SQL, and plan details. Example: sl_query({ connectionId: "warehouse", measures: ["orders.order_count"], dimensions: [{ field: "orders.created_at", granularity: "month" }] }).', + 'Execute a semantic-layer query and return headers, rows, and total row count, plus correctness notes (e.g. compile-only or fan-out) when relevant. The generated SQL and full query plan are omitted by default; request them with include: ["sql"] and/or include: ["plan"]. Example: sl_query({ connectionId: "warehouse", measures: ["orders.order_count"], dimensions: [{ field: "orders.created_at", granularity: "month" }], include: ["sql"] }).', sql_execution: 'Execute one parser-validated read-only SQL query against a configured KTX connection. Example: sql_execution({ connectionId: "warehouse", sql: "select count(*) from public.orders", maxRows: 100 }).', memory_ingest: @@ -73,7 +74,7 @@ const connectionListSchema = z.object({}); const knowledgeSearchSchema = z.object({ query: z.string().min(1).describe('Natural-language wiki search query, e.g. "revenue recognition policy".'), - limit: z.number().int().min(1).max(50).default(10).describe('Maximum wiki pages to return. Defaults to 10.'), + limit: z.number().int().min(1).max(50).default(10).describe('Maximum wiki pages to return.'), }); const knowledgeReadSchema = z.object({ @@ -109,10 +110,7 @@ const slQueryOrderBySchema = z.object({ .describe( 'Field/measure/dimension id to order by, e.g. "orders.created_at", a dimension key like "mart_nrr_quarterly.quarter_label", or a measure alias.', ), - direction: z - .enum(['asc', 'desc']) - .default('asc') - .describe('Sort direction: "asc" or "desc". Defaults to "asc".'), + direction: z.enum(['asc', 'desc']).default('asc').describe('Sort direction for this field.'), }); const slQuerySchema = z.object({ @@ -136,8 +134,12 @@ const slQuerySchema = z.object({ .array(slQueryOrderBySchema) .default([]) .describe('Sort clauses. Use {field, direction?} entries.'), - limit: z.number().int().min(0).default(1000).describe('Maximum rows to return. Defaults to 1000.'), - include_empty: z.boolean().default(true).describe('Whether to include empty dimension groups. Defaults to true.'), + limit: z.number().int().min(0).default(1000).describe('Maximum rows to return.'), + include_empty: z.boolean().default(true).describe('Whether to include empty dimension groups.'), + include: z + .array(z.enum(['plan', 'sql'])) + .default([]) + .describe('Extra detail to attach to the response: "sql" for the generated SQL, "plan" for the full query plan.'), }); const entityDetailsTableRefSchema = z.object({ @@ -184,13 +186,13 @@ const discoverDataSchema = z.object({ .optional() .describe('Optional connection id. Pass it when user intent pins a specific warehouse.'), kinds: z.array(discoverDataKindSchema.describe('Reference kind to include.')).optional().describe('Optional kind filter.'), - limit: z.number().int().min(1).max(50).default(15).optional().describe('Maximum refs to return. Defaults to 15.'), + limit: z.number().int().min(1).max(50).default(10).optional().describe('Maximum refs to return.'), }); const sqlExecutionSchema = z.object({ connectionId: connectionIdSchema.describe('Connection id to execute against. Required for raw SQL.'), sql: z.string().min(1).describe('Parser-validated read-only SQL, e.g. "select count(*) from public.orders".'), - maxRows: z.number().int().min(1).max(10_000).default(1000).optional().describe('Maximum rows to return. Defaults to 1000.'), + maxRows: z.number().int().min(1).max(10_000).default(1000).optional().describe('Maximum rows to return.'), }); const memoryIngestSchema = z.object({ @@ -266,10 +268,14 @@ const slReadSourceOutputSchema = z.object({ const slQueryOutputSchema = z.object({ connectionId: z.string().optional(), dialect: z.string().optional(), - sql: z.string(), headers: z.array(z.string()), rows: z.array(z.array(z.unknown())), totalRows: z.number(), + // Correctness signals hoisted out of `plan` so they survive default projection (e.g. compile-only + // status, fan-out warnings). Present only when there is something to report. + notes: z.array(z.string()).optional(), + // Opt-in detail, attached only when requested via the `include` input. + sql: z.string().optional(), plan: unknownRecordSchema.optional(), }); @@ -411,12 +417,59 @@ const memoryIngestStatusOutputSchema = z.object({ /** @internal */ export function jsonToolResult(structuredContent: T): KtxMcpToolResult { + // Compact (non-indented) JSON: this `content` text is the copy the model reads. Pretty-printing + // arrays-of-arrays (every `rows` payload) puts one scalar per line, inflating tabular results by + // a large constant factor. `structuredContent` carries the same data for structured-output clients. return { - content: [{ type: 'text', text: JSON.stringify(structuredContent, null, 2) }], + content: [{ type: 'text', text: JSON.stringify(structuredContent) }], structuredContent, }; } +/** + * Pull the correctness-critical signals out of a query plan so they survive even when the caller + * did not opt into the full `plan`. Returns an empty list when there is nothing to flag. + */ +function slQueryNotes(plan: Record | undefined): string[] { + if (!plan) { + return []; + } + const notes: string[] = []; + const execution = plan.execution; + if ( + execution && + typeof execution === 'object' && + (execution as Record).mode === 'compile_only' + ) { + const reason = (execution as Record).reason; + notes.push(typeof reason === 'string' ? reason : 'Compiled SQL only; no rows were executed.'); + } + if (plan.has_fan_out === true) { + const description = typeof plan.fan_out_description === 'string' ? plan.fan_out_description.trim() : ''; + notes.push(description.length > 0 ? description : 'Fan-out detected: measure totals may be inflated by joins.'); + } + return notes; +} + +/** + * Default sl_query response is the minimum the agent needs to read the result: connection, headers, + * rows, totals, plus any correctness notes. The generated `sql` and the full `plan` are attached only + * when explicitly requested via `include`, since both are large and echo information the caller already has. + */ +function projectSlQueryResult(result: KtxSemanticLayerQueryResponse, include: ('plan' | 'sql')[]) { + const notes = slQueryNotes(result.plan); + return { + ...(result.connectionId !== undefined ? { connectionId: result.connectionId } : {}), + ...(result.dialect !== undefined ? { dialect: result.dialect } : {}), + headers: result.headers, + rows: result.rows, + totalRows: result.totalRows, + ...(notes.length > 0 ? { notes } : {}), + ...(include.includes('sql') ? { sql: result.sql } : {}), + ...(include.includes('plan') && result.plan ? { plan: result.plan } : {}), + }; +} + function jsonErrorToolResult(text: string): KtxMcpToolResult> { return { content: [{ type: 'text', text }], @@ -618,23 +671,22 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void slQuerySchema, async (input, context) => { const onProgress = mcpProgressCallback(context); - return jsonToolResult( - await semanticLayer.query( - { - connectionId: input.connectionId, - query: { - measures: input.measures, - dimensions: input.dimensions, - filters: input.filters, - segments: input.segments, - order_by: input.order_by, - limit: input.limit, - include_empty: input.include_empty, - }, + const result = await semanticLayer.query( + { + connectionId: input.connectionId, + query: { + measures: input.measures, + dimensions: input.dimensions, + filters: input.filters, + segments: input.segments, + order_by: input.order_by, + limit: input.limit, + include_empty: input.include_empty, }, - onProgress ? { onProgress } : undefined, - ), + }, + onProgress ? { onProgress } : undefined, ); + return jsonToolResult(projectSlQueryResult(result, input.include)); }, ); } diff --git a/packages/cli/src/context/mcp/types.ts b/packages/cli/src/context/mcp/types.ts index 29a8c069..e9fc0ff2 100644 --- a/packages/cli/src/context/mcp/types.ts +++ b/packages/cli/src/context/mcp/types.ts @@ -110,7 +110,10 @@ interface KtxSemanticLayerReadResponse { yaml: string; } -interface KtxSemanticLayerQueryResponse { +/** @internal */ +export interface KtxSemanticLayerQueryResponse { + connectionId?: string; + dialect?: string; sql: string; headers: string[]; rows: unknown[][]; diff --git a/packages/cli/src/context/search/discover.ts b/packages/cli/src/context/search/discover.ts index b3456459..9a572daf 100644 --- a/packages/cli/src/context/search/discover.ts +++ b/packages/cli/src/context/search/discover.ts @@ -167,7 +167,7 @@ async function wikiCandidates( query: input.query, userId: options.userId, embeddingService: options.embeddingService ?? null, - limit: Math.max(input.limit ?? 15, 25), + limit: Math.max(input.limit ?? 10, 25), }); const records: CandidateRecord[] = []; for (const result of searchResults) { @@ -421,7 +421,8 @@ function hydrate( } return { ...ref, - score: maxScore > 0 ? Number((candidate.score / maxScore).toFixed(6)) : 0, + // 3 decimals is plenty for a relative-rank hint; 6 just spent bytes on noise. + score: maxScore > 0 ? Number((candidate.score / maxScore).toFixed(3)) : 0, }; }) .filter((result): result is KtxDiscoverDataRef => result !== null); @@ -433,7 +434,7 @@ export function createKtxDiscoverDataService( ): { search(input: KtxDiscoverDataInput): Promise } { return { async search(input) { - const limit = Math.max(1, Math.min(input.limit ?? 15, 50)); + const limit = Math.max(1, Math.min(input.limit ?? 10, 50)); const query = input.query.trim(); if (!query) { return []; diff --git a/packages/cli/src/skills/analytics/SKILL.md b/packages/cli/src/skills/analytics/SKILL.md index e4aa86d2..e6857e56 100644 --- a/packages/cli/src/skills/analytics/SKILL.md +++ b/packages/cli/src/skills/analytics/SKILL.md @@ -28,7 +28,12 @@ You have access to KTX MCP tools for data discovery, semantic-layer analysis, ra - Read entity details before writing SQL against an unfamiliar table. Do not assume column names. - Treat `sql_execution` as read-only. Writes are rejected by the server. - Validate value mentions with `dictionary_search` instead of guessing case or spelling. Treat a `dictionary_search` miss as non-authoritative. The index is built from profile-sampled values, so a missing value may simply have been outside the sample. Follow up with `sql_execution` against the most plausible columns before concluding the value is absent. -- When `connection_list` shows multiple connections, pass an explicit `connectionId` to every tool that takes one and where user intent pins a specific warehouse. Required: `entity_details`, `sl_read_source`, and `sql_execution`. Required when user intent is warehouse-specific, including wording like "in our warehouse" or "this warehouse": `memory_ingest`; without `connectionId`, the memory agent cannot update the semantic layer and the knowledge lands as wiki-only. Pass `connectionId` when intent pins a warehouse, otherwise omit for unscoped discovery: `sl_query`, `discover_data`, and `dictionary_search`. Never pass `connectionId` to `connection_list`, `wiki_search`, `wiki_read`, or `memory_ingest_status`. If intent is ambiguous for a required-or-scoped tool, ask the user which warehouse before calling. +- `connectionId` scoping when `connection_list` shows multiple connections: + - Always pass it: `entity_details`, `sl_read_source`, `sql_execution`. + - Pass it when intent pins a warehouse, otherwise omit for unscoped discovery: `sl_query`, `discover_data`, `dictionary_search`. + - `memory_ingest`: pass it for warehouse-specific knowledge (e.g. "in our warehouse"); without it the memory lands as wiki-only and cannot update the semantic layer. + - Never pass it: `connection_list`, `wiki_search`, `wiki_read`, `memory_ingest_status`. + - If scoping is required but intent is ambiguous, ask which warehouse before calling. - Show compact result tables for small outputs. For broad results, summarize the top findings and mention the applied limit. - Ask a concise clarification only when the metric, date range, entity, or grain is genuinely ambiguous and cannot be inferred from context. diff --git a/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json b/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json index 10cb0b77..b38851f4 100644 --- a/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json +++ b/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json @@ -65,7 +65,7 @@ }, "limit": { "default": 10, - "description": "Maximum wiki pages to return. Defaults to 10.", + "description": "Maximum wiki pages to return.", "type": "integer", "minimum": 1, "maximum": 50 @@ -307,7 +307,7 @@ { "name": "sl_query", "title": "Semantic Layer Query", - "description": "Execute a semantic-layer query and return rows, headers, generated SQL, and plan details. Example: sl_query({ connectionId: \"warehouse\", measures: [\"orders.order_count\"], dimensions: [{ field: \"orders.created_at\", granularity: \"month\" }] }).", + "description": "Execute a semantic-layer query and return headers, rows, and total row count, plus correctness notes (e.g. compile-only or fan-out) when relevant. The generated SQL and full query plan are omitted by default; request them with include: [\"sql\"] and/or include: [\"plan\"]. Example: sl_query({ connectionId: \"warehouse\", measures: [\"orders.order_count\"], dimensions: [{ field: \"orders.created_at\", granularity: \"month\" }], include: [\"sql\"] }).", "inputSchema": { "type": "object", "properties": { @@ -403,7 +403,7 @@ }, "direction": { "default": "asc", - "description": "Sort direction: \"asc\" or \"desc\". Defaults to \"asc\".", + "description": "Sort direction for this field.", "type": "string", "enum": [ "asc", @@ -418,15 +418,27 @@ }, "limit": { "default": 1000, - "description": "Maximum rows to return. Defaults to 1000.", + "description": "Maximum rows to return.", "type": "integer", "minimum": 0, "maximum": 9007199254740991 }, "include_empty": { "default": true, - "description": "Whether to include empty dimension groups. Defaults to true.", + "description": "Whether to include empty dimension groups.", "type": "boolean" + }, + "include": { + "default": [], + "description": "Extra detail to attach to the response: \"sql\" for the generated SQL, \"plan\" for the full query plan.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "plan", + "sql" + ] + } } }, "required": [ @@ -443,9 +455,6 @@ "dialect": { "type": "string" }, - "sql": { - "type": "string" - }, "headers": { "type": "array", "items": { @@ -462,6 +471,15 @@ "totalRows": { "type": "number" }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + }, + "sql": { + "type": "string" + }, "plan": { "type": "object", "propertyNames": { @@ -471,7 +489,6 @@ } }, "required": [ - "sql", "headers", "rows", "totalRows" @@ -1241,8 +1258,8 @@ } }, "limit": { - "description": "Maximum refs to return. Defaults to 15.", - "default": 15, + "description": "Maximum refs to return.", + "default": 10, "type": "integer", "minimum": 1, "maximum": 50 @@ -1396,7 +1413,7 @@ "description": "Parser-validated read-only SQL, e.g. \"select count(*) from public.orders\"." }, "maxRows": { - "description": "Maximum rows to return. Defaults to 1000.", + "description": "Maximum rows to return.", "default": 1000, "type": "integer", "minimum": 1, diff --git a/packages/cli/test/context/mcp/server.test.ts b/packages/cli/test/context/mcp/server.test.ts index f6d3e37e..38bc4af9 100644 --- a/packages/cli/test/context/mcp/server.test.ts +++ b/packages/cli/test/context/mcp/server.test.ts @@ -307,16 +307,12 @@ describe('createKtxMcpServer', () => { content: [ { type: 'text', - text: JSON.stringify( - { - headers: ['status', 'count'], - headerTypes: ['text', 'bigint'], - rows: [['paid', 42]], - rowCount: 1, - }, - null, - 2, - ), + text: JSON.stringify({ + headers: ['status', 'count'], + headerTypes: ['text', 'bigint'], + rows: [['paid', 42]], + rowCount: 1, + }), }, ], structuredContent: { @@ -598,6 +594,92 @@ describe('createKtxMcpServer', () => { ); }); + it('sl_query default response omits plan and sql but keeps compile-only and fan-out notes', async () => { + const fake = makeFakeServer(); + const semanticLayer: KtxSemanticLayerMcpPort = { + readSource: vi.fn(), + query: vi.fn().mockResolvedValue({ + connectionId: 'warehouse', + dialect: 'postgres', + sql: 'select count(*) from public.orders', + headers: ['order_count'], + rows: [], + totalRows: 0, + plan: { + sources_used: ['orders'], + has_fan_out: true, + fan_out_description: 'orders fans out across line_items', + execution: { mode: 'compile_only', reason: 'No execution adapter configured.' }, + }, + }), + }; + + createKtxMcpServer({ + server: fake.server, + userContext: { userId: 'local-user' }, + contextTools: { semanticLayer }, + }); + + const result = await getTool(fake.tools, 'sl_query').handler({ + connectionId: 'warehouse', + measures: ['orders.order_count'], + }); + + expect(result).toMatchObject({ + structuredContent: { + connectionId: 'warehouse', + dialect: 'postgres', + headers: ['order_count'], + rows: [], + totalRows: 0, + notes: ['No execution adapter configured.', 'orders fans out across line_items'], + }, + }); + const structured = (result as { structuredContent: Record }).structuredContent; + expect(structured.sql).toBeUndefined(); + expect(structured.plan).toBeUndefined(); + }); + + it('sl_query attaches sql and plan only when include requests them', async () => { + const fake = makeFakeServer(); + const plan = { sources_used: ['orders'], execution: { mode: 'executed' } }; + const semanticLayer: KtxSemanticLayerMcpPort = { + readSource: vi.fn(), + query: vi.fn().mockResolvedValue({ + connectionId: 'warehouse', + dialect: 'postgres', + sql: 'select count(*) from public.orders', + headers: ['order_count'], + rows: [[3]], + totalRows: 1, + plan, + }), + }; + + createKtxMcpServer({ + server: fake.server, + userContext: { userId: 'local-user' }, + contextTools: { semanticLayer }, + }); + + const result = await getTool(fake.tools, 'sl_query').handler({ + connectionId: 'warehouse', + measures: ['orders.order_count'], + include: ['plan', 'sql'], + }); + + expect(result).toMatchObject({ + structuredContent: { + sql: 'select count(*) from public.orders', + plan, + rows: [[3]], + totalRows: 1, + }, + }); + const structured = (result as { structuredContent: Record }).structuredContent; + expect(structured.notes).toBeUndefined(); + }); + it('entity_details rejects sql-style schema table ref aliases', async () => { const fake = makeFakeServer(); const entityDetails = makeAllContextTools().entityDetails!; @@ -798,7 +880,7 @@ describe('createKtxMcpServer', () => { connectionId: '00000000-0000-4000-8000-000000000001', }), ).resolves.toEqual({ - content: [{ type: 'text', text: JSON.stringify({ runId: 'run-1' }, null, 2) }], + content: [{ type: 'text', text: JSON.stringify({ runId: 'run-1' }) }], structuredContent: { runId: 'run-1' }, }); expect(ingest.ingest).toHaveBeenCalledWith({ @@ -825,21 +907,17 @@ describe('createKtxMcpServer', () => { content: [ { type: 'text', - text: JSON.stringify( - { - runId: 'run-1', - status: 'done', - stage: 'done', - done: true, - captured: { wiki: ['revenue'], sl: [], xrefs: [] }, - error: null, - commitHash: 'abc123', - skillsLoaded: ['wiki_capture'], - signalDetected: true, - }, - null, - 2, - ), + text: JSON.stringify({ + runId: 'run-1', + status: 'done', + stage: 'done', + done: true, + captured: { wiki: ['revenue'], sl: [], xrefs: [] }, + error: null, + commitHash: 'abc123', + skillsLoaded: ['wiki_capture'], + signalDetected: true, + }), }, ], structuredContent: { @@ -1047,19 +1125,15 @@ describe('createKtxMcpServer', () => { content: [ { type: 'text', - text: JSON.stringify( - { - connections: [ - { - id: '00000000-0000-4000-8000-000000000001', - name: 'Warehouse', - connectionType: 'POSTGRES', - }, - ], - }, - null, - 2, - ), + text: JSON.stringify({ + connections: [ + { + id: '00000000-0000-4000-8000-000000000001', + name: 'Warehouse', + connectionType: 'POSTGRES', + }, + ], + }), }, ], structuredContent: {