Merge branch 'main' into review-pasted-findings-v3

This commit is contained in:
Andrey Avtomonov 2026-05-30 17:54:33 +02:00 committed by GitHub
commit 90a7e3467b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 621 additions and 1729 deletions

72
.github/workflows/star-history.yml vendored Normal file
View file

@ -0,0 +1,72 @@
name: Refresh star history chart
on:
schedule:
# Twice daily at 06:00 and 18:00 UTC.
- cron: "0 6,18 * * *"
workflow_dispatch:
permissions:
contents: write
env:
DO_NOT_TRACK: "1"
KTX_TELEMETRY_DISABLED: "1"
NEXT_TELEMETRY_DISABLED: "1"
concurrency:
group: star-history-refresh
cancel-in-progress: true
jobs:
refresh:
name: Regenerate assets/star-history.svg
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
# RELEASE_PAT can push to the protected main branch; the default
# GITHUB_TOKEN is rejected by the branch-protection hook (GH006).
token: ${{ secrets.RELEASE_PAT }}
- name: Fetch fresh star-history SVG
run: |
set -euo pipefail
# cachebust forces star-history to regenerate instead of serving its
# own server-side cache; --location follows the slug-normalizing 301.
url="https://api.star-history.com/svg?repos=Kaelio/ktx&type=Date&cachebust=${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
curl --fail --location --silent --show-error \
--retry 3 --retry-delay 5 --max-time 60 \
-o assets/star-history.svg.new "$url"
# Guard against error pages / truncated responses before overwriting.
if ! grep -q "</svg>" assets/star-history.svg.new; then
echo "Downloaded file is not a valid SVG; aborting." >&2
exit 1
fi
if [ "$(wc -c < assets/star-history.svg.new)" -lt 1000 ]; then
echo "Downloaded SVG is suspiciously small; aborting." >&2
exit 1
fi
# The star-history API returns the SVG without a trailing newline,
# which end-of-file-fixer rewrites whenever pre-commit runs
# --all-files on a PR. Because the refresh commit below uses [skip ci],
# the hook never runs against it here, so an un-normalized file
# silently breaks the pre-commit check on every open PR. Normalize to
# exactly one trailing newline before committing.
printf '%s\n' "$(cat assets/star-history.svg.new)" > assets/star-history.svg
rm -f assets/star-history.svg.new
- name: Commit if changed
run: |
set -euo pipefail
if git diff --quiet -- assets/star-history.svg; then
echo "Star-history chart unchanged; nothing to commit."
exit 0
fi
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add assets/star-history.svg
# [skip ci] keeps this housekeeping commit from triggering KTX CI.
git commit -m "chore: refresh star history chart [skip ci]"
git push

View file

@ -248,6 +248,6 @@ event catalog and opt-out options.
<p align="center">
<a href="https://star-history.com/#Kaelio/ktx&Date">
<img src="https://api.star-history.com/svg?repos=Kaelio/ktx&type=Date" alt="ktx Star History Chart" width="700" />
<img src="assets/star-history.svg" alt="ktx Star History Chart" width="700" />
</a>
</p>

1
assets/star-history.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 62 KiB

View file

@ -13,6 +13,7 @@ import { localPullConfigForAdapter, type DefaultLocalIngestAdaptersOptions } fro
import { createLocalBundleIngestRuntime } from './local-bundle-runtime.js';
import type { MemoryFlowEventSink } from './memory-flow/types.js';
import { buildSyncId } from './raw-sources-paths.js';
import { ingestReportOutcome } from './reports.js';
import type { IngestReportBody, IngestReportSnapshot } from './reports.js';
import { SqliteBundleIngestStore } from './sqlite-bundle-ingest-store.js';
import type { IngestBundleResult, IngestJobContext, IngestJobPhase, IngestTrigger, SourceAdapter } from './types.js';
@ -79,7 +80,7 @@ export interface LocalMetabaseFanoutProgress {
metabaseDatabaseId: number;
targetConnectionId: string;
jobId: string;
status: 'done' | 'failed';
status: 'done' | 'partial' | 'failed';
}): void;
}
@ -232,11 +233,11 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise<Lo
}
function metabaseFanoutStatus(children: LocalMetabaseFanoutChild[]): LocalMetabaseFanoutResult['status'] {
const succeeded = children.filter((child) => child.report.body.failedWorkUnits.length === 0).length;
if (succeeded === children.length) {
const outcomes = children.map((child) => ingestReportOutcome(child.report));
if (outcomes.every((outcome) => outcome === 'done')) {
return 'all_succeeded';
}
if (succeeded === 0) {
if (outcomes.every((outcome) => outcome === 'error')) {
return 'all_failed';
}
return 'partial_failure';
@ -401,12 +402,13 @@ export async function runLocalMetabaseIngest(
error,
});
}
const childOutcome = ingestReportOutcome(child.report);
options.progress?.onMetabaseChildCompleted?.({
metabaseConnectionId,
metabaseDatabaseId: childPlan.metabaseDatabaseId,
targetConnectionId,
jobId: child.report.jobId,
status: child.report.body.failedWorkUnits.length > 0 ? 'failed' : 'done',
status: childOutcome === 'error' ? 'failed' : childOutcome,
});
children.push({
jobId: child.report.jobId,

View file

@ -1,5 +1,6 @@
import type { MemoryAction } from '../../../context/memory/types.js';
import type { LocalIngestRunRecord } from '../local-stage-ingest.js';
import { ingestReportOutcome } from '../reports.js';
import type { IngestReportSnapshot } from '../reports.js';
import type {
MemoryFlowActionDetail,
@ -72,7 +73,7 @@ function fullModeMetadata(input: {
}
function reportStatus(report: IngestReportSnapshot): MemoryFlowReplayInput['status'] {
return report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
return ingestReportOutcome(report) === 'error' ? 'error' : 'done';
}
function reportCreatedEvent(report: IngestReportSnapshot): MemoryFlowEvent {

View file

@ -146,6 +146,20 @@ export function savedMemoryCountsForReport(report: IngestReportSnapshot): Ingest
};
}
/** @internal */
export type IngestReportOutcome = 'done' | 'partial' | 'error';
export function ingestReportOutcome(report: IngestReportSnapshot): IngestReportOutcome {
if (report.body.status === 'failed') {
return 'error';
}
if (report.body.failedWorkUnits.length === 0) {
return 'done';
}
const { wikiCount, slCount } = savedMemoryCountsForReport(report);
return wikiCount + slCount > 0 ? 'partial' : 'error';
}
export function buildStageIndexFromReportBody(jobId: string, connectionId: string, body: IngestReportBody): StageIndex {
return {
jobId,

File diff suppressed because it is too large Load diff

View file

@ -13,6 +13,7 @@ import type {
KtxMcpToolHandlerContext,
KtxMcpToolResult,
KtxMcpUserContext,
KtxSemanticLayerQueryResponse,
NonArrayObject,
} from './types.js';
@ -62,7 +63,7 @@ const toolDescriptions = {
sl_read_source:
'Read a semantic-layer YAML source by connection id and source name. Example: sl_read_source({ connectionId: "warehouse", sourceName: "orders" }).',
sl_query:
'Execute a semantic-layer query and return rows, headers, generated SQL, and plan details. Example: sl_query({ connectionId: "warehouse", measures: ["orders.order_count"], dimensions: [{ field: "orders.created_at", granularity: "month" }] }).',
'Execute a semantic-layer query and return headers, rows, and total row count, plus correctness notes (e.g. compile-only or fan-out) when relevant. The generated SQL and full query plan are omitted by default; request them with include: ["sql"] and/or include: ["plan"]. Example: sl_query({ connectionId: "warehouse", measures: ["orders.order_count"], dimensions: [{ field: "orders.created_at", granularity: "month" }], include: ["sql"] }).',
sql_execution:
'Execute one parser-validated read-only SQL query against a configured KTX connection. Example: sql_execution({ connectionId: "warehouse", sql: "select count(*) from public.orders", maxRows: 100 }).',
memory_ingest:
@ -75,7 +76,7 @@ const connectionListSchema = z.object({});
const knowledgeSearchSchema = z.object({
query: z.string().min(1).describe('Natural-language wiki search query, e.g. "revenue recognition policy".'),
limit: z.number().int().min(1).max(50).default(10).describe('Maximum wiki pages to return. Defaults to 10.'),
limit: z.number().int().min(1).max(50).default(10).describe('Maximum wiki pages to return.'),
});
const knowledgeReadSchema = z.object({
@ -111,10 +112,7 @@ const slQueryOrderBySchema = z.object({
.describe(
'Field/measure/dimension id to order by, e.g. "orders.created_at", a dimension key like "mart_nrr_quarterly.quarter_label", or a measure alias.',
),
direction: z
.enum(['asc', 'desc'])
.default('asc')
.describe('Sort direction: "asc" or "desc". Defaults to "asc".'),
direction: z.enum(['asc', 'desc']).default('asc').describe('Sort direction for this field.'),
});
const slQuerySchema = z.object({
@ -138,8 +136,12 @@ const slQuerySchema = z.object({
.array(slQueryOrderBySchema)
.default([])
.describe('Sort clauses. Use {field, direction?} entries.'),
limit: z.number().int().min(0).default(1000).describe('Maximum rows to return. Defaults to 1000.'),
include_empty: z.boolean().default(true).describe('Whether to include empty dimension groups. Defaults to true.'),
limit: z.number().int().min(0).default(1000).describe('Maximum rows to return.'),
include_empty: z.boolean().default(true).describe('Whether to include empty dimension groups.'),
include: z
.array(z.enum(['plan', 'sql']))
.default([])
.describe('Extra detail to attach to the response: "sql" for the generated SQL, "plan" for the full query plan.'),
});
const entityDetailsTableRefSchema = z.object({
@ -186,13 +188,13 @@ const discoverDataSchema = z.object({
.optional()
.describe('Optional connection id. Pass it when user intent pins a specific warehouse.'),
kinds: z.array(discoverDataKindSchema.describe('Reference kind to include.')).optional().describe('Optional kind filter.'),
limit: z.number().int().min(1).max(50).default(15).optional().describe('Maximum refs to return. Defaults to 15.'),
limit: z.number().int().min(1).max(50).default(10).optional().describe('Maximum refs to return.'),
});
const sqlExecutionSchema = z.object({
connectionId: connectionIdSchema.describe('Connection id to execute against. Required for raw SQL.'),
sql: z.string().min(1).describe('Parser-validated read-only SQL, e.g. "select count(*) from public.orders".'),
maxRows: z.number().int().min(1).max(10_000).default(1000).optional().describe('Maximum rows to return. Defaults to 1000.'),
maxRows: z.number().int().min(1).max(10_000).default(1000).optional().describe('Maximum rows to return.'),
});
const memoryIngestSchema = z.object({
@ -268,10 +270,14 @@ const slReadSourceOutputSchema = z.object({
const slQueryOutputSchema = z.object({
connectionId: z.string().optional(),
dialect: z.string().optional(),
sql: z.string(),
headers: z.array(z.string()),
rows: z.array(z.array(z.unknown())),
totalRows: z.number(),
// Correctness signals hoisted out of `plan` so they survive default projection (e.g. compile-only
// status, fan-out warnings). Present only when there is something to report.
notes: z.array(z.string()).optional(),
// Opt-in detail, attached only when requested via the `include` input.
sql: z.string().optional(),
plan: unknownRecordSchema.optional(),
});
@ -413,12 +419,59 @@ const memoryIngestStatusOutputSchema = z.object({
/** @internal */
export function jsonToolResult<T extends NonArrayObject>(structuredContent: T): KtxMcpToolResult<T> {
// Compact (non-indented) JSON: this `content` text is the copy the model reads. Pretty-printing
// arrays-of-arrays (every `rows` payload) puts one scalar per line, inflating tabular results by
// a large constant factor. `structuredContent` carries the same data for structured-output clients.
return {
content: [{ type: 'text', text: JSON.stringify(structuredContent, null, 2) }],
content: [{ type: 'text', text: JSON.stringify(structuredContent) }],
structuredContent,
};
}
/**
* Pull the correctness-critical signals out of a query plan so they survive even when the caller
* did not opt into the full `plan`. Returns an empty list when there is nothing to flag.
*/
function slQueryNotes(plan: Record<string, unknown> | undefined): string[] {
if (!plan) {
return [];
}
const notes: string[] = [];
const execution = plan.execution;
if (
execution &&
typeof execution === 'object' &&
(execution as Record<string, unknown>).mode === 'compile_only'
) {
const reason = (execution as Record<string, unknown>).reason;
notes.push(typeof reason === 'string' ? reason : 'Compiled SQL only; no rows were executed.');
}
if (plan.has_fan_out === true) {
const description = typeof plan.fan_out_description === 'string' ? plan.fan_out_description.trim() : '';
notes.push(description.length > 0 ? description : 'Fan-out detected: measure totals may be inflated by joins.');
}
return notes;
}
/**
* Default sl_query response is the minimum the agent needs to read the result: connection, headers,
* rows, totals, plus any correctness notes. The generated `sql` and the full `plan` are attached only
* when explicitly requested via `include`, since both are large and echo information the caller already has.
*/
function projectSlQueryResult(result: KtxSemanticLayerQueryResponse, include: ('plan' | 'sql')[]) {
const notes = slQueryNotes(result.plan);
return {
...(result.connectionId !== undefined ? { connectionId: result.connectionId } : {}),
...(result.dialect !== undefined ? { dialect: result.dialect } : {}),
headers: result.headers,
rows: result.rows,
totalRows: result.totalRows,
...(notes.length > 0 ? { notes } : {}),
...(include.includes('sql') ? { sql: result.sql } : {}),
...(include.includes('plan') && result.plan ? { plan: result.plan } : {}),
};
}
function jsonErrorToolResult(text: string): KtxMcpToolResult<Record<string, never>> {
return {
content: [{ type: 'text', text }],
@ -641,23 +694,22 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void
slQuerySchema,
async (input, context) => {
const onProgress = mcpProgressCallback(context);
return jsonToolResult(
await semanticLayer.query(
{
connectionId: input.connectionId,
query: {
measures: input.measures,
dimensions: input.dimensions,
filters: input.filters,
segments: input.segments,
order_by: input.order_by,
limit: input.limit,
include_empty: input.include_empty,
},
const result = await semanticLayer.query(
{
connectionId: input.connectionId,
query: {
measures: input.measures,
dimensions: input.dimensions,
filters: input.filters,
segments: input.segments,
order_by: input.order_by,
limit: input.limit,
include_empty: input.include_empty,
},
onProgress ? { onProgress } : undefined,
),
},
onProgress ? { onProgress } : undefined,
);
return jsonToolResult(projectSlQueryResult(result, input.include));
},
);
}

View file

@ -120,7 +120,10 @@ interface KtxSemanticLayerReadResponse {
yaml: string;
}
interface KtxSemanticLayerQueryResponse {
/** @internal */
export interface KtxSemanticLayerQueryResponse {
connectionId?: string;
dialect?: string;
sql: string;
headers: string[];
rows: unknown[][];

View file

@ -167,7 +167,7 @@ async function wikiCandidates(
query: input.query,
userId: options.userId,
embeddingService: options.embeddingService ?? null,
limit: Math.max(input.limit ?? 15, 25),
limit: Math.max(input.limit ?? 10, 25),
});
const records: CandidateRecord[] = [];
for (const result of searchResults) {
@ -421,7 +421,8 @@ function hydrate(
}
return {
...ref,
score: maxScore > 0 ? Number((candidate.score / maxScore).toFixed(6)) : 0,
// 3 decimals is plenty for a relative-rank hint; 6 just spent bytes on noise.
score: maxScore > 0 ? Number((candidate.score / maxScore).toFixed(3)) : 0,
};
})
.filter((result): result is KtxDiscoverDataRef => result !== null);
@ -433,7 +434,7 @@ export function createKtxDiscoverDataService(
): { search(input: KtxDiscoverDataInput): Promise<KtxDiscoverDataResponse> } {
return {
async search(input) {
const limit = Math.max(1, Math.min(input.limit ?? 15, 50));
const limit = Math.max(1, Math.min(input.limit ?? 10, 50));
const query = input.query.trim();
if (!query) {
return [];

View file

@ -2,7 +2,7 @@ import { buildMemoryFlowViewModel } from './context/ingest/memory-flow/view-mode
import { createMemoryFlowLiveBuffer, sanitizeMemoryFlowError } from './context/ingest/memory-flow/live-buffer.js';
import { formatMemoryFlowFinalSummary } from './context/ingest/memory-flow/summary.js';
import { getLatestLocalIngestStatus, getLocalIngestStatus, type LocalMetabaseFanoutResult, type LocalMetabaseFanoutProgress, type RunLocalIngestOptions, runLocalIngest, runLocalMetabaseIngest } from './context/ingest/local-ingest.js';
import { type IngestReportSnapshot, savedMemoryCountsForReport } from './context/ingest/reports.js';
import { type IngestReportSnapshot, ingestReportOutcome, savedMemoryCountsForReport } from './context/ingest/reports.js';
import { ingestReportToMemoryFlowReplay } from './context/ingest/memory-flow/events.js';
import type { MemoryFlowEvent, MemoryFlowReplayInput } from './context/ingest/memory-flow/types.js';
import { renderMemoryFlowReplay } from './context/ingest/memory-flow/render.js';
@ -93,10 +93,6 @@ export interface KtxIngestDeps {
runtimeIo?: KtxIngestIo;
}
function reportStatus(report: IngestReportSnapshot): 'done' | 'error' {
return report.body.status === 'failed' || report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
}
const REPORT_SOURCE_LABELS = new Map<string, string>([
['live-database', 'Database schema'],
['historic-sql', 'Query history'],
@ -193,7 +189,7 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void
if (report.body.tracePath) {
io.stdout.write(`Trace: ${report.body.tracePath}\n`);
}
io.stdout.write(`Status: ${reportStatus(report)}\n`);
io.stdout.write(`Status: ${ingestReportOutcome(report)}\n`);
io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`);
io.stdout.write(`Connection: ${report.connectionId}\n`);
io.stdout.write(`Sync: ${report.body.syncId}\n`);
@ -231,7 +227,7 @@ function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIng
}
io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`);
for (const child of result.children) {
const status = reportStatus(child.report);
const status = ingestReportOutcome(child.report);
io.stdout.write(
`- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId} report=${child.report.id}\n`,
);
@ -595,7 +591,7 @@ function initialRunMemoryFlowInput(
}
function finalRunMemoryFlowInput(snapshot: MemoryFlowReplayInput, report: IngestReportSnapshot): MemoryFlowReplayInput {
const status = reportStatus(report);
const status = ingestReportOutcome(report) === 'error' ? 'error' : 'done';
return {
...snapshot,
runId: report.runId,
@ -777,7 +773,7 @@ export async function runKtxIngest(
} finally {
plainProgress?.flush();
}
return result.status === 'all_succeeded' ? 0 : 1;
return result.status === 'all_failed' ? 1 : 0;
}
const jobId = deps.jobIdFactory?.();
@ -846,7 +842,7 @@ export async function runKtxIngest(
liveTui?.close();
liveTui = null;
io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot));
return reportStatus(result.report) === 'done' ? 0 : 1;
return ingestReportOutcome(result.report) === 'error' ? 1 : 0;
}
plainProgress?.flush();
await writeReportRecord(result.report, runOutputMode, io, {
@ -854,7 +850,7 @@ export async function runKtxIngest(
renderStoredMemoryFlow: deps.renderStoredMemoryFlow,
env,
});
return reportStatus(result.report) === 'done' ? 0 : 1;
return ingestReportOutcome(result.report) === 'error' ? 1 : 0;
} finally {
plainProgress?.flush();
liveTui?.close();

View file

@ -1,7 +1,7 @@
import { existsSync } from 'node:fs';
import { basename, join, resolve } from 'node:path';
import { getLatestLocalIngestStatus } from './context/ingest/local-ingest.js';
import { savedMemoryCountsForReport } from './context/ingest/reports.js';
import { ingestReportOutcome, savedMemoryCountsForReport } from './context/ingest/reports.js';
import { ktxLocalStateDbPath } from './context/project/local-state-db.js';
import { loadKtxProject, type KtxLocalProject } from './context/project/project.js';
import { readKtxSetupState } from './context/project/setup-config.js';
@ -306,7 +306,7 @@ function sourceConnections(config: Awaited<ReturnType<typeof loadKtxProject>>['c
type LocalIngestStatusReport = NonNullable<Awaited<ReturnType<typeof getLatestLocalIngestStatus>>>;
function reportHasSavedContext(report: LocalIngestStatusReport): boolean {
if (report.body.failedWorkUnits.length > 0) {
if (ingestReportOutcome(report) === 'error') {
return false;
}
const counts = savedMemoryCountsForReport(report);

View file

@ -28,7 +28,12 @@ You have access to KTX MCP tools for data discovery, semantic-layer analysis, ra
- Read entity details before writing SQL against an unfamiliar table. Do not assume column names.
- Treat `sql_execution` as read-only. Writes are rejected by the server.
- Validate value mentions with `dictionary_search` instead of guessing case or spelling. Treat a `dictionary_search` miss as non-authoritative. The index is built from profile-sampled values, so a missing value may simply have been outside the sample. Follow up with `sql_execution` against the most plausible columns before concluding the value is absent.
- When `connection_list` shows multiple connections, pass an explicit `connectionId` to every tool that takes one and where user intent pins a specific warehouse. Required: `entity_details`, `sl_read_source`, and `sql_execution`. Required when user intent is warehouse-specific, including wording like "in our warehouse" or "this warehouse": `memory_ingest`; without `connectionId`, the memory agent cannot update the semantic layer and the knowledge lands as wiki-only. Pass `connectionId` when intent pins a warehouse, otherwise omit for unscoped discovery: `sl_query`, `discover_data`, and `dictionary_search`. Never pass `connectionId` to `connection_list`, `wiki_search`, `wiki_read`, or `memory_ingest_status`. If intent is ambiguous for a required-or-scoped tool, ask the user which warehouse before calling.
- `connectionId` scoping when `connection_list` shows multiple connections:
- Always pass it: `entity_details`, `sl_read_source`, `sql_execution`.
- Pass it when intent pins a warehouse, otherwise omit for unscoped discovery: `sl_query`, `discover_data`, `dictionary_search`.
- `memory_ingest`: pass it for warehouse-specific knowledge (e.g. "in our warehouse"); without it the memory lands as wiki-only and cannot update the semantic layer.
- Never pass it: `connection_list`, `wiki_search`, `wiki_read`, `memory_ingest_status`.
- If scoping is required but intent is ambiguous, ask which warehouse before calling.
- Show compact result tables for small outputs. For broad results, summarize the top findings and mention the applied limit.
- Ask a concise clarification only when the metric, date range, entity, or grain is genuinely ambiguous and cannot be inferred from context.
</rules>

View file

@ -6,6 +6,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { LocalMetabaseDiscoveryCache } from '../../../src/context/ingest/adapters/metabase/local-source-state-store.js';
import { getLocalIngestStatus, runLocalMetabaseIngest } from '../../../src/context/ingest/local-ingest.js';
import { ingestReportOutcome } from '../../../src/context/ingest/reports.js';
import type { ChunkResult, FetchContext, SourceAdapter } from '../../../src/context/ingest/types.js';
class TestAgentRunner implements AgentRunnerPort {
@ -202,6 +203,24 @@ describe('runLocalMetabaseIngest', () => {
expect(result.children[1]?.report.body.failedWorkUnits).toEqual(['metabase-db-2']);
});
it('keeps a child that saved memory out of all_failed when another child fails', async () => {
await seedMetabaseState();
const agentRunner = new TestAgentRunner();
const ids = ['metabase-child-1', 'metabase-child-2'];
const result = await runLocalMetabaseIngest({
project,
adapters: [new FakeMetabaseSourceAdapter()],
metabaseConnectionId: 'prod-metabase',
agentRunner,
jobIdFactory: () => ids.shift() ?? 'metabase-child-extra',
});
expect(result.status).toBe('partial_failure');
expect(ingestReportOutcome(result.children[0].report)).toBe('done');
expect(ingestReportOutcome(result.children[1].report)).toBe('error');
});
it('captures fetch-time child failures and continues later mappings', async () => {
await seedMetabaseState();
project.config.connections.warehouse_c = { driver: 'postgres', url: 'postgres://localhost/c' };

View file

@ -166,7 +166,7 @@ describe('memory-flow event mapping', () => {
runId: 'run-1',
connectionId: 'warehouse',
adapter: 'lookml',
status: 'error',
status: 'done',
sourceDir: null,
syncId: 'sync-2',
reportId: 'report-1',
@ -308,7 +308,7 @@ describe('memory-flow event mapping', () => {
sourceReportPath: 'report-1',
fallbackReason: null,
});
expect(replay.status).toBe('error');
expect(replay.status).toBe('done');
expect(replay.reportId).toBe('report-1');
expect(replay.reportPath).toBe('report-1');
expect(replay.events[0]).toMatchObject({ type: 'source_acquired', emittedAt: '2026-05-01T10:00:00.000Z' });

View file

@ -0,0 +1,71 @@
import { describe, expect, it } from 'vitest';
import { ingestReportOutcome } from '../../../src/context/ingest/reports.js';
import type { IngestReportSnapshot } from '../../../src/context/ingest/reports.js';
function report(body: Partial<IngestReportSnapshot['body']>): IngestReportSnapshot {
return {
id: 'r',
runId: 'run',
jobId: 'job',
connectionId: 'warehouse',
sourceKey: 'metabase',
createdAt: '2026-05-29T00:00:00.000Z',
body: {
syncId: 'sync',
diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 },
commitSha: null,
workUnits: [],
failedWorkUnits: [],
reconciliationSkipped: false,
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
evictionInputs: [],
unresolvedCards: [],
supersededBy: null,
overrideOf: null,
provenanceRows: [],
toolTranscripts: [],
...body,
},
};
}
const savingWorkUnit = {
unitKey: 'ok',
rawFiles: ['cards/1.json'],
status: 'success' as const,
actions: [{ target: 'sl' as const, type: 'updated' as const, key: 'warehouse.orders', detail: 'measure' }],
touchedSlSources: [],
};
const failedWorkUnit = {
unitKey: 'bad',
rawFiles: ['cards/2.json'],
status: 'failed' as const,
reason: 'tool write failed',
actions: [],
touchedSlSources: [],
};
describe('ingestReportOutcome', () => {
it('returns done when there are no failed work units', () => {
expect(ingestReportOutcome(report({ workUnits: [savingWorkUnit] }))).toBe('done');
});
it('returns partial when failed work units coexist with saved memory', () => {
expect(
ingestReportOutcome(report({ workUnits: [savingWorkUnit, failedWorkUnit], failedWorkUnits: ['bad'] })),
).toBe('partial');
});
it('returns error when failed work units produced no saved memory', () => {
expect(ingestReportOutcome(report({ workUnits: [failedWorkUnit], failedWorkUnits: ['bad'] }))).toBe('error');
});
it('returns error for a stage-level failure even if artifacts were recorded', () => {
expect(ingestReportOutcome(report({ status: 'failed', workUnits: [savingWorkUnit], failedWorkUnits: [] }))).toBe(
'error',
);
});
});

View file

@ -65,7 +65,7 @@
},
"limit": {
"default": 10,
"description": "Maximum wiki pages to return. Defaults to 10.",
"description": "Maximum wiki pages to return.",
"type": "integer",
"minimum": 1,
"maximum": 50
@ -307,7 +307,7 @@
{
"name": "sl_query",
"title": "Semantic Layer Query",
"description": "Execute a semantic-layer query and return rows, headers, generated SQL, and plan details. Example: sl_query({ connectionId: \"warehouse\", measures: [\"orders.order_count\"], dimensions: [{ field: \"orders.created_at\", granularity: \"month\" }] }).",
"description": "Execute a semantic-layer query and return headers, rows, and total row count, plus correctness notes (e.g. compile-only or fan-out) when relevant. The generated SQL and full query plan are omitted by default; request them with include: [\"sql\"] and/or include: [\"plan\"]. Example: sl_query({ connectionId: \"warehouse\", measures: [\"orders.order_count\"], dimensions: [{ field: \"orders.created_at\", granularity: \"month\" }], include: [\"sql\"] }).",
"inputSchema": {
"type": "object",
"properties": {
@ -403,7 +403,7 @@
},
"direction": {
"default": "asc",
"description": "Sort direction: \"asc\" or \"desc\". Defaults to \"asc\".",
"description": "Sort direction for this field.",
"type": "string",
"enum": [
"asc",
@ -418,15 +418,27 @@
},
"limit": {
"default": 1000,
"description": "Maximum rows to return. Defaults to 1000.",
"description": "Maximum rows to return.",
"type": "integer",
"minimum": 0,
"maximum": 9007199254740991
},
"include_empty": {
"default": true,
"description": "Whether to include empty dimension groups. Defaults to true.",
"description": "Whether to include empty dimension groups.",
"type": "boolean"
},
"include": {
"default": [],
"description": "Extra detail to attach to the response: \"sql\" for the generated SQL, \"plan\" for the full query plan.",
"type": "array",
"items": {
"type": "string",
"enum": [
"plan",
"sql"
]
}
}
},
"required": [
@ -443,9 +455,6 @@
"dialect": {
"type": "string"
},
"sql": {
"type": "string"
},
"headers": {
"type": "array",
"items": {
@ -462,6 +471,15 @@
"totalRows": {
"type": "number"
},
"notes": {
"type": "array",
"items": {
"type": "string"
}
},
"sql": {
"type": "string"
},
"plan": {
"type": "object",
"propertyNames": {
@ -471,7 +489,6 @@
}
},
"required": [
"sql",
"headers",
"rows",
"totalRows"
@ -1241,8 +1258,8 @@
}
},
"limit": {
"description": "Maximum refs to return. Defaults to 15.",
"default": 15,
"description": "Maximum refs to return.",
"default": 10,
"type": "integer",
"minimum": 1,
"maximum": 50
@ -1396,7 +1413,7 @@
"description": "Parser-validated read-only SQL, e.g. \"select count(*) from public.orders\"."
},
"maxRows": {
"description": "Maximum rows to return. Defaults to 1000.",
"description": "Maximum rows to return.",
"default": 1000,
"type": "integer",
"minimum": 1,

View file

@ -347,16 +347,12 @@ describe('createKtxMcpServer', () => {
content: [
{
type: 'text',
text: JSON.stringify(
{
headers: ['status', 'count'],
headerTypes: ['text', 'bigint'],
rows: [['paid', 42]],
rowCount: 1,
},
null,
2,
),
text: JSON.stringify({
headers: ['status', 'count'],
headerTypes: ['text', 'bigint'],
rows: [['paid', 42]],
rowCount: 1,
}),
},
],
structuredContent: {
@ -638,6 +634,92 @@ describe('createKtxMcpServer', () => {
);
});
it('sl_query default response omits plan and sql but keeps compile-only and fan-out notes', async () => {
const fake = makeFakeServer();
const semanticLayer: KtxSemanticLayerMcpPort = {
readSource: vi.fn(),
query: vi.fn<KtxSemanticLayerMcpPort['query']>().mockResolvedValue({
connectionId: 'warehouse',
dialect: 'postgres',
sql: 'select count(*) from public.orders',
headers: ['order_count'],
rows: [],
totalRows: 0,
plan: {
sources_used: ['orders'],
has_fan_out: true,
fan_out_description: 'orders fans out across line_items',
execution: { mode: 'compile_only', reason: 'No execution adapter configured.' },
},
}),
};
createKtxMcpServer({
server: fake.server,
userContext: { userId: 'local-user' },
contextTools: { semanticLayer },
});
const result = await getTool(fake.tools, 'sl_query').handler({
connectionId: 'warehouse',
measures: ['orders.order_count'],
});
expect(result).toMatchObject({
structuredContent: {
connectionId: 'warehouse',
dialect: 'postgres',
headers: ['order_count'],
rows: [],
totalRows: 0,
notes: ['No execution adapter configured.', 'orders fans out across line_items'],
},
});
const structured = (result as { structuredContent: Record<string, unknown> }).structuredContent;
expect(structured.sql).toBeUndefined();
expect(structured.plan).toBeUndefined();
});
it('sl_query attaches sql and plan only when include requests them', async () => {
const fake = makeFakeServer();
const plan = { sources_used: ['orders'], execution: { mode: 'executed' } };
const semanticLayer: KtxSemanticLayerMcpPort = {
readSource: vi.fn(),
query: vi.fn<KtxSemanticLayerMcpPort['query']>().mockResolvedValue({
connectionId: 'warehouse',
dialect: 'postgres',
sql: 'select count(*) from public.orders',
headers: ['order_count'],
rows: [[3]],
totalRows: 1,
plan,
}),
};
createKtxMcpServer({
server: fake.server,
userContext: { userId: 'local-user' },
contextTools: { semanticLayer },
});
const result = await getTool(fake.tools, 'sl_query').handler({
connectionId: 'warehouse',
measures: ['orders.order_count'],
include: ['plan', 'sql'],
});
expect(result).toMatchObject({
structuredContent: {
sql: 'select count(*) from public.orders',
plan,
rows: [[3]],
totalRows: 1,
},
});
const structured = (result as { structuredContent: Record<string, unknown> }).structuredContent;
expect(structured.notes).toBeUndefined();
});
it('entity_details rejects sql-style schema table ref aliases', async () => {
const fake = makeFakeServer();
const entityDetails = makeAllContextTools().entityDetails!;
@ -838,7 +920,7 @@ describe('createKtxMcpServer', () => {
connectionId: '00000000-0000-4000-8000-000000000001',
}),
).resolves.toEqual({
content: [{ type: 'text', text: JSON.stringify({ runId: 'run-1' }, null, 2) }],
content: [{ type: 'text', text: JSON.stringify({ runId: 'run-1' }) }],
structuredContent: { runId: 'run-1' },
});
expect(ingest.ingest).toHaveBeenCalledWith({
@ -865,21 +947,17 @@ describe('createKtxMcpServer', () => {
content: [
{
type: 'text',
text: JSON.stringify(
{
runId: 'run-1',
status: 'done',
stage: 'done',
done: true,
captured: { wiki: ['revenue'], sl: [], xrefs: [] },
error: null,
commitHash: 'abc123',
skillsLoaded: ['wiki_capture'],
signalDetected: true,
},
null,
2,
),
text: JSON.stringify({
runId: 'run-1',
status: 'done',
stage: 'done',
done: true,
captured: { wiki: ['revenue'], sl: [], xrefs: [] },
error: null,
commitHash: 'abc123',
skillsLoaded: ['wiki_capture'],
signalDetected: true,
}),
},
],
structuredContent: {
@ -1087,19 +1165,15 @@ describe('createKtxMcpServer', () => {
content: [
{
type: 'text',
text: JSON.stringify(
{
connections: [
{
id: '00000000-0000-4000-8000-000000000001',
name: 'Warehouse',
connectionType: 'POSTGRES',
},
],
},
null,
2,
),
text: JSON.stringify({
connections: [
{
id: '00000000-0000-4000-8000-000000000001',
name: 'Warehouse',
connectionType: 'POSTGRES',
},
],
}),
},
],
structuredContent: {

View file

@ -403,7 +403,7 @@ describe('runKtxIngest', () => {
expect(io.stderr()).toContain('Metabase ingest: prod-metabase');
});
it('returns a non-zero code when Metabase fanout has failed children', async () => {
it('returns a non-zero code when a Metabase fanout child fully fails', async () => {
const projectDir = join(tempDir, 'project');
await writeMetabaseConfig(projectDir);
const io = makeIo();
@ -441,7 +441,7 @@ describe('runKtxIngest', () => {
{
runLocalMetabaseIngest: async () => ({
metabaseConnectionId: 'prod-metabase',
status: 'partial_failure',
status: 'all_failed',
totals: { workUnits: 1, failedWorkUnits: 1 },
children: [
{
@ -467,9 +467,83 @@ describe('runKtxIngest', () => {
),
).resolves.toBe(1);
expect(io.stdout()).toContain('Metabase fanout: partial_failure');
expect(io.stdout()).toContain('Failed tasks: 1');
expect(io.stdout()).toContain('Metabase fanout: all_failed');
expect(io.stdout()).toContain('status=error');
});
it('exits 0 and reports status=partial when a Metabase child saved memory despite a failure', async () => {
const projectDir = join(tempDir, 'project');
await writeMetabaseConfig(projectDir);
const io = makeIo();
const report = localFakeBundleReport('metabase-child-1', {
id: 'report-metabase-child-1',
runId: 'run-a',
jobId: 'metabase-child-1',
connectionId: 'warehouse_a',
sourceKey: 'metabase',
body: {
failedWorkUnits: ['metabase-db-2'],
workUnits: [
{
unitKey: 'metabase-db-1',
rawFiles: ['cards/1.json'],
status: 'success',
actions: [{ target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'measure' }],
touchedSlSources: [],
},
{
unitKey: 'metabase-db-2',
rawFiles: ['cards/2.json'],
status: 'failed',
reason: 'bad SQL',
actions: [],
touchedSlSources: [],
},
],
},
});
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'prod-metabase',
adapter: 'metabase',
outputMode: 'plain',
},
io.io,
{
runLocalMetabaseIngest: async () => ({
metabaseConnectionId: 'prod-metabase',
status: 'partial_failure',
totals: { workUnits: 2, failedWorkUnits: 1 },
children: [
{
jobId: 'metabase-child-1',
metabaseConnectionId: 'prod-metabase',
metabaseDatabaseId: 1,
targetConnectionId: 'warehouse_a',
result: {
jobId: 'metabase-child-1',
runId: 'run-a',
syncId: 'sync-a',
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
workUnitCount: 2,
failedWorkUnits: ['metabase-db-2'],
artifactsWritten: 1,
commitSha: 'abc',
},
report,
},
],
}),
},
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Metabase fanout: partial_failure');
expect(io.stdout()).toContain('status=partial');
expect(io.stderr()).toContain('Metabase ingest: prod-metabase');
});
@ -1140,6 +1214,63 @@ describe('runKtxIngest', () => {
expect(io.stdout()).toContain('Status: error\n');
});
it('exits 0 and reports Status: partial when a single-source ingest saved memory despite a failure', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const partialReport = localFakeBundleReport('local-job-partial', {
connectionId: 'warehouse',
sourceKey: 'fake',
body: {
failedWorkUnits: ['orders-bad'],
workUnits: [
{
unitKey: 'orders-ok',
rawFiles: ['orders/orders.json'],
status: 'success',
actions: [{ target: 'wiki', type: 'created', key: 'wiki/orders.md', detail: 'orders' }],
touchedSlSources: [],
},
{
unitKey: 'orders-bad',
rawFiles: ['orders/bad.json'],
status: 'failed',
reason: 'writer tool failed',
actions: [],
touchedSlSources: [],
},
],
},
});
const runLocal = vi.fn(async (_input: RunLocalIngestOptions) => ({
result: {
jobId: 'local-job-partial',
runId: partialReport.runId,
syncId: partialReport.body.syncId,
diffSummary: partialReport.body.diffSummary,
workUnitCount: partialReport.body.workUnits.length,
failedWorkUnits: partialReport.body.failedWorkUnits,
artifactsWritten: 1,
commitSha: partialReport.body.commitSha,
},
report: partialReport,
}));
const io = makeIo();
await expect(
runKtxIngest(
{ command: 'run', projectDir, connectionId: 'warehouse', adapter: 'fake', sourceDir, outputMode: 'plain' },
io.io,
{ runLocalIngest: runLocal, jobIdFactory: () => 'local-job-partial' },
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Status: partial\n');
});
it('prints trace path and error status for stored failed ingest reports', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);

View file

@ -398,6 +398,59 @@ describe('setup status', () => {
expect(rendered).toContain('KTX context built: yes');
});
it('reports context ready after a partial ingest report saved memory', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'setup:',
' database_connection_ids:',
' - warehouse',
'connections:',
' warehouse:',
' driver: postgres',
' url: env:DATABASE_URL',
'ingest:',
' embeddings:',
' backend: none',
' dimensions: 8',
'',
].join('\n'),
'utf-8',
);
await writeKtxSetupState(tempDir, { completed_steps: ['project', 'databases'] });
await persistLocalBundleReport(
tempDir,
localFakeBundleReport('warehouse-job-partial', {
connectionId: 'warehouse',
sourceKey: 'fake',
body: {
failedWorkUnits: ['orders-bad'],
workUnits: [
{
unitKey: 'orders-ok',
rawFiles: ['orders/orders.json'],
status: 'success',
actions: [{ target: 'wiki', type: 'created', key: 'wiki/orders.md', detail: 'orders' }],
touchedSlSources: [],
},
{
unitKey: 'orders-bad',
rawFiles: ['orders/bad.json'],
status: 'failed',
reason: 'writer tool failed',
actions: [],
touchedSlSources: [],
},
],
},
}),
);
const status = await readKtxSetupStatus(tempDir);
expect(status.context).toMatchObject({ ready: true, status: 'completed' });
});
it('formats plain and JSON setup status payloads', async () => {
const status = await readKtxSetupStatus(tempDir);
const rendered = formatKtxSetupStatus(status);