diff --git a/.github/workflows/star-history.yml b/.github/workflows/star-history.yml
new file mode 100644
index 00000000..b7d90c43
--- /dev/null
+++ b/.github/workflows/star-history.yml
@@ -0,0 +1,72 @@
+name: Refresh star history chart
+
+on:
+ schedule:
+ # Twice daily at 06:00 and 18:00 UTC.
+ - cron: "0 6,18 * * *"
+ workflow_dispatch:
+
+permissions:
+ contents: write
+
+env:
+ DO_NOT_TRACK: "1"
+ KTX_TELEMETRY_DISABLED: "1"
+ NEXT_TELEMETRY_DISABLED: "1"
+
+concurrency:
+ group: star-history-refresh
+ cancel-in-progress: true
+
+jobs:
+ refresh:
+ name: Regenerate assets/star-history.svg
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ # RELEASE_PAT can push to the protected main branch; the default
+ # GITHUB_TOKEN is rejected by the branch-protection hook (GH006).
+ token: ${{ secrets.RELEASE_PAT }}
+
+ - name: Fetch fresh star-history SVG
+ run: |
+ set -euo pipefail
+ # cachebust forces star-history to regenerate instead of serving its
+ # own server-side cache; --location follows the slug-normalizing 301.
+ url="https://api.star-history.com/svg?repos=Kaelio/ktx&type=Date&cachebust=${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
+ curl --fail --location --silent --show-error \
+ --retry 3 --retry-delay 5 --max-time 60 \
+ -o assets/star-history.svg.new "$url"
+ # Guard against error pages / truncated responses before overwriting.
+ if ! grep -q "" assets/star-history.svg.new; then
+ echo "Downloaded file is not a valid SVG; aborting." >&2
+ exit 1
+ fi
+ if [ "$(wc -c < assets/star-history.svg.new)" -lt 1000 ]; then
+ echo "Downloaded SVG is suspiciously small; aborting." >&2
+ exit 1
+ fi
+ # The star-history API returns the SVG without a trailing newline,
+ # which end-of-file-fixer rewrites whenever pre-commit runs
+ # --all-files on a PR. Because the refresh commit below uses [skip ci],
+ # the hook never runs against it here, so an un-normalized file
+ # silently breaks the pre-commit check on every open PR. Normalize to
+ # exactly one trailing newline before committing.
+ printf '%s\n' "$(cat assets/star-history.svg.new)" > assets/star-history.svg
+ rm -f assets/star-history.svg.new
+
+ - name: Commit if changed
+ run: |
+ set -euo pipefail
+ if git diff --quiet -- assets/star-history.svg; then
+ echo "Star-history chart unchanged; nothing to commit."
+ exit 0
+ fi
+ git config user.name "github-actions[bot]"
+ git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+ git add assets/star-history.svg
+ # [skip ci] keeps this housekeeping commit from triggering KTX CI.
+ git commit -m "chore: refresh star history chart [skip ci]"
+ git push
diff --git a/README.md b/README.md
index 23b2fa0a..686ece22 100644
--- a/README.md
+++ b/README.md
@@ -248,6 +248,6 @@ event catalog and opt-out options.
-
+
diff --git a/assets/star-history.svg b/assets/star-history.svg
new file mode 100644
index 00000000..3f6c4a04
--- /dev/null
+++ b/assets/star-history.svg
@@ -0,0 +1 @@
+
diff --git a/packages/cli/src/context/ingest/local-ingest.ts b/packages/cli/src/context/ingest/local-ingest.ts
index 2351d420..ec8a72f4 100644
--- a/packages/cli/src/context/ingest/local-ingest.ts
+++ b/packages/cli/src/context/ingest/local-ingest.ts
@@ -13,6 +13,7 @@ import { localPullConfigForAdapter, type DefaultLocalIngestAdaptersOptions } fro
import { createLocalBundleIngestRuntime } from './local-bundle-runtime.js';
import type { MemoryFlowEventSink } from './memory-flow/types.js';
import { buildSyncId } from './raw-sources-paths.js';
+import { ingestReportOutcome } from './reports.js';
import type { IngestReportBody, IngestReportSnapshot } from './reports.js';
import { SqliteBundleIngestStore } from './sqlite-bundle-ingest-store.js';
import type { IngestBundleResult, IngestJobContext, IngestJobPhase, IngestTrigger, SourceAdapter } from './types.js';
@@ -79,7 +80,7 @@ export interface LocalMetabaseFanoutProgress {
metabaseDatabaseId: number;
targetConnectionId: string;
jobId: string;
- status: 'done' | 'failed';
+ status: 'done' | 'partial' | 'failed';
}): void;
}
@@ -232,11 +233,11 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise child.report.body.failedWorkUnits.length === 0).length;
- if (succeeded === children.length) {
+ const outcomes = children.map((child) => ingestReportOutcome(child.report));
+ if (outcomes.every((outcome) => outcome === 'done')) {
return 'all_succeeded';
}
- if (succeeded === 0) {
+ if (outcomes.every((outcome) => outcome === 'error')) {
return 'all_failed';
}
return 'partial_failure';
@@ -401,12 +402,13 @@ export async function runLocalMetabaseIngest(
error,
});
}
+ const childOutcome = ingestReportOutcome(child.report);
options.progress?.onMetabaseChildCompleted?.({
metabaseConnectionId,
metabaseDatabaseId: childPlan.metabaseDatabaseId,
targetConnectionId,
jobId: child.report.jobId,
- status: child.report.body.failedWorkUnits.length > 0 ? 'failed' : 'done',
+ status: childOutcome === 'error' ? 'failed' : childOutcome,
});
children.push({
jobId: child.report.jobId,
diff --git a/packages/cli/src/context/ingest/memory-flow/events.ts b/packages/cli/src/context/ingest/memory-flow/events.ts
index 020ce5ae..92cebe0f 100644
--- a/packages/cli/src/context/ingest/memory-flow/events.ts
+++ b/packages/cli/src/context/ingest/memory-flow/events.ts
@@ -1,5 +1,6 @@
import type { MemoryAction } from '../../../context/memory/types.js';
import type { LocalIngestRunRecord } from '../local-stage-ingest.js';
+import { ingestReportOutcome } from '../reports.js';
import type { IngestReportSnapshot } from '../reports.js';
import type {
MemoryFlowActionDetail,
@@ -72,7 +73,7 @@ function fullModeMetadata(input: {
}
function reportStatus(report: IngestReportSnapshot): MemoryFlowReplayInput['status'] {
- return report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
+ return ingestReportOutcome(report) === 'error' ? 'error' : 'done';
}
function reportCreatedEvent(report: IngestReportSnapshot): MemoryFlowEvent {
diff --git a/packages/cli/src/context/ingest/reports.ts b/packages/cli/src/context/ingest/reports.ts
index ea02a31a..09f92170 100644
--- a/packages/cli/src/context/ingest/reports.ts
+++ b/packages/cli/src/context/ingest/reports.ts
@@ -146,6 +146,20 @@ export function savedMemoryCountsForReport(report: IngestReportSnapshot): Ingest
};
}
+/** @internal */
+export type IngestReportOutcome = 'done' | 'partial' | 'error';
+
+export function ingestReportOutcome(report: IngestReportSnapshot): IngestReportOutcome {
+ if (report.body.status === 'failed') {
+ return 'error';
+ }
+ if (report.body.failedWorkUnits.length === 0) {
+ return 'done';
+ }
+ const { wikiCount, slCount } = savedMemoryCountsForReport(report);
+ return wikiCount + slCount > 0 ? 'partial' : 'error';
+}
+
export function buildStageIndexFromReportBody(jobId: string, connectionId: string, body: IngestReportBody): StageIndex {
return {
jobId,
diff --git a/packages/cli/src/context/mcp/__snapshots__/mcp-tools-list.json b/packages/cli/src/context/mcp/__snapshots__/mcp-tools-list.json
deleted file mode 100644
index 10cb0b77..00000000
--- a/packages/cli/src/context/mcp/__snapshots__/mcp-tools-list.json
+++ /dev/null
@@ -1,1620 +0,0 @@
-[
- {
- "name": "connection_list",
- "title": "Connection List",
- "description": "List configured read-only data connections available to this KTX project. Use this before connection-scoped tools when the project may have multiple warehouses.",
- "inputSchema": {
- "type": "object",
- "properties": {},
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "connections": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "id": {
- "type": "string"
- },
- "name": {
- "type": "string"
- },
- "connectionType": {
- "type": "string"
- }
- },
- "required": [
- "id",
- "name",
- "connectionType"
- ],
- "additionalProperties": false
- }
- }
- },
- "required": [
- "connections"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "Connection List",
- "readOnlyHint": true,
- "idempotentHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- },
- {
- "name": "wiki_search",
- "title": "Wiki Search",
- "description": "Search KTX wiki pages for reusable business context. Example: wiki_search({ query: \"revenue recognition\", limit: 5 }).",
- "inputSchema": {
- "type": "object",
- "properties": {
- "query": {
- "type": "string",
- "minLength": 1,
- "description": "Natural-language wiki search query, e.g. \"revenue recognition policy\"."
- },
- "limit": {
- "default": 10,
- "description": "Maximum wiki pages to return. Defaults to 10.",
- "type": "integer",
- "minimum": 1,
- "maximum": 50
- }
- },
- "required": [
- "query"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "results": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "key": {
- "type": "string"
- },
- "path": {
- "type": "string"
- },
- "scope": {
- "type": "string",
- "enum": [
- "GLOBAL",
- "USER"
- ]
- },
- "summary": {
- "type": "string"
- },
- "score": {
- "type": "number"
- },
- "matchReasons": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "lanes": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "lane": {
- "type": "string"
- },
- "status": {
- "type": "string"
- },
- "requestedCandidatePoolLimit": {
- "type": "number"
- },
- "effectiveCandidatePoolLimit": {
- "type": "number"
- },
- "returnedCandidateCount": {
- "type": "number"
- },
- "weight": {
- "type": "number"
- },
- "reason": {
- "type": "string"
- }
- },
- "required": [
- "lane",
- "status",
- "requestedCandidatePoolLimit",
- "effectiveCandidatePoolLimit",
- "returnedCandidateCount",
- "weight"
- ],
- "additionalProperties": false
- }
- }
- },
- "required": [
- "key",
- "path",
- "scope",
- "summary",
- "score"
- ],
- "additionalProperties": false
- }
- },
- "totalFound": {
- "type": "number"
- }
- },
- "required": [
- "results",
- "totalFound"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "Wiki Search",
- "readOnlyHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- },
- {
- "name": "wiki_read",
- "title": "Wiki Read",
- "description": "Read a KTX wiki page by key returned from wiki_search. Example: wiki_read({ key: \"global/revenue\" }).",
- "inputSchema": {
- "type": "object",
- "properties": {
- "key": {
- "type": "string",
- "minLength": 1,
- "description": "Wiki page key returned by wiki_search, e.g. \"global/revenue\"."
- }
- },
- "required": [
- "key"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "key": {
- "type": "string"
- },
- "summary": {
- "type": "string"
- },
- "content": {
- "type": "string"
- },
- "scope": {
- "type": "string",
- "enum": [
- "GLOBAL",
- "USER"
- ]
- },
- "tags": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "refs": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "slRefs": {
- "type": "array",
- "items": {
- "type": "string"
- }
- }
- },
- "required": [
- "key",
- "summary",
- "content",
- "scope"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "Wiki Read",
- "readOnlyHint": true,
- "idempotentHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- },
- {
- "name": "sl_read_source",
- "title": "Semantic Layer Read Source",
- "description": "Read a semantic-layer YAML source by connection id and source name. Example: sl_read_source({ connectionId: \"warehouse\", sourceName: \"orders\" }).",
- "inputSchema": {
- "type": "object",
- "properties": {
- "connectionId": {
- "type": "string",
- "minLength": 1,
- "description": "Connection id that owns the semantic-layer source."
- },
- "sourceName": {
- "type": "string",
- "minLength": 1,
- "description": "Semantic-layer source name without \".yaml\", e.g. \"orders\"."
- }
- },
- "required": [
- "connectionId",
- "sourceName"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "sourceName": {
- "type": "string"
- },
- "yaml": {
- "type": "string"
- }
- },
- "required": [
- "sourceName",
- "yaml"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "Semantic Layer Read Source",
- "readOnlyHint": true,
- "idempotentHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- },
- {
- "name": "sl_query",
- "title": "Semantic Layer Query",
- "description": "Execute a semantic-layer query and return rows, headers, generated SQL, and plan details. Example: sl_query({ connectionId: \"warehouse\", measures: [\"orders.order_count\"], dimensions: [{ field: \"orders.created_at\", granularity: \"month\" }] }).",
- "inputSchema": {
- "type": "object",
- "properties": {
- "connectionId": {
- "description": "Connection id to query. Omit only when the project has exactly one configured connection.",
- "type": "string",
- "minLength": 1
- },
- "measures": {
- "minItems": 1,
- "type": "array",
- "items": {
- "anyOf": [
- {
- "type": "string",
- "description": "Semantic-layer measure key, e.g. \"orders.order_count\"."
- },
- {
- "type": "object",
- "properties": {
- "expr": {
- "type": "string",
- "minLength": 1,
- "description": "Ad hoc aggregate expression, e.g. \"sum(orders.amount)\"."
- },
- "name": {
- "type": "string",
- "minLength": 1,
- "description": "Alias for the ad hoc measure, e.g. \"gross_revenue\"."
- }
- },
- "required": [
- "expr",
- "name"
- ]
- }
- ]
- },
- "description": "Measures to select. Use semantic-layer keys when available."
- },
- "dimensions": {
- "default": [],
- "description": "Dimensions to group by. Use {field, granularity?} entries.",
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "field": {
- "type": "string",
- "minLength": 1,
- "description": "Dimension to group by, e.g. \"orders.created_at\" or \"orders.status\"."
- },
- "granularity": {
- "description": "Time grain for time dimensions: day, week, month, quarter, or year.",
- "type": "string",
- "minLength": 1
- }
- },
- "required": [
- "field"
- ]
- }
- },
- "filters": {
- "default": [],
- "description": "Semantic-layer filter expressions to apply.",
- "type": "array",
- "items": {
- "type": "string",
- "description": "Semantic-layer filter expression, e.g. \"orders.status = paid\"."
- }
- },
- "segments": {
- "default": [],
- "description": "Semantic-layer segment keys to apply.",
- "type": "array",
- "items": {
- "type": "string",
- "description": "Semantic-layer segment key to apply."
- }
- },
- "order_by": {
- "default": [],
- "description": "Sort clauses. Use {field, direction?} entries.",
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "field": {
- "type": "string",
- "minLength": 1,
- "description": "Field/measure/dimension id to order by, e.g. \"orders.created_at\", a dimension key like \"mart_nrr_quarterly.quarter_label\", or a measure alias."
- },
- "direction": {
- "default": "asc",
- "description": "Sort direction: \"asc\" or \"desc\". Defaults to \"asc\".",
- "type": "string",
- "enum": [
- "asc",
- "desc"
- ]
- }
- },
- "required": [
- "field"
- ]
- }
- },
- "limit": {
- "default": 1000,
- "description": "Maximum rows to return. Defaults to 1000.",
- "type": "integer",
- "minimum": 0,
- "maximum": 9007199254740991
- },
- "include_empty": {
- "default": true,
- "description": "Whether to include empty dimension groups. Defaults to true.",
- "type": "boolean"
- }
- },
- "required": [
- "measures"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "connectionId": {
- "type": "string"
- },
- "dialect": {
- "type": "string"
- },
- "sql": {
- "type": "string"
- },
- "headers": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "rows": {
- "type": "array",
- "items": {
- "type": "array",
- "items": {}
- }
- },
- "totalRows": {
- "type": "number"
- },
- "plan": {
- "type": "object",
- "propertyNames": {
- "type": "string"
- },
- "additionalProperties": {}
- }
- },
- "required": [
- "sql",
- "headers",
- "rows",
- "totalRows"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "Semantic Layer Query",
- "readOnlyHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- },
- {
- "name": "entity_details",
- "title": "Entity Details",
- "description": "Read table and column metadata from the latest live-database scan snapshot. Example: entity_details({ connectionId: \"warehouse\", entities: [{ table: { catalog: null, db: \"public\", name: \"orders\" }, columns: [\"id\"] }] }).",
- "inputSchema": {
- "type": "object",
- "properties": {
- "connectionId": {
- "type": "string",
- "minLength": 1,
- "description": "Connection id whose latest scan snapshot should be read."
- },
- "entities": {
- "minItems": 1,
- "maxItems": 20,
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "table": {
- "anyOf": [
- {
- "type": "string",
- "minLength": 1
- },
- {
- "type": "object",
- "properties": {
- "catalog": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Catalog/project/database. Use null when not applicable."
- },
- "db": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Schema/database/dataset. Use null when not applicable."
- },
- "name": {
- "type": "string",
- "minLength": 1,
- "description": "Table name."
- }
- },
- "required": [
- "catalog",
- "db",
- "name"
- ]
- }
- ],
- "description": "Table display string or canonical object ref."
- },
- "columns": {
- "description": "Optional column filter.",
- "type": "array",
- "items": {
- "type": "string",
- "minLength": 1,
- "description": "Column name to inspect."
- }
- }
- },
- "required": [
- "table"
- ]
- },
- "description": "Tables or columns to inspect. Maximum 20 entities."
- }
- },
- "required": [
- "connectionId",
- "entities"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "results": {
- "type": "array",
- "items": {
- "anyOf": [
- {
- "type": "object",
- "properties": {
- "ok": {
- "type": "boolean",
- "const": true
- },
- "connectionId": {
- "type": "string"
- },
- "tableRef": {
- "type": "object",
- "properties": {
- "catalog": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "db": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "name": {
- "type": "string"
- }
- },
- "required": [
- "catalog",
- "db",
- "name"
- ],
- "additionalProperties": false
- },
- "display": {
- "type": "string"
- },
- "kind": {
- "type": "string",
- "enum": [
- "table",
- "view",
- "external",
- "event_stream"
- ]
- },
- "comment": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "estimatedRows": {
- "anyOf": [
- {
- "type": "number"
- },
- {
- "type": "null"
- }
- ]
- },
- "columns": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "name": {
- "type": "string"
- },
- "nativeType": {
- "type": "string"
- },
- "normalizedType": {
- "type": "string"
- },
- "dimensionType": {
- "type": "string",
- "enum": [
- "time",
- "string",
- "number",
- "boolean"
- ]
- },
- "nullable": {
- "type": "boolean"
- },
- "primaryKey": {
- "type": "boolean"
- },
- "comment": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- }
- },
- "required": [
- "name",
- "nativeType",
- "normalizedType",
- "dimensionType",
- "nullable",
- "primaryKey",
- "comment"
- ],
- "additionalProperties": false
- }
- },
- "foreignKeys": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "fromColumn": {
- "type": "string"
- },
- "toCatalog": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "toDb": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "toTable": {
- "type": "string"
- },
- "toColumn": {
- "type": "string"
- },
- "constraintName": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- }
- },
- "required": [
- "fromColumn",
- "toCatalog",
- "toDb",
- "toTable",
- "toColumn",
- "constraintName"
- ],
- "additionalProperties": false
- }
- },
- "snapshot": {
- "type": "object",
- "properties": {
- "syncId": {
- "type": "string"
- },
- "extractedAt": {
- "type": "string"
- },
- "scanRunId": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- }
- },
- "required": [
- "syncId",
- "extractedAt",
- "scanRunId"
- ],
- "additionalProperties": false
- }
- },
- "required": [
- "ok",
- "connectionId",
- "tableRef",
- "display",
- "kind",
- "comment",
- "estimatedRows",
- "columns",
- "foreignKeys",
- "snapshot"
- ],
- "additionalProperties": false
- },
- {
- "type": "object",
- "properties": {
- "ok": {
- "type": "boolean",
- "const": false
- },
- "connectionId": {
- "type": "string"
- },
- "table": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "object",
- "properties": {
- "catalog": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "db": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "name": {
- "type": "string"
- }
- },
- "required": [
- "catalog",
- "db",
- "name"
- ],
- "additionalProperties": false
- }
- ]
- },
- "snapshot": {
- "type": "object",
- "properties": {
- "syncId": {
- "type": "string"
- },
- "extractedAt": {
- "type": "string"
- },
- "scanRunId": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- }
- },
- "required": [
- "syncId",
- "extractedAt",
- "scanRunId"
- ],
- "additionalProperties": false
- },
- "error": {
- "type": "object",
- "properties": {
- "code": {
- "type": "string",
- "enum": [
- "scan_missing",
- "table_not_found",
- "ambiguous_table",
- "column_not_found"
- ]
- },
- "message": {
- "type": "string"
- },
- "candidates": {
- "anyOf": [
- {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "tableRef": {
- "type": "object",
- "properties": {
- "catalog": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "db": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "name": {
- "type": "string"
- }
- },
- "required": [
- "catalog",
- "db",
- "name"
- ],
- "additionalProperties": false
- },
- "display": {
- "type": "string"
- }
- },
- "required": [
- "tableRef",
- "display"
- ],
- "additionalProperties": false
- }
- },
- {
- "type": "array",
- "items": {
- "type": "string"
- }
- }
- ]
- }
- },
- "required": [
- "code",
- "message"
- ],
- "additionalProperties": false
- }
- },
- "required": [
- "ok",
- "connectionId",
- "table",
- "error"
- ],
- "additionalProperties": false
- }
- ]
- }
- }
- },
- "required": [
- "results"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "Entity Details",
- "readOnlyHint": true,
- "idempotentHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- },
- {
- "name": "dictionary_search",
- "title": "Dictionary Search",
- "description": "Search profile-sampled warehouse values to locate likely source columns for business values. Example: dictionary_search({ values: [\"Acme Corp\"], connectionId: \"warehouse\" }).",
- "inputSchema": {
- "type": "object",
- "properties": {
- "values": {
- "minItems": 1,
- "maxItems": 20,
- "type": "array",
- "items": {
- "type": "string",
- "minLength": 1,
- "description": "Business value to locate, e.g. \"Acme Corp\" or \"enterprise\"."
- },
- "description": "Values to search for in sampled warehouse dictionaries."
- },
- "connectionId": {
- "description": "Optional connection id. Pass it when user intent pins a specific warehouse.",
- "type": "string",
- "minLength": 1
- }
- },
- "required": [
- "values"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "searched": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "connectionId": {
- "type": "string"
- },
- "coverage": {
- "type": "object",
- "properties": {
- "sampledRows": {
- "anyOf": [
- {
- "type": "number"
- },
- {
- "type": "null"
- }
- ]
- },
- "valuesPerColumn": {
- "anyOf": [
- {
- "type": "number"
- },
- {
- "type": "null"
- }
- ]
- },
- "profiledColumns": {
- "type": "number"
- },
- "syncId": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "profiledAt": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- }
- },
- "required": [
- "sampledRows",
- "valuesPerColumn",
- "profiledColumns",
- "syncId",
- "profiledAt"
- ],
- "additionalProperties": false
- },
- "status": {
- "type": "string",
- "enum": [
- "ready",
- "no_profile_artifact",
- "no_candidate_columns"
- ]
- }
- },
- "required": [
- "connectionId",
- "coverage",
- "status"
- ],
- "additionalProperties": false
- }
- },
- "results": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "value": {
- "type": "string"
- },
- "matches": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "connectionId": {
- "type": "string"
- },
- "sourceName": {
- "type": "string"
- },
- "columnName": {
- "type": "string"
- },
- "matchedValue": {
- "type": "string"
- },
- "cardinality": {
- "anyOf": [
- {
- "type": "number"
- },
- {
- "type": "null"
- }
- ]
- }
- },
- "required": [
- "connectionId",
- "sourceName",
- "columnName",
- "matchedValue",
- "cardinality"
- ],
- "additionalProperties": false
- }
- },
- "misses": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "connectionId": {
- "type": "string"
- },
- "reason": {
- "type": "string",
- "enum": [
- "no_profile_artifact",
- "no_candidate_columns",
- "value_not_in_sample"
- ]
- }
- },
- "required": [
- "connectionId",
- "reason"
- ],
- "additionalProperties": false
- }
- }
- },
- "required": [
- "value",
- "matches",
- "misses"
- ],
- "additionalProperties": false
- }
- }
- },
- "required": [
- "searched",
- "results"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "Dictionary Search",
- "readOnlyHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- },
- {
- "name": "discover_data",
- "title": "Discover Data",
- "description": "Search across KTX wiki pages, semantic-layer sources, measures, dimensions, raw tables, and columns. Example: discover_data({ query: \"monthly orders by customer\", connectionId: \"warehouse\", kinds: [\"sl_source\", \"table\"] }).",
- "inputSchema": {
- "type": "object",
- "properties": {
- "query": {
- "type": "string",
- "minLength": 1,
- "description": "Natural-language discovery query, e.g. \"monthly orders by customer\"."
- },
- "connectionId": {
- "description": "Optional connection id. Pass it when user intent pins a specific warehouse.",
- "type": "string",
- "minLength": 1
- },
- "kinds": {
- "description": "Optional kind filter.",
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "wiki",
- "sl_source",
- "sl_measure",
- "sl_dimension",
- "table",
- "column"
- ],
- "description": "Reference kind to include."
- }
- },
- "limit": {
- "description": "Maximum refs to return. Defaults to 15.",
- "default": 15,
- "type": "integer",
- "minimum": 1,
- "maximum": 50
- }
- },
- "required": [
- "query"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "refs": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "kind": {
- "type": "string",
- "enum": [
- "wiki",
- "sl_source",
- "sl_measure",
- "sl_dimension",
- "table",
- "column"
- ]
- },
- "id": {
- "type": "string"
- },
- "score": {
- "type": "number"
- },
- "summary": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "snippet": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "matchedOn": {
- "type": "string",
- "enum": [
- "name",
- "display",
- "description",
- "comment",
- "expr",
- "sample_value",
- "body"
- ]
- },
- "connectionId": {
- "type": "string"
- },
- "tableRef": {
- "type": "object",
- "properties": {
- "catalog": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "db": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "name": {
- "type": "string"
- }
- },
- "required": [
- "catalog",
- "db",
- "name"
- ],
- "additionalProperties": false
- },
- "columnName": {
- "type": "string"
- }
- },
- "required": [
- "kind",
- "id",
- "score",
- "summary",
- "snippet",
- "matchedOn"
- ],
- "additionalProperties": false
- }
- }
- },
- "required": [
- "refs"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "Discover Data",
- "readOnlyHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- },
- {
- "name": "sql_execution",
- "title": "SQL Execution",
- "description": "Execute one parser-validated read-only SQL query against a configured KTX connection. Example: sql_execution({ connectionId: \"warehouse\", sql: \"select count(*) from public.orders\", maxRows: 100 }).",
- "inputSchema": {
- "type": "object",
- "properties": {
- "connectionId": {
- "type": "string",
- "minLength": 1,
- "description": "Connection id to execute against. Required for raw SQL."
- },
- "sql": {
- "type": "string",
- "minLength": 1,
- "description": "Parser-validated read-only SQL, e.g. \"select count(*) from public.orders\"."
- },
- "maxRows": {
- "description": "Maximum rows to return. Defaults to 1000.",
- "default": 1000,
- "type": "integer",
- "minimum": 1,
- "maximum": 10000
- }
- },
- "required": [
- "connectionId",
- "sql"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "headers": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "headerTypes": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "rows": {
- "type": "array",
- "items": {
- "type": "array",
- "items": {}
- }
- },
- "rowCount": {
- "type": "number"
- }
- },
- "required": [
- "headers",
- "rows",
- "rowCount"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "SQL Execution",
- "readOnlyHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- },
- {
- "name": "memory_ingest",
- "title": "Memory Ingest",
- "description": "Ingest free-form markdown knowledge into durable KTX memory. Use this for business rules, metric definitions, schema gotchas, recurring findings, or explicit user requests to remember something. Example: memory_ingest({ connectionId: \"warehouse\", content: \"ARR is reported in cents in this warehouse.\" }).",
- "inputSchema": {
- "type": "object",
- "properties": {
- "content": {
- "type": "string",
- "minLength": 1,
- "description": "Free-form markdown to ingest. Include the knowledge itself plus any context (source, the user question, why this came up) that the memory agent should consider when triaging into wiki/SL."
- },
- "connectionId": {
- "description": "Scope this memory to a specific connection. Required when the knowledge is warehouse-specific, including measure definitions, schema gotchas, or anything tied to a particular warehouse. Omit only for global wiki knowledge.",
- "type": "string",
- "minLength": 1
- }
- },
- "required": [
- "content"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "runId": {
- "type": "string"
- }
- },
- "required": [
- "runId"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "Memory Ingest",
- "destructiveHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- },
- {
- "name": "memory_ingest_status",
- "title": "Memory Ingest Status",
- "description": "Read the current or final status for a memory ingest run. Example: memory_ingest_status({ runId: \"memory-run-1\" }).",
- "inputSchema": {
- "type": "object",
- "properties": {
- "runId": {
- "type": "string",
- "minLength": 1,
- "description": "The memory ingest run id returned by memory_ingest."
- }
- },
- "required": [
- "runId"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#"
- },
- "outputSchema": {
- "type": "object",
- "properties": {
- "runId": {
- "type": "string"
- },
- "status": {
- "type": "string",
- "enum": [
- "running",
- "done",
- "error"
- ]
- },
- "stage": {
- "type": "string"
- },
- "done": {
- "type": "boolean"
- },
- "captured": {
- "type": "object",
- "properties": {
- "wiki": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "sl": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "xrefs": {
- "type": "array",
- "items": {
- "type": "string"
- }
- }
- },
- "required": [
- "wiki",
- "sl",
- "xrefs"
- ],
- "additionalProperties": false
- },
- "error": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "commitHash": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ]
- },
- "skillsLoaded": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "signalDetected": {
- "type": "boolean"
- }
- },
- "required": [
- "runId",
- "status",
- "stage",
- "done",
- "captured",
- "error",
- "commitHash",
- "skillsLoaded",
- "signalDetected"
- ],
- "$schema": "http://json-schema.org/draft-07/schema#",
- "additionalProperties": false
- },
- "annotations": {
- "title": "Memory Ingest Status",
- "readOnlyHint": true,
- "openWorldHint": false
- },
- "execution": {
- "taskSupport": "forbidden"
- }
- }
-]
diff --git a/packages/cli/src/context/mcp/context-tools.ts b/packages/cli/src/context/mcp/context-tools.ts
index c8c3842a..03cd2ad4 100644
--- a/packages/cli/src/context/mcp/context-tools.ts
+++ b/packages/cli/src/context/mcp/context-tools.ts
@@ -13,6 +13,7 @@ import type {
KtxMcpToolHandlerContext,
KtxMcpToolResult,
KtxMcpUserContext,
+ KtxSemanticLayerQueryResponse,
NonArrayObject,
} from './types.js';
@@ -62,7 +63,7 @@ const toolDescriptions = {
sl_read_source:
'Read a semantic-layer YAML source by connection id and source name. Example: sl_read_source({ connectionId: "warehouse", sourceName: "orders" }).',
sl_query:
- 'Execute a semantic-layer query and return rows, headers, generated SQL, and plan details. Example: sl_query({ connectionId: "warehouse", measures: ["orders.order_count"], dimensions: [{ field: "orders.created_at", granularity: "month" }] }).',
+ 'Execute a semantic-layer query and return headers, rows, and total row count, plus correctness notes (e.g. compile-only or fan-out) when relevant. The generated SQL and full query plan are omitted by default; request them with include: ["sql"] and/or include: ["plan"]. Example: sl_query({ connectionId: "warehouse", measures: ["orders.order_count"], dimensions: [{ field: "orders.created_at", granularity: "month" }], include: ["sql"] }).',
sql_execution:
'Execute one parser-validated read-only SQL query against a configured KTX connection. Example: sql_execution({ connectionId: "warehouse", sql: "select count(*) from public.orders", maxRows: 100 }).',
memory_ingest:
@@ -75,7 +76,7 @@ const connectionListSchema = z.object({});
const knowledgeSearchSchema = z.object({
query: z.string().min(1).describe('Natural-language wiki search query, e.g. "revenue recognition policy".'),
- limit: z.number().int().min(1).max(50).default(10).describe('Maximum wiki pages to return. Defaults to 10.'),
+ limit: z.number().int().min(1).max(50).default(10).describe('Maximum wiki pages to return.'),
});
const knowledgeReadSchema = z.object({
@@ -111,10 +112,7 @@ const slQueryOrderBySchema = z.object({
.describe(
'Field/measure/dimension id to order by, e.g. "orders.created_at", a dimension key like "mart_nrr_quarterly.quarter_label", or a measure alias.',
),
- direction: z
- .enum(['asc', 'desc'])
- .default('asc')
- .describe('Sort direction: "asc" or "desc". Defaults to "asc".'),
+ direction: z.enum(['asc', 'desc']).default('asc').describe('Sort direction for this field.'),
});
const slQuerySchema = z.object({
@@ -138,8 +136,12 @@ const slQuerySchema = z.object({
.array(slQueryOrderBySchema)
.default([])
.describe('Sort clauses. Use {field, direction?} entries.'),
- limit: z.number().int().min(0).default(1000).describe('Maximum rows to return. Defaults to 1000.'),
- include_empty: z.boolean().default(true).describe('Whether to include empty dimension groups. Defaults to true.'),
+ limit: z.number().int().min(0).default(1000).describe('Maximum rows to return.'),
+ include_empty: z.boolean().default(true).describe('Whether to include empty dimension groups.'),
+ include: z
+ .array(z.enum(['plan', 'sql']))
+ .default([])
+ .describe('Extra detail to attach to the response: "sql" for the generated SQL, "plan" for the full query plan.'),
});
const entityDetailsTableRefSchema = z.object({
@@ -186,13 +188,13 @@ const discoverDataSchema = z.object({
.optional()
.describe('Optional connection id. Pass it when user intent pins a specific warehouse.'),
kinds: z.array(discoverDataKindSchema.describe('Reference kind to include.')).optional().describe('Optional kind filter.'),
- limit: z.number().int().min(1).max(50).default(15).optional().describe('Maximum refs to return. Defaults to 15.'),
+ limit: z.number().int().min(1).max(50).default(10).optional().describe('Maximum refs to return.'),
});
const sqlExecutionSchema = z.object({
connectionId: connectionIdSchema.describe('Connection id to execute against. Required for raw SQL.'),
sql: z.string().min(1).describe('Parser-validated read-only SQL, e.g. "select count(*) from public.orders".'),
- maxRows: z.number().int().min(1).max(10_000).default(1000).optional().describe('Maximum rows to return. Defaults to 1000.'),
+ maxRows: z.number().int().min(1).max(10_000).default(1000).optional().describe('Maximum rows to return.'),
});
const memoryIngestSchema = z.object({
@@ -268,10 +270,14 @@ const slReadSourceOutputSchema = z.object({
const slQueryOutputSchema = z.object({
connectionId: z.string().optional(),
dialect: z.string().optional(),
- sql: z.string(),
headers: z.array(z.string()),
rows: z.array(z.array(z.unknown())),
totalRows: z.number(),
+ // Correctness signals hoisted out of `plan` so they survive default projection (e.g. compile-only
+ // status, fan-out warnings). Present only when there is something to report.
+ notes: z.array(z.string()).optional(),
+ // Opt-in detail, attached only when requested via the `include` input.
+ sql: z.string().optional(),
plan: unknownRecordSchema.optional(),
});
@@ -413,12 +419,59 @@ const memoryIngestStatusOutputSchema = z.object({
/** @internal */
export function jsonToolResult(structuredContent: T): KtxMcpToolResult {
+ // Compact (non-indented) JSON: this `content` text is the copy the model reads. Pretty-printing
+ // arrays-of-arrays (every `rows` payload) puts one scalar per line, inflating tabular results by
+ // a large constant factor. `structuredContent` carries the same data for structured-output clients.
return {
- content: [{ type: 'text', text: JSON.stringify(structuredContent, null, 2) }],
+ content: [{ type: 'text', text: JSON.stringify(structuredContent) }],
structuredContent,
};
}
+/**
+ * Pull the correctness-critical signals out of a query plan so they survive even when the caller
+ * did not opt into the full `plan`. Returns an empty list when there is nothing to flag.
+ */
+function slQueryNotes(plan: Record | undefined): string[] {
+ if (!plan) {
+ return [];
+ }
+ const notes: string[] = [];
+ const execution = plan.execution;
+ if (
+ execution &&
+ typeof execution === 'object' &&
+ (execution as Record).mode === 'compile_only'
+ ) {
+ const reason = (execution as Record).reason;
+ notes.push(typeof reason === 'string' ? reason : 'Compiled SQL only; no rows were executed.');
+ }
+ if (plan.has_fan_out === true) {
+ const description = typeof plan.fan_out_description === 'string' ? plan.fan_out_description.trim() : '';
+ notes.push(description.length > 0 ? description : 'Fan-out detected: measure totals may be inflated by joins.');
+ }
+ return notes;
+}
+
+/**
+ * Default sl_query response is the minimum the agent needs to read the result: connection, headers,
+ * rows, totals, plus any correctness notes. The generated `sql` and the full `plan` are attached only
+ * when explicitly requested via `include`, since both are large and echo information the caller already has.
+ */
+function projectSlQueryResult(result: KtxSemanticLayerQueryResponse, include: ('plan' | 'sql')[]) {
+ const notes = slQueryNotes(result.plan);
+ return {
+ ...(result.connectionId !== undefined ? { connectionId: result.connectionId } : {}),
+ ...(result.dialect !== undefined ? { dialect: result.dialect } : {}),
+ headers: result.headers,
+ rows: result.rows,
+ totalRows: result.totalRows,
+ ...(notes.length > 0 ? { notes } : {}),
+ ...(include.includes('sql') ? { sql: result.sql } : {}),
+ ...(include.includes('plan') && result.plan ? { plan: result.plan } : {}),
+ };
+}
+
function jsonErrorToolResult(text: string): KtxMcpToolResult> {
return {
content: [{ type: 'text', text }],
@@ -641,23 +694,22 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void
slQuerySchema,
async (input, context) => {
const onProgress = mcpProgressCallback(context);
- return jsonToolResult(
- await semanticLayer.query(
- {
- connectionId: input.connectionId,
- query: {
- measures: input.measures,
- dimensions: input.dimensions,
- filters: input.filters,
- segments: input.segments,
- order_by: input.order_by,
- limit: input.limit,
- include_empty: input.include_empty,
- },
+ const result = await semanticLayer.query(
+ {
+ connectionId: input.connectionId,
+ query: {
+ measures: input.measures,
+ dimensions: input.dimensions,
+ filters: input.filters,
+ segments: input.segments,
+ order_by: input.order_by,
+ limit: input.limit,
+ include_empty: input.include_empty,
},
- onProgress ? { onProgress } : undefined,
- ),
+ },
+ onProgress ? { onProgress } : undefined,
);
+ return jsonToolResult(projectSlQueryResult(result, input.include));
},
);
}
diff --git a/packages/cli/src/context/mcp/types.ts b/packages/cli/src/context/mcp/types.ts
index e3062ab5..3694e3d6 100644
--- a/packages/cli/src/context/mcp/types.ts
+++ b/packages/cli/src/context/mcp/types.ts
@@ -120,7 +120,10 @@ interface KtxSemanticLayerReadResponse {
yaml: string;
}
-interface KtxSemanticLayerQueryResponse {
+/** @internal */
+export interface KtxSemanticLayerQueryResponse {
+ connectionId?: string;
+ dialect?: string;
sql: string;
headers: string[];
rows: unknown[][];
diff --git a/packages/cli/src/context/search/discover.ts b/packages/cli/src/context/search/discover.ts
index b3456459..9a572daf 100644
--- a/packages/cli/src/context/search/discover.ts
+++ b/packages/cli/src/context/search/discover.ts
@@ -167,7 +167,7 @@ async function wikiCandidates(
query: input.query,
userId: options.userId,
embeddingService: options.embeddingService ?? null,
- limit: Math.max(input.limit ?? 15, 25),
+ limit: Math.max(input.limit ?? 10, 25),
});
const records: CandidateRecord[] = [];
for (const result of searchResults) {
@@ -421,7 +421,8 @@ function hydrate(
}
return {
...ref,
- score: maxScore > 0 ? Number((candidate.score / maxScore).toFixed(6)) : 0,
+ // 3 decimals is plenty for a relative-rank hint; 6 just spent bytes on noise.
+ score: maxScore > 0 ? Number((candidate.score / maxScore).toFixed(3)) : 0,
};
})
.filter((result): result is KtxDiscoverDataRef => result !== null);
@@ -433,7 +434,7 @@ export function createKtxDiscoverDataService(
): { search(input: KtxDiscoverDataInput): Promise } {
return {
async search(input) {
- const limit = Math.max(1, Math.min(input.limit ?? 15, 50));
+ const limit = Math.max(1, Math.min(input.limit ?? 10, 50));
const query = input.query.trim();
if (!query) {
return [];
diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts
index fb8c9a29..ad5ba270 100644
--- a/packages/cli/src/ingest.ts
+++ b/packages/cli/src/ingest.ts
@@ -2,7 +2,7 @@ import { buildMemoryFlowViewModel } from './context/ingest/memory-flow/view-mode
import { createMemoryFlowLiveBuffer, sanitizeMemoryFlowError } from './context/ingest/memory-flow/live-buffer.js';
import { formatMemoryFlowFinalSummary } from './context/ingest/memory-flow/summary.js';
import { getLatestLocalIngestStatus, getLocalIngestStatus, type LocalMetabaseFanoutResult, type LocalMetabaseFanoutProgress, type RunLocalIngestOptions, runLocalIngest, runLocalMetabaseIngest } from './context/ingest/local-ingest.js';
-import { type IngestReportSnapshot, savedMemoryCountsForReport } from './context/ingest/reports.js';
+import { type IngestReportSnapshot, ingestReportOutcome, savedMemoryCountsForReport } from './context/ingest/reports.js';
import { ingestReportToMemoryFlowReplay } from './context/ingest/memory-flow/events.js';
import type { MemoryFlowEvent, MemoryFlowReplayInput } from './context/ingest/memory-flow/types.js';
import { renderMemoryFlowReplay } from './context/ingest/memory-flow/render.js';
@@ -93,10 +93,6 @@ export interface KtxIngestDeps {
runtimeIo?: KtxIngestIo;
}
-function reportStatus(report: IngestReportSnapshot): 'done' | 'error' {
- return report.body.status === 'failed' || report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
-}
-
const REPORT_SOURCE_LABELS = new Map([
['live-database', 'Database schema'],
['historic-sql', 'Query history'],
@@ -193,7 +189,7 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void
if (report.body.tracePath) {
io.stdout.write(`Trace: ${report.body.tracePath}\n`);
}
- io.stdout.write(`Status: ${reportStatus(report)}\n`);
+ io.stdout.write(`Status: ${ingestReportOutcome(report)}\n`);
io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`);
io.stdout.write(`Connection: ${report.connectionId}\n`);
io.stdout.write(`Sync: ${report.body.syncId}\n`);
@@ -231,7 +227,7 @@ function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIng
}
io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`);
for (const child of result.children) {
- const status = reportStatus(child.report);
+ const status = ingestReportOutcome(child.report);
io.stdout.write(
`- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId} report=${child.report.id}\n`,
);
@@ -595,7 +591,7 @@ function initialRunMemoryFlowInput(
}
function finalRunMemoryFlowInput(snapshot: MemoryFlowReplayInput, report: IngestReportSnapshot): MemoryFlowReplayInput {
- const status = reportStatus(report);
+ const status = ingestReportOutcome(report) === 'error' ? 'error' : 'done';
return {
...snapshot,
runId: report.runId,
@@ -777,7 +773,7 @@ export async function runKtxIngest(
} finally {
plainProgress?.flush();
}
- return result.status === 'all_succeeded' ? 0 : 1;
+ return result.status === 'all_failed' ? 1 : 0;
}
const jobId = deps.jobIdFactory?.();
@@ -846,7 +842,7 @@ export async function runKtxIngest(
liveTui?.close();
liveTui = null;
io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot));
- return reportStatus(result.report) === 'done' ? 0 : 1;
+ return ingestReportOutcome(result.report) === 'error' ? 1 : 0;
}
plainProgress?.flush();
await writeReportRecord(result.report, runOutputMode, io, {
@@ -854,7 +850,7 @@ export async function runKtxIngest(
renderStoredMemoryFlow: deps.renderStoredMemoryFlow,
env,
});
- return reportStatus(result.report) === 'done' ? 0 : 1;
+ return ingestReportOutcome(result.report) === 'error' ? 1 : 0;
} finally {
plainProgress?.flush();
liveTui?.close();
diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts
index 74056542..ebc04c87 100644
--- a/packages/cli/src/setup.ts
+++ b/packages/cli/src/setup.ts
@@ -1,7 +1,7 @@
import { existsSync } from 'node:fs';
import { basename, join, resolve } from 'node:path';
import { getLatestLocalIngestStatus } from './context/ingest/local-ingest.js';
-import { savedMemoryCountsForReport } from './context/ingest/reports.js';
+import { ingestReportOutcome, savedMemoryCountsForReport } from './context/ingest/reports.js';
import { ktxLocalStateDbPath } from './context/project/local-state-db.js';
import { loadKtxProject, type KtxLocalProject } from './context/project/project.js';
import { readKtxSetupState } from './context/project/setup-config.js';
@@ -306,7 +306,7 @@ function sourceConnections(config: Awaited>['c
type LocalIngestStatusReport = NonNullable>>;
function reportHasSavedContext(report: LocalIngestStatusReport): boolean {
- if (report.body.failedWorkUnits.length > 0) {
+ if (ingestReportOutcome(report) === 'error') {
return false;
}
const counts = savedMemoryCountsForReport(report);
diff --git a/packages/cli/src/skills/analytics/SKILL.md b/packages/cli/src/skills/analytics/SKILL.md
index e4aa86d2..e6857e56 100644
--- a/packages/cli/src/skills/analytics/SKILL.md
+++ b/packages/cli/src/skills/analytics/SKILL.md
@@ -28,7 +28,12 @@ You have access to KTX MCP tools for data discovery, semantic-layer analysis, ra
- Read entity details before writing SQL against an unfamiliar table. Do not assume column names.
- Treat `sql_execution` as read-only. Writes are rejected by the server.
- Validate value mentions with `dictionary_search` instead of guessing case or spelling. Treat a `dictionary_search` miss as non-authoritative. The index is built from profile-sampled values, so a missing value may simply have been outside the sample. Follow up with `sql_execution` against the most plausible columns before concluding the value is absent.
-- When `connection_list` shows multiple connections, pass an explicit `connectionId` to every tool that takes one and where user intent pins a specific warehouse. Required: `entity_details`, `sl_read_source`, and `sql_execution`. Required when user intent is warehouse-specific, including wording like "in our warehouse" or "this warehouse": `memory_ingest`; without `connectionId`, the memory agent cannot update the semantic layer and the knowledge lands as wiki-only. Pass `connectionId` when intent pins a warehouse, otherwise omit for unscoped discovery: `sl_query`, `discover_data`, and `dictionary_search`. Never pass `connectionId` to `connection_list`, `wiki_search`, `wiki_read`, or `memory_ingest_status`. If intent is ambiguous for a required-or-scoped tool, ask the user which warehouse before calling.
+- `connectionId` scoping when `connection_list` shows multiple connections:
+ - Always pass it: `entity_details`, `sl_read_source`, `sql_execution`.
+ - Pass it when intent pins a warehouse, otherwise omit for unscoped discovery: `sl_query`, `discover_data`, `dictionary_search`.
+ - `memory_ingest`: pass it for warehouse-specific knowledge (e.g. "in our warehouse"); without it the memory lands as wiki-only and cannot update the semantic layer.
+ - Never pass it: `connection_list`, `wiki_search`, `wiki_read`, `memory_ingest_status`.
+ - If scoping is required but intent is ambiguous, ask which warehouse before calling.
- Show compact result tables for small outputs. For broad results, summarize the top findings and mention the applied limit.
- Ask a concise clarification only when the metric, date range, entity, or grain is genuinely ambiguous and cannot be inferred from context.
diff --git a/packages/cli/test/context/ingest/local-metabase-ingest.test.ts b/packages/cli/test/context/ingest/local-metabase-ingest.test.ts
index 06822aa2..8fb89bd0 100644
--- a/packages/cli/test/context/ingest/local-metabase-ingest.test.ts
+++ b/packages/cli/test/context/ingest/local-metabase-ingest.test.ts
@@ -6,6 +6,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { LocalMetabaseDiscoveryCache } from '../../../src/context/ingest/adapters/metabase/local-source-state-store.js';
import { getLocalIngestStatus, runLocalMetabaseIngest } from '../../../src/context/ingest/local-ingest.js';
+import { ingestReportOutcome } from '../../../src/context/ingest/reports.js';
import type { ChunkResult, FetchContext, SourceAdapter } from '../../../src/context/ingest/types.js';
class TestAgentRunner implements AgentRunnerPort {
@@ -202,6 +203,24 @@ describe('runLocalMetabaseIngest', () => {
expect(result.children[1]?.report.body.failedWorkUnits).toEqual(['metabase-db-2']);
});
+ it('keeps a child that saved memory out of all_failed when another child fails', async () => {
+ await seedMetabaseState();
+ const agentRunner = new TestAgentRunner();
+ const ids = ['metabase-child-1', 'metabase-child-2'];
+
+ const result = await runLocalMetabaseIngest({
+ project,
+ adapters: [new FakeMetabaseSourceAdapter()],
+ metabaseConnectionId: 'prod-metabase',
+ agentRunner,
+ jobIdFactory: () => ids.shift() ?? 'metabase-child-extra',
+ });
+
+ expect(result.status).toBe('partial_failure');
+ expect(ingestReportOutcome(result.children[0].report)).toBe('done');
+ expect(ingestReportOutcome(result.children[1].report)).toBe('error');
+ });
+
it('captures fetch-time child failures and continues later mappings', async () => {
await seedMetabaseState();
project.config.connections.warehouse_c = { driver: 'postgres', url: 'postgres://localhost/c' };
diff --git a/packages/cli/test/context/ingest/memory-flow/events.test.ts b/packages/cli/test/context/ingest/memory-flow/events.test.ts
index e29405a4..cb0e72c8 100644
--- a/packages/cli/test/context/ingest/memory-flow/events.test.ts
+++ b/packages/cli/test/context/ingest/memory-flow/events.test.ts
@@ -166,7 +166,7 @@ describe('memory-flow event mapping', () => {
runId: 'run-1',
connectionId: 'warehouse',
adapter: 'lookml',
- status: 'error',
+ status: 'done',
sourceDir: null,
syncId: 'sync-2',
reportId: 'report-1',
@@ -308,7 +308,7 @@ describe('memory-flow event mapping', () => {
sourceReportPath: 'report-1',
fallbackReason: null,
});
- expect(replay.status).toBe('error');
+ expect(replay.status).toBe('done');
expect(replay.reportId).toBe('report-1');
expect(replay.reportPath).toBe('report-1');
expect(replay.events[0]).toMatchObject({ type: 'source_acquired', emittedAt: '2026-05-01T10:00:00.000Z' });
diff --git a/packages/cli/test/context/ingest/reports.test.ts b/packages/cli/test/context/ingest/reports.test.ts
new file mode 100644
index 00000000..5fc24f6d
--- /dev/null
+++ b/packages/cli/test/context/ingest/reports.test.ts
@@ -0,0 +1,71 @@
+import { describe, expect, it } from 'vitest';
+import { ingestReportOutcome } from '../../../src/context/ingest/reports.js';
+import type { IngestReportSnapshot } from '../../../src/context/ingest/reports.js';
+
+function report(body: Partial): IngestReportSnapshot {
+ return {
+ id: 'r',
+ runId: 'run',
+ jobId: 'job',
+ connectionId: 'warehouse',
+ sourceKey: 'metabase',
+ createdAt: '2026-05-29T00:00:00.000Z',
+ body: {
+ syncId: 'sync',
+ diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 },
+ commitSha: null,
+ workUnits: [],
+ failedWorkUnits: [],
+ reconciliationSkipped: false,
+ conflictsResolved: [],
+ evictionsApplied: [],
+ unmappedFallbacks: [],
+ evictionInputs: [],
+ unresolvedCards: [],
+ supersededBy: null,
+ overrideOf: null,
+ provenanceRows: [],
+ toolTranscripts: [],
+ ...body,
+ },
+ };
+}
+
+const savingWorkUnit = {
+ unitKey: 'ok',
+ rawFiles: ['cards/1.json'],
+ status: 'success' as const,
+ actions: [{ target: 'sl' as const, type: 'updated' as const, key: 'warehouse.orders', detail: 'measure' }],
+ touchedSlSources: [],
+};
+
+const failedWorkUnit = {
+ unitKey: 'bad',
+ rawFiles: ['cards/2.json'],
+ status: 'failed' as const,
+ reason: 'tool write failed',
+ actions: [],
+ touchedSlSources: [],
+};
+
+describe('ingestReportOutcome', () => {
+ it('returns done when there are no failed work units', () => {
+ expect(ingestReportOutcome(report({ workUnits: [savingWorkUnit] }))).toBe('done');
+ });
+
+ it('returns partial when failed work units coexist with saved memory', () => {
+ expect(
+ ingestReportOutcome(report({ workUnits: [savingWorkUnit, failedWorkUnit], failedWorkUnits: ['bad'] })),
+ ).toBe('partial');
+ });
+
+ it('returns error when failed work units produced no saved memory', () => {
+ expect(ingestReportOutcome(report({ workUnits: [failedWorkUnit], failedWorkUnits: ['bad'] }))).toBe('error');
+ });
+
+ it('returns error for a stage-level failure even if artifacts were recorded', () => {
+ expect(ingestReportOutcome(report({ status: 'failed', workUnits: [savingWorkUnit], failedWorkUnits: [] }))).toBe(
+ 'error',
+ );
+ });
+});
diff --git a/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json b/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json
index 10cb0b77..b38851f4 100644
--- a/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json
+++ b/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json
@@ -65,7 +65,7 @@
},
"limit": {
"default": 10,
- "description": "Maximum wiki pages to return. Defaults to 10.",
+ "description": "Maximum wiki pages to return.",
"type": "integer",
"minimum": 1,
"maximum": 50
@@ -307,7 +307,7 @@
{
"name": "sl_query",
"title": "Semantic Layer Query",
- "description": "Execute a semantic-layer query and return rows, headers, generated SQL, and plan details. Example: sl_query({ connectionId: \"warehouse\", measures: [\"orders.order_count\"], dimensions: [{ field: \"orders.created_at\", granularity: \"month\" }] }).",
+ "description": "Execute a semantic-layer query and return headers, rows, and total row count, plus correctness notes (e.g. compile-only or fan-out) when relevant. The generated SQL and full query plan are omitted by default; request them with include: [\"sql\"] and/or include: [\"plan\"]. Example: sl_query({ connectionId: \"warehouse\", measures: [\"orders.order_count\"], dimensions: [{ field: \"orders.created_at\", granularity: \"month\" }], include: [\"sql\"] }).",
"inputSchema": {
"type": "object",
"properties": {
@@ -403,7 +403,7 @@
},
"direction": {
"default": "asc",
- "description": "Sort direction: \"asc\" or \"desc\". Defaults to \"asc\".",
+ "description": "Sort direction for this field.",
"type": "string",
"enum": [
"asc",
@@ -418,15 +418,27 @@
},
"limit": {
"default": 1000,
- "description": "Maximum rows to return. Defaults to 1000.",
+ "description": "Maximum rows to return.",
"type": "integer",
"minimum": 0,
"maximum": 9007199254740991
},
"include_empty": {
"default": true,
- "description": "Whether to include empty dimension groups. Defaults to true.",
+ "description": "Whether to include empty dimension groups.",
"type": "boolean"
+ },
+ "include": {
+ "default": [],
+ "description": "Extra detail to attach to the response: \"sql\" for the generated SQL, \"plan\" for the full query plan.",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "plan",
+ "sql"
+ ]
+ }
}
},
"required": [
@@ -443,9 +455,6 @@
"dialect": {
"type": "string"
},
- "sql": {
- "type": "string"
- },
"headers": {
"type": "array",
"items": {
@@ -462,6 +471,15 @@
"totalRows": {
"type": "number"
},
+ "notes": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "sql": {
+ "type": "string"
+ },
"plan": {
"type": "object",
"propertyNames": {
@@ -471,7 +489,6 @@
}
},
"required": [
- "sql",
"headers",
"rows",
"totalRows"
@@ -1241,8 +1258,8 @@
}
},
"limit": {
- "description": "Maximum refs to return. Defaults to 15.",
- "default": 15,
+ "description": "Maximum refs to return.",
+ "default": 10,
"type": "integer",
"minimum": 1,
"maximum": 50
@@ -1396,7 +1413,7 @@
"description": "Parser-validated read-only SQL, e.g. \"select count(*) from public.orders\"."
},
"maxRows": {
- "description": "Maximum rows to return. Defaults to 1000.",
+ "description": "Maximum rows to return.",
"default": 1000,
"type": "integer",
"minimum": 1,
diff --git a/packages/cli/test/context/mcp/server.test.ts b/packages/cli/test/context/mcp/server.test.ts
index 3c4500b1..95985d68 100644
--- a/packages/cli/test/context/mcp/server.test.ts
+++ b/packages/cli/test/context/mcp/server.test.ts
@@ -347,16 +347,12 @@ describe('createKtxMcpServer', () => {
content: [
{
type: 'text',
- text: JSON.stringify(
- {
- headers: ['status', 'count'],
- headerTypes: ['text', 'bigint'],
- rows: [['paid', 42]],
- rowCount: 1,
- },
- null,
- 2,
- ),
+ text: JSON.stringify({
+ headers: ['status', 'count'],
+ headerTypes: ['text', 'bigint'],
+ rows: [['paid', 42]],
+ rowCount: 1,
+ }),
},
],
structuredContent: {
@@ -638,6 +634,92 @@ describe('createKtxMcpServer', () => {
);
});
+ it('sl_query default response omits plan and sql but keeps compile-only and fan-out notes', async () => {
+ const fake = makeFakeServer();
+ const semanticLayer: KtxSemanticLayerMcpPort = {
+ readSource: vi.fn(),
+ query: vi.fn().mockResolvedValue({
+ connectionId: 'warehouse',
+ dialect: 'postgres',
+ sql: 'select count(*) from public.orders',
+ headers: ['order_count'],
+ rows: [],
+ totalRows: 0,
+ plan: {
+ sources_used: ['orders'],
+ has_fan_out: true,
+ fan_out_description: 'orders fans out across line_items',
+ execution: { mode: 'compile_only', reason: 'No execution adapter configured.' },
+ },
+ }),
+ };
+
+ createKtxMcpServer({
+ server: fake.server,
+ userContext: { userId: 'local-user' },
+ contextTools: { semanticLayer },
+ });
+
+ const result = await getTool(fake.tools, 'sl_query').handler({
+ connectionId: 'warehouse',
+ measures: ['orders.order_count'],
+ });
+
+ expect(result).toMatchObject({
+ structuredContent: {
+ connectionId: 'warehouse',
+ dialect: 'postgres',
+ headers: ['order_count'],
+ rows: [],
+ totalRows: 0,
+ notes: ['No execution adapter configured.', 'orders fans out across line_items'],
+ },
+ });
+ const structured = (result as { structuredContent: Record }).structuredContent;
+ expect(structured.sql).toBeUndefined();
+ expect(structured.plan).toBeUndefined();
+ });
+
+ it('sl_query attaches sql and plan only when include requests them', async () => {
+ const fake = makeFakeServer();
+ const plan = { sources_used: ['orders'], execution: { mode: 'executed' } };
+ const semanticLayer: KtxSemanticLayerMcpPort = {
+ readSource: vi.fn(),
+ query: vi.fn().mockResolvedValue({
+ connectionId: 'warehouse',
+ dialect: 'postgres',
+ sql: 'select count(*) from public.orders',
+ headers: ['order_count'],
+ rows: [[3]],
+ totalRows: 1,
+ plan,
+ }),
+ };
+
+ createKtxMcpServer({
+ server: fake.server,
+ userContext: { userId: 'local-user' },
+ contextTools: { semanticLayer },
+ });
+
+ const result = await getTool(fake.tools, 'sl_query').handler({
+ connectionId: 'warehouse',
+ measures: ['orders.order_count'],
+ include: ['plan', 'sql'],
+ });
+
+ expect(result).toMatchObject({
+ structuredContent: {
+ sql: 'select count(*) from public.orders',
+ plan,
+ rows: [[3]],
+ totalRows: 1,
+ },
+ });
+ const structured = (result as { structuredContent: Record }).structuredContent;
+ expect(structured.notes).toBeUndefined();
+ });
+
it('entity_details rejects sql-style schema table ref aliases', async () => {
const fake = makeFakeServer();
const entityDetails = makeAllContextTools().entityDetails!;
@@ -838,7 +920,7 @@ describe('createKtxMcpServer', () => {
connectionId: '00000000-0000-4000-8000-000000000001',
}),
).resolves.toEqual({
- content: [{ type: 'text', text: JSON.stringify({ runId: 'run-1' }, null, 2) }],
+ content: [{ type: 'text', text: JSON.stringify({ runId: 'run-1' }) }],
structuredContent: { runId: 'run-1' },
});
expect(ingest.ingest).toHaveBeenCalledWith({
@@ -865,21 +947,17 @@ describe('createKtxMcpServer', () => {
content: [
{
type: 'text',
- text: JSON.stringify(
- {
- runId: 'run-1',
- status: 'done',
- stage: 'done',
- done: true,
- captured: { wiki: ['revenue'], sl: [], xrefs: [] },
- error: null,
- commitHash: 'abc123',
- skillsLoaded: ['wiki_capture'],
- signalDetected: true,
- },
- null,
- 2,
- ),
+ text: JSON.stringify({
+ runId: 'run-1',
+ status: 'done',
+ stage: 'done',
+ done: true,
+ captured: { wiki: ['revenue'], sl: [], xrefs: [] },
+ error: null,
+ commitHash: 'abc123',
+ skillsLoaded: ['wiki_capture'],
+ signalDetected: true,
+ }),
},
],
structuredContent: {
@@ -1087,19 +1165,15 @@ describe('createKtxMcpServer', () => {
content: [
{
type: 'text',
- text: JSON.stringify(
- {
- connections: [
- {
- id: '00000000-0000-4000-8000-000000000001',
- name: 'Warehouse',
- connectionType: 'POSTGRES',
- },
- ],
- },
- null,
- 2,
- ),
+ text: JSON.stringify({
+ connections: [
+ {
+ id: '00000000-0000-4000-8000-000000000001',
+ name: 'Warehouse',
+ connectionType: 'POSTGRES',
+ },
+ ],
+ }),
},
],
structuredContent: {
diff --git a/packages/cli/test/ingest.test.ts b/packages/cli/test/ingest.test.ts
index eef751ba..f5cd1ac5 100644
--- a/packages/cli/test/ingest.test.ts
+++ b/packages/cli/test/ingest.test.ts
@@ -403,7 +403,7 @@ describe('runKtxIngest', () => {
expect(io.stderr()).toContain('Metabase ingest: prod-metabase');
});
- it('returns a non-zero code when Metabase fanout has failed children', async () => {
+ it('returns a non-zero code when a Metabase fanout child fully fails', async () => {
const projectDir = join(tempDir, 'project');
await writeMetabaseConfig(projectDir);
const io = makeIo();
@@ -441,7 +441,7 @@ describe('runKtxIngest', () => {
{
runLocalMetabaseIngest: async () => ({
metabaseConnectionId: 'prod-metabase',
- status: 'partial_failure',
+ status: 'all_failed',
totals: { workUnits: 1, failedWorkUnits: 1 },
children: [
{
@@ -467,9 +467,83 @@ describe('runKtxIngest', () => {
),
).resolves.toBe(1);
- expect(io.stdout()).toContain('Metabase fanout: partial_failure');
- expect(io.stdout()).toContain('Failed tasks: 1');
+ expect(io.stdout()).toContain('Metabase fanout: all_failed');
expect(io.stdout()).toContain('status=error');
+ });
+
+ it('exits 0 and reports status=partial when a Metabase child saved memory despite a failure', async () => {
+ const projectDir = join(tempDir, 'project');
+ await writeMetabaseConfig(projectDir);
+ const io = makeIo();
+ const report = localFakeBundleReport('metabase-child-1', {
+ id: 'report-metabase-child-1',
+ runId: 'run-a',
+ jobId: 'metabase-child-1',
+ connectionId: 'warehouse_a',
+ sourceKey: 'metabase',
+ body: {
+ failedWorkUnits: ['metabase-db-2'],
+ workUnits: [
+ {
+ unitKey: 'metabase-db-1',
+ rawFiles: ['cards/1.json'],
+ status: 'success',
+ actions: [{ target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'measure' }],
+ touchedSlSources: [],
+ },
+ {
+ unitKey: 'metabase-db-2',
+ rawFiles: ['cards/2.json'],
+ status: 'failed',
+ reason: 'bad SQL',
+ actions: [],
+ touchedSlSources: [],
+ },
+ ],
+ },
+ });
+
+ await expect(
+ runKtxIngest(
+ {
+ command: 'run',
+ projectDir,
+ connectionId: 'prod-metabase',
+ adapter: 'metabase',
+ outputMode: 'plain',
+ },
+ io.io,
+ {
+ runLocalMetabaseIngest: async () => ({
+ metabaseConnectionId: 'prod-metabase',
+ status: 'partial_failure',
+ totals: { workUnits: 2, failedWorkUnits: 1 },
+ children: [
+ {
+ jobId: 'metabase-child-1',
+ metabaseConnectionId: 'prod-metabase',
+ metabaseDatabaseId: 1,
+ targetConnectionId: 'warehouse_a',
+ result: {
+ jobId: 'metabase-child-1',
+ runId: 'run-a',
+ syncId: 'sync-a',
+ diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
+ workUnitCount: 2,
+ failedWorkUnits: ['metabase-db-2'],
+ artifactsWritten: 1,
+ commitSha: 'abc',
+ },
+ report,
+ },
+ ],
+ }),
+ },
+ ),
+ ).resolves.toBe(0);
+
+ expect(io.stdout()).toContain('Metabase fanout: partial_failure');
+ expect(io.stdout()).toContain('status=partial');
expect(io.stderr()).toContain('Metabase ingest: prod-metabase');
});
@@ -1140,6 +1214,63 @@ describe('runKtxIngest', () => {
expect(io.stdout()).toContain('Status: error\n');
});
+ it('exits 0 and reports Status: partial when a single-source ingest saved memory despite a failure', async () => {
+ const projectDir = join(tempDir, 'project');
+ await writeWarehouseConfig(projectDir);
+ const sourceDir = join(tempDir, 'source');
+ await mkdir(join(sourceDir, 'orders'), { recursive: true });
+ await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
+
+ const partialReport = localFakeBundleReport('local-job-partial', {
+ connectionId: 'warehouse',
+ sourceKey: 'fake',
+ body: {
+ failedWorkUnits: ['orders-bad'],
+ workUnits: [
+ {
+ unitKey: 'orders-ok',
+ rawFiles: ['orders/orders.json'],
+ status: 'success',
+ actions: [{ target: 'wiki', type: 'created', key: 'wiki/orders.md', detail: 'orders' }],
+ touchedSlSources: [],
+ },
+ {
+ unitKey: 'orders-bad',
+ rawFiles: ['orders/bad.json'],
+ status: 'failed',
+ reason: 'writer tool failed',
+ actions: [],
+ touchedSlSources: [],
+ },
+ ],
+ },
+ });
+ const runLocal = vi.fn(async (_input: RunLocalIngestOptions) => ({
+ result: {
+ jobId: 'local-job-partial',
+ runId: partialReport.runId,
+ syncId: partialReport.body.syncId,
+ diffSummary: partialReport.body.diffSummary,
+ workUnitCount: partialReport.body.workUnits.length,
+ failedWorkUnits: partialReport.body.failedWorkUnits,
+ artifactsWritten: 1,
+ commitSha: partialReport.body.commitSha,
+ },
+ report: partialReport,
+ }));
+
+ const io = makeIo();
+ await expect(
+ runKtxIngest(
+ { command: 'run', projectDir, connectionId: 'warehouse', adapter: 'fake', sourceDir, outputMode: 'plain' },
+ io.io,
+ { runLocalIngest: runLocal, jobIdFactory: () => 'local-job-partial' },
+ ),
+ ).resolves.toBe(0);
+
+ expect(io.stdout()).toContain('Status: partial\n');
+ });
+
it('prints trace path and error status for stored failed ingest reports', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
diff --git a/packages/cli/test/setup.test.ts b/packages/cli/test/setup.test.ts
index 0bc00919..da51e9af 100644
--- a/packages/cli/test/setup.test.ts
+++ b/packages/cli/test/setup.test.ts
@@ -398,6 +398,59 @@ describe('setup status', () => {
expect(rendered).toContain('KTX context built: yes');
});
+ it('reports context ready after a partial ingest report saved memory', async () => {
+ await writeFile(
+ join(tempDir, 'ktx.yaml'),
+ [
+ 'setup:',
+ ' database_connection_ids:',
+ ' - warehouse',
+ 'connections:',
+ ' warehouse:',
+ ' driver: postgres',
+ ' url: env:DATABASE_URL',
+ 'ingest:',
+ ' embeddings:',
+ ' backend: none',
+ ' dimensions: 8',
+ '',
+ ].join('\n'),
+ 'utf-8',
+ );
+ await writeKtxSetupState(tempDir, { completed_steps: ['project', 'databases'] });
+ await persistLocalBundleReport(
+ tempDir,
+ localFakeBundleReport('warehouse-job-partial', {
+ connectionId: 'warehouse',
+ sourceKey: 'fake',
+ body: {
+ failedWorkUnits: ['orders-bad'],
+ workUnits: [
+ {
+ unitKey: 'orders-ok',
+ rawFiles: ['orders/orders.json'],
+ status: 'success',
+ actions: [{ target: 'wiki', type: 'created', key: 'wiki/orders.md', detail: 'orders' }],
+ touchedSlSources: [],
+ },
+ {
+ unitKey: 'orders-bad',
+ rawFiles: ['orders/bad.json'],
+ status: 'failed',
+ reason: 'writer tool failed',
+ actions: [],
+ touchedSlSources: [],
+ },
+ ],
+ },
+ }),
+ );
+
+ const status = await readKtxSetupStatus(tempDir);
+
+ expect(status.context).toMatchObject({ ready: true, status: 'completed' });
+ });
+
it('formats plain and JSON setup status payloads', async () => {
const status = await readKtxSetupStatus(tempDir);
const rendered = formatKtxSetupStatus(status);