2026-05-10 23:12:26 +02:00
import YAML from 'yaml' ;
import { z } from 'zod' ;
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm
* refactor(workspace): rewrite @ktx/llm imports to relative paths
* refactor(workspace): fold internal packages into cli
* chore(workspace): gate dead-code with knip production mode
Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.
* refactor(cli): delete internal barrel index.ts files
The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).
This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
(the published package entry).
- Rewrites ~270 source/test files to import each name directly from
the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
`create-warehouse-verification-tools.ts` (the function it defined
locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
live-database/extracted-schema, live-database/structural-sync,
relationship-* feedback/review chain) plus their tests and a
cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
(notion-client, connector barrels in scan/local-scan-connectors
tests) to mock the source files instead.
- Points the maintainer benchmark script
(`scripts/relationship-benchmark-report.mjs`) at source files
instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
production entries only for the benchmark code reached via dist by
the maintainer script.
Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.
`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.
* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly
Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.
Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.
* docs: align "agent clients" and "data agents" terminology
Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.
Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.
* refactor(release): single source of truth for package version
Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.
Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.
- update-public-release-version.mjs rewrites both Python pyproject.toml
files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
@semantic-release/git assets so the release commit back to main
carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
replaced with "?? getKtxCliPackageInfo().version", and
createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
always reflects the most recent release; no sentinel pin to
maintain.
Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.
* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime
Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.
* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal
Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.
* fix(cli): use real package metadata in print-command-tree
The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.
* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts
Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.
Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
import { addTouchedSlSource } from '../../../context/tools/touched-sl-sources.js' ;
import type { ToolContext , ToolOutput } from '../../../context/tools/base-tool.js' ;
import { validateActionRawPaths } from '../../../context/tools/action-raw-paths.js' ;
import { validateActionTargetConnection } from '../../../context/tools/action-target-connection.js' ;
2026-05-10 23:12:26 +02:00
import { sourceOverlaySchema } from '../schemas.js' ;
import type { SemanticLayerService } from '../semantic-layer.service.js' ;
import type { SemanticLayerSource } from '../types.js' ;
import {
BaseSemanticLayerTool ,
type BaseSemanticLayerToolDeps ,
type SemanticLayerStructured ,
sourceDefinitionSchema ,
} from './base-semantic-layer.tool.js' ;
2026-05-11 00:31:15 -07:00
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js' ;
2026-05-10 23:12:26 +02:00
import { slToolConnectionIdSchema } from './connection-id-schema.js' ;
const sourceInputSchema = z . union ( [ sourceDefinitionSchema , sourceOverlaySchema ] ) ;
const slWriteSourceInputSchema = z . object ( {
connectionId : slToolConnectionIdSchema.describe ( 'Data source connection ID' ) ,
sourceName : z
. string ( )
fix: read semantic sources safely (#284)
* fix: read semantic sources safely
* test: retarget reindex per-scope error case to a broken manifest
Reading a broken standalone source was made non-fatal in de1f1a8d (it is
surfaced for repair instead of throwing), so the reindex per-scope error
test no longer captured an error. Point it at a corrupt manifest shard,
which is the remaining fatal read failure the per-scope catch must
isolate, and assert the captured error names the offending file.
* fix(sl): decouple semantic-layer file names from warehouse naming rules
The in-file `name:` field is now the sole source identity; the filename is
a derived label that never participates in identity. This removes the
"Unsafe semantic-layer source name" failure class entirely: any warehouse
identifier (Snowflake's uppercase SIGNED_UP, EVENT$LOG, dotted names) can
be read, overlaid, edited, and deleted.
- New `source-files.ts`: one total filename derivation (safe lowercase
names verbatim; otherwise slug + sha256-hash suffix, immune to
case-insensitive-filesystem collisions) and one by-name file resolver.
- Reads resolve by name everywhere; the path-from-name fast path and
`assertSafeSourceName` are gone.
- Writes resolve-then-write: rewrites land on the file that declares the
name (human renames survive); new sources get a derived filename; a
derived path occupied by a different source fails instead of clobbering.
- `readSourceFile` returns null for missing files instead of forcing every
caller to launder IO errors; `deleteSource` distinguishes manifest-backed
sources from not-found instead of silently succeeding.
- `sl_write_source` accepts verbatim warehouse identifiers (snake_case is
now a recommendation for new sources) and rejects sourceName/source.name
mismatches; `sl_edit_source` rejects name-changing edits.
- Ingest projection commits, gate-repair allowlists, and touched-source
derivation use resolved paths / in-file names instead of interpolating
`<connId>/<name>.yaml`.
- Collapsed the five parallel path derivations and duplicated path-token
helpers onto the shared module; dropped dead service methods.
* fix(sl): resolve sources by declared name end-to-end and gate warehouse SQL with the parser-backed validator
- Key broken/renamed semantic-layer files by their recoverable in-file
name (slSourceNameForFile) so mid-edit sources stay reachable under
their real identity in reads, listings, and search
- Derive finalization touched sources from composed-source diffs and
recover deleted files' declared names from the pre-change commit
instead of parsing hash-derived filenames
- Resolve revert/rollback paths against history (listFilesAtCommit) so
human-renamed files are restored where they lived at preHead
- Validate ingest sql_execution through the daemon's sqlglot
validateReadOnly in the connection's dialect, sharing one
driver-to-dialect map (sql-analysis/dialect.ts) across MCP and ingest
- Harden the local read-only SQL backstop: accept leading comments,
reject smuggled second statements, and strip trailing
semicolons/comments before row-limit wrapping
2026-06-10 14:06:13 +02:00
. min ( 1 )
. describe (
"Name of the source to create, edit, or delete. Must equal the source's `name:`. Use the verbatim " +
'warehouse identifier when overlaying a manifest source (e.g. SIGNED_UP); snake_case is recommended ' +
'for new standalone sources.' ,
) ,
2026-05-10 23:12:26 +02:00
source : sourceInputSchema
. optional ( )
fix(context): merge overlay columns onto manifest columns by name (#94)
* fix(context): merge overlay columns onto manifest columns by name
composeOverlay was appending overlay columns to the manifest column list,
producing duplicate entries when dbt/metabase overlays declared a column
just to attach descriptions. The duplicates carried no `type`, so the
pydantic SourceDefinition rejected them at semantic-query time and broke
`ktx sl query` for every overlay-backed measure. Now overlay columns
match base columns by name (case-insensitive): same-name entries merge
onto the manifest (overlay fields win, type/role fall back to the base,
descriptions merge per source key) and only new names append.
* refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract
Overlay sources now have two distinct collections: `columns:` for computed
columns (requiring `expr` + `type`) and `column_overrides:` for metadata
patches to inherited manifest columns. Composing or loading an overlay that
mixes the two — or references an unknown column — fails with a typed error.
Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` /
`toResolvedWire` as the strict shape sent to the Python engine, and add a
schema contract test that diffs Zod against the Pydantic JSON schema dumped
by `python -m semantic_layer dump-schema`. `SourceDefinition` is now
`extra="forbid"` on the Python side.
`loadAllSources` surfaces per-file load errors instead of swallowing them,
so validation/query paths can report manifest shard parse failures.
* fix(context): make scan description generation resilient and quiet
A transient sampleTable failure during ingest used to take out every
table in a connection: generateTableDescription returned a hardcoded
'Table not found' string into descriptions.ai, and KtxDescriptionGenerator
was constructed without a logger, so the failure left no trail anywhere.
- sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff,
honouring KtxScanContext.signal via a new KtxAbortedError.
- On retry exhaustion or missing capability, table generation falls back
to a metadata-only prompt built from column name / native type / comment
/ rawDescriptions. The column path follows the same rule -- call the
LLM when any of samples or rawDescriptions are available; skip only
when both are absent.
- Logger is now threaded from KtxScanContext into the generator. Failures
emit structured KtxScanWarning entries (new description_fallback_used
code, plus existing sampling_failed / enrichment_failed /
connector_capability_missing). ktx scan groups warnings by code so a
batch of identical failures collapses to one summary line plus sample.
- Returns null on failure instead of the 'Table not found' sentinel; the
manifest writer's existing guard already skips empty descriptions, so
schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS
already strips stale 'ai' on merge, so existing YAML clears on next run.
Also suppress AI SDK v6 'system in messages' warning: pull system messages
out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages
helper and pass them top-level to generateText (preserves cacheControl
providerOptions on the SystemModelMessage). Agent-runner's local
splitSystemPromptMessages dedupes onto the shared helper.
* test(docs): align examples-docs assertions with revamped docs
PR #103 (setup/guide doc revamp) reworded several CLI examples and
connection labels; the assertions in scripts/examples-docs.test.mjs
still referenced the pre-revamp wording and were failing in CI on main.
Update the regexes to match the post-revamp content:
- drop the `--json` flag from the sl-query example expectation
- move the `Driver:` / `Status: ok` probe to the connection reference,
which is where that output now lives (driver id is lowercase
`postgres`, not the display name `PostgreSQL`)
- drop the obsolete `Install \`uv\`...` troubleshooting line
- accept `<connectionId>` everywhere; the docs no longer use the
hyphenated `<connection-id>` form
- match the `warehouse` connection id used in the quickstart instead of
the `postgres-warehouse` id only used in the README and setup ref
* fix(sl): skip TS/Python schema contract test when uv is unavailable
The TypeScript checks CI job does not install uv or Python, so the
module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw
ENOENT and failed the suite. Wrap the schema dump in a try/catch and
guard the describe block with `describe.skipIf` so the test skips in
environments without uv. Local dev and any CI job that has uv on PATH
still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
. describe (
'Source definition (standalone with table/sql) or overlay (measures, column_overrides, computed columns, etc.)' ,
) ,
2026-05-10 23:12:26 +02:00
delete : z . boolean ( ) . optional ( ) . describe ( 'Set to true to delete this source entirely' ) ,
2026-05-12 16:56:58 -04:00
rawPaths : z
. array ( z . string ( ) . min ( 1 ) )
. optional ( )
. describe ( 'In ingest sessions, raw source file paths that directly support this SL action.' ) ,
2026-05-10 23:12:26 +02:00
} ) ;
type SlWriteSourceInput = z . infer < typeof slWriteSourceInputSchema > ;
function actionTargetConnectionId (
runConnectionId : string | null | undefined ,
actionConnectionId : string ,
) : string | null {
return runConnectionId && runConnectionId !== actionConnectionId ? actionConnectionId : null ;
}
export class SlWriteSourceTool extends BaseSemanticLayerTool < typeof slWriteSourceInputSchema > {
readonly name = 'sl_write_source' ;
constructor ( deps : BaseSemanticLayerToolDeps ) {
super ( deps ) ;
}
get description ( ) : string {
return ` <purpose>
Create a new semantic layer source or fully rewrite an existing one .
If the source already exists , this tool will overwrite it with the new definition .
< / purpose >
< when_to_use >
- First time creating a source definition
- When modeling a new SQL - backed source ( e . g . , churn risk view , ARR calculation )
- When the user asks to start over / fully rewrite a source
- Consolidating multiple sources into one ( write merged definition )
- For targeted edits to existing sources ( add / remove measures , update joins ) , prefer sl_edit_source instead
< / when_to_use >
< editing_approach >
- New source : provide \ ` source \` with full definition
- Full rewrite : provide \ ` source \` (overwrites existing)
- Targeted edits on an existing source : use sl_edit_source instead
- Delete : set \ ` delete: true \`
< / editing_approach >
< source_definition >
- name : Unique identifier for the source
- table : For physical table / view sources ( e . g . , "public.orders" ) . Mutually exclusive with sql .
- sql : For SQL - based sources ( the SQL query ) . Mutually exclusive with table .
- grain : What one row represents ( e . g . , [ "id" ] , [ "customer_id" , "product_id" ] )
fix(context): merge overlay columns onto manifest columns by name (#94)
* fix(context): merge overlay columns onto manifest columns by name
composeOverlay was appending overlay columns to the manifest column list,
producing duplicate entries when dbt/metabase overlays declared a column
just to attach descriptions. The duplicates carried no `type`, so the
pydantic SourceDefinition rejected them at semantic-query time and broke
`ktx sl query` for every overlay-backed measure. Now overlay columns
match base columns by name (case-insensitive): same-name entries merge
onto the manifest (overlay fields win, type/role fall back to the base,
descriptions merge per source key) and only new names append.
* refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract
Overlay sources now have two distinct collections: `columns:` for computed
columns (requiring `expr` + `type`) and `column_overrides:` for metadata
patches to inherited manifest columns. Composing or loading an overlay that
mixes the two — or references an unknown column — fails with a typed error.
Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` /
`toResolvedWire` as the strict shape sent to the Python engine, and add a
schema contract test that diffs Zod against the Pydantic JSON schema dumped
by `python -m semantic_layer dump-schema`. `SourceDefinition` is now
`extra="forbid"` on the Python side.
`loadAllSources` surfaces per-file load errors instead of swallowing them,
so validation/query paths can report manifest shard parse failures.
* fix(context): make scan description generation resilient and quiet
A transient sampleTable failure during ingest used to take out every
table in a connection: generateTableDescription returned a hardcoded
'Table not found' string into descriptions.ai, and KtxDescriptionGenerator
was constructed without a logger, so the failure left no trail anywhere.
- sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff,
honouring KtxScanContext.signal via a new KtxAbortedError.
- On retry exhaustion or missing capability, table generation falls back
to a metadata-only prompt built from column name / native type / comment
/ rawDescriptions. The column path follows the same rule -- call the
LLM when any of samples or rawDescriptions are available; skip only
when both are absent.
- Logger is now threaded from KtxScanContext into the generator. Failures
emit structured KtxScanWarning entries (new description_fallback_used
code, plus existing sampling_failed / enrichment_failed /
connector_capability_missing). ktx scan groups warnings by code so a
batch of identical failures collapses to one summary line plus sample.
- Returns null on failure instead of the 'Table not found' sentinel; the
manifest writer's existing guard already skips empty descriptions, so
schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS
already strips stale 'ai' on merge, so existing YAML clears on next run.
Also suppress AI SDK v6 'system in messages' warning: pull system messages
out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages
helper and pass them top-level to generateText (preserves cacheControl
providerOptions on the SystemModelMessage). Agent-runner's local
splitSystemPromptMessages dedupes onto the shared helper.
* test(docs): align examples-docs assertions with revamped docs
PR #103 (setup/guide doc revamp) reworded several CLI examples and
connection labels; the assertions in scripts/examples-docs.test.mjs
still referenced the pre-revamp wording and were failing in CI on main.
Update the regexes to match the post-revamp content:
- drop the `--json` flag from the sl-query example expectation
- move the `Driver:` / `Status: ok` probe to the connection reference,
which is where that output now lives (driver id is lowercase
`postgres`, not the display name `PostgreSQL`)
- drop the obsolete `Install \`uv\`...` troubleshooting line
- accept `<connectionId>` everywhere; the docs no longer use the
hyphenated `<connection-id>` form
- match the `warehouse` connection id used in the quickstart instead of
the `postgres-warehouse` id only used in the README and setup ref
* fix(sl): skip TS/Python schema contract test when uv is unavailable
The TypeScript checks CI job does not install uv or Python, so the
module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw
ENOENT and failed the suite. Wrap the schema dump in a try/catch and
guard the describe block with `describe.skipIf` so the test skips in
environments without uv. Local dev and any CI job that has uv on PATH
still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
- columns : All columns with type ( string / number / time / boolean ) and optional descriptions . On overlays , columns are computed - only and require expr + type .
- column_overrides : Overlay - only metadata patches for existing manifest columns ( descriptions , role , visibility , constraints , enum_values , tests ) . Do not include type or expr .
2026-05-10 23:12:26 +02:00
- joins : Relationships to other sources ( to , on , relationship : many_to_one / one_to_many / one_to_one )
- measures : Pre - defined aggregations ( name , expr like "sum(amount)" , optional filter , optional segments — bare names of segments defined on the same source , optional description )
- segments : Named , reusable boolean predicates scoped to this source ( name , expr — a SQL boolean over this source 's columns, optional description). A measure references one with \`segments: [name]\`; a query references one with the dotted form \`source.segment_name\`. Use when the same predicate appears on 3+ measures — e.g. extract \`is_paid = true and is_refunded = ' 0 ' \ ` as \` segments: [{name: paid_non_refunded, expr: "..."}] \` and have each measure use \` segments: [paid_non_refunded] \` instead of re-typing the predicate inside \` sum(case when ... then x end) \` . Segments are predicates only — they cannot be selected as dimensions or grouped by; if you need to group by the predicate, add a \` columns[] \` entry instead.
< / source_definition >
< join_requirements >
Sources with joins : [ ] are disconnected from the semantic layer join graph and cannot be composed with other sources in semantic queries .
Before writing , use discover_data to check existing sources and their grain columns .
For each grain / key column in your source ( e . g . , account_id , item_id ) , find the matching dimension source ( e . g . , ACCOUNTS , ITEMS ) and declare a many_to_one join .
Example : a source graining on [ account_id ] should declare :
joins :
- to : ACCOUNTS
on : source_name.account_id = ACCOUNTS . ACCOUNT_ID
relationship : many_to_one
The on condition format : local_column = TARGET_SOURCE . target_column ( right side must include target source name ) .
Do NOT join back to a table that the SQL already aggregates from if the grain column is not in the output ( the relationship is already baked into the SQL ) .
< / join_requirements > ` ;
}
get inputSchema() {
return slWriteSourceInputSchema ;
}
async call ( input : SlWriteSourceInput , context : ToolContext ) : Promise < ToolOutput < SemanticLayerStructured > > {
const { connectionId , sourceName } = input ;
const { name : author , email : authorEmail } = await this . authorResolver . resolve ( context . userId ) ;
const semanticLayerService = context . session ? . semanticLayerService ? ? this . semanticLayerService ;
const skipIndex = context . session ? . isWorktreeScoped === true ;
2026-05-18 13:38:06 +02:00
const targetConnectionValidation = validateActionTargetConnection ( context . session , connectionId ) ;
if ( ! targetConnectionValidation . ok ) {
return this . buildOutput ( false , [ targetConnectionValidation . error ] , sourceName ) ;
}
2026-05-12 16:56:58 -04:00
const rawPathValidation = validateActionRawPaths ( context . session , input . rawPaths ) ;
if ( ! rawPathValidation . ok ) {
return this . buildOutput ( false , [ rawPathValidation . error ] , sourceName ) ;
}
2026-05-10 23:12:26 +02:00
// Handle delete
if ( input . delete ) {
try {
await semanticLayerService . deleteSource ( connectionId , sourceName , author , authorEmail ) ;
if ( ! skipIndex ) {
fix(context): merge overlay columns onto manifest columns by name (#94)
* fix(context): merge overlay columns onto manifest columns by name
composeOverlay was appending overlay columns to the manifest column list,
producing duplicate entries when dbt/metabase overlays declared a column
just to attach descriptions. The duplicates carried no `type`, so the
pydantic SourceDefinition rejected them at semantic-query time and broke
`ktx sl query` for every overlay-backed measure. Now overlay columns
match base columns by name (case-insensitive): same-name entries merge
onto the manifest (overlay fields win, type/role fall back to the base,
descriptions merge per source key) and only new names append.
* refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract
Overlay sources now have two distinct collections: `columns:` for computed
columns (requiring `expr` + `type`) and `column_overrides:` for metadata
patches to inherited manifest columns. Composing or loading an overlay that
mixes the two — or references an unknown column — fails with a typed error.
Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` /
`toResolvedWire` as the strict shape sent to the Python engine, and add a
schema contract test that diffs Zod against the Pydantic JSON schema dumped
by `python -m semantic_layer dump-schema`. `SourceDefinition` is now
`extra="forbid"` on the Python side.
`loadAllSources` surfaces per-file load errors instead of swallowing them,
so validation/query paths can report manifest shard parse failures.
* fix(context): make scan description generation resilient and quiet
A transient sampleTable failure during ingest used to take out every
table in a connection: generateTableDescription returned a hardcoded
'Table not found' string into descriptions.ai, and KtxDescriptionGenerator
was constructed without a logger, so the failure left no trail anywhere.
- sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff,
honouring KtxScanContext.signal via a new KtxAbortedError.
- On retry exhaustion or missing capability, table generation falls back
to a metadata-only prompt built from column name / native type / comment
/ rawDescriptions. The column path follows the same rule -- call the
LLM when any of samples or rawDescriptions are available; skip only
when both are absent.
- Logger is now threaded from KtxScanContext into the generator. Failures
emit structured KtxScanWarning entries (new description_fallback_used
code, plus existing sampling_failed / enrichment_failed /
connector_capability_missing). ktx scan groups warnings by code so a
batch of identical failures collapses to one summary line plus sample.
- Returns null on failure instead of the 'Table not found' sentinel; the
manifest writer's existing guard already skips empty descriptions, so
schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS
already strips stale 'ai' on merge, so existing YAML clears on next run.
Also suppress AI SDK v6 'system in messages' warning: pull system messages
out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages
helper and pass them top-level to generateText (preserves cacheControl
providerOptions on the SystemModelMessage). Agent-runner's local
splitSystemPromptMessages dedupes onto the shared helper.
* test(docs): align examples-docs assertions with revamped docs
PR #103 (setup/guide doc revamp) reworded several CLI examples and
connection labels; the assertions in scripts/examples-docs.test.mjs
still referenced the pre-revamp wording and were failing in CI on main.
Update the regexes to match the post-revamp content:
- drop the `--json` flag from the sl-query example expectation
- move the `Driver:` / `Status: ok` probe to the connection reference,
which is where that output now lives (driver id is lowercase
`postgres`, not the display name `PostgreSQL`)
- drop the obsolete `Install \`uv\`...` troubleshooting line
- accept `<connectionId>` everywhere; the docs no longer use the
hyphenated `<connection-id>` form
- match the `warehouse` connection id used in the quickstart instead of
the `postgres-warehouse` id only used in the README and setup ref
* fix(sl): skip TS/Python schema contract test when uv is unavailable
The TypeScript checks CI job does not install uv or Python, so the
module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw
ENOENT and failed the suite. Wrap the schema dump in a try/catch and
guard the describe block with `describe.skipIf` so the test skips in
environments without uv. Local dev and any CI job that has uv on PATH
still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
const { sources : allSources } = await semanticLayerService . loadAllSources ( connectionId ) ;
2026-05-10 23:12:26 +02:00
await this . slSearchService . indexSources ( connectionId , allSources ) . catch ( ( ) = > { } ) ;
}
if ( context . session ) {
addTouchedSlSource ( context . session . touchedSlSources , connectionId , sourceName ) ;
context . session . actions . push ( {
target : 'sl' ,
type : 'removed' ,
key : sourceName ,
detail : 'Deleted source' ,
targetConnectionId : actionTargetConnectionId ( context . session . connectionId , connectionId ) ,
2026-05-12 16:56:58 -04:00
. . . ( rawPathValidation . rawPaths ? { rawPaths : rawPathValidation.rawPaths } : { } ) ,
2026-05-10 23:12:26 +02:00
} ) ;
}
return this . buildOutput ( true , [ ] , sourceName , { yaml : undefined , commitHash : undefined } ) ;
} catch ( error ) {
return this . buildOutput ( false , [ error instanceof Error ? error.message : String ( error ) ] , sourceName ) ;
}
}
// Require source for create/rewrite
if ( ! input . source ) {
return this . buildOutput (
false ,
[ 'Provide `source` to create or rewrite. For targeted edits, use sl_edit_source.' ] ,
sourceName ,
) ;
}
fix: read semantic sources safely (#284)
* fix: read semantic sources safely
* test: retarget reindex per-scope error case to a broken manifest
Reading a broken standalone source was made non-fatal in de1f1a8d (it is
surfaced for repair instead of throwing), so the reindex per-scope error
test no longer captured an error. Point it at a corrupt manifest shard,
which is the remaining fatal read failure the per-scope catch must
isolate, and assert the captured error names the offending file.
* fix(sl): decouple semantic-layer file names from warehouse naming rules
The in-file `name:` field is now the sole source identity; the filename is
a derived label that never participates in identity. This removes the
"Unsafe semantic-layer source name" failure class entirely: any warehouse
identifier (Snowflake's uppercase SIGNED_UP, EVENT$LOG, dotted names) can
be read, overlaid, edited, and deleted.
- New `source-files.ts`: one total filename derivation (safe lowercase
names verbatim; otherwise slug + sha256-hash suffix, immune to
case-insensitive-filesystem collisions) and one by-name file resolver.
- Reads resolve by name everywhere; the path-from-name fast path and
`assertSafeSourceName` are gone.
- Writes resolve-then-write: rewrites land on the file that declares the
name (human renames survive); new sources get a derived filename; a
derived path occupied by a different source fails instead of clobbering.
- `readSourceFile` returns null for missing files instead of forcing every
caller to launder IO errors; `deleteSource` distinguishes manifest-backed
sources from not-found instead of silently succeeding.
- `sl_write_source` accepts verbatim warehouse identifiers (snake_case is
now a recommendation for new sources) and rejects sourceName/source.name
mismatches; `sl_edit_source` rejects name-changing edits.
- Ingest projection commits, gate-repair allowlists, and touched-source
derivation use resolved paths / in-file names instead of interpolating
`<connId>/<name>.yaml`.
- Collapsed the five parallel path derivations and duplicated path-token
helpers onto the shared module; dropped dead service methods.
* fix(sl): resolve sources by declared name end-to-end and gate warehouse SQL with the parser-backed validator
- Key broken/renamed semantic-layer files by their recoverable in-file
name (slSourceNameForFile) so mid-edit sources stay reachable under
their real identity in reads, listings, and search
- Derive finalization touched sources from composed-source diffs and
recover deleted files' declared names from the pre-change commit
instead of parsing hash-derived filenames
- Resolve revert/rollback paths against history (listFilesAtCommit) so
human-renamed files are restored where they lived at preHead
- Validate ingest sql_execution through the daemon's sqlglot
validateReadOnly in the connection's dialect, sharing one
driver-to-dialect map (sql-analysis/dialect.ts) across MCP and ingest
- Harden the local read-only SQL backstop: accept leading comments,
reject smuggled second statements, and strip trailing
semicolons/comments before row-limit wrapping
2026-06-10 14:06:13 +02:00
// The in-file `name:` is the source's identity; the file is written under
// source.name while the orphan/shadow checks key on sourceName — a mismatch
// would validate one source and save another.
if ( input . source . name !== sourceName ) {
return this . buildOutput (
false ,
[ ` source.name " ${ input . source . name } " does not match sourceName " ${ sourceName } " — they must be identical. ` ] ,
sourceName ,
) ;
}
2026-05-10 23:12:26 +02:00
return this . writeFullSource (
connectionId ,
input . source ,
sourceName ,
author ,
authorEmail ,
context ,
semanticLayerService ,
skipIndex ,
2026-05-12 16:56:58 -04:00
rawPathValidation . rawPaths ,
2026-05-10 23:12:26 +02:00
) ;
}
private async writeFullSource (
connectionId : string ,
source : z.infer < typeof sourceInputSchema > ,
sourceName : string ,
author : string ,
authorEmail : string ,
context : ToolContext ,
semanticLayerService : SemanticLayerService ,
skipIndex : boolean ,
2026-05-12 16:56:58 -04:00
rawPaths : string [ ] | undefined ,
2026-05-10 23:12:26 +02:00
) : Promise < ToolOutput < SemanticLayerStructured > > {
2026-05-11 00:31:15 -07:00
const normalizedSource = normalizeSemanticLayerDescriptions ( source , { fillMissing : ! ! context . session ? . ingest } ) ;
const isOverlay =
! ( 'table' in normalizedSource && normalizedSource . table ) && ! ( 'sql' in normalizedSource && normalizedSource . sql ) ;
2026-05-10 23:12:26 +02:00
const existing = await this . readSourceYamlFromService ( semanticLayerService , connectionId , sourceName ) ;
const commitMessage = existing
? ` ${ isOverlay ? 'Update overlay' : 'Rewrite source' } : ${ sourceName } `
: ` ${ isOverlay ? 'Create overlay' : 'Create source' } : ${ sourceName } ` ;
2026-05-12 16:56:58 -04:00
const yamlContent = YAML . stringify ( normalizedSource , { indent : 2 , lineWidth : 0 , version : '1.1' } ) ;
2026-05-10 23:12:26 +02:00
const orphanError = await this . rejectOrphanOverlay ( semanticLayerService , connectionId , sourceName , yamlContent ) ;
if ( orphanError ) {
return this . buildOutput ( false , [ orphanError ] , sourceName , { yaml : yamlContent } ) ;
}
const shadowError = await this . rejectStandaloneShadow ( semanticLayerService , connectionId , sourceName , yamlContent ) ;
if ( shadowError ) {
return this . buildOutput ( false , [ shadowError ] , sourceName , { yaml : yamlContent } ) ;
}
2026-05-11 00:31:15 -07:00
const validatedSource = normalizedSource as SemanticLayerSource ;
2026-05-10 23:12:26 +02:00
const validationResult = await semanticLayerService . validateWithProposedSource ( connectionId , validatedSource ) ;
const validationErrors = validationResult . errors ;
const validationWarnings = [ . . . validationResult . warnings ] ;
const actionRequiredWarnings = validationResult . perSourceWarnings ? . [ sourceName ] ? ? [ ] ;
if ( validationErrors . length > 0 ) {
return this . buildOutput ( false , [ 'Validation failed — source was NOT saved:' , . . . validationErrors ] , sourceName , {
yaml : yamlContent ,
validationErrors ,
validationWarnings ,
actionRequiredWarnings ,
} ) ;
}
try {
const result = await semanticLayerService . writeSource (
connectionId ,
validatedSource ,
author ,
authorEmail ,
commitMessage ,
) ;
if ( ! skipIndex ) {
fix(context): merge overlay columns onto manifest columns by name (#94)
* fix(context): merge overlay columns onto manifest columns by name
composeOverlay was appending overlay columns to the manifest column list,
producing duplicate entries when dbt/metabase overlays declared a column
just to attach descriptions. The duplicates carried no `type`, so the
pydantic SourceDefinition rejected them at semantic-query time and broke
`ktx sl query` for every overlay-backed measure. Now overlay columns
match base columns by name (case-insensitive): same-name entries merge
onto the manifest (overlay fields win, type/role fall back to the base,
descriptions merge per source key) and only new names append.
* refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract
Overlay sources now have two distinct collections: `columns:` for computed
columns (requiring `expr` + `type`) and `column_overrides:` for metadata
patches to inherited manifest columns. Composing or loading an overlay that
mixes the two — or references an unknown column — fails with a typed error.
Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` /
`toResolvedWire` as the strict shape sent to the Python engine, and add a
schema contract test that diffs Zod against the Pydantic JSON schema dumped
by `python -m semantic_layer dump-schema`. `SourceDefinition` is now
`extra="forbid"` on the Python side.
`loadAllSources` surfaces per-file load errors instead of swallowing them,
so validation/query paths can report manifest shard parse failures.
* fix(context): make scan description generation resilient and quiet
A transient sampleTable failure during ingest used to take out every
table in a connection: generateTableDescription returned a hardcoded
'Table not found' string into descriptions.ai, and KtxDescriptionGenerator
was constructed without a logger, so the failure left no trail anywhere.
- sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff,
honouring KtxScanContext.signal via a new KtxAbortedError.
- On retry exhaustion or missing capability, table generation falls back
to a metadata-only prompt built from column name / native type / comment
/ rawDescriptions. The column path follows the same rule -- call the
LLM when any of samples or rawDescriptions are available; skip only
when both are absent.
- Logger is now threaded from KtxScanContext into the generator. Failures
emit structured KtxScanWarning entries (new description_fallback_used
code, plus existing sampling_failed / enrichment_failed /
connector_capability_missing). ktx scan groups warnings by code so a
batch of identical failures collapses to one summary line plus sample.
- Returns null on failure instead of the 'Table not found' sentinel; the
manifest writer's existing guard already skips empty descriptions, so
schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS
already strips stale 'ai' on merge, so existing YAML clears on next run.
Also suppress AI SDK v6 'system in messages' warning: pull system messages
out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages
helper and pass them top-level to generateText (preserves cacheControl
providerOptions on the SystemModelMessage). Agent-runner's local
splitSystemPromptMessages dedupes onto the shared helper.
* test(docs): align examples-docs assertions with revamped docs
PR #103 (setup/guide doc revamp) reworded several CLI examples and
connection labels; the assertions in scripts/examples-docs.test.mjs
still referenced the pre-revamp wording and were failing in CI on main.
Update the regexes to match the post-revamp content:
- drop the `--json` flag from the sl-query example expectation
- move the `Driver:` / `Status: ok` probe to the connection reference,
which is where that output now lives (driver id is lowercase
`postgres`, not the display name `PostgreSQL`)
- drop the obsolete `Install \`uv\`...` troubleshooting line
- accept `<connectionId>` everywhere; the docs no longer use the
hyphenated `<connection-id>` form
- match the `warehouse` connection id used in the quickstart instead of
the `postgres-warehouse` id only used in the README and setup ref
* fix(sl): skip TS/Python schema contract test when uv is unavailable
The TypeScript checks CI job does not install uv or Python, so the
module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw
ENOENT and failed the suite. Wrap the schema dump in a try/catch and
guard the describe block with `describe.skipIf` so the test skips in
environments without uv. Local dev and any CI job that has uv on PATH
still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
const { sources : allSources } = await semanticLayerService . loadAllSources ( connectionId ) ;
2026-05-10 23:12:26 +02:00
await this . slSearchService . indexSources ( connectionId , allSources ) . catch ( ( ) = > { } ) ;
}
if ( context . session ) {
addTouchedSlSource ( context . session . touchedSlSources , connectionId , sourceName ) ;
context . session . actions . push ( {
target : 'sl' ,
type : existing ? 'updated' : 'created' ,
key : sourceName ,
detail : existing ? ` Rewrote source ` : ` Created source ` ,
targetConnectionId : actionTargetConnectionId ( context . session . connectionId , connectionId ) ,
2026-05-12 16:56:58 -04:00
. . . ( rawPaths ? { rawPaths } : { } ) ,
2026-05-10 23:12:26 +02:00
} ) ;
}
return this . buildOutput ( true , [ ] , sourceName , {
yaml : yamlContent ,
commitHash : result.commitHash ? ? undefined ,
validationErrors ,
validationWarnings ,
actionRequiredWarnings ,
} ) ;
} catch ( error ) {
return this . buildOutput ( false , [ error instanceof Error ? error.message : String ( error ) ] , sourceName ) ;
}
}
private async readSourceYamlFromService (
service : SemanticLayerService ,
connectionId : string ,
sourceName : string ,
) : Promise < string | null > {
fix: read semantic sources safely (#284)
* fix: read semantic sources safely
* test: retarget reindex per-scope error case to a broken manifest
Reading a broken standalone source was made non-fatal in de1f1a8d (it is
surfaced for repair instead of throwing), so the reindex per-scope error
test no longer captured an error. Point it at a corrupt manifest shard,
which is the remaining fatal read failure the per-scope catch must
isolate, and assert the captured error names the offending file.
* fix(sl): decouple semantic-layer file names from warehouse naming rules
The in-file `name:` field is now the sole source identity; the filename is
a derived label that never participates in identity. This removes the
"Unsafe semantic-layer source name" failure class entirely: any warehouse
identifier (Snowflake's uppercase SIGNED_UP, EVENT$LOG, dotted names) can
be read, overlaid, edited, and deleted.
- New `source-files.ts`: one total filename derivation (safe lowercase
names verbatim; otherwise slug + sha256-hash suffix, immune to
case-insensitive-filesystem collisions) and one by-name file resolver.
- Reads resolve by name everywhere; the path-from-name fast path and
`assertSafeSourceName` are gone.
- Writes resolve-then-write: rewrites land on the file that declares the
name (human renames survive); new sources get a derived filename; a
derived path occupied by a different source fails instead of clobbering.
- `readSourceFile` returns null for missing files instead of forcing every
caller to launder IO errors; `deleteSource` distinguishes manifest-backed
sources from not-found instead of silently succeeding.
- `sl_write_source` accepts verbatim warehouse identifiers (snake_case is
now a recommendation for new sources) and rejects sourceName/source.name
mismatches; `sl_edit_source` rejects name-changing edits.
- Ingest projection commits, gate-repair allowlists, and touched-source
derivation use resolved paths / in-file names instead of interpolating
`<connId>/<name>.yaml`.
- Collapsed the five parallel path derivations and duplicated path-token
helpers onto the shared module; dropped dead service methods.
* fix(sl): resolve sources by declared name end-to-end and gate warehouse SQL with the parser-backed validator
- Key broken/renamed semantic-layer files by their recoverable in-file
name (slSourceNameForFile) so mid-edit sources stay reachable under
their real identity in reads, listings, and search
- Derive finalization touched sources from composed-source diffs and
recover deleted files' declared names from the pre-change commit
instead of parsing hash-derived filenames
- Resolve revert/rollback paths against history (listFilesAtCommit) so
human-renamed files are restored where they lived at preHead
- Validate ingest sql_execution through the daemon's sqlglot
validateReadOnly in the connection's dialect, sharing one
driver-to-dialect map (sql-analysis/dialect.ts) across MCP and ingest
- Harden the local read-only SQL backstop: accept leading comments,
reject smuggled second statements, and strip trailing
semicolons/comments before row-limit wrapping
2026-06-10 14:06:13 +02:00
const file = await service . readSourceFile ( connectionId , sourceName ) ;
return file ? . content ? ? null ;
2026-05-10 23:12:26 +02:00
}
private async rejectOrphanOverlay (
semanticLayerService : SemanticLayerService ,
connectionId : string ,
sourceName : string ,
content : string ,
) : Promise < string | null > {
let parsed : Record < string , unknown > ;
try {
parsed = YAML . parse ( content ) as Record < string , unknown > ;
} catch {
return null ;
}
if ( ! parsed || typeof parsed !== 'object' ) {
return null ;
}
const isOverlay = ! ( 'table' in parsed && parsed . table ) && ! ( 'sql' in parsed && parsed . sql ) ;
if ( ! isOverlay ) {
return null ;
}
const manifestNames = await semanticLayerService . listManifestSourceNames ( connectionId ) ;
if ( manifestNames . includes ( sourceName ) ) {
return null ;
}
const suggestions = this . nearestMatches ( sourceName , manifestNames , 3 ) ;
return [
` Error: cannot write " ${ sourceName } " as an overlay — no manifest entry with that name exists. ` ,
suggestions . length > 0
? ` Nearest manifest matches: ${ suggestions . join ( ', ' ) } . `
: ` No manifest entries resemble " ${ sourceName } ". ` ,
` To customize an existing base table, retarget the overlay at one of the nearest matches. ` ,
` For a LookML derived_table or any source backed by inline SQL, rewrite as a standalone ` ,
` curated source with a top-level "sql:" block plus explicit "grain:" and "columns:". ` ,
] . join ( '\n' ) ;
}
private async rejectStandaloneShadow (
semanticLayerService : SemanticLayerService ,
connectionId : string ,
sourceName : string ,
content : string ,
) : Promise < string | null > {
let parsed : Record < string , unknown > ;
try {
parsed = YAML . parse ( content ) as Record < string , unknown > ;
} catch {
return null ;
}
if ( ! parsed || typeof parsed !== 'object' ) {
return null ;
}
const isOverlay = ! ( 'table' in parsed && parsed . table ) && ! ( 'sql' in parsed && parsed . sql ) ;
if ( isOverlay ) {
return null ;
}
const isManifestBacked = await semanticLayerService . isManifestBacked ( connectionId , sourceName ) ;
if ( ! isManifestBacked ) {
return null ;
}
return [
` Error: cannot write " ${ sourceName } " as a standalone source — a manifest entry with that name already exists. ` ,
` Writing standalone would drop the manifest's columns and joins, leaving only what you list here. ` ,
` To add measures/segments on top of the manifest, rewrite this YAML as an overlay: ` ,
fix(context): merge overlay columns onto manifest columns by name (#94)
* fix(context): merge overlay columns onto manifest columns by name
composeOverlay was appending overlay columns to the manifest column list,
producing duplicate entries when dbt/metabase overlays declared a column
just to attach descriptions. The duplicates carried no `type`, so the
pydantic SourceDefinition rejected them at semantic-query time and broke
`ktx sl query` for every overlay-backed measure. Now overlay columns
match base columns by name (case-insensitive): same-name entries merge
onto the manifest (overlay fields win, type/role fall back to the base,
descriptions merge per source key) and only new names append.
* refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract
Overlay sources now have two distinct collections: `columns:` for computed
columns (requiring `expr` + `type`) and `column_overrides:` for metadata
patches to inherited manifest columns. Composing or loading an overlay that
mixes the two — or references an unknown column — fails with a typed error.
Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` /
`toResolvedWire` as the strict shape sent to the Python engine, and add a
schema contract test that diffs Zod against the Pydantic JSON schema dumped
by `python -m semantic_layer dump-schema`. `SourceDefinition` is now
`extra="forbid"` on the Python side.
`loadAllSources` surfaces per-file load errors instead of swallowing them,
so validation/query paths can report manifest shard parse failures.
* fix(context): make scan description generation resilient and quiet
A transient sampleTable failure during ingest used to take out every
table in a connection: generateTableDescription returned a hardcoded
'Table not found' string into descriptions.ai, and KtxDescriptionGenerator
was constructed without a logger, so the failure left no trail anywhere.
- sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff,
honouring KtxScanContext.signal via a new KtxAbortedError.
- On retry exhaustion or missing capability, table generation falls back
to a metadata-only prompt built from column name / native type / comment
/ rawDescriptions. The column path follows the same rule -- call the
LLM when any of samples or rawDescriptions are available; skip only
when both are absent.
- Logger is now threaded from KtxScanContext into the generator. Failures
emit structured KtxScanWarning entries (new description_fallback_used
code, plus existing sampling_failed / enrichment_failed /
connector_capability_missing). ktx scan groups warnings by code so a
batch of identical failures collapses to one summary line plus sample.
- Returns null on failure instead of the 'Table not found' sentinel; the
manifest writer's existing guard already skips empty descriptions, so
schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS
already strips stale 'ai' on merge, so existing YAML clears on next run.
Also suppress AI SDK v6 'system in messages' warning: pull system messages
out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages
helper and pass them top-level to generateText (preserves cacheControl
providerOptions on the SystemModelMessage). Agent-runner's local
splitSystemPromptMessages dedupes onto the shared helper.
* test(docs): align examples-docs assertions with revamped docs
PR #103 (setup/guide doc revamp) reworded several CLI examples and
connection labels; the assertions in scripts/examples-docs.test.mjs
still referenced the pre-revamp wording and were failing in CI on main.
Update the regexes to match the post-revamp content:
- drop the `--json` flag from the sl-query example expectation
- move the `Driver:` / `Status: ok` probe to the connection reference,
which is where that output now lives (driver id is lowercase
`postgres`, not the display name `PostgreSQL`)
- drop the obsolete `Install \`uv\`...` troubleshooting line
- accept `<connectionId>` everywhere; the docs no longer use the
hyphenated `<connection-id>` form
- match the `warehouse` connection id used in the quickstart instead of
the `postgres-warehouse` id only used in the README and setup ref
* fix(sl): skip TS/Python schema contract test when uv is unavailable
The TypeScript checks CI job does not install uv or Python, so the
module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw
ENOENT and failed the suite. Wrap the schema dump in a try/catch and
guard the describe block with `describe.skipIf` so the test skips in
environments without uv. Local dev and any CI job that has uv on PATH
still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
` - Remove "sql:", "table:", "grain:", and base-table "columns:". ` ,
` - Keep "name:" plus "measures:", "segments:", "descriptions:", "joins:", "disable_joins:", ` ,
` "exclude_columns:", "column_overrides:", and/or computed-only "columns:" entries with expr + type. ` ,
2026-05-10 23:12:26 +02:00
` - The manifest's schema is inherited automatically. ` ,
` If you really need a different base table, use a different source name. ` ,
] . join ( '\n' ) ;
}
private nearestMatches ( needle : string , haystack : string [ ] , limit : number ) : string [ ] {
if ( haystack . length === 0 ) {
return [ ] ;
}
const lowerNeedle = needle . toLowerCase ( ) ;
const scored = haystack . map ( ( candidate ) = > {
const lower = candidate . toLowerCase ( ) ;
const prefixBoost = lower . startsWith ( lowerNeedle ) || lowerNeedle . startsWith ( lower ) ? 0.2 : 0 ;
const substringBoost = lower . includes ( lowerNeedle ) || lowerNeedle . includes ( lower ) ? 0.1 : 0 ;
const score = jaroWinkler ( lowerNeedle , lower ) + prefixBoost + substringBoost ;
return { candidate , score } ;
} ) ;
scored . sort ( ( a , b ) = > b . score - a . score ) ;
return scored
. filter ( ( s ) = > s . score > 0.4 )
. slice ( 0 , limit )
. map ( ( s ) = > s . candidate ) ;
}
}
function jaroWinkler ( a : string , b : string ) : number {
if ( a === b ) {
return 1 ;
}
const matchDistance = Math . max ( 0 , Math . floor ( Math . max ( a . length , b . length ) / 2 ) - 1 ) ;
const aMatches = new Array < boolean > ( a . length ) . fill ( false ) ;
const bMatches = new Array < boolean > ( b . length ) . fill ( false ) ;
let matches = 0 ;
for ( let i = 0 ; i < a . length ; i ++ ) {
const start = Math . max ( 0 , i - matchDistance ) ;
const end = Math . min ( i + matchDistance + 1 , b . length ) ;
for ( let j = start ; j < end ; j ++ ) {
if ( bMatches [ j ] ) {
continue ;
}
if ( a [ i ] !== b [ j ] ) {
continue ;
}
aMatches [ i ] = true ;
bMatches [ j ] = true ;
matches ++ ;
break ;
}
}
if ( matches === 0 ) {
return 0 ;
}
let transpositions = 0 ;
let k = 0 ;
for ( let i = 0 ; i < a . length ; i ++ ) {
if ( ! aMatches [ i ] ) {
continue ;
}
while ( ! bMatches [ k ] ) {
k ++ ;
}
if ( a [ i ] !== b [ k ] ) {
transpositions ++ ;
}
k ++ ;
}
const jaro = ( matches / a . length + matches / b . length + ( matches - transpositions / 2 ) / matches ) / 3 ;
let prefix = 0 ;
const maxPrefix = Math . min ( 4 , a . length , b . length ) ;
while ( prefix < maxPrefix && a [ prefix ] === b [ prefix ] ) {
prefix ++ ;
}
return jaro + prefix * 0.1 * ( 1 - jaro ) ;
}