test: split cli tests from source tree (#216)

* feat(cli): define full warehouse dialect contract

* test(cli): keep dialect edge tests focused

* fix(cli): stabilize dialect contract foundation

* refactor(connectors): own read-only query preparation

* refactor(connectors): resolve dialects through registry

* refactor(connectors): keep concrete dialect classes internal

* chore(workspace): enforce dialect import boundary

* refactor(cli): resolve relationship dialect at scan boundary

* refactor(cli): use dialect display parsing for entity details

* refactor(cli): use dialect display parsing for warehouse catalog

* refactor(cli): use dialect SQL in relationship workflows

* test(cli): verify solid dialect scan workflow closure

* test: split cli tests from source tree

* refactor(cli): standardize BigQuery scope listing

* feat(sqlite): implement connector scope listing

* test(connectors): cover required table listing

* feat(cli): add warehouse driver registry

* refactor(setup): route scope discovery through driver registry

* refactor(cli): route local query execution through driver registry

* refactor(historic-sql): route dialect support through driver registry

* refactor(cli): test warehouse connections through driver registry

* fix(cli): close driver registry type export gaps

* Improve setup daemon diagnostics

* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback

Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.

* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match

The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.

Align the picker boundary with the canonical 3-level KtxTableRef:

- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
  resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
  (resolveEnabledTables already accepts the 3-part shape) and
  schemasFromEnabledTables now goes through parseDottedTableEntry so it
  recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
  reuse.

Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).

* fix(cli): allow debug telemetry under opt-out env
This commit is contained in:
Andrey Avtomonov 2026-05-26 08:49:05 +02:00 committed by GitHub
parent 924868841d
commit 56985b7e09
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
548 changed files with 5048 additions and 2228 deletions

View file

@ -0,0 +1,228 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { createKtxDictionarySearchService } from '../../../src/context/sl/dictionary-search.js';
describe('createKtxDictionarySearchService', () => {
let tempDir: string;
let project: KtxLocalProject;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-dictionary-search-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
project.config.connections.warehouse = { driver: 'postgres', url: 'env:DATABASE_URL' };
project.config.connections.billing = { driver: 'postgres', url: 'env:BILLING_DATABASE_URL' };
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
async function seedProfile(input: {
connectionId: string;
syncId: string;
columns: Record<string, unknown>;
}): Promise<void> {
await project.fileStore.writeFile(
`raw-sources/${input.connectionId}/live-database/${input.syncId}/enrichment/relationship-profile.json`,
`${JSON.stringify(
{
connectionId: input.connectionId,
driver: 'postgres',
sqlAvailable: true,
queryCount: 4,
tables: [],
columns: input.columns,
warnings: [],
},
null,
2,
)}\n`,
'ktx',
'ktx@example.com',
'Seed relationship profile',
);
}
it('returns matches and non-authoritative misses across configured connections', async () => {
await seedProfile({
connectionId: 'warehouse',
syncId: 'sync-1',
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 3,
sampleValues: ['paid', 'refunded', 'pending'],
},
},
});
await seedProfile({
connectionId: 'billing',
syncId: 'sync-2',
columns: {
'customers.name': {
table: { catalog: null, db: 'public', name: 'customers' },
column: 'name',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 4,
sampleValues: ['Acme Corp', 'Globex'],
},
},
});
const service = createKtxDictionarySearchService(project);
await expect(service.search({ values: ['PAID', 'missing'] })).resolves.toEqual({
searched: [
{
connectionId: 'billing',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 1,
syncId: 'sync-2',
profiledAt: null,
},
status: 'ready',
},
{
connectionId: 'warehouse',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 1,
syncId: 'sync-1',
profiledAt: null,
},
status: 'ready',
},
],
results: [
{
value: 'PAID',
matches: [
{
connectionId: 'warehouse',
sourceName: 'orders',
columnName: 'status',
matchedValue: 'paid',
cardinality: 3,
},
],
misses: [{ connectionId: 'billing', reason: 'value_not_in_sample' }],
},
{
value: 'missing',
matches: [],
misses: [
{ connectionId: 'billing', reason: 'value_not_in_sample' },
{ connectionId: 'warehouse', reason: 'value_not_in_sample' },
],
},
],
});
});
it('distinguishes missing profile artifacts from profiles with no candidate columns', async () => {
await seedProfile({
connectionId: 'billing',
syncId: 'sync-empty',
columns: {
'events.id': {
table: { catalog: null, db: 'public', name: 'events' },
column: 'id',
nativeType: 'integer',
normalizedType: 'integer',
distinctCount: 100,
sampleValues: [1, 2, 3],
},
},
});
const service = createKtxDictionarySearchService(project);
await expect(service.search({ values: ['Acme'] })).resolves.toEqual({
searched: [
{
connectionId: 'billing',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 0,
syncId: 'sync-empty',
profiledAt: null,
},
status: 'no_candidate_columns',
},
{
connectionId: 'warehouse',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 0,
syncId: null,
profiledAt: null,
},
status: 'no_profile_artifact',
},
],
results: [
{
value: 'Acme',
matches: [],
misses: [
{ connectionId: 'billing', reason: 'no_candidate_columns' },
{ connectionId: 'warehouse', reason: 'no_profile_artifact' },
],
},
],
});
});
it('scopes search to the requested connection', async () => {
await seedProfile({
connectionId: 'warehouse',
syncId: 'sync-1',
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 3,
sampleValues: ['paid'],
},
},
});
await seedProfile({
connectionId: 'billing',
syncId: 'sync-2',
columns: {
'invoices.status': {
table: { catalog: null, db: 'public', name: 'invoices' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 2,
sampleValues: ['paid'],
},
},
});
const service = createKtxDictionarySearchService(project);
await expect(service.search({ connectionId: 'billing', values: ['paid'] })).resolves.toMatchObject({
searched: [{ connectionId: 'billing', status: 'ready' }],
results: [
{
value: 'paid',
matches: [{ connectionId: 'billing', sourceName: 'invoices', columnName: 'status', matchedValue: 'paid' }],
misses: [],
},
],
});
});
});

View file

@ -0,0 +1,337 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { KtxSemanticLayerComputePort } from '../../../src/context/daemon/semantic-layer-compute.js';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { compileLocalSlQuery } from '../../../src/context/sl/local-query.js';
describe('compileLocalSlQuery', () => {
let tempDir: string;
let project: KtxLocalProject;
let compute: KtxSemanticLayerComputePort;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-local-query-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
project.config.connections.warehouse = { driver: 'postgres' };
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
`name: orders
table: public.orders
grain:
- id
columns:
- name: id
type: number
- name: status
type: string
measures:
- name: order_count
expr: count(*)
joins: []
`,
'ktx',
'ktx@example.com',
'Add orders source',
);
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders_overlay.yaml',
`name: orders_overlay
inherits_columns_from: orders
columns:
- name: paid_at
type: timestamp
joins: []
measures: []
grain: []
`,
'ktx',
'ktx@example.com',
'Add overlay source',
);
compute = {
query: vi.fn(async (input) => ({
sql: 'select status, count(*) as order_count from public.orders group by status',
dialect: input.dialect,
columns: [{ name: 'orders.status' }, { name: 'orders.order_count' }],
plan: { measures: input.query.measures, dimensions: input.query.dimensions },
})),
validateSources: vi.fn(),
generateSources: vi.fn(),
};
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('compiles a local semantic-layer query with computable sources only', async () => {
const result = await compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: {
measures: ['orders.order_count'],
dimensions: ['orders.status'],
limit: 25,
},
compute,
});
expect(compute.query).toHaveBeenCalledWith({
sources: [
{
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{ name: 'status', type: 'string' },
],
measures: [{ name: 'order_count', expr: 'count(*)' }],
joins: [],
},
],
dialect: 'postgres',
query: {
measures: ['orders.order_count'],
dimensions: ['orders.status'],
limit: 25,
},
});
expect(result).toEqual({
connectionId: 'warehouse',
dialect: 'postgres',
sql: 'select status, count(*) as order_count from public.orders group by status',
headers: ['orders.status', 'orders.order_count'],
rows: [],
totalRows: 0,
plan: {
measures: ['orders.order_count'],
dimensions: ['orders.status'],
execution: {
mode: 'compile_only',
reason: 'Local semantic-layer query compiled SQL but no data-source execution adapter is configured.',
},
},
});
});
it('compiles a local semantic-layer query from manifest-backed scan sources', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
payments:
table: public.payments
columns:
- name: payment_id
type: number
pk: true
- name: amount
type: number
`,
'ktx',
'ktx@example.com',
'Add manifest shard',
);
await compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: {
measures: ['sum(payments.amount)'],
dimensions: [],
},
compute,
});
expect(compute.query).toHaveBeenLastCalledWith({
sources: expect.arrayContaining([
{
name: 'payments',
table: 'public.payments',
grain: ['payment_id'],
columns: [
{
name: 'payment_id',
type: 'number',
role: undefined,
descriptions: undefined,
constraints: undefined,
enum_values: undefined,
tests: undefined,
},
{
name: 'amount',
type: 'number',
role: undefined,
descriptions: undefined,
constraints: undefined,
enum_values: undefined,
tests: undefined,
},
],
joins: [],
measures: [],
},
]),
dialect: 'postgres',
query: {
measures: ['sum(payments.amount)'],
dimensions: [],
},
});
});
it('strips authoring-only fields (usage, inherits_columns_from) before sending sources to the daemon', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
invoices:
table: public.invoices
columns:
- name: invoice_id
type: number
pk: true
- name: amount
type: number
usage:
narrative: Activation policy windows table for invoice analytics.
frequencyTier: mid
commonFilters:
- amount
commonGroupBys: []
commonJoins: []
staleSince: null
`,
'ktx',
'ktx@example.com',
'Add manifest shard with usage',
);
await compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: { measures: ['sum(invoices.amount)'], dimensions: [] },
compute,
});
const lastCall = (compute.query as ReturnType<typeof vi.fn>).mock.calls.at(-1)?.[0];
const invoices = lastCall?.sources.find((s: Record<string, unknown>) => s.name === 'invoices');
expect(invoices).toBeDefined();
expect(invoices).not.toHaveProperty('usage');
expect(invoices).not.toHaveProperty('inherits_columns_from');
expect(invoices).not.toHaveProperty('source_type');
});
it('resolves the only configured connection when connectionId is omitted', async () => {
await compileLocalSlQuery(project, {
query: { measures: ['orders.order_count'], dimensions: [] },
compute,
});
expect(compute.query).toHaveBeenCalledWith(
expect.objectContaining({
dialect: 'postgres',
}),
);
});
it('executes compiled SQL through a local query executor when requested', async () => {
const queryExecutor = {
execute: vi.fn(async () => ({
headers: ['status', 'order_count'],
rows: [['paid', 2]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
})),
};
const result = await compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: {
measures: ['orders.order_count'],
dimensions: ['orders.status'],
limit: 25,
},
compute,
execute: true,
maxRows: 10,
queryExecutor,
});
expect(queryExecutor.execute).toHaveBeenCalledWith({
connectionId: 'warehouse',
projectDir: project.projectDir,
connection: { driver: 'postgres' },
sql: 'select status, count(*) as order_count from public.orders group by status',
maxRows: 10,
});
expect(result.rows).toEqual([['paid', 2]]);
expect(result.totalRows).toBe(1);
expect(result.plan.execution).toEqual({
mode: 'executed',
driver: 'postgres',
maxRows: 10,
rowCount: 1,
});
});
it('emits progress while compiling and executing a local semantic-layer query', async () => {
const progress: Array<{ progress: number; message: string }> = [];
const queryExecutor = {
execute: vi.fn(async () => ({
headers: ['status', 'order_count'],
rows: [['paid', 2]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
})),
};
const result = await compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: {
measures: ['orders.order_count'],
dimensions: ['orders.status'],
limit: 25,
},
compute,
execute: true,
maxRows: 10,
queryExecutor,
onProgress: (event) => {
progress.push({ progress: event.progress, message: event.message });
},
});
expect(result.totalRows).toBe(1);
expect(progress).toEqual([
{ progress: 0, message: 'Compiling query' },
{ progress: 0.3, message: 'Generating SQL' },
{ progress: 0.6, message: 'Executing' },
{ progress: 1, message: 'Fetched 1 rows' },
]);
});
it('requires a query executor for executed mode', async () => {
await expect(
compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [] },
compute,
execute: true,
}),
).rejects.toThrow('Local semantic-layer execution requires a query executor.');
});
it('requires connectionId when multiple connections are configured', async () => {
project.config.connections.analytics = { driver: 'bigquery' };
await expect(
compileLocalSlQuery(project, {
query: { measures: ['orders.order_count'], dimensions: [] },
compute,
}),
).rejects.toThrow('connectionId is required when the local project has zero or multiple connections.');
});
});

View file

@ -0,0 +1,421 @@
import { access, mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import {
listLocalSlSources,
readLocalSlSource,
searchLocalSlSources,
validateLocalSlSource,
writeLocalSlSource,
} from '../../../src/context/sl/local-sl.js';
const ORDERS_YAML = [
'name: orders',
'table: public.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
' - name: revenue',
' type: number',
'measures:',
' - name: total_revenue',
' expr: sum(revenue)',
'',
].join('\n');
const SUPPORT_YAML = [
'name: tickets',
'descriptions:',
' user: Support tickets grouped by priority.',
'table: public.tickets',
'grain:',
' - ticket_id',
'columns:',
' - name: ticket_id',
' type: string',
' - name: priority',
' type: string',
'measures:',
' - name: ticket_count',
' expr: count(*)',
'',
].join('\n');
describe('local semantic-layer helpers', () => {
let tempDir: string;
let project: KtxLocalProject;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-local-sl-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('writes, reads, lists, and validates semantic-layer sources', async () => {
const write = await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
expect(write.path).toBe('semantic-layer/warehouse/orders.yaml');
await expect(
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders' }),
).resolves.toMatchObject({
connectionId: 'warehouse',
name: 'orders',
path: 'semantic-layer/warehouse/orders.yaml',
yaml: ORDERS_YAML,
});
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
{
columnCount: 2,
connectionId: 'warehouse',
joinCount: 0,
measureCount: 1,
name: 'orders',
path: 'semantic-layer/warehouse/orders.yaml',
},
]);
await expect(validateLocalSlSource(ORDERS_YAML)).resolves.toEqual({ valid: true, errors: [] });
});
it('validates table-backed sources against matching physical manifests when project context is provided', async () => {
await project.fileStore.writeFile(
'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml',
`tables:
int_active_contract_arr:
table: orbit_analytics.int_active_contract_arr
columns:
- { name: contract_id, type: string }
- { name: contract_arr_cents, type: number }
`,
'ktx',
'ktx@example.com',
'Add warehouse manifest',
);
const invalidDbtSource = [
'name: int_active_contract_arr',
'table: orbit_analytics.int_active_contract_arr',
'grain: [contract_id]',
'columns:',
' - { name: contract_id, type: string }',
' - { name: arr_cents, type: number }',
'measures:',
' - { name: arr, expr: sum(arr_cents) }',
'',
].join('\n');
const result = await validateLocalSlSource(invalidDbtSource, { project, connectionId: 'dbt-main' });
expect(result.valid).toBe(false);
expect(result.errors.join('\n')).toContain('arr_cents');
expect(result.errors.join('\n')).toContain('absent from physical table');
});
it('lists and reads manifest-backed scan sources as queryable sources', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
payments:
table: public.payments
columns:
- name: payment_id
type: number
pk: true
- name: amount
type: number
`,
'ktx',
'ktx@example.com',
'Add manifest shard',
);
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
{
columnCount: 2,
connectionId: 'warehouse',
joinCount: 0,
measureCount: 0,
name: 'payments',
path: 'semantic-layer/warehouse/_schema/public.yaml#payments',
},
]);
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'payments' })).resolves.toEqual(
expect.objectContaining({
columnCount: 2,
connectionId: 'warehouse',
joinCount: 0,
measureCount: 0,
name: 'payments',
path: 'semantic-layer/warehouse/_schema/public.yaml#payments',
yaml: expect.stringContaining('table: public.payments'),
}),
);
});
it('expands manifest-backed scan sources when listing all connections', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
payments:
table: public.payments
columns:
- name: payment_id
type: number
pk: true
- name: amount
type: number
`,
'ktx',
'ktx@example.com',
'Add manifest shard',
);
await expect(listLocalSlSources(project)).resolves.toEqual([
{
columnCount: 2,
connectionId: 'warehouse',
joinCount: 0,
measureCount: 0,
name: 'payments',
path: 'semantic-layer/warehouse/_schema/public.yaml#payments',
},
]);
});
it('searches local semantic-layer source text through SQLite FTS', async () => {
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'tickets',
yaml: SUPPORT_YAML,
});
const results = await searchLocalSlSources(project, { connectionId: 'warehouse', query: 'total revenue' });
expect(results).toEqual([
expect.objectContaining({
connectionId: 'warehouse',
name: 'orders',
path: 'semantic-layer/warehouse/orders.yaml',
score: expect.any(Number),
}),
]);
expect(results[0]?.score).toBeGreaterThan(0);
await expect(access(join(project.projectDir, '.ktx/db.sqlite'))).resolves.toBeUndefined();
});
it('searches historic SQL usage and returns frequency tier plus FTS snippet', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
orders:
table: public.orders
usage:
narrative: Analysts inspect paid order lifecycle by customer segment.
frequencyTier: high
commonFilters:
- status
- created_at
commonGroupBys:
- customer_segment
commonJoins:
- table: public.customers
on:
- customer_id
columns:
- name: order_id
type: string
- name: status
type: string
`,
'ktx',
'ktx@example.com',
'Add usage-backed manifest shard',
);
const results = await searchLocalSlSources(project, {
connectionId: 'warehouse',
query: 'paid lifecycle customer segment',
});
expect(results).toEqual([
expect.objectContaining({
connectionId: 'warehouse',
name: 'orders',
path: 'semantic-layer/warehouse/_schema/public.yaml#orders',
frequencyTier: 'high',
snippet: expect.stringContaining('<mark>'),
matchReasons: expect.arrayContaining(['lexical']),
}),
]);
expect(results[0]?.snippet).toContain('lifecycle');
});
it('searches all connections with one global hybrid ranking pass', async () => {
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
await writeLocalSlSource(project, {
connectionId: 'finance',
sourceName: 'orders',
yaml: [
'name: orders',
'descriptions:',
' user: Finance orders used for invoice reconciliation.',
'table: finance.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
' - name: invoice_status',
' type: string',
'',
].join('\n'),
});
const results = await searchLocalSlSources(project, { query: 'orders' });
expect(results.map((result) => `${result.connectionId}/${result.name}`)).toEqual([
'finance/orders',
'warehouse/orders',
]);
expect(results[0]).toMatchObject({
score: expect.any(Number),
matchReasons: expect.arrayContaining(['lexical']),
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'lexical', status: 'available' })]),
});
});
it('returns dictionary evidence when collected sample values explain a match', async () => {
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
`${JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
sqlAvailable: true,
queryCount: 2,
tables: [],
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
rowCount: 10,
nullCount: 0,
distinctCount: 2,
uniquenessRatio: 0.2,
nullRate: 0,
sampleValues: ['paid', 'refunded'],
minTextLength: 4,
maxTextLength: 8,
},
},
warnings: [],
},
null,
2,
)}\n`,
'ktx',
'ktx@example.com',
'Seed dictionary profile',
);
const results = await searchLocalSlSources(project, { connectionId: 'warehouse', query: 'refunded' });
expect(results).toEqual([
expect.objectContaining({
connectionId: 'warehouse',
name: 'orders',
matchReasons: ['dictionary'],
dictionaryMatches: [{ column: 'status', values: ['refunded'] }],
}),
]);
});
it('adds the token lane alongside lexical matches for normalized query terms', async () => {
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
const results = await searchLocalSlSources(project, { connectionId: 'warehouse', query: 'orders---' });
expect(results[0]).toMatchObject({
connectionId: 'warehouse',
name: 'orders',
matchReasons: expect.arrayContaining(['token']),
});
});
it('reports schema validation errors without writing invalid YAML', async () => {
const invalidYaml = ['name: broken', 'table: public.orders', 'columns: []', ''].join('\n');
await expect(validateLocalSlSource(invalidYaml)).resolves.toMatchObject({
valid: false,
errors: expect.arrayContaining([expect.stringContaining('grain')]),
});
await expect(
writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'broken',
yaml: invalidYaml,
}),
).rejects.toThrow('Invalid semantic-layer source');
});
it('reports overlay columns that are not computed columns', async () => {
const invalidYaml = [
'name: orders',
'columns:',
' - name: status',
' descriptions:',
' user: Order status.',
'',
].join('\n');
await expect(
validateLocalSlSource(invalidYaml, { project, connectionId: 'warehouse', sourceName: 'orders' }),
).resolves.toEqual({
valid: false,
errors: expect.arrayContaining([expect.stringContaining('columns.0.type')]),
});
});
it('rejects unsafe source paths', async () => {
await expect(
readLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: '../orders',
}),
).rejects.toThrow('Unsafe semantic-layer source name');
});
});

View file

@ -0,0 +1,271 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { createServer } from 'node:net';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { assertSearchBackendConformanceCase } from '../search/backend-conformance.test-utils.js';
import { searchLocalSlSources, writeLocalSlSource, type LocalSlSourceSearchResult } from '../../../src/context/sl/local-sl.js';
import { searchLocalSlSourcesWithPglitePrototype } from '../../../src/context/sl/pglite-sl-search-prototype.js';
const ORDERS_YAML = [
'name: orders',
'descriptions:',
' user: Orders with paid revenue and refund status.',
'table: public.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
' - name: status',
' type: string',
' - name: revenue',
' type: number',
'measures:',
' - name: total_revenue',
' expr: sum(revenue)',
'',
].join('\n');
const FINANCE_ORDERS_YAML = [
'name: orders',
'descriptions:',
' user: Finance orders used for invoice reconciliation.',
'table: finance.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
' - name: invoice_status',
' type: string',
'',
].join('\n');
const CUSTOMERS_YAML = [
'name: customers',
'descriptions:',
' user: Customer lifecycle accounts by region.',
'table: public.customers',
'grain:',
' - customer_id',
'columns:',
' - name: customer_id',
' type: string',
' - name: region',
' type: string',
'',
].join('\n');
class FakeEmbeddingPort {
readonly maxBatchSize = 16;
async computeEmbedding(text: string): Promise<number[]> {
const normalized = text.toLowerCase();
if (normalized.includes('semantic revenue') || normalized.includes('orders with paid revenue')) {
return [1, 0, 0];
}
if (normalized.includes('finance orders')) {
return [0.72, 0.28, 0];
}
return [0, 1, 0];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return Promise.all(texts.map((text) => this.computeEmbedding(text)));
}
}
async function allocatePort(): Promise<number> {
const server = createServer();
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
const address = server.address();
if (typeof address !== 'object' || address === null) {
throw new Error('Expected TCP server address while allocating a PGlite SL prototype port.');
}
await new Promise<void>((resolve, reject) => {
server.close((error) => {
if (error) {
reject(error);
return;
}
resolve();
});
});
return address.port;
}
function toConformanceResult(result: LocalSlSourceSearchResult) {
return {
id: `${result.connectionId}/${result.name}`,
score: result.score,
matchReasons: result.matchReasons ?? [],
lanes: result.lanes,
dictionaryMatches: result.dictionaryMatches,
};
}
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML });
await writeLocalSlSource(project, { connectionId: 'finance', sourceName: 'orders', yaml: FINANCE_ORDERS_YAML });
await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'customers', yaml: CUSTOMERS_YAML });
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
`${JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
sqlAvailable: true,
queryCount: 2,
tables: [],
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
rowCount: 10,
nullCount: 0,
distinctCount: 2,
uniquenessRatio: 0.2,
nullRate: 0,
sampleValues: ['paid', 'refunded'],
minTextLength: 4,
maxTextLength: 8,
},
'customers.region': {
table: { catalog: null, db: 'public', name: 'customers' },
column: 'region',
nativeType: 'text',
normalizedType: 'string',
rowCount: 10,
nullCount: 0,
distinctCount: 3,
uniquenessRatio: 0.3,
nullRate: 0,
sampleValues: ['emea', 'amer', 'apac'],
minTextLength: 4,
maxTextLength: 4,
},
},
warnings: [],
},
null,
2,
)}\n`,
'ktx',
'ktx@example.com',
'Seed PGlite dictionary profile',
);
}
describe('PGlite semantic-layer search prototype', () => {
let tempDir: string;
let project: KtxLocalProject;
let pgliteDataDir: string;
let port: number;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-pglite-sl-prototype-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
project.config.ingest.embeddings.dimensions = 3;
pgliteDataDir = join(tempDir, 'pglite-search');
port = await allocatePort();
await seedSemanticLayerProject(project);
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('returns lexical semantic-layer matches through PGlite FTS', async () => {
const results = await searchLocalSlSourcesWithPglitePrototype(project, {
query: 'paid revenue',
limit: 5,
pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port },
});
assertSearchBackendConformanceCase({
backendName: 'pglite-owner-prototype',
surface: 'semantic-layer',
caseName: 'pglite lexical source ranking',
results: results.map(toConformanceResult),
expectedTopIds: ['warehouse/orders'],
expectedReasonsById: {
'warehouse/orders': ['lexical'],
},
expectedLanes: {
lexical: { status: 'available' },
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
},
});
});
it('returns dictionary evidence through PGlite pg_trgm and exact matching', async () => {
const results = await searchLocalSlSourcesWithPglitePrototype(project, {
connectionId: 'warehouse',
query: 'refund',
limit: 5,
pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port },
});
assertSearchBackendConformanceCase({
backendName: 'pglite-owner-prototype',
surface: 'semantic-layer',
caseName: 'pglite dictionary source evidence',
results: results.map(toConformanceResult),
expectedTopIds: ['warehouse/orders'],
expectedReasonsById: {
'warehouse/orders': ['dictionary'],
},
expectedLanes: {
dictionary: { status: 'available' },
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
},
expectedDictionaryMatchesById: {
'warehouse/orders': [{ column: 'status', values: ['refunded'] }],
},
});
});
it('returns semantic matches through PGlite vector ordering when embeddings are configured', async () => {
const results = await searchLocalSlSourcesWithPglitePrototype(project, {
query: 'semantic revenue',
limit: 5,
embeddingService: new FakeEmbeddingPort(),
pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port },
});
assertSearchBackendConformanceCase({
backendName: 'pglite-owner-prototype',
surface: 'semantic-layer',
caseName: 'pglite semantic source ranking',
results: results.map(toConformanceResult),
expectedTopIds: ['warehouse/orders'],
expectedReasonsById: {
'warehouse/orders': ['semantic'],
},
expectedLanes: {
semantic: { status: 'available' },
},
});
});
it('routes through PGlite only when the private local search input opts in', async () => {
const results = await searchLocalSlSources(project, {
query: 'refnd',
limit: 5,
backend: 'pglite-owner-prototype',
pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port },
});
expect(results[0]).toMatchObject({
connectionId: 'warehouse',
name: 'orders',
matchReasons: expect.arrayContaining(['dictionary']),
dictionaryMatches: [{ column: 'status', values: ['refunded'] }],
});
});
});

View file

@ -0,0 +1,68 @@
import { execFileSync } from 'node:child_process';
import { Ajv2020 } from 'ajv/dist/2020.js';
import { describe, expect, it } from 'vitest';
import { resolvedSourceSchema } from '../../../src/context/sl/schemas.js';
import { toResolvedWire } from '../../../src/context/sl/semantic-layer.service.js';
import type { SemanticLayerSource } from '../../../src/context/sl/types.js';
function loadPythonSourceDefinitionSchema(): Record<string, unknown> | null {
try {
const stdout = execFileSync('uv', ['run', 'python', '-m', 'semantic_layer', 'dump-schema'], {
cwd: new URL('../../../..', import.meta.url),
encoding: 'utf8',
stdio: ['ignore', 'pipe', 'ignore'],
});
return JSON.parse(stdout) as Record<string, unknown>;
} catch {
return null;
}
}
const sourceDefinitionJsonSchema = loadPythonSourceDefinitionSchema();
const fixtures: SemanticLayerSource[] = [
{
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{
name: 'status',
type: 'string',
descriptions: { dbt: 'Order lifecycle status.' },
constraints: { dbt: { not_null: true } },
enum_values: { dbt: ['placed', 'shipped'] },
tests: { dbt: [{ name: 'accepted_values', package: 'dbt' }] },
},
],
joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }],
measures: [{ name: 'order_count', expr: 'count(id)' }],
segments: [{ name: 'paid', expr: "status = 'paid'" }],
default_time_dimension: { dbt: 'created_at' },
tags: { dbt: ['mart'] },
freshness: { dbt: { loaded_at_field: 'updated_at' } },
},
{
name: 'aav_orders',
sql: 'select id, status from public.orders where status = paid',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [],
},
];
describe.skipIf(sourceDefinitionJsonSchema === null)('resolved source JSON Schema contract', () => {
it('keeps TS resolved-source fixtures accepted by the Python SourceDefinition schema', () => {
const ajv = new Ajv2020({ allErrors: true, strict: false });
const validate = ajv.compile(sourceDefinitionJsonSchema as Record<string, unknown>);
for (const fixture of fixtures) {
const wire = toResolvedWire(fixture);
expect(resolvedSourceSchema.safeParse(wire).success).toBe(true);
expect(validate(wire), JSON.stringify(validate.errors, null, 2)).toBe(true);
}
});
});

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,115 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { loadLatestSlDictionaryEntries } from '../../../src/context/sl/sl-dictionary-profile.js';
describe('loadLatestSlDictionaryEntries', () => {
let tempDir: string;
let project: KtxLocalProject;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sl-dictionary-profile-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('loads latest relationship-profile sample values for dictionary candidate columns', async () => {
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
`${JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
sqlAvailable: true,
queryCount: 4,
tables: [],
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
rowCount: 20,
nullCount: 0,
distinctCount: 3,
uniquenessRatio: 0.15,
nullRate: 0,
sampleValues: ['paid', 'refunded', 'pending'],
minTextLength: 4,
maxTextLength: 8,
},
'orders.customer_id': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'customer_id',
nativeType: 'text',
normalizedType: 'string',
rowCount: 20,
nullCount: 0,
distinctCount: 20,
uniquenessRatio: 1,
nullRate: 0,
sampleValues: ['cus_1'],
minTextLength: 5,
maxTextLength: 5,
},
},
warnings: [],
},
null,
2,
)}\n`,
'ktx',
'ktx@example.com',
'Seed profile',
);
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-2/enrichment/relationship-profile.json',
`${JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
sqlAvailable: true,
queryCount: 4,
tables: [],
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
rowCount: 20,
nullCount: 0,
distinctCount: 2,
uniquenessRatio: 0.1,
nullRate: 0,
sampleValues: ['settled', 'voided'],
minTextLength: 6,
maxTextLength: 7,
},
},
warnings: [],
},
null,
2,
)}\n`,
'ktx',
'ktx@example.com',
'Seed newer profile',
);
await expect(loadLatestSlDictionaryEntries(project, ['warehouse'])).resolves.toEqual([
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'settled', cardinality: 2 },
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'voided', cardinality: 2 },
]);
});
it('returns an empty list when no relationship profile exists', async () => {
await expect(loadLatestSlDictionaryEntries(project, ['warehouse'])).resolves.toEqual([]);
});
});

View file

@ -0,0 +1,295 @@
import { describe, expect, it, vi } from 'vitest';
import { buildSemanticLayerSourceSearchText, SlSearchService } from '../../../src/context/sl/sl-search.service.js';
import type { SemanticLayerSource } from '../../../src/context/sl/types.js';
describe('SlSearchService', () => {
it('builds search text from source, columns, measures, and joins', () => {
const service = new SlSearchService(
{ maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() },
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
},
);
const source: SemanticLayerSource = {
name: 'orders',
descriptions: { user: 'Customer orders' },
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'string' },
{ name: 'amount', type: 'number', descriptions: { user: 'Order amount' } },
],
measures: [{ name: 'revenue', expr: 'sum(amount)', description: 'Gross revenue' }],
joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }],
};
expect(service.buildSearchText(source)).toContain('orders');
expect(service.buildSearchText(source)).toContain('Customer orders');
expect(service.buildSearchText(source)).toContain('amount (number) Order amount');
expect(service.buildSearchText(source)).toContain('measure: revenue sum(amount) Gross revenue');
expect(service.buildSearchText(source)).toContain('join: customers (many_to_one)');
});
it('exports the same canonical search text builder used by SlSearchService', () => {
const service = new SlSearchService(
{ maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() },
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
},
);
const source: SemanticLayerSource = {
name: 'orders',
descriptions: { user: 'Customer orders' },
table: 'public.orders',
grain: ['id'],
columns: [
{
name: 'status',
type: 'string',
enum_values: { dbt: ['paid', 'refunded'] },
constraints: { dbt: { not_null: true } },
},
],
joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }],
measures: [{ name: 'total_revenue', expr: 'sum(revenue)', description: 'Gross revenue' }],
tags: { dbt: ['finance'] },
};
expect(buildSemanticLayerSourceSearchText(source)).toBe(service.buildSearchText(source));
expect(buildSemanticLayerSourceSearchText(source)).toContain('dbt values: paid, refunded');
expect(buildSemanticLayerSourceSearchText(source)).toContain('measure: total_revenue sum(revenue) Gross revenue');
expect(buildSemanticLayerSourceSearchText(source)).toContain('dbt tags: finance');
});
it('includes dbt enum, not_null, and unique tokens for columns', () => {
const service = new SlSearchService(
{ maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() },
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
},
);
const source: SemanticLayerSource = {
name: 'src_orders',
table: 'public.orders',
grain: [],
columns: [
{
name: 'status',
type: 'string',
descriptions: {},
enum_values: { dbt: ['a', 'b'] },
constraints: { dbt: { not_null: true, unique: true } },
},
],
joins: [],
measures: [],
};
const text = service.buildSearchText(source);
expect(text).toContain('dbt values: a, b');
expect(text).toContain('not_null');
expect(text).toContain('unique');
});
it('includes dbt default time token for MetricFlow agg_time_dimension', () => {
const service = new SlSearchService(
{ maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() },
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
},
);
const source: SemanticLayerSource = {
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [],
default_time_dimension: { dbt: 'order_date' },
};
expect(service.buildSearchText(source)).toContain('dbt default time: order_date');
});
it('includes dbt table tags and freshness from manifest-backed source', () => {
const service = new SlSearchService(
{ maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() },
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
},
);
const source: SemanticLayerSource = {
name: 'customers',
table: 'jaffle.customers',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [],
tags: { dbt: ['raw', 'core'] },
freshness: {
dbt: {
loaded_at_field: 'updated_at',
raw: { warn_after: { count: 12, period: 'hour' } },
},
},
};
const text = service.buildSearchText(source);
expect(text).toContain('dbt tags: raw, core');
expect(text).toContain('dbt freshness:');
expect(text).toContain('loaded_at=updated_at');
expect(text).toContain('warn_after');
});
it('includes historic SQL usage in semantic-layer search text', () => {
const source: SemanticLayerSource = {
name: 'orders',
descriptions: { user: 'Customer orders' },
table: 'public.orders',
grain: ['order_id'],
columns: [{ name: 'order_id', type: 'string' }],
joins: [],
measures: [],
usage: {
narrative: 'Analysts inspect paid and refunded order lifecycle trends by customer segment.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['customer_segment'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: '2026-05-01T00:00:00.000Z',
},
};
const text = buildSemanticLayerSourceSearchText(source);
expect(text).toContain('usage: Analysts inspect paid and refunded order lifecycle trends by customer segment.');
expect(text).toContain('frequency: high');
expect(text).toContain('commonly filtered by: status, created_at');
expect(text).toContain('commonly grouped by: customer_segment');
expect(text).toContain('commonly joined to public.customers on customer_id');
expect(text).toContain('stale since 2026-05-01T00:00:00.000Z');
});
it('preserves FTS snippets returned by the source index', async () => {
const service = new SlSearchService(
{
maxBatchSize: 16,
computeEmbedding: vi.fn(async () => [1, 0]),
computeEmbeddingsBulk: vi.fn(),
},
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(async () => [
{
sourceName: 'orders',
rrfScore: 0.75,
snippet: 'usage: paid <mark>order</mark> lifecycle',
},
]),
},
);
await expect(service.search('warehouse', 'order lifecycle', 10)).resolves.toEqual([
{
sourceName: 'orders',
score: 0.75,
snippet: 'usage: paid <mark>order</mark> lifecycle',
},
]);
});
it('indexSources reports stats and supports lexical-only indexing', async () => {
const repository = {
upsertSources: vi.fn().mockResolvedValue(undefined),
getExistingSearchTexts: vi.fn().mockResolvedValue(
new Map([
['old_source', { searchText: 'old source', hasEmbedding: true }],
]),
),
deleteStale: vi.fn().mockResolvedValue(1),
deleteByConnection: vi.fn().mockResolvedValue(0),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
};
const service = new SlSearchService(null, repository);
const source: SemanticLayerSource = {
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [],
};
await expect(service.indexSources('warehouse', [source])).resolves.toEqual({
scanned: 1,
updated: 1,
deleted: 1,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
expect(repository.upsertSources).toHaveBeenCalledWith('warehouse', [
expect.objectContaining({ sourceName: 'orders', embedding: null }),
]);
});
it('does not update unchanged lexical-only SL rows on repeated sync', async () => {
const repository = {
upsertSources: vi.fn().mockResolvedValue(undefined),
getExistingSearchTexts: vi.fn().mockResolvedValue(
new Map([
['orders', { searchText: 'orders. table: public.orders. id (number)', hasEmbedding: false }],
]),
),
deleteStale: vi.fn().mockResolvedValue(0),
deleteByConnection: vi.fn().mockResolvedValue(0),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
};
const service = new SlSearchService(null, repository);
const source: SemanticLayerSource = {
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [],
};
await expect(service.indexSources('warehouse', [source])).resolves.toEqual({
scanned: 1,
updated: 0,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
expect(repository.upsertSources).toHaveBeenCalledWith('warehouse', []);
expect(repository.deleteStale).toHaveBeenCalledWith('warehouse', ['orders']);
});
});

View file

@ -0,0 +1,205 @@
import { access, mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { SqliteSlSourcesIndex } from '../../../src/context/sl/sqlite-sl-sources-index.js';
describe('SqliteSlSourcesIndex', () => {
let tempDir: string;
let dbPath: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sqlite-sl-index-'));
dbPath = join(tempDir, 'db.sqlite');
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('creates SQLite tables and searches indexed source text with FTS snippets', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{
sourceName: 'orders',
searchText: 'orders table: public.orders measure: total_revenue sum(revenue) gross revenue',
embedding: null,
},
{
sourceName: 'tickets',
searchText: 'tickets table: public.tickets measure: ticket_count count(*) support queue',
embedding: null,
},
]);
await expect(access(dbPath)).resolves.toBeUndefined();
const directResults = await index.search('warehouse', null, 'gross revenue', 10);
expect(directResults).toEqual([
expect.objectContaining({
sourceName: 'orders',
rrfScore: expect.any(Number),
snippet: expect.stringContaining('<mark>'),
}),
]);
expect(directResults[0]?.snippet).toContain('revenue');
const lexicalCandidates = await index.searchLexicalCandidates({ queryText: 'gross revenue', limit: 10 });
expect(lexicalCandidates).toEqual([
expect.objectContaining({
id: 'warehouse/orders',
connectionId: 'warehouse',
sourceName: 'orders',
snippet: expect.stringContaining('<mark>'),
}),
]);
});
it('reports existing search text and embedding presence', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{
sourceName: 'orders',
searchText: 'orders gross revenue',
embedding: [0.1, 0.2, 0.3],
},
{
sourceName: 'tickets',
searchText: 'tickets support queue',
embedding: null,
},
]);
await expect(index.getExistingSearchTexts('warehouse')).resolves.toEqual(
new Map([
['orders', { searchText: 'orders gross revenue', hasEmbedding: true }],
['tickets', { searchText: 'tickets support queue', hasEmbedding: false }],
]),
);
});
it('deletes stale, named, and connection-scoped rows from the FTS index', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{ sourceName: 'orders', searchText: 'orders revenue', embedding: null },
{ sourceName: 'tickets', searchText: 'tickets support', embedding: null },
]);
await index.upsertSources('finance', [{ sourceName: 'invoices', searchText: 'invoices revenue', embedding: null }]);
await index.deleteStale('warehouse', ['orders']);
expect(await index.search('warehouse', null, 'support', 10)).toEqual([]);
expect(await index.search('warehouse', null, 'revenue', 10)).toEqual([
expect.objectContaining({ sourceName: 'orders' }),
]);
expect(await index.search('finance', null, 'revenue', 10)).toEqual([
expect.objectContaining({ sourceName: 'invoices' }),
]);
await index.deleteByConnectionAndName('warehouse', 'orders');
expect(await index.search('warehouse', null, 'revenue', 10)).toEqual([]);
await index.deleteByConnection('finance');
expect(await index.search('finance', null, 'revenue', 10)).toEqual([]);
});
it('clear removes sources and dictionary rows for one connection only', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{ sourceName: 'orders', searchText: 'orders revenue paid', embedding: null },
]);
await index.upsertSources('finance', [
{ sourceName: 'invoices', searchText: 'invoices revenue paid', embedding: null },
]);
await index.replaceDictionaryEntries('warehouse', [
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'paid', cardinality: 1 },
]);
await index.replaceDictionaryEntries('finance', [
{ connectionId: 'finance', sourceName: 'invoices', columnName: 'status', value: 'paid', cardinality: 1 },
]);
await expect(index.clear('warehouse')).resolves.toBe(1);
expect(await index.search('warehouse', null, 'revenue', 10)).toEqual([]);
expect(await index.search('finance', null, 'revenue', 10)).toEqual([
expect.objectContaining({ sourceName: 'invoices' }),
]);
await expect(index.searchDictionaryCandidates({ connectionIds: ['warehouse'], queryText: 'paid', limit: 10 }))
.resolves.toEqual([]);
await expect(index.searchDictionaryCandidates({ connectionIds: ['finance'], queryText: 'paid', limit: 10 }))
.resolves.toEqual([expect.objectContaining({ connectionId: 'finance', sourceName: 'invoices' })]);
});
it('returns lane candidates with stable connection-scoped IDs', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{ sourceName: 'orders', searchText: 'orders gross revenue paid status', embedding: [1, 0] },
]);
await index.upsertSources('finance', [
{ sourceName: 'orders', searchText: 'finance orders invoices', embedding: [0, 1] },
]);
await expect(index.searchLexicalCandidates({ queryText: 'gross revenue', limit: 25 })).resolves.toEqual([
expect.objectContaining({
id: 'warehouse/orders',
connectionId: 'warehouse',
sourceName: 'orders',
rank: 1,
rawScore: expect.any(Number),
}),
]);
await expect(index.searchSemanticCandidates({ queryEmbedding: [0, 1], limit: 25 })).resolves.toEqual([
expect.objectContaining({ id: 'finance/orders', connectionId: 'finance', sourceName: 'orders', rank: 1 }),
expect.objectContaining({ id: 'warehouse/orders', connectionId: 'warehouse', sourceName: 'orders', rank: 2 }),
]);
});
it('aggregates dictionary matches to one source-level lane candidate', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.replaceDictionaryEntries('warehouse', [
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'paid', cardinality: 3 },
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'refunded', cardinality: 3 },
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'channel', value: 'paid search', cardinality: 4 },
{
connectionId: 'warehouse',
sourceName: 'tickets',
columnName: 'priority',
value: 'paid support',
cardinality: 5,
},
]);
await expect(index.searchDictionaryCandidates({ queryText: 'paid', limit: 25 })).resolves.toEqual([
expect.objectContaining({
id: 'warehouse/orders',
connectionId: 'warehouse',
sourceName: 'orders',
rank: 1,
matches: [
{ column: 'channel', values: ['paid search'] },
{ column: 'status', values: ['paid'] },
],
}),
expect.objectContaining({
id: 'warehouse/tickets',
connectionId: 'warehouse',
sourceName: 'tickets',
rank: 2,
matches: [{ column: 'priority', values: ['paid support'] }],
}),
]);
});
it('returns an empty result for blank or punctuation-only queries', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [{ sourceName: 'orders', searchText: 'orders revenue', embedding: null }]);
expect(await index.search('warehouse', null, ' ', 10)).toEqual([]);
expect(await index.search('warehouse', null, '---', 10)).toEqual([]);
});
});

View file

@ -0,0 +1,18 @@
import { describe, expect, it } from 'vitest';
import { slToolConnectionIdSchema } from '../../../../src/context/sl/tools/connection-id-schema.js';
describe('slToolConnectionIdSchema', () => {
it('accepts app UUIDs and local project connection ids', () => {
expect(slToolConnectionIdSchema.parse('00000000-0000-4000-8000-000000000001')).toBe(
'00000000-0000-4000-8000-000000000001',
);
expect(slToolConnectionIdSchema.parse('warehouse')).toBe('warehouse');
expect(slToolConnectionIdSchema.parse('warehouse_prod-1')).toBe('warehouse_prod-1');
});
it('rejects empty, path-like, and hidden connection ids', () => {
for (const value of ['', '../warehouse', 'warehouse/prod', '.warehouse', 'warehouse prod']) {
expect(() => slToolConnectionIdSchema.parse(value)).toThrow();
}
});
});

View file

@ -0,0 +1,84 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolContext } from '../../../../src/context/tools/base-tool.js';
import type { ToolSession } from '../../../../src/context/tools/tool-session.js';
import { createTouchedSlSources } from '../../../../src/context/tools/touched-sl-sources.js';
import type { SemanticLayerSource } from '../../../../src/context/sl/types.js';
import { SlDiscoverTool } from '../../../../src/context/sl/tools/sl-discover.tool.js';
function makeTool() {
const semanticLayerService = {
listConnectionIdsWithNames: vi.fn(async () => [] as Array<{ id: string; name: string; connectionType: string }>),
loadAllSources: vi.fn(async () => ({ sources: [] as SemanticLayerSource[], loadErrors: [] })),
};
const slSearchService = {
search: vi.fn(async () => []),
};
const tool = new SlDiscoverTool(
{
semanticLayerService: semanticLayerService as never,
slSearchService: slSearchService as never,
authorResolver: { resolve: vi.fn() },
},
{ maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 },
);
return { tool, semanticLayerService, slSearchService };
}
function makeContext(overrides: Partial<ToolContext> = {}): ToolContext {
return {
sourceId: 'src',
messageId: 'msg',
userId: 'user',
...overrides,
};
}
function makeSession(semanticLayerService: Record<string, unknown>): ToolSession {
return {
connectionId: 'dbt-main',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: semanticLayerService as never,
wikiService: {} as never,
configService: {} as never,
gitService: {} as never,
};
}
describe('SlDiscoverTool - session-scoped reads', () => {
it('discovers sources through context.session.semanticLayerService when a session is present', async () => {
const { tool, semanticLayerService } = makeTool();
const sessionSemanticLayerService = {
listConnectionIdsWithNames: vi.fn().mockResolvedValue([
{ id: 'warehouse', name: 'warehouse', connectionType: 'postgres' },
]),
loadAllSources: vi.fn().mockResolvedValue({
sources: [
{
name: 'orders',
table: 'public.orders',
grain: ['order_id'],
columns: [{ name: 'order_id', type: 'string' }],
measures: [],
joins: [],
},
],
loadErrors: [],
}),
};
const result = await tool.call({}, makeContext({ session: makeSession(sessionSemanticLayerService) }));
expect(result.structured.totalSources).toBe(1);
expect(result.structured.sources[0]).toMatchObject({
connectionId: 'warehouse',
name: 'orders',
columnCount: 1,
});
expect(sessionSemanticLayerService.listConnectionIdsWithNames).toHaveBeenCalled();
expect(sessionSemanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse');
expect(semanticLayerService.listConnectionIdsWithNames).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,243 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../../../src/context/tools/tool-session.js';
import { createTouchedSlSources, hasTouchedSlSource } from '../../../../src/context/tools/touched-sl-sources.js';
import type { ToolContext } from '../../../../src/context/tools/base-tool.js';
import { SlEditSourceTool } from '../../../../src/context/sl/tools/sl-edit-source.tool.js';
function makeTool(overrides: any = {}) {
const semanticLayerService = {
readSourceFile: vi.fn().mockResolvedValue({
content:
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: string\nmeasures: []\njoins: []\n',
}),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
deleteSource: vi.fn().mockResolvedValue(undefined),
isManifestBacked: vi.fn().mockResolvedValue(false),
...overrides.semanticLayerService,
};
const slSearchService = {
indexSources: vi.fn().mockResolvedValue(undefined),
...overrides.slSearchService,
};
const tool = new SlEditSourceTool({
semanticLayerService: semanticLayerService as never,
slSearchService: slSearchService as never,
authorResolver: { resolve: vi.fn().mockResolvedValue({ name: 'T U', email: 't@u.com' }) },
});
return { tool, semanticLayerService, slSearchService };
}
const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' };
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: '11111111-1111-1111-1111-111111111111',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: {
readSourceFile: vi.fn().mockResolvedValue({
content:
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: string\nmeasures: []\njoins: []\n',
}),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
} as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
...overrides,
};
}
describe('SlEditSourceTool — session gating', () => {
it('skips slSearchService.indexSources when session is worktree-scoped', async () => {
const { tool, slSearchService } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: session.connectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).not.toHaveBeenCalled();
expect(hasTouchedSlSource(session.touchedSlSources, session.connectionId!, 'orders')).toBe(true);
expect(session.actions).toContainEqual(expect.objectContaining({ target: 'sl', key: 'orders' }));
});
it('records cross-connection SL edits with targetConnectionId', async () => {
const { tool } = makeTool();
const session = makeSession({ connectionId: '11111111-1111-4111-8111-111111111111' });
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: warehouseConnectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(hasTouchedSlSource(session.touchedSlSources, warehouseConnectionId, 'orders')).toBe(true);
expect(session.actions).toContainEqual(
expect.objectContaining({
target: 'sl',
type: 'updated',
key: 'orders',
targetConnectionId: warehouseConnectionId,
}),
);
});
it('rejects session-scoped edits outside allowed target connections', async () => {
const { tool } = makeTool();
const session = makeSession({
allowedConnectionNames: new Set(['warehouse']),
});
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: 'finance',
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toContain('connectionId "finance" is outside this ingest session');
expect(session.actions).toEqual([]);
});
it('indexes normally when no session is present', async () => {
const { tool, slSearchService } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
baseContext,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).toHaveBeenCalledTimes(1);
});
it('uses session.semanticLayerService when session is present', async () => {
const { tool } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
await tool.call(
{
connectionId: session.connectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
});
it('fills missing descriptions when an ingest session edits a source', async () => {
const { tool } = makeTool();
const session = makeSession({
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'dbt' },
});
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: session.connectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith(
expect.any(String),
expect.objectContaining({
descriptions: { ktx: expect.stringContaining('orders') },
columns: [
expect.objectContaining({
descriptions: { ktx: expect.stringContaining('Identifier') },
}),
],
}),
expect.any(String),
expect.any(String),
expect.any(String),
);
});
});
describe('SlEditSourceTool — manifest-backed source without overlay', () => {
it('returns a directed hint pointing at sl_write_source + overlay shape', async () => {
const { tool, semanticLayerService } = makeTool({
semanticLayerService: {
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
isManifestBacked: vi.fn().mockResolvedValue(true),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'CONSIGNMENTS',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures:\n - name: aav_count\n expr: count(*)' }],
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(semanticLayerService.isManifestBacked).toHaveBeenCalledWith(
'11111111-1111-1111-1111-111111111111',
'CONSIGNMENTS',
);
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
const joinedErrors = (result.structured.errors ?? []).join('\n');
expect(joinedErrors).toContain('CONSIGNMENTS');
expect(joinedErrors).toContain('manifest');
expect(joinedErrors).toContain('sl_write_source');
expect(joinedErrors).toContain('overlay');
// Overlay shape: name plus overlay-only fields.
expect(joinedErrors).toContain('measures');
expect(joinedErrors).toContain('segments');
expect(joinedErrors).toContain('column_overrides');
});
it('still returns the plain "Source not found" error for truly-missing names', async () => {
const { tool, semanticLayerService } = makeTool({
semanticLayerService: {
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
isManifestBacked: vi.fn().mockResolvedValue(false),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'does_not_exist',
yaml_edits: [{ oldText: 'x', newText: 'y' }],
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.structured.errors).toEqual(['Source not found. Use sl_write_source to create it.']);
expect(semanticLayerService.isManifestBacked).toHaveBeenCalledTimes(1);
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,76 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../../../src/context/tools/tool-session.js';
import { createTouchedSlSources } from '../../../../src/context/tools/touched-sl-sources.js';
import type { ToolContext } from '../../../../src/context/tools/base-tool.js';
import { SlReadSourceTool } from '../../../../src/context/sl/tools/sl-read-source.tool.js';
function makeTool(overrides: Partial<Record<string, any>> = {}) {
const semanticLayerService = {
readSourceFile: vi.fn().mockResolvedValue({ content: 'name: foo_default\n', path: 'default' }),
...overrides.semanticLayerService,
};
const tool = new SlReadSourceTool({
semanticLayerService: semanticLayerService as never,
slSearchService: {} as never,
authorResolver: { resolve: vi.fn() },
});
return { tool, semanticLayerService };
}
function makeContext(overrides: Partial<ToolContext> = {}): ToolContext {
return {
sourceId: 'src',
messageId: 'msg',
userId: 'user',
...overrides,
};
}
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: '11111111-1111-1111-1111-111111111111',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: {
readSourceFile: vi.fn().mockResolvedValue({ content: 'name: foo_session\n', path: 'session' }),
} as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
...overrides,
};
}
describe('SlReadSourceTool - session-scoped reads', () => {
it('reads through context.session.semanticLayerService when a session is present', async () => {
const { tool, semanticLayerService } = makeTool();
const session = makeSession();
const result = await tool.call(
{ connectionId: '11111111-1111-1111-1111-111111111111', sourceName: 'foo' },
makeContext({ session }),
);
expect((session.semanticLayerService as any).readSourceFile).toHaveBeenCalledWith(
'11111111-1111-1111-1111-111111111111',
'foo',
);
expect(semanticLayerService.readSourceFile).not.toHaveBeenCalled();
expect(result.structured.yaml).toContain('foo_session');
});
it('reads through the default service when no session is present', async () => {
const { tool, semanticLayerService } = makeTool();
const result = await tool.call(
{ connectionId: '11111111-1111-1111-1111-111111111111', sourceName: 'foo' },
makeContext(),
);
expect(semanticLayerService.readSourceFile).toHaveBeenCalledWith('11111111-1111-1111-1111-111111111111', 'foo');
expect(result.structured.yaml).toContain('foo_default');
});
});

View file

@ -0,0 +1,68 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../../../src/context/tools/tool-session.js';
import { createTouchedSlSources, hasTouchedSlSource } from '../../../../src/context/tools/touched-sl-sources.js';
import type { ToolContext } from '../../../../src/context/tools/base-tool.js';
import { SlRollbackTool } from '../../../../src/context/sl/tools/sl-rollback.tool.js';
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: 'conn-1',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'orders' }]),
actions: [{ target: 'sl', type: 'updated', key: 'orders', detail: 'x' }],
semanticLayerService: {} as any,
wikiService: {} as any,
configService: {
writeFile: vi.fn().mockResolvedValue(undefined),
deleteFile: vi.fn().mockResolvedValue(undefined),
} as any,
gitService: { getFileAtCommit: vi.fn().mockResolvedValue('pre: content') } as any,
...overrides,
};
}
describe('SlRollbackTool', () => {
const connections = {
getConnectionById: vi.fn(),
listEnabledConnections: vi.fn(),
executeQuery: vi.fn(),
};
it('errors when context.session is absent', async () => {
const tool = new SlRollbackTool({} as never, connections as never, 1);
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' };
const result = await tool.call({ sourceName: 'orders' } as any, context);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/session/i);
});
it('errors when session has no connectionId (wiki-only turn)', async () => {
const tool = new SlRollbackTool({} as never, connections as never, 1);
const session = makeSession({ connectionId: null });
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
const result = await tool.call({ sourceName: 'orders' } as any, context);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/connection-scoped session/i);
// Session state untouched
expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(true);
expect((session.gitService as any).getFileAtCommit).not.toHaveBeenCalled();
});
it('restores the source content from preHead, clears touched set, prunes actions', async () => {
const slSourcesRepository = { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) };
const tool = new SlRollbackTool(slSourcesRepository as never, connections as never, 1);
const session = makeSession();
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
const result = await tool.call({ sourceName: 'orders' } as any, context);
expect(result.structured.success).toBe(true);
expect((session.gitService as any).getFileAtCommit).toHaveBeenCalledWith(
expect.stringContaining('orders.yaml'),
'base',
);
expect((session.configService as any).writeFile).toHaveBeenCalled();
expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(false);
expect(session.actions).toEqual([]);
});
});

View file

@ -0,0 +1,67 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../../../src/context/tools/tool-session.js';
import { createTouchedSlSources } from '../../../../src/context/tools/touched-sl-sources.js';
import type { ToolContext } from '../../../../src/context/tools/base-tool.js';
import type { SemanticLayerService } from '../../../../src/context/sl/semantic-layer.service.js';
import type { SemanticLayerSource } from '../../../../src/context/sl/types.js';
import { SlValidateTool, validateSemanticLayerEndpoint } from '../../../../src/context/sl/tools/sl-validate.tool.js';
describe('validateSemanticLayerEndpoint', () => {
it('uses the connection warehouse dialect, not hardcoded postgres', async () => {
const serviceMock = {
validateSourcesForConnection: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
};
await validateSemanticLayerEndpoint('conn-1', serviceMock as unknown as SemanticLayerService);
expect(serviceMock.validateSourcesForConnection).toHaveBeenCalledWith('conn-1');
});
it('short-circuits when there are no validatable sources', async () => {
const serviceMock = {
validateSourcesForConnection: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
};
const result = await validateSemanticLayerEndpoint('conn-1', serviceMock as unknown as SemanticLayerService);
expect(result).toEqual({ errors: [], warnings: [] });
});
});
describe('SlValidateTool — session-aware touched-set filtering', () => {
it('when session present, only returns errors/warnings that mention touched sources', async () => {
const sources: SemanticLayerSource[] = [
{ name: 'orders', table: 'x.orders', grain: ['id'], columns: [], joins: [], measures: [] },
{ name: 'customers', table: 'x.customers', grain: ['id'], columns: [], joins: [], measures: [] },
];
const serviceMock = {
loadAllSources: vi.fn().mockResolvedValue({ sources, loadErrors: [] }),
validateSourcesForConnection: vi.fn().mockResolvedValue({
errors: ['orders: missing join target', 'customers: invalid grain'],
warnings: ['orders: disconnected-components warning'],
}),
};
const tool = new SlValidateTool({
semanticLayerService: serviceMock as never,
slSearchService: {} as never,
authorResolver: { resolve: vi.fn() },
});
const session: ToolSession = {
connectionId: 'conn-1',
isWorktreeScoped: true,
preHead: null,
touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'orders' }]),
actions: [],
semanticLayerService: serviceMock as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
};
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
const result = await tool.call({ connectionId: 'conn-1' } as any, context);
expect(result.structured.validationErrors).toEqual(['orders: missing join target']);
expect(result.structured.validationWarnings).toEqual(['orders: disconnected-components warning']);
});
});

View file

@ -0,0 +1,146 @@
import { describe, expect, it, vi } from 'vitest';
import { validateSingleSource } from '../../../../src/context/sl/tools/sl-warehouse-validation.js';
function makeDeps(opts: { sourceYaml: string; executeQuery: ReturnType<typeof vi.fn> }) {
return {
semanticLayerService: {
readSourceFile: vi.fn().mockResolvedValue({ content: opts.sourceYaml, path: 'x' }),
isManifestBacked: vi.fn().mockResolvedValue(false),
listManifestSourceNames: vi.fn().mockResolvedValue([]),
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
validatePhysicalTableReferences: vi.fn().mockResolvedValue([]),
} as never,
connections: {
executeQuery: opts.executeQuery,
getConnectionById: vi.fn().mockResolvedValue({ id: 'conn-1', name: 'conn-1', connectionType: 'bigquery' }),
listEnabledConnections: vi.fn().mockResolvedValue([]),
} as never,
configService: {} as never,
gitService: {} as never,
slSourcesRepository: { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) } as never,
probeRowCount: 1,
};
}
describe('validateSingleSource warehouse dry-run', () => {
it('surfaces warehouse error when dry-run fails on unknown column', async () => {
const yaml = `name: fct_arr_delta
source_type: sql
sql: |
SELECT * FROM analytics.fct_arr_delta WHERE date_date < CURRENT_DATE()
grain: [date_date]
columns:
- name: date_date
type: time
measures:
- name: count_delta_events
expr: count(*)
joins: []
`;
const executeQuery = vi.fn().mockRejectedValue(new Error('Unrecognized name: date_date at [1:42]'));
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
const result = await validateSingleSource(deps, 'conn-1', 'fct_arr_delta');
expect(result.errors.join('\n')).toMatch(/Unrecognized name: date_date/);
expect(result.errors.join('\n')).toMatch(/embedded sql dry-run failed/);
});
it('flags declared columns missing from the dry-run result', async () => {
const yaml = `name: fct_arr_delta
source_type: sql
sql: |
SELECT date, customer_id FROM analytics.fct_arr_delta
columns:
- name: date_date
type: time
- name: customer_id
type: string
measures:
- name: count_delta
expr: count(*)
joins: []
grain: [customer_id]
`;
const executeQuery = vi.fn().mockResolvedValue({
headers: ['date', 'customer_id'],
rows: [],
totalRows: 0,
error: null,
});
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
const result = await validateSingleSource(deps, 'conn-1', 'fct_arr_delta');
expect(result.errors.join('\n')).toMatch(/declared columns absent from sql result — date_date/);
expect(result.errors.join('\n')).toMatch(/warehouse returned:/);
});
it('passes cleanly when dry-run succeeds and declared columns match', async () => {
const yaml = `name: lab_results
source_type: sql
sql: |
SELECT lab_order_id, admin_user_id FROM analytics.raw_lab_results
grain: [lab_order_id]
columns:
- name: lab_order_id
type: string
- name: admin_user_id
type: string
measures:
- name: count_lab_results
expr: count(lab_order_id)
joins: []
`;
const executeQuery = vi.fn().mockResolvedValue({
headers: ['lab_order_id', 'admin_user_id'],
rows: [],
totalRows: 0,
error: null,
});
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
const result = await validateSingleSource(deps, 'conn-1', 'lab_results');
expect(result.errors).toEqual([]);
});
it('uses LIMIT 1 (not LIMIT 0) so runtime policies fire', async () => {
const yaml = `name: foo
source_type: sql
sql: |
SELECT a FROM analytics.bar
grain: [a]
columns:
- {name: a, type: string}
measures: []
joins: []
`;
const executeQuery = vi.fn().mockResolvedValue({ headers: ['a'], rows: [], totalRows: 0, error: null });
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
await validateSingleSource(deps, 'conn-1', 'foo');
const probeSql = executeQuery.mock.calls[0][1] as string;
expect(probeSql).toMatch(/LIMIT 1\b/);
expect(probeSql).not.toMatch(/LIMIT 0\b/);
});
it('adds physical manifest errors for table-backed sources', async () => {
const yaml = `name: int_active_contract_arr
table: orbit_analytics.int_active_contract_arr
grain: [contract_id]
columns:
- {name: contract_id, type: string}
- {name: arr_cents, type: number}
measures:
- {name: arr, expr: sum(arr_cents)}
joins: []
`;
const executeQuery = vi.fn();
const deps = makeDeps({ sourceYaml: yaml, executeQuery }) as any;
deps.semanticLayerService.validatePhysicalTableReferences.mockResolvedValue([
'int_active_contract_arr.yaml: declared column(s) absent from physical table: arr_cents',
]);
const result = await validateSingleSource(deps, 'conn-1', 'int_active_contract_arr');
expect(result.errors).toContain(
'int_active_contract_arr.yaml: declared column(s) absent from physical table: arr_cents',
);
expect(executeQuery).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,379 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../../../src/context/tools/tool-session.js';
import { createTouchedSlSources, hasTouchedSlSource } from '../../../../src/context/tools/touched-sl-sources.js';
import type { ToolContext } from '../../../../src/context/tools/base-tool.js';
import { SlWriteSourceTool } from '../../../../src/context/sl/tools/sl-write-source.tool.js';
function makeTool(overrides: Partial<Record<string, any>> = {}) {
const semanticLayerService = {
listManifestSourceNames: vi.fn().mockResolvedValue(['ACCOUNTS', 'ORDERS']),
isManifestBacked: vi.fn().mockResolvedValue(false),
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
deleteSource: vi.fn().mockResolvedValue(undefined),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
...overrides.semanticLayerService,
};
const slSearchService = {
indexSources: vi.fn().mockResolvedValue(undefined),
...overrides.slSearchService,
};
const tool = new SlWriteSourceTool({
semanticLayerService: semanticLayerService as never,
slSearchService: slSearchService as never,
authorResolver: { resolve: vi.fn().mockResolvedValue({ name: 'T U', email: 't@u.com' }) },
});
return { tool, semanticLayerService, slSearchService };
}
const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' };
describe('SlWriteSourceTool — orphan overlay guard', () => {
it('rejects overlay YAMLs targeting a name absent from the manifest', async () => {
const { tool } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'does_not_exist',
source: {
name: 'does_not_exist',
measures: [{ name: 'count_rows', expr: 'count(*)' }],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/no manifest entry with that name exists/i);
expect(result.markdown).toMatch(/ACCOUNTS|ORDERS/);
});
});
describe('SlWriteSourceTool — session gating', () => {
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: '11111111-1111-1111-1111-111111111111',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: {
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
deleteSource: vi.fn().mockResolvedValue(undefined),
listManifestSourceNames: vi.fn().mockResolvedValue([]),
isManifestBacked: vi.fn().mockResolvedValue(false),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
} as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
...overrides,
};
}
it('skips slSearchService.indexSources when session is worktree-scoped', async () => {
const { tool, slSearchService } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: session.connectionId,
sourceName: 'my_source',
source: {
name: 'my_source',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).not.toHaveBeenCalled();
expect(hasTouchedSlSource(session.touchedSlSources, session.connectionId!, 'my_source')).toBe(true);
expect(session.actions).toContainEqual(expect.objectContaining({ target: 'sl', key: 'my_source' }));
});
it('records cross-connection SL writes with targetConnectionId', async () => {
const { tool } = makeTool();
const session = makeSession({ connectionId: '11111111-1111-4111-8111-111111111111' });
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: warehouseConnectionId,
sourceName: 'mapped_orders',
source: {
name: 'mapped_orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(hasTouchedSlSource(session.touchedSlSources, warehouseConnectionId, 'mapped_orders')).toBe(true);
expect(session.actions).toContainEqual(
expect.objectContaining({
target: 'sl',
key: 'mapped_orders',
targetConnectionId: warehouseConnectionId,
}),
);
});
it('rejects session-scoped writes outside allowed target connections', async () => {
const { tool } = makeTool();
const session = makeSession({
allowedConnectionNames: new Set(['warehouse']),
});
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: 'finance',
sourceName: 'finance_orders',
source: {
name: 'finance_orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toContain('connectionId "finance" is outside this ingest session');
expect(session.actions).toEqual([]);
});
it('indexes normally when no session is present', async () => {
const { tool, slSearchService } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'my_source',
source: {
name: 'my_source',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).toHaveBeenCalledTimes(1);
});
it('uses session.semanticLayerService when session is present', async () => {
const { tool } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
await tool.call(
{
connectionId: session.connectionId,
sourceName: 'my_source',
source: {
name: 'my_source',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
});
it('writes source and column description maps', async () => {
const { tool, semanticLayerService } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'orders',
source: {
name: 'orders',
descriptions: { user: 'Finance orders used for invoice reconciliation.' },
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'string', descriptions: { user: 'Stable order identifier.' } }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(true);
expect(semanticLayerService.writeSource).toHaveBeenCalledWith(
expect.any(String),
expect.objectContaining({
descriptions: { user: 'Finance orders used for invoice reconciliation.' },
columns: [expect.objectContaining({ descriptions: { user: 'Stable order identifier.' } })],
}),
expect.any(String),
expect.any(String),
expect.any(String),
);
});
it('fills missing descriptions for ingest-written overlays and columns', async () => {
const session = makeSession({
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'metabase' },
semanticLayerService: {
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
deleteSource: vi.fn().mockResolvedValue(undefined),
listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']),
isManifestBacked: vi.fn().mockResolvedValue(false),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
} as any,
});
const { tool } = makeTool();
const result = await tool.call(
{
connectionId: session.connectionId,
sourceName: 'mart_account_segments',
source: {
name: 'mart_account_segments',
columns: [{ name: 'is_large_contract', type: 'boolean', expr: 'contract_arr_cents >= 20000000' }],
measures: [{ name: 'account_count', expr: 'count(account_id)' }],
} as any,
} as any,
{ ...baseContext, session },
);
expect(result.structured.success).toBe(true);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith(
expect.any(String),
expect.objectContaining({
descriptions: {
ktx: expect.stringContaining('mart_account_segments'),
},
columns: [
expect.objectContaining({
descriptions: {
ktx: expect.stringContaining('is large contract'),
},
}),
],
}),
expect.any(String),
expect.any(String),
expect.any(String),
);
});
});
describe('SlWriteSourceTool — disconnected-components warning in markdown', () => {
it('surfaces validation warnings (including disconnected-components) in the markdown body', async () => {
const { tool } = makeTool({
semanticLayerService: {
validateWithProposedSource: vi.fn().mockResolvedValue({
errors: [],
warnings: ['orders: disconnected-components — no join path to ACCOUNTS'],
}),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'orders',
source: {
name: 'orders',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.markdown).toMatch(/disconnected-components/i);
});
it('renders per-source warnings prominently when the just-written source becomes a singleton component', async () => {
const { tool } = makeTool({
semanticLayerService: {
validateWithProposedSource: vi.fn().mockResolvedValue({
errors: [],
warnings: ['Model has 2 disconnected components.'],
perSourceWarnings: {
foo: ["Source 'foo' is now a singleton component (no joins to any other source)."],
},
}),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'foo',
source: {
name: 'foo',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.markdown).toMatch(/Action required/i);
expect(result.markdown).toContain("Source 'foo' is now a singleton component");
});
});
describe('SlWriteSourceTool — standalone shadow guard', () => {
it('rejects standalone YAMLs that shadow a manifest entry', async () => {
const { tool } = makeTool({
semanticLayerService: {
isManifestBacked: vi.fn().mockResolvedValue(true),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'ACCOUNTS',
source: {
name: 'ACCOUNTS',
table: 'raw.accounts',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/shadows an existing manifest entry|already exists/i);
});
});