test: split cli tests from source tree (#216)

* feat(cli): define full warehouse dialect contract

* test(cli): keep dialect edge tests focused

* fix(cli): stabilize dialect contract foundation

* refactor(connectors): own read-only query preparation

* refactor(connectors): resolve dialects through registry

* refactor(connectors): keep concrete dialect classes internal

* chore(workspace): enforce dialect import boundary

* refactor(cli): resolve relationship dialect at scan boundary

* refactor(cli): use dialect display parsing for entity details

* refactor(cli): use dialect display parsing for warehouse catalog

* refactor(cli): use dialect SQL in relationship workflows

* test(cli): verify solid dialect scan workflow closure

* test: split cli tests from source tree

* refactor(cli): standardize BigQuery scope listing

* feat(sqlite): implement connector scope listing

* test(connectors): cover required table listing

* feat(cli): add warehouse driver registry

* refactor(setup): route scope discovery through driver registry

* refactor(cli): route local query execution through driver registry

* refactor(historic-sql): route dialect support through driver registry

* refactor(cli): test warehouse connections through driver registry

* fix(cli): close driver registry type export gaps

* Improve setup daemon diagnostics

* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback

Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.

* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match

The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.

Align the picker boundary with the canonical 3-level KtxTableRef:

- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
  resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
  (resolveEnabledTables already accepts the 3-part shape) and
  schemasFromEnabledTables now goes through parseDottedTableEntry so it
  recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
  reuse.

Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).

* fix(cli): allow debug telemetry under opt-out env
This commit is contained in:
Andrey Avtomonov 2026-05-26 08:49:05 +02:00 committed by GitHub
parent 924868841d
commit 56985b7e09
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
548 changed files with 5048 additions and 2228 deletions

View file

@ -1,150 +0,0 @@
import { createRequire } from 'node:module';
import type { ReindexSummary } from './context/index-sync/types.js';
import { describe, expect, it, vi } from 'vitest';
import { renderReindexJson, renderReindexPlain, reindexHasErrors } from './admin-reindex.js';
import { runKtxCli } from './index.js';
const cliVersion = (createRequire(import.meta.url)('@kaelio/ktx/package.json') as { version: string })
.version;
function makeIo(options: { stdoutIsTTY?: boolean } = {}) {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: options.stdoutIsTTY,
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
function summary(overrides: Partial<ReindexSummary> = {}): ReindexSummary {
return {
scopes: [
{
kind: 'wiki',
label: 'global',
scope: 'global',
scopeId: null,
scanned: 42,
updated: 3,
deleted: 1,
embeddingsRecomputed: 3,
embeddingsFailed: 0,
durationMs: 412,
},
{
kind: 'sl',
label: 'warehouse',
connectionId: 'warehouse',
scanned: 18,
updated: 2,
deleted: 0,
embeddingsRecomputed: 2,
embeddingsFailed: 0,
durationMs: 287,
},
],
totals: { scanned: 60, updated: 5, deleted: 1, embeddingsRecomputed: 5, embeddingsFailed: 0 },
dbPath: '.ktx/db.sqlite',
force: false,
embeddingsAvailable: true,
durationMs: 1234,
...overrides,
};
}
describe('admin reindex renderers', () => {
it('renders plain scope lines to stderr and summary to stdout', () => {
const io = makeIo();
renderReindexPlain(summary(), io.io);
expect(io.stderr()).toContain('wiki/global\tscanned=42\tupdated=3\tdeleted=1\tembeddings=3\tduration_ms=412\n');
expect(io.stderr()).toContain('sl/warehouse\tscanned=18\tupdated=2\tdeleted=0\tembeddings=2\tduration_ms=287\n');
expect(io.stdout()).toBe('reindex\tscopes=2\tscanned=60\tupdated=5\tdeleted=1\tembeddings=5\tduration_ms=1234\n');
});
it('renders rebuilt labels in plain force mode', () => {
const io = makeIo();
renderReindexPlain(summary({ force: true }), io.io);
expect(io.stderr()).toContain('rebuilt=3');
expect(io.stdout()).toContain('rebuilt=5');
expect(io.stdout()).not.toContain('updated=5');
});
it('renders json envelope to stdout only', () => {
const io = makeIo();
renderReindexJson(summary(), io.io);
expect(JSON.parse(io.stdout())).toMatchObject({
kind: 'reindex',
data: { totals: { scanned: 60, updated: 5 } },
meta: { command: 'admin reindex' },
});
expect(io.stderr()).toBe('');
});
it('detects per-scope errors', () => {
expect(
reindexHasErrors(
summary({
scopes: [{ ...summary().scopes[0]!, error: 'provider failed' }],
}),
),
).toBe(true);
});
});
describe('admin reindex Commander routing', () => {
it('routes flags to the injectable reindex runner', async () => {
const { mkdir, mkdtemp, rm, writeFile } = await import('node:fs/promises');
const { tmpdir } = await import('node:os');
const { join } = await import('node:path');
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-admin-reindex-cli-'));
const projectDir = join(tempDir, 'project');
const io = makeIo();
const adminReindex = vi.fn(async () => 0);
try {
await mkdir(projectDir, { recursive: true });
await writeFile(join(projectDir, 'ktx.yaml'), '{}\n', 'utf-8');
await expect(
runKtxCli(
['--project-dir', projectDir, 'admin', 'reindex', '--force', '--json', '--output', 'plain'],
io.io,
{ adminReindex },
),
).resolves.toBe(0);
} finally {
await rm(tempDir, { recursive: true, force: true });
}
expect(adminReindex).toHaveBeenCalledWith(
{
projectDir,
force: true,
json: true,
output: 'plain',
cliVersion,
},
io.io,
);
});
});

View file

@ -1,246 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { runKtxCli } from './index.js';
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
describe('admin Commander tree', () => {
it('prints visible admin help with supported low-level command groups', async () => {
const testIo = makeIo();
await expect(runKtxCli(['admin', '--help'], testIo.io)).resolves.toBe(0);
expect(testIo.stdout()).toContain('Usage: ktx admin [options] [command]');
for (const command of ['init', 'runtime', 'reindex']) {
expect(testIo.stdout()).toContain(command);
}
for (const removed of [
'doctor',
'scan',
'ingest',
'mapping',
'knowledge',
'model',
'replay',
'report',
'status',
'artifacts',
'config',
'tools',
'daemon',
]) {
expect(testIo.stdout()).not.toContain(`${removed} `);
}
expect(testIo.stderr()).toBe('');
});
it('lists admin in root command rows', async () => {
const testIo = makeIo();
await expect(runKtxCli(['--help'], testIo.io)).resolves.toBe(0);
expect(testIo.stdout()).not.toContain('Advanced:');
expect(testIo.stdout()).toContain('admin');
expect(testIo.stdout()).toMatch(/Low-level project initialization,\s+runtime,\s+and index management/);
expect(testIo.stderr()).toBe('');
});
it('does not keep a dev alias', async () => {
const testIo = makeIo();
await expect(runKtxCli(['dev', '--help'], testIo.io)).resolves.toBe(1);
expect(testIo.stderr()).toContain("unknown command 'dev'");
});
it('keeps project scaffolding under admin init', async () => {
const { mkdtemp, readFile, rm } = await import('node:fs/promises');
const { tmpdir } = await import('node:os');
const { join } = await import('node:path');
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-admin-init-'));
const projectDir = join(tempDir, 'warehouse');
const testIo = makeIo();
try {
await expect(runKtxCli(['admin', 'init', projectDir], testIo.io)).resolves.toBe(0);
expect(testIo.stdout()).toContain(`Initialized KTX project at ${projectDir}`);
await expect(readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).resolves.not.toContain('project:');
expect(testIo.stderr()).toBe('');
} finally {
await rm(tempDir, { recursive: true, force: true });
}
});
it('uses global project-dir for admin init when the positional directory is omitted', async () => {
const { mkdtemp, rm } = await import('node:fs/promises');
const { tmpdir } = await import('node:os');
const { join } = await import('node:path');
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-admin-init-global-'));
const projectDir = join(tempDir, 'global-init');
const testIo = makeIo();
try {
await expect(
runKtxCli(['--project-dir', projectDir, 'admin', 'init'], testIo.io),
).resolves.toBe(0);
expect(testIo.stdout()).toContain(`Initialized KTX project at ${projectDir}`);
expect(testIo.stderr()).toBe('');
} finally {
await rm(tempDir, { recursive: true, force: true });
}
});
it('prints config schema without requiring a KTX project directory', async () => {
const { mkdtemp, rm } = await import('node:fs/promises');
const { tmpdir } = await import('node:os');
const { join } = await import('node:path');
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-admin-schema-'));
const missingProjectDir = join(tempDir, 'missing-project');
const originalProjectDir = process.env.KTX_PROJECT_DIR;
const testIo = makeIo();
try {
process.env.KTX_PROJECT_DIR = missingProjectDir;
await expect(runKtxCli(['admin', 'schema'], testIo.io)).resolves.toBe(0);
expect(JSON.parse(testIo.stdout())).toMatchObject({
title: 'ktx.yaml',
type: 'object',
});
expect(testIo.stderr()).toBe('');
} finally {
if (originalProjectDir === undefined) {
delete process.env.KTX_PROJECT_DIR;
} else {
process.env.KTX_PROJECT_DIR = originalProjectDir;
}
await rm(tempDir, { recursive: true, force: true });
}
});
it('rejects removed admin command groups', async () => {
for (const argv of [
['admin', 'doctor', 'setup'],
['admin', 'runtime', 'doctor'],
['admin', 'runtime', 'prune', '--dry-run'],
['admin', 'scan', 'warehouse'],
['admin', 'ingest', 'run'],
['admin', 'mapping', 'list'],
['admin', 'completion', 'zsh'],
['admin', '__complete', '--shell', 'zsh', '--position', '2', '--', 'ktx', ''],
['admin', 'knowledge', 'list'],
['admin', 'model', 'list'],
['admin', 'artifacts'],
]) {
const testIo = makeIo();
await expect(runKtxCli(argv, testIo.io)).resolves.toBe(1);
expect(testIo.stderr()).toMatch(/unknown command|error:/);
}
});
it.each([
{
argv: ['admin', 'runtime', '--help'],
expected: ['Usage: ktx admin runtime', 'install', 'start', 'stop', 'status'],
},
])('prints generated nested help for $argv', async ({ argv, expected }) => {
const io = makeIo();
const doctor = vi.fn(async () => 0);
await expect(runKtxCli(argv, io.io, { doctor })).resolves.toBe(0);
for (const text of expected) {
expect(io.stdout()).toContain(text);
}
if (argv.join(' ') === 'admin runtime --help') {
expect(io.stdout()).not.toContain('prune');
expect(io.stdout()).not.toContain('doctor');
}
expect(io.stderr()).toBe('');
expect(doctor).not.toHaveBeenCalled();
});
it('rejects old adapter-backed ingest flags through public option parsing and keeps run out of ingest help', async () => {
const helpIo = makeIo();
const runIo = makeIo();
const publicIngest = vi.fn(async () => 0);
await expect(runKtxCli(['ingest', '--help'], helpIo.io, { publicIngest })).resolves.toBe(0);
await expect(
runKtxCli(
['ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'metabase', '--project-dir', '/tmp/project'],
runIo.io,
{ publicIngest },
),
).resolves.toBe(1);
expect(helpIo.stdout()).not.toMatch(/^ run\s/m);
expect(runIo.stderr()).toMatch(/unknown option '--connection-id'|error:/);
expect(publicIngest).not.toHaveBeenCalled();
});
it.each([
{ argv: ['scan'] },
{ argv: ['scan', '--help'] },
{ argv: ['scan', 'warehouse'] },
{ argv: ['scan', 'warehouse', '--project-dir', '/tmp/project', '--dry-run'] },
{ argv: ['scan', 'warehouse', '--project-dir', '/tmp/project', '--mode', 'relationships'] },
])('rejects removed top-level scan command $argv', async ({ argv }) => {
const io = makeIo();
const publicIngest = vi.fn(async () => 0);
await expect(runKtxCli(argv, io.io, { publicIngest })).resolves.toBe(1);
expect(publicIngest).not.toHaveBeenCalled();
expect(io.stderr()).toMatch(/unknown command|error:/);
});
it('rejects old adapter-backed top-level ingest flags without low-level ingest registration', async () => {
const io = makeIo();
const publicIngest = vi.fn(async () => 0);
await expect(
runKtxCli(
[
'ingest',
'run',
'--connection-id',
'warehouse',
'--adapter',
'metabase',
'--project-dir',
'/tmp/project',
'--json',
],
io.io,
{ publicIngest },
),
).resolves.toBe(1);
expect(publicIngest).not.toHaveBeenCalled();
expect(io.stderr()).toMatch(/unknown option '--connection-id'|error:/);
});
});

View file

@ -1,7 +1,30 @@
import { cancel, confirm, isCancel, log, spinner } from '@clack/prompts';
import type { KtxCliIo } from './cli-runtime.js';
const ESC = String.fromCharCode(0x1b);
export interface RailBufferedSource {
stdoutText(): string;
stderrText(): string;
}
export function errorMessage(error: unknown): string {
return error instanceof Error ? error.message : String(error);
}
export function writePrefixedLines(write: (chunk: string) => void, output: string): void {
for (const line of output.split(/\r?\n/)) {
if (line.length > 0) {
write(`${line}\n`);
}
}
}
export function flushPrefixedBufferedCommandOutput(io: KtxCliIo, buffered: RailBufferedSource): void {
writePrefixedLines((chunk) => io.stdout.write(chunk), buffered.stdoutText());
writePrefixedLines((chunk) => io.stderr.write(chunk), buffered.stderrText());
}
export interface KtxCliSpinner {
start(message: string): void;
message(message: string): void;

View file

@ -1,133 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { runCommanderKtxCli } from './cli-program.js';
import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
function makeIo(stdoutIsTTY = true): { io: KtxCliIo; stdout: () => string; stderr: () => string } {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: stdoutIsTTY,
write: (chunk) => {
stdout += chunk;
},
},
stderr: {
write: (chunk) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
const info: KtxCliPackageInfo = { name: '@kaelio/ktx', version: '0.4.1' };
describe('runCommanderKtxCli telemetry', () => {
let tempDir: string;
const originalEnv = process.env;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-telemetry-'));
await writeFile(join(tempDir, 'ktx.yaml'), '{}\n', 'utf-8');
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('HOME', tempDir);
vi.stubEnv('CI', '');
vi.stubEnv('KTX_TELEMETRY_DISABLED', '');
vi.stubEnv('DO_NOT_TRACK', '');
});
afterEach(async () => {
vi.unstubAllEnvs();
process.env = originalEnv;
await rm(tempDir, { recursive: true, force: true });
});
it('emits debug command telemetry for registered actions', async () => {
const io = makeIo(true);
await expect(
runCommanderKtxCli(
['--project-dir', tempDir, 'status', '--help'],
io.io,
{},
info,
{ runInit: async () => 0 },
),
).resolves.toBe(0);
expect(io.stderr()).not.toContain('[telemetry]');
const statusIo = makeIo(true);
const deps: KtxCliDeps = { doctor: async () => 0 };
await expect(
runCommanderKtxCli(
['--project-dir', tempDir, 'status', '--json'],
statusIo.io,
deps,
info,
{ runInit: async () => 0 },
),
).resolves.toBe(0);
expect(statusIo.stderr()).toContain('[telemetry]');
expect(statusIo.stderr()).toContain('"event":"install_first_run"');
expect(statusIo.stderr()).toContain('"event":"command"');
expect(statusIo.stderr()).toContain('"commandPath":["ktx","status"]');
expect(statusIo.stderr()).toContain('"event":"project_stack_snapshot"');
expect(statusIo.stderr()).toContain('"connectionCount"');
expect(statusIo.stderr()).not.toContain(tempDir);
const noticeIndex = statusIo.stderr().indexOf('ktx collects anonymous usage data');
const firstTelemetryIndex = statusIo.stderr().indexOf('[telemetry]');
expect(noticeIndex).toBeGreaterThanOrEqual(0);
expect(firstTelemetryIndex).toBeGreaterThan(noticeIndex);
});
it('emits aborted telemetry when project validation aborts after preAction starts', async () => {
const missingProjectDir = join(tempDir, 'missing');
await mkdir(missingProjectDir, { recursive: true });
const io = makeIo(true);
await expect(
runCommanderKtxCli(
['--project-dir', missingProjectDir, 'connection'],
io.io,
{},
info,
{ runInit: async () => 0 },
),
).resolves.toBe(1);
expect(io.stderr()).toContain('[telemetry]');
expect(io.stderr()).toContain('"outcome":"aborted"');
expect(io.stderr()).toContain('"hasProject":false');
expect(io.stderr()).toContain('"projectGroupAttached":false');
expect(io.stderr()).not.toContain(missingProjectDir);
});
it('does not import or emit telemetry for help, version, bare non-TTY, or unknown top-level command', async () => {
const helpIo = makeIo(true);
await expect(runCommanderKtxCli(['--help'], helpIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(0);
expect(helpIo.stderr()).not.toContain('[telemetry]');
const versionIo = makeIo(true);
await expect(runCommanderKtxCli(['--version'], versionIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(0);
expect(versionIo.stderr()).not.toContain('[telemetry]');
const bareIo = makeIo(false);
await expect(runCommanderKtxCli([], bareIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(0);
expect(bareIo.stderr()).not.toContain('[telemetry]');
const unknownIo = makeIo(true);
await expect(runCommanderKtxCli(['unknown'], unknownIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(1);
expect(unknownIo.stderr()).not.toContain('[telemetry]');
});
});

View file

@ -1,85 +0,0 @@
import { Command, type CommandUnknownOpts } from '@commander-js/extra-typings';
import { describe, expect, it } from 'vitest';
import { buildKtxProgram, collectCommandFlagsPresent } from './cli-program.js';
import type { KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
function stubIo(): KtxCliIo {
return {
stdout: { isTTY: false, columns: 80, write: () => {} },
stderr: { write: () => {} },
};
}
function stubPackageInfo(): KtxCliPackageInfo {
return {
name: '@kaelio/ktx',
version: '0.0.0-test',
};
}
describe('buildKtxProgram', () => {
it('returns a Command named "ktx" with all registered top-level subcommands', () => {
const program: Command = buildKtxProgram({
io: stubIo(),
deps: {},
packageInfo: stubPackageInfo(),
runInit: async () => 0,
});
expect(program.name()).toBe('ktx');
const topLevel = program.commands.map((command) => command.name()).sort();
for (const expected of ['setup', 'connection', 'ingest', 'sl', 'admin']) {
expect(topLevel).toContain(expected);
}
});
it('does not parse argv or invoke action handlers', () => {
let wrote = '';
const io: KtxCliIo = {
stdout: {
isTTY: false,
columns: 80,
write: (chunk) => {
wrote += chunk;
},
},
stderr: {
write: (chunk) => {
wrote += chunk;
},
},
};
buildKtxProgram({ io, deps: {}, packageInfo: stubPackageInfo(), runInit: async () => 0 });
expect(wrote).toBe('');
});
});
describe('collectCommandFlagsPresent', () => {
it('records only CLI-sourced flags and ignores positional content that looks like a flag', async () => {
let captured: Record<string, boolean> | undefined;
const program = new Command()
.name('ktx')
.option('--project-dir <dir>', 'project directory')
.option('--json', 'json output', false);
program
.command('sql')
.argument('<sql...>')
.requiredOption('-c, --connection <id>', 'connection id')
.option('--max-rows <n>', 'cap rows')
.action(function () {
captured = collectCommandFlagsPresent(this as unknown as CommandUnknownOpts);
});
await program.parseAsync(
['--project-dir', '/tmp/p', 'sql', '-c', 'warehouse', '--', '--customer_table', 'SELECT', '1'],
{ from: 'user' },
);
expect(captured).toEqual({ projectDir: true, connection: true });
expect(captured).not.toHaveProperty('customer_table');
expect(captured).not.toHaveProperty('json');
expect(captured).not.toHaveProperty('maxRows');
});
});

View file

@ -1,141 +0,0 @@
import { Command } from '@commander-js/extra-typings';
import { describe, expect, it } from 'vitest';
import { formatCommandTree, walkCommandTree } from './command-tree.js';
describe('walkCommandTree', () => {
it('captures name, description, aliases, and nested children', () => {
const root = new Command('root').description('the root');
const child = new Command('child').description('a child').alias('c').alias('ch');
const grandchild = new Command('grand').description('a grandchild');
child.addCommand(grandchild);
root.addCommand(child);
const tree = walkCommandTree(root);
expect(tree).toEqual({
name: 'root',
description: 'the root',
aliases: [],
arguments: [],
children: [
{
name: 'child',
description: 'a child',
aliases: ['c', 'ch'],
arguments: [],
children: [{ name: 'grand', description: 'a grandchild', aliases: [], arguments: [], children: [] }],
},
],
});
});
it('returns an empty children array when there are no subcommands', () => {
const leaf = new Command('leaf').description('alone');
expect(walkCommandTree(leaf)).toEqual({
name: 'leaf',
description: 'alone',
aliases: [],
arguments: [],
children: [],
});
});
it('uses an empty string when description is unset', () => {
const command = new Command('bare');
expect(walkCommandTree(command).description).toBe('');
});
it('captures required, optional, and variadic arguments', () => {
const command = new Command('scan')
.argument('<connectionId>', 'KTX connection id')
.argument('[schemas...]', 'Schemas');
expect(walkCommandTree(command).arguments).toEqual(['<connectionId>', '[schemas...]']);
});
it('walks registered commands without applying hidden-command policy', () => {
const root = new Command('ktx');
root.command('scan', { hidden: true }).description('Run a standalone connection scan');
const ingest = root.command('ingest').description('Build or inspect KTX context');
ingest.command('run', { hidden: true }).description('Run local ingest by adapter');
ingest.command('watch', { hidden: true }).description('Open a stored visual report');
ingest.command('status').description('Print status');
root.command('status').description('Check readiness');
const tree = walkCommandTree(root);
expect(tree.children.map((child) => child.name)).toEqual(['scan', 'ingest', 'status']);
expect(tree.children[0]).toMatchObject({
name: 'scan',
description: 'Run a standalone connection scan',
children: [],
});
expect(tree.children[1]).toMatchObject({
name: 'ingest',
children: [
{ name: 'run', description: 'Run local ingest by adapter', aliases: [], arguments: [], children: [] },
{ name: 'watch', description: 'Open a stored visual report', aliases: [], arguments: [], children: [] },
{ name: 'status', description: 'Print status', aliases: [], arguments: [], children: [] },
],
});
});
});
describe('formatCommandTree', () => {
it('renders a single node with no children', () => {
const node = { name: 'solo', description: 'just me', aliases: [], arguments: [], children: [] };
expect(formatCommandTree(node)).toMatch(/^solo\s+just me\n$/);
});
it('renders aliases in parentheses before the description', () => {
const node = { name: 'cmd', description: 'does things', aliases: ['c', 'co'], arguments: [], children: [] };
expect(formatCommandTree(node)).toMatch(/^cmd \(c, co\)\s+does things\n$/);
});
it('renders command arguments after the command name', () => {
const node = {
name: 'test',
description: 'Test a configured connection',
aliases: [],
arguments: ['<connectionId>'],
children: [],
};
expect(formatCommandTree(node)).toMatch(/^test <connectionId>\s+Test a configured connection\n$/);
});
it('omits the dash when description is empty', () => {
const node = { name: 'bare', description: '', aliases: [], arguments: [], children: [] };
expect(formatCommandTree(node)).toBe('bare\n');
});
it('renders tree connectors and preserves sibling registration order', () => {
const tree = {
name: 'root',
description: 'top',
aliases: [],
arguments: [],
children: [
{
name: 'beta',
description: 'b',
aliases: [],
arguments: [],
children: [{ name: 'leaf', description: 'l', aliases: [], arguments: [], children: [] }],
},
{
name: 'alpha',
description: 'a',
aliases: ['al'],
arguments: ['<id>'],
children: [{ name: 'inner', description: 'i', aliases: [], arguments: [], children: [] }],
},
],
};
const lines = formatCommandTree(tree).trimEnd().split('\n');
expect(lines[0]).toMatch(/^root\s+top$/);
expect(lines[1]).toMatch(/^ ├── beta\s+b$/);
expect(lines[2]).toMatch(/^ │ └── leaf\s+l$/);
expect(lines[3]).toMatch(/^ └── alpha <id> \(al\)\s+a$/);
expect(lines[4]).toMatch(/^ └── inner\s+i$/);
});
});

View file

@ -1,138 +0,0 @@
import { Command } from '@commander-js/extra-typings';
import { describe, expect, it, vi } from 'vitest';
import type { KtxCliCommandContext } from '../cli-program.js';
import { registerMcpCommands } from './mcp-commands.js';
function makeContext(overrides: Partial<KtxCliCommandContext> = {}): KtxCliCommandContext {
let exitCode = 0;
return {
io: {
stdout: { write: vi.fn() },
stderr: { write: vi.fn() },
},
deps: {},
packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' },
setExitCode: (code) => {
exitCode = code;
},
runInit: vi.fn(),
writeDebug: vi.fn(),
...overrides,
get exitCode() {
return exitCode;
},
} as KtxCliCommandContext;
}
describe('registerMcpCommands', () => {
it('registers the public mcp lifecycle commands', () => {
const program = new Command().exitOverride();
registerMcpCommands(program, makeContext());
const mcp = program.commands.find((command) => command.name() === 'mcp');
expect(mcp?.commands.map((command) => command.name()).sort()).toEqual([
'logs',
'serve-internal',
'start',
'status',
'stdio',
'stop',
]);
expect(
(mcp?.commands.find((command) => command.name() === 'serve-internal') as { _hidden?: boolean } | undefined)
?._hidden,
).toBe(true);
});
it('rejects non-loopback start without token before spawning', async () => {
const program = new Command().exitOverride();
const startDaemon = vi.fn();
const context = makeContext({ deps: { mcp: { startDaemon } } });
registerMcpCommands(program, context);
await expect(program.parseAsync(['mcp', 'start', '--host', '0.0.0.0'], { from: 'user' })).rejects.toThrow(
'Binding KTX MCP to 0.0.0.0 requires --token or KTX_MCP_TOKEN',
);
expect(startDaemon).not.toHaveBeenCalled();
});
it('prints "already running" when startDaemon reports already-running', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const startDaemon = vi.fn().mockResolvedValue({
status: 'already-running',
url: 'http://127.0.0.1:7878/mcp',
state: {
schemaVersion: 1,
pid: 4242,
host: '127.0.0.1',
port: 7878,
tokenAuth: false,
projectDir: '/tmp/ktx-already',
startedAt: '2026-05-14T00:00:00.000Z',
logPath: '/tmp/ktx-already/.ktx/logs/mcp.log',
},
});
const context = makeContext({ deps: { mcp: { startDaemon } } });
registerMcpCommands(program, context);
await program.parseAsync(['--project-dir', '/tmp/ktx-already', 'mcp', 'start'], { from: 'user' });
expect(startDaemon).toHaveBeenCalledTimes(1);
expect(context.io.stdout.write).toHaveBeenCalledWith(
[
'KTX MCP daemon already running: http://127.0.0.1:7878/mcp',
'',
'KTX is ready for configured agents.',
'Open your agent for this KTX project and ask a data question, for example:',
' "Use KTX to show me the available tables and metrics."',
'',
].join('\n'),
);
});
it('prints a friendly next step after starting the daemon', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const startDaemon = vi.fn().mockResolvedValue({
status: 'started',
url: 'http://127.0.0.1:7878/mcp',
state: {
schemaVersion: 1,
pid: 4242,
host: '127.0.0.1',
port: 7878,
tokenAuth: false,
projectDir: '/tmp/ktx-started',
startedAt: '2026-05-14T00:00:00.000Z',
logPath: '/tmp/ktx-started/.ktx/logs/mcp.log',
},
});
const context = makeContext({ deps: { mcp: { startDaemon } } });
registerMcpCommands(program, context);
await program.parseAsync(['--project-dir', '/tmp/ktx-started', 'mcp', 'start'], { from: 'user' });
expect(context.io.stdout.write).toHaveBeenCalledWith(
expect.stringContaining('KTX MCP daemon started: http://127.0.0.1:7878/mcp\n\nKTX is ready for configured agents.'),
);
expect(context.io.stdout.write).toHaveBeenCalledWith(
expect.stringContaining('"Use KTX to show me the available tables and metrics."'),
);
});
it('runs the stdio server with the resolved project directory', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const runStdioServer = vi.fn().mockResolvedValue(undefined);
const context = makeContext({ deps: { mcp: { runStdioServer } } });
registerMcpCommands(program, context);
await expect(program.parseAsync(['--project-dir', '/tmp/ktx6', 'mcp', 'stdio'], { from: 'user' })).resolves.toBe(
program,
);
expect(runStdioServer).toHaveBeenCalledWith({
projectDir: '/tmp/ktx6',
cliVersion: '0.0.0-test',
io: context.io,
});
});
});

View file

@ -1,99 +0,0 @@
import { Command } from '@commander-js/extra-typings';
import { describe, expect, it, vi } from 'vitest';
import type { KtxCliCommandContext } from '../cli-program.js';
import { registerSqlCommands } from './sql-commands.js';
function makeContext(overrides: Partial<KtxCliCommandContext> = {}): KtxCliCommandContext {
let exitCode = 0;
return {
io: {
stdout: { write: vi.fn() },
stderr: { write: vi.fn() },
},
deps: {},
packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' },
setExitCode: (code) => {
exitCode = code;
},
runInit: vi.fn(),
writeDebug: vi.fn(),
...overrides,
get exitCode() {
return exitCode;
},
} as KtxCliCommandContext;
}
describe('registerSqlCommands', () => {
it('routes positional SQL through the sql runner', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const sql = vi.fn(async () => 0);
const context = makeContext({ deps: { sql } });
registerSqlCommands(program, context);
await expect(
program.parseAsync(
['--project-dir', '/tmp/ktx-sql', 'sql', '--connection', 'warehouse', 'select', '1'],
{ from: 'user' },
),
).resolves.toBe(program);
expect(sql).toHaveBeenCalledWith(
{
command: 'execute',
projectDir: '/tmp/ktx-sql',
connectionId: 'warehouse',
sql: 'select 1',
maxRows: 1000,
output: undefined,
json: false,
cliVersion: '0.0.0-test',
},
context.io,
);
});
it('supports the short connection flag', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const sql = vi.fn(async () => 0);
const context = makeContext({ deps: { sql } });
registerSqlCommands(program, context);
await expect(
program.parseAsync(['--project-dir', '/tmp/ktx-sql', 'sql', '-c', 'warehouse', 'select 1'], {
from: 'user',
}),
).resolves.toBe(program);
expect(sql).toHaveBeenCalledWith(expect.objectContaining({ connectionId: 'warehouse', sql: 'select 1' }), context.io);
});
it('rejects missing SQL before invoking the runner', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const sql = vi.fn(async () => 0);
registerSqlCommands(program, makeContext({ deps: { sql } }));
await expect(
program.parseAsync(['--project-dir', '/tmp/ktx-sql', 'sql', '--connection', 'warehouse'], {
from: 'user',
}),
).rejects.toThrow('missing required argument');
expect(sql).not.toHaveBeenCalled();
});
it('rejects maxRows above the CLI cap', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const sql = vi.fn(async () => 0);
registerSqlCommands(program, makeContext({ deps: { sql } }));
await expect(
program.parseAsync(
['--project-dir', '/tmp/ktx-sql', 'sql', '--connection', 'warehouse', '--max-rows', '10001', 'select 1'],
{ from: 'user' },
),
).rejects.toThrow('must be an integer between 1 and 10000');
expect(sql).not.toHaveBeenCalled();
});
});

View file

@ -1,531 +0,0 @@
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { LookerClient } from './context/ingest/adapters/looker/client.js';
import type { MetabaseRuntimeClient } from './context/ingest/adapters/metabase/client-port.js';
import type { NotionClient } from './context/ingest/adapters/notion/notion-client.js';
import { initKtxProject } from './context/project/project.js';
import { parseKtxProjectConfig, serializeKtxProjectConfig } from './context/project/config.js';
import type { KtxConnectionDriver, KtxScanConnector } from './context/scan/types.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { runKtxConnection } from './connection.js';
function stripAnsi(s: string): string {
return s.replace(/\[[0-9;]*m/g, '');
}
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: true,
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
function nativeConnector(
driver: KtxConnectionDriver,
testResult: { success: true } | { success: false; error: string } = { success: true },
) {
const testConnection = vi.fn(async () => testResult);
const cleanup = vi.fn(async () => undefined);
const connector: KtxScanConnector = {
id: `${driver}:warehouse`,
driver,
capabilities: {
structuralIntrospection: true,
tableSampling: false,
columnSampling: false,
columnStats: false,
readOnlySql: false,
nestedAnalysis: false,
eventStreamDiscovery: false,
formalForeignKeys: false,
estimatedRowCounts: false,
},
introspect: vi.fn(async () => {
throw new Error('introspect should not be called from connection test');
}),
testConnection,
cleanup,
};
return { connector, testConnection, cleanup };
}
describe('runKtxConnection', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-connection-'));
});
afterEach(async () => {
vi.unstubAllEnvs();
await rm(tempDir, { recursive: true, force: true });
});
async function writeConnections(
projectDir: string,
connections: ReturnType<typeof parseKtxProjectConfig>['connections'],
): Promise<void> {
const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'));
await writeFile(join(projectDir, 'ktx.yaml'), serializeKtxProjectConfig({ ...config, connections }), 'utf-8');
}
it('lists configured connections without resolving secrets', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' },
docs: { driver: 'notion', auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'all_accessible' },
});
const io = makeIo();
await expect(runKtxConnection({ command: 'list', projectDir }, io.io)).resolves.toBe(0);
expect(io.stdout()).toContain('warehouse');
expect(io.stdout()).toContain('postgres');
expect(io.stdout()).toContain('docs');
expect(io.stdout()).toContain('notion');
expect(io.stderr()).toBe('');
});
it('prints an empty-state message that points at setup instead of removed connection add', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
const io = makeIo();
await expect(runKtxConnection({ command: 'list', projectDir }, io.io)).resolves.toBe(0);
expect(io.stdout()).toContain('No connections configured. Run `ktx setup` to add one.');
expect(io.stdout()).not.toContain('ktx connection add');
});
it('tests a native connection by calling connector.testConnection (not introspect)', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
warehouse: { driver: 'sqlite' },
});
const { connector, testConnection, cleanup } = nativeConnector('sqlite');
const createScanConnector = vi.fn(async () => connector);
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, {
createScanConnector,
}),
).resolves.toBe(0);
expect(createScanConnector).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'warehouse');
expect(testConnection).toHaveBeenCalledTimes(1);
expect(connector.introspect).not.toHaveBeenCalled();
expect(cleanup).toHaveBeenCalledTimes(1);
expect(io.stdout()).toContain('Connection test passed: warehouse');
expect(io.stdout()).toContain('Driver: sqlite');
expect(io.stdout()).toContain('Status: ok');
});
it('emits debug telemetry for connection tests without project paths', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' },
});
const { connector } = nativeConnector('postgres');
const io = makeIo();
const code = await runKtxConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, {
createScanConnector: vi.fn(async () => connector),
});
expect(code).toBe(0);
expect(io.stderr()).toContain('"event":"connection_test"');
expect(io.stderr()).toContain('"driver":"postgres"');
expect(io.stderr()).not.toContain(projectDir);
});
it('reports the connector error and still cleans up when native testConnection fails', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
warehouse: { driver: 'sqlite' },
});
const { connector, cleanup } = nativeConnector('sqlite', { success: false, error: 'database file is unreadable' });
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, {
createScanConnector: vi.fn(async () => connector),
}),
).resolves.toBe(1);
expect(cleanup).toHaveBeenCalledTimes(1);
expect(io.stderr()).toContain('database file is unreadable');
});
it('tests a configured Metabase connection through the Metabase runtime client', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
prod_metabase: {
driver: 'metabase',
api_url: 'http://metabase.example.test',
api_key: 'mb_test', // pragma: allowlist secret
},
});
const testConnection = vi.fn(async () => ({ success: true as const }));
const getDatabases = vi.fn(async () => [
{ id: 1, name: 'Analytics', engine: 'postgres', details: {}, is_sample: false },
{ id: 2, name: 'Sample Database', engine: 'h2', details: {}, is_sample: true },
]);
const cleanup = vi.fn(async () => undefined);
const createMetabaseClient = vi.fn(
async (): Promise<Pick<MetabaseRuntimeClient, 'testConnection' | 'getDatabases' | 'cleanup'>> => ({
testConnection,
getDatabases,
cleanup,
}),
);
const createScanConnector = vi.fn(async () => {
throw new Error('native scanner should not be used for Metabase');
});
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'prod_metabase' }, io.io, {
createScanConnector,
createMetabaseClient,
}),
).resolves.toBe(0);
expect(createScanConnector).not.toHaveBeenCalled();
expect(createMetabaseClient).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'prod_metabase');
expect(testConnection).toHaveBeenCalledTimes(1);
expect(getDatabases).toHaveBeenCalledTimes(1);
expect(cleanup).toHaveBeenCalledTimes(1);
expect(io.stdout()).toContain('Connection test passed: prod_metabase');
expect(io.stdout()).toContain('Driver: metabase');
expect(io.stdout()).toContain('Databases: 1');
expect(io.stderr()).toBe('');
});
it('tests a Looker connection through the Looker client', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
bi_looker: {
driver: 'looker',
base_url: 'https://looker.example.test',
client_id: 'cid',
client_secret: 'csecret', // pragma: allowlist secret
},
});
const testConnection = vi.fn(async () => ({
success: true as const,
metadata: { displayName: 'Alice Analyst', userId: '42' },
}));
const createLookerClient = vi.fn(async (): Promise<Pick<LookerClient, 'testConnection'>> => ({ testConnection }));
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'bi_looker' }, io.io, { createLookerClient }),
).resolves.toBe(0);
expect(createLookerClient).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'bi_looker');
expect(testConnection).toHaveBeenCalledTimes(1);
expect(io.stdout()).toContain('Connection test passed: bi_looker');
expect(io.stdout()).toContain('Driver: looker');
expect(io.stdout()).toContain('User: Alice Analyst');
});
it('falls back to userId when Looker metadata has no display name', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
bi_looker: {
driver: 'looker',
base_url: 'https://looker.example.test',
client_id: 'cid',
client_secret: 'csecret', // pragma: allowlist secret
},
});
const createLookerClient = vi.fn(async (): Promise<Pick<LookerClient, 'testConnection'>> => ({
testConnection: vi.fn(async () => ({
success: true as const,
metadata: { displayName: null, userId: '42' },
})),
}));
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'bi_looker' }, io.io, { createLookerClient }),
).resolves.toBe(0);
expect(io.stdout()).toContain('User: 42');
});
it('reports the Looker error when testConnection fails', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
bi_looker: {
driver: 'looker',
base_url: 'https://looker.example.test',
client_id: 'cid',
client_secret: 'csecret', // pragma: allowlist secret
},
});
const createLookerClient = vi.fn(async (): Promise<Pick<LookerClient, 'testConnection'>> => ({
testConnection: vi.fn(async () => ({ success: false as const, error: 'invalid client_id' })),
}));
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'bi_looker' }, io.io, { createLookerClient }),
).resolves.toBe(1);
expect(io.stderr()).toContain('Looker connection test failed: invalid client_id');
});
it('tests a Notion connection by retrieving the bot user', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
docs: {
driver: 'notion',
auth_token: 'secret_token', // pragma: allowlist secret
crawl_mode: 'all_accessible',
},
});
const retrieveBotUser = vi.fn(async () => ({ id: 'bot-1', name: 'Analytics Bot' }));
const createNotionClient = vi.fn(async (): Promise<Pick<NotionClient, 'retrieveBotUser'>> => ({ retrieveBotUser }));
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'docs' }, io.io, { createNotionClient }),
).resolves.toBe(0);
expect(createNotionClient).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'docs');
expect(retrieveBotUser).toHaveBeenCalledTimes(1);
expect(io.stdout()).toContain('Connection test passed: docs');
expect(io.stdout()).toContain('Driver: notion');
expect(io.stdout()).toContain('Bot: Analytics Bot');
});
it('falls back to bot id when Notion bot has no name', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
docs: {
driver: 'notion',
auth_token: 'secret_token', // pragma: allowlist secret
crawl_mode: 'all_accessible',
},
});
const createNotionClient = vi.fn(async (): Promise<Pick<NotionClient, 'retrieveBotUser'>> => ({
retrieveBotUser: vi.fn(async () => ({ id: 'bot-1', name: null })),
}));
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'docs' }, io.io, { createNotionClient }),
).resolves.toBe(0);
expect(io.stdout()).toContain('Bot: bot-1');
});
it('tests a dbt connection via testRepoConnection (success)', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
process.env.DBT_TOKEN = 'gh_token_abc'; // pragma: allowlist secret
await writeConnections(projectDir, {
'dbt-main': {
driver: 'dbt',
repo_url: 'https://github.com/example/dbt-project',
auth_token_ref: 'env:DBT_TOKEN',
},
});
const testRepoConnection = vi.fn(async () => ({ ok: true as const }));
const io = makeIo();
try {
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'dbt-main' }, io.io, { testRepoConnection }),
).resolves.toBe(0);
expect(testRepoConnection).toHaveBeenCalledWith({
repoUrl: 'https://github.com/example/dbt-project',
authToken: 'gh_token_abc',
});
expect(io.stdout()).toContain('Connection test passed: dbt-main');
expect(io.stdout()).toContain('Driver: dbt');
expect(io.stdout()).toContain('Repo: https://github.com/example/dbt-project');
} finally {
delete process.env.DBT_TOKEN;
}
});
it('reports the git error when testRepoConnection fails for dbt', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
'dbt-main': {
driver: 'dbt',
repo_url: 'https://github.com/example/dbt-project',
},
});
const testRepoConnection = vi.fn(async () => ({ ok: false as const, error: 'fatal: auth failed' }));
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'dbt-main' }, io.io, { testRepoConnection }),
).resolves.toBe(1);
expect(testRepoConnection).toHaveBeenCalledWith({
repoUrl: 'https://github.com/example/dbt-project',
authToken: null,
});
expect(io.stderr()).toContain('dbt repository check failed: fatal: auth failed');
});
it('tests a LookML connection via testRepoConnection with camelCase repoUrl', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
lookml_main: {
driver: 'lookml',
repoUrl: 'https://github.com/example/lookml',
},
});
const testRepoConnection = vi.fn(async () => ({ ok: true as const }));
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'lookml_main' }, io.io, { testRepoConnection }),
).resolves.toBe(0);
expect(testRepoConnection).toHaveBeenCalledWith({
repoUrl: 'https://github.com/example/lookml',
authToken: null,
});
expect(io.stdout()).toContain('Driver: lookml');
expect(io.stdout()).toContain('Repo: https://github.com/example/lookml');
});
it('tests a MetricFlow connection via the nested metricflow block', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
mf_main: {
driver: 'metricflow',
metricflow: { repoUrl: 'https://github.com/example/metricflow' },
},
});
const testRepoConnection = vi.fn(async () => ({ ok: true as const }));
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'mf_main' }, io.io, { testRepoConnection }),
).resolves.toBe(0);
expect(testRepoConnection).toHaveBeenCalledWith({
repoUrl: 'https://github.com/example/metricflow',
authToken: null,
});
expect(io.stdout()).toContain('Driver: metricflow');
});
it('--all: prints a single coherent list with one row per connection', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
warehouse: { driver: 'sqlite' },
docs: { driver: 'notion', auth_token: 'secret_token', crawl_mode: 'all_accessible' }, // pragma: allowlist secret
});
const { connector } = nativeConnector('sqlite');
const createScanConnector = vi.fn(async () => connector);
const createNotionClient = vi.fn(async (): Promise<Pick<NotionClient, 'retrieveBotUser'>> => ({
retrieveBotUser: vi.fn(async () => ({ id: 'bot-1', name: 'Docs Bot' })),
}));
const io = makeIo();
await expect(
runKtxConnection({ command: 'test-all', projectDir }, io.io, { createScanConnector, createNotionClient }),
).resolves.toBe(0);
const out = stripAnsi(io.stdout());
expect(out).toContain('connection test --all');
expect(out).toMatch(/docs\s+notion\s+✓ ok\s+Bot: Docs Bot/);
expect(out).toMatch(/warehouse\s+sqlite\s+✓ ok\s+Status: ok/);
expect(out).toContain('2 tested');
expect(out).toContain('2 passed');
expect(out).not.toContain('failed');
expect(io.stderr()).toBe('');
});
it('--all: marks failing connections, keeps passing ones, and returns non-zero', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
warehouse: { driver: 'sqlite' },
broken: { driver: 'sqlite' },
});
const okConnector = nativeConnector('sqlite').connector;
const failConnector = nativeConnector('sqlite', { success: false, error: 'database file is unreadable' }).connector;
const createScanConnector = vi.fn(async (_p, connectionId: string) =>
connectionId === 'broken' ? failConnector : okConnector,
);
const io = makeIo();
await expect(
runKtxConnection({ command: 'test-all', projectDir }, io.io, { createScanConnector }),
).resolves.toBe(1);
const out = stripAnsi(io.stdout());
expect(out).toMatch(/broken\s+sqlite\s+✗ failed\s+database file is unreadable/);
expect(out).toMatch(/warehouse\s+sqlite\s+✓ ok\s+Status: ok/);
expect(out).toContain('1 passed');
expect(out).toContain('1 failed');
expect(io.stderr()).toBe('');
});
it('--all: shows an empty-state message when no connections are configured', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
const io = makeIo();
await expect(runKtxConnection({ command: 'test-all', projectDir }, io.io)).resolves.toBe(0);
const out = stripAnsi(io.stdout());
expect(out).toContain('connection test --all');
expect(out).toContain('No connections configured. Run `ktx setup` to add one.');
});
it('rejects unknown drivers with a helpful error', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeFile(
join(projectDir, 'ktx.yaml'),
'connections:\n mystery:\n driver: duckdb\n',
'utf-8',
);
const io = makeIo();
await expect(
runKtxConnection({ command: 'test', projectDir, connectionId: 'mystery' }, io.io),
).resolves.toBe(1);
expect(io.stderr()).toContain('connections.mystery.driver');
expect(io.stderr()).toContain('postgres');
});
});

View file

@ -6,6 +6,7 @@ import { type NotionBotInfo, NotionClient } from './context/ingest/adapters/noti
import { createLocalLookerCredentialResolver } from './context/ingest/adapters/looker/local-looker.adapter.js';
import { metabaseRuntimeConfigFromLocalConnection } from './context/ingest/adapters/metabase/local-metabase.adapter.js';
import { testRepoConnection } from './context/ingest/repo-fetch.js';
import { getDriverRegistration } from './context/connections/drivers.js';
import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from './context/connections/notion-config.js';
import { resolveKtxConfigReference } from './context/core/config-reference.js';
import { type KtxLocalProject, loadKtxProject } from './context/project/project.js';
@ -272,15 +273,7 @@ async function testConnectionByDriver(
return { driver, detailKey: 'Repo', detailValue: result.repoUrl };
}
if (
driver === 'sqlite' ||
driver === 'postgres' ||
driver === 'mysql' ||
driver === 'clickhouse' ||
driver === 'sqlserver' ||
driver === 'bigquery' ||
driver === 'snowflake'
) {
if (getDriverRegistration(driver)) {
const result = await testNativeConnection(
project,
connectionId,

View file

@ -1,483 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { bigQueryConnectionConfigFromConfig, isKtxBigQueryConnectionConfig, type KtxBigQueryClient, KtxBigQueryScanConnector, type KtxBigQueryClientFactory, type KtxBigQueryDataset, type KtxBigQueryQueryJob, type KtxBigQueryTableRef } from '../../connectors/bigquery/connector.js';
import { createBigQueryLiveDatabaseIntrospection } from '../../connectors/bigquery/live-database-introspection.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function fakeClientFactory(options: { primaryKeyError?: Error } = {}): KtxBigQueryClientFactory {
const queryResults = vi.fn(async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ id: 1, status: 'paid' }],
undefined,
{ schema: { fields: [{ name: 'id', type: 'INT64' }, { name: 'status', type: 'STRING' }] } },
]);
const createQueryJob = vi.fn(async (input: { query: string }): ReturnType<KtxBigQueryClient['createQueryJob']> => {
if (input.query.includes('INFORMATION_SCHEMA.TABLE_CONSTRAINTS')) {
if (options.primaryKeyError) {
throw options.primaryKeyError;
}
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ table_name: 'orders', column_name: 'id' }],
undefined,
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
],
},
];
}
if (input.query.includes('APPROX_COUNT_DISTINCT')) {
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ cardinality: 2 }],
undefined,
{ schema: { fields: [{ name: 'cardinality', type: 'INT64' }] } },
],
},
];
}
if (input.query.includes('SELECT DISTINCT CAST')) {
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ val: 'open' }, { val: 'paid' }],
undefined,
{ schema: { fields: [{ name: 'val', type: 'STRING' }] } },
],
},
];
}
if (input.query.includes('SELECT `status`')) {
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ status: 'paid' }],
undefined,
{ schema: { fields: [{ name: 'status', type: 'STRING' }] } },
],
},
];
}
return [{ getQueryResults: queryResults }];
});
const getTable = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{
metadata: {
type: 'TABLE',
numRows: '12',
description: 'Orders table',
schema: {
fields: [
{ name: 'id', type: 'INT64', mode: 'REQUIRED', description: 'Order id' },
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
{ name: 'payload', type: 'RECORD', mode: 'NULLABLE' },
],
},
},
},
]);
const tableRef: KtxBigQueryTableRef = { id: 'orders', get: getTable };
return {
createClient: vi.fn(() => ({
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }, { id: 'staging' }]]),
dataset: vi.fn(
(datasetId: string): KtxBigQueryDataset => ({
get: vi.fn(async () => [{ id: datasetId }]),
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [[tableRef]]),
}),
),
createQueryJob,
})),
};
}
const connection = {
driver: 'bigquery',
dataset_id: 'analytics',
credentials_json: JSON.stringify({ project_id: 'project-1', client_email: 'reader@example.test' }),
location: 'US',
} as const;
describe('KtxBigQueryScanConnector', () => {
it('resolves configuration safely', () => {
expect(isKtxBigQueryConnectionConfig(connection)).toBe(true);
expect(isKtxBigQueryConnectionConfig({ driver: 'mysql' })).toBe(false);
expect(bigQueryConnectionConfigFromConfig({ connectionId: 'warehouse', connection })).toMatchObject({
projectId: 'project-1',
datasetIds: ['analytics'],
location: 'US',
});
});
it('introspects datasets, table metadata, primary keys, and normalized types', async () => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory: fakeClientFactory(),
now: () => new Date('2026-04-29T17:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'bigquery',
extractedAt: '2026-04-29T17:00:00.000Z',
scope: { catalogs: ['project-1'], datasets: ['analytics'] },
metadata: {
project_id: 'project-1',
datasets: ['analytics'],
table_count: 1,
total_columns: 3,
},
});
expect(snapshot.tables[0]).toMatchObject({
catalog: 'project-1',
db: 'analytics',
name: 'orders',
kind: 'table',
comment: 'Orders table',
estimatedRows: 12,
foreignKeys: [],
});
expect(snapshot.tables[0]?.columns).toEqual([
{
name: 'id',
nativeType: 'INT64',
normalizedType: 'BIGINT',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
{
name: 'status',
nativeType: 'STRING',
normalizedType: 'VARCHAR',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
{
name: 'payload',
nativeType: 'RECORD',
normalizedType: 'JSON',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
]);
});
it.each([
Object.assign(new Error('Access Denied'), { code: 403 }),
Object.assign(new Error('Not found'), { errors: [{ reason: 'notFound' }] }),
])('soft-fails denied BigQuery primary-key discovery with a scan warning', async (primaryKeyError) => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory: fakeClientFactory({ primaryKeyError }),
now: () => new Date('2026-04-29T17:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery' },
{ runId: 'scan-run-bigquery-denied-pk' },
);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in analytics (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'analytics', kind: 'primary_key' },
},
]);
expect(snapshot.tables[0]?.foreignKeys).toEqual([]);
expect(snapshot.tables[0]?.columns.every((column) => column.primaryKey === false)).toBe(true);
});
it('runs samples, read-only SQL, distinct values, dataset listing, row counts, and cleanup', async () => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory: fakeClientFactory(),
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
columns: ['id', 'status'],
limit: 1,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({
headers: ['id', 'status'],
headerTypes: ['INT64', 'STRING'],
rows: [[1, 'paid']],
totalRows: 1,
});
await expect(
connector.sampleColumn(
{
connectionId: 'warehouse',
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
column: 'status',
limit: 5,
},
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid'], nullCount: null, distinctCount: null });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(
connector.getColumnDistinctValues(
{ catalog: 'project-1', db: 'analytics', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(connector.getTableRowCount('orders')).resolves.toBe(12);
await expect(connector.listDatasets()).resolves.toEqual(['analytics', 'staging']);
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
await connector.cleanup();
});
it('limits introspection to tables in tableScope', async () => {
const ordersGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{
metadata: {
type: 'TABLE',
numRows: '12',
schema: { fields: [{ name: 'id', type: 'INT64', mode: 'REQUIRED' }] },
},
},
]);
const skippedGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{ metadata: { type: 'TABLE', numRows: '1', schema: { fields: [] } } },
]);
const clientFactory: KtxBigQueryClientFactory = {
createClient: vi.fn(() => ({
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }]]),
dataset: vi.fn(
(): KtxBigQueryDataset => ({
get: vi.fn(async () => [{ id: 'analytics' }]),
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [
[
{ id: 'orders', get: ordersGet },
{ id: 'customers', get: skippedGet },
],
]),
}),
),
createQueryJob: vi.fn(async (): ReturnType<KtxBigQueryClient['createQueryJob']> => [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[],
undefined,
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
],
},
]),
})),
};
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory,
});
const scope = tableRefSet([{ catalog: 'project-1', db: 'analytics', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
expect(ordersGet).toHaveBeenCalledTimes(1);
expect(skippedGet).not.toHaveBeenCalled();
});
it('constructs for discovery without dataset scope and lists tables through one region information schema query', async () => {
const createQueryJob = vi.fn(
async (
input: { query: string; params?: Record<string, unknown>; location?: string },
): ReturnType<KtxBigQueryClient['createQueryJob']> => [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[
{ table_schema: 'analytics', table_name: 'orders', table_type: 'BASE TABLE' },
{ table_schema: 'analytics', table_name: 'order_clone', table_type: 'CLONE' },
{ table_schema: 'mart', table_name: 'orders_mv', table_type: 'MATERIALIZED VIEW' },
],
undefined,
{
schema: {
fields: [
{ name: 'table_schema', type: 'STRING' },
{ name: 'table_name', type: 'STRING' },
{ name: 'table_type', type: 'STRING' },
],
},
},
],
},
],
);
const clientFactory: KtxBigQueryClientFactory = {
createClient: vi.fn(() => ({
getDatasets: vi.fn(async () => [[{ id: 'analytics' }, { id: 'mart' }]] as [{ id: string }[]]),
dataset: vi.fn((datasetId: string) => ({
get: vi.fn(async () => [{ id: datasetId }]),
getTables: vi.fn(async () => [[]] as [never[]]),
})),
createQueryJob,
})),
};
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'bigquery',
credentials_json: JSON.stringify({ project_id: 'project-1' }),
location: 'US',
},
clientFactory,
});
await expect(connector.listTables(['analytics', 'mart'])).resolves.toEqual([
{ schema: 'analytics', name: 'orders', kind: 'table' },
{ schema: 'analytics', name: 'order_clone', kind: 'table' },
{ schema: 'mart', name: 'orders_mv', kind: 'view' },
]);
expect(createQueryJob).toHaveBeenCalledTimes(1);
expect(createQueryJob).toHaveBeenCalledWith(
expect.objectContaining({
location: 'US',
params: { dataset_ids: ['analytics', 'mart'] },
}),
);
expect(createQueryJob.mock.calls[0]?.[0].query).toContain('`project-1`.`region-us`.INFORMATION_SCHEMA.TABLES');
expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'CLONE'");
expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'SNAPSHOT'");
});
it('keeps scan paths requiring dataset scope', async () => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'bigquery',
credentials_json: JSON.stringify({ project_id: 'project-1' }),
location: 'US',
},
clientFactory: fakeClientFactory(),
});
await expect(
connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery' },
{ runId: 'scan-run-1' },
),
).rejects.toThrow('Native BigQuery scan requires connections.warehouse.dataset_ids or dataset_id');
});
it('applies maximumBytesBilled to read-only queries when configured', async () => {
const clientFactory = fakeClientFactory();
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory,
maxBytesBilled: 123456789,
});
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
expect(client.createQueryJob).toHaveBeenLastCalledWith(
expect.objectContaining({
maximumBytesBilled: '123456789',
}),
);
});
it('applies canonical BigQuery YAML scan limits to query jobs', async () => {
const clientFactory = fakeClientFactory();
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection: { ...connection, max_bytes_billed: '987654321', job_timeout_ms: 30_000 },
clientFactory,
});
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
expect(client.createQueryJob).toHaveBeenLastCalledWith(
expect.objectContaining({
maximumBytesBilled: '987654321',
jobTimeoutMs: 30_000,
}),
);
});
it('adapts native snapshots to live-database introspection snapshots', async () => {
const introspection = createBigQueryLiveDatabaseIntrospection({
connections: { warehouse: connection },
clientFactory: fakeClientFactory(),
now: () => new Date('2026-04-29T17:00:00.000Z'),
});
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
connectionId: 'warehouse',
metadata: { project_id: 'project-1' },
tables: expect.arrayContaining([
expect.objectContaining({
catalog: 'project-1',
db: 'analytics',
name: 'orders',
columns: expect.arrayContaining([
{
name: 'id',
nativeType: 'INT64',
normalizedType: 'BIGINT',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
]),
}),
]),
});
});
});

View file

@ -1,5 +1,6 @@
import { BigQuery, type TableField } from '@google-cloud/bigquery';
import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from '../../context/connections/bigquery-identifiers.js';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -26,7 +27,6 @@ import {
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { KtxBigQueryDialect } from './dialect.js';
export interface KtxBigQueryConnectionConfig {
driver?: string;
@ -235,6 +235,23 @@ function normalizeValue(value: unknown): unknown {
return value;
}
/** @internal */
export function prepareBigQueryReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let processedSql = sql;
const processedParams: Record<string, unknown> = {};
for (const [key, value] of Object.entries(params)) {
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
processedParams[key] = value;
}
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
}
export function isKtxBigQueryConnectionConfig(
connection: KtxBigQueryConnectionConfig | undefined,
): connection is KtxBigQueryConnectionConfig {
@ -286,7 +303,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
private readonly now: () => Date;
private readonly maxBytesBilled?: number | string;
private readonly queryTimeoutMs?: number;
private readonly dialect = new KtxBigQueryDialect();
private readonly dialect = getDialectForDriver('bigquery');
private client: KtxBigQueryClient | null = null;
constructor(options: KtxBigQueryScanConnectorOptions) {
@ -364,7 +381,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxBigQueryReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const prepared = prepareBigQueryReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -411,7 +428,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
return this.dialect.quoteIdentifier(identifier);
}
async listDatasets(): Promise<string[]> {
async listSchemas(): Promise<string[]> {
const [datasets] = await this.getClient().getDatasets();
return datasets.map((dataset) => dataset.id).filter((id): id is string => Boolean(id));
}
@ -437,6 +454,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
params,
);
return rows.map((row) => ({
catalog: this.resolved.projectId,
schema: row.table_schema,
name: row.table_name,
kind:

View file

@ -1,52 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxBigQueryDialect } from './dialect.js';
describe('KtxBigQueryDialect', () => {
const dialect = new KtxBigQueryDialect();
it('quotes identifiers and formats project.dataset.table names', () => {
expect(dialect.quoteIdentifier('order`items')).toBe('`order\\`items`');
expect(dialect.formatTableName({ catalog: 'project-1', db: 'analytics', name: 'orders' })).toBe(
'`project-1`.`analytics`.`orders`',
);
expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('`analytics`.`orders`');
expect(dialect.formatTableName({ name: 'orders' })).toBe('`orders`');
});
it('maps native BigQuery types to normalized types and scan dimensions', () => {
expect(dialect.mapDataType('INT64')).toBe('BIGINT');
expect(dialect.mapDataType('STRUCT')).toBe('JSON');
expect(dialect.mapDataType('GEOGRAPHY')).toBe('GEOGRAPHY');
expect(dialect.mapToDimensionType('TIMESTAMP')).toBe('time');
expect(dialect.mapToDimensionType('NUMERIC')).toBe('number');
expect(dialect.mapToDimensionType('BOOL')).toBe('boolean');
expect(dialect.mapToDimensionType('JSON')).toBe('string');
});
it('generates sampling, cardinality, and distinct-value SQL', () => {
expect(dialect.generateSampleQuery('`p`.`d`.`orders`', 5, ['id', 'status'])).toBe(
'SELECT `id`, `status` FROM `p`.`d`.`orders` ORDER BY RAND() LIMIT 5',
);
expect(dialect.generateColumnSampleQuery('`p`.`d`.`orders`', 'status', 10)).toBe(
"SELECT `status` FROM `p`.`d`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS STRING)) != '' ORDER BY RAND() LIMIT 10",
);
expect(dialect.generateCardinalitySampleQuery('`p`.`d`.`orders`', '`status`', 100)).toContain(
'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality',
);
expect(dialect.generateDistinctValuesQuery('`p`.`d`.`orders`', '`status`', 20)).toContain(
'SELECT DISTINCT CAST(`status` AS STRING) AS val',
);
});
it('rewrites colon parameters to BigQuery named parameters', () => {
expect(dialect.prepareQuery('SELECT * FROM orders WHERE id = :id AND id_2 = :id_2', { id: 1, id_2: 2 })).toEqual({
sql: 'SELECT * FROM orders WHERE id = @id AND id_2 = @id_2',
params: { id: 1, id_2: 2 },
});
expect(dialect.prepareQuery('SELECT * FROM orders')).toEqual({ sql: 'SELECT * FROM orders', params: undefined });
});
it('keeps unsupported statistics explicit', () => {
expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull();
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type BigQueryTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxBigQueryDialect {
readonly type = 'bigquery';
/** @internal */
export class KtxBigQueryDialect implements KtxDialect {
readonly type = 'bigquery' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
TIMESTAMP: 'time',
@ -27,13 +36,19 @@ export class KtxBigQueryDialect {
}
formatTableName(table: BigQueryTableNameRef): string {
if (table.catalog && table.db) {
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
if (table.db) {
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
return this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
}
formatDisplayRef(table: BigQueryTableNameRef): string {
return formatDialectDisplayRef(table, 'three-part');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'three-part');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('three-part');
}
mapDataType(nativeType: string): string {
@ -93,19 +108,6 @@ export class KtxBigQueryDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let processedSql = sql;
const processedParams: Record<string, unknown> = {};
for (const [key, value] of Object.entries(params)) {
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
processedParams[key] = value;
}
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -121,7 +123,11 @@ export class KtxBigQueryDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -132,6 +138,18 @@ export class KtxBigQueryDialect {
return `APPROX_COUNT_DISTINCT(${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS STRING))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS STRING)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT STRING_AGG(CAST(value AS STRING), '\\u001F') FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -172,36 +190,4 @@ export class KtxBigQueryDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const bigQueryGranularity = granularity.toUpperCase();
if (timezone) {
return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
}
return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
const [rawAmount, rawUnit] = interval.split(' ');
let diffUnit = rawUnit!.toUpperCase();
let amount = Number(rawAmount);
let addUnit = diffUnit;
if (diffUnit === 'WEEK') {
diffUnit = 'DAY';
amount = amount * 7;
addUnit = 'DAY';
}
const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
}
}

View file

@ -1,405 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { clickHouseClientConfigFromConfig, isKtxClickHouseConnectionConfig, KtxClickHouseScanConnector, type KtxClickHouseClientFactory } from '../../connectors/clickhouse/connector.js';
import { createClickHouseLiveDatabaseIntrospection } from '../../connectors/clickhouse/live-database-introspection.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function result<T>(payload: T) {
return {
async json(): Promise<T> {
return payload;
},
};
}
function fakeClientFactory(): KtxClickHouseClientFactory {
const query = vi.fn(async (input: { query: string; format: string; query_params?: Record<string, unknown> }) => {
if (input.query.includes('FROM system.tables')) {
return result([
{ name: 'events', engine: 'MergeTree', comment: 'Event stream' },
{ name: 'event_summary', engine: 'View', comment: '' },
]);
}
if (input.query.includes('FROM system.columns')) {
return result([
{ table: 'events', name: 'id', type: 'UInt64', comment: 'PK', is_in_primary_key: 1 },
{ table: 'events', name: 'event_name', type: 'LowCardinality(String)', comment: '', is_in_primary_key: 0 },
{ table: 'event_summary', name: 'event_name', type: 'String', comment: '', is_in_primary_key: 0 },
]);
}
if (input.query.includes('FROM system.parts') && input.query.includes('GROUP BY')) {
return result([{ table: 'events', row_count: '2' }]);
}
if (input.query.includes('SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 1')) {
return result({
meta: [
{ name: 'id', type: 'UInt64' },
{ name: 'event_name', type: 'String' },
],
data: [[10, 'signup']],
rows: 1,
});
}
if (input.query.includes('SELECT `event_name` FROM `analytics`.`events`')) {
return result({
meta: [{ name: 'event_name', type: 'String' }],
data: [['signup'], ['purchase']],
rows: 2,
});
}
if (input.query.includes('COUNT(DISTINCT val)')) {
return result({
meta: [{ name: 'cardinality', type: 'UInt64' }],
data: [[2]],
rows: 1,
});
}
if (input.query.includes('SELECT DISTINCT toString(`event_name`) AS val')) {
return result({
meta: [{ name: 'val', type: 'String' }],
data: [['purchase'], ['signup']],
rows: 2,
});
}
if (input.query.includes('sum(rows) AS count')) {
return result({
meta: [{ name: 'count', type: 'UInt64' }],
data: [[2]],
rows: 1,
});
}
if (input.query.includes('FROM system.databases')) {
return result([{ name: 'analytics' }, { name: 'warehouse' }]);
}
if (input.query.trim() === 'SELECT 1') {
return result({ meta: [{ name: '1', type: 'UInt8' }], data: [[1]], rows: 1 });
}
if (input.query.includes('select * from (select id, event_name from analytics.events) as ktx_query_result limit 1')) {
return result({
meta: [
{ name: 'id', type: 'UInt64' },
{ name: 'event_name', type: 'String' },
],
data: [[10, 'signup']],
rows: 1,
});
}
throw new Error(`Unexpected SQL: ${input.query}`);
});
const close = vi.fn(async () => undefined);
return {
createClient: vi.fn(() => ({ query, close })),
};
}
function multiDatabaseClickHouseClientFactory(): KtxClickHouseClientFactory {
const query = vi.fn(async (input: { query: string; format: string; query_params?: Record<string, unknown> }) => {
if (input.query.includes('FROM system.tables')) {
expect(input.query_params).toEqual({ databases: ['analytics', 'mart'] });
return result([
{ database: 'analytics', name: 'events', engine: 'MergeTree', comment: 'Event stream' },
{ database: 'mart', name: 'order_events', engine: 'MergeTree', comment: '' },
]);
}
if (input.query.includes('FROM system.columns')) {
expect(input.query_params).toEqual({ databases: ['analytics', 'mart'] });
return result([
{
database: 'analytics',
table: 'events',
name: 'id',
type: 'UInt64',
comment: '',
is_in_primary_key: 1,
},
{
database: 'mart',
table: 'order_events',
name: 'id',
type: 'UInt64',
comment: '',
is_in_primary_key: 1,
},
]);
}
if (input.query.includes('FROM system.parts') && input.query.includes('GROUP BY')) {
expect(input.query_params).toEqual({ databases: ['analytics', 'mart'] });
return result([
{ database: 'analytics', table: 'events', row_count: '2' },
{ database: 'mart', table: 'order_events', row_count: '5' },
]);
}
throw new Error(`Unexpected SQL: ${input.query}`);
});
return {
createClient: vi.fn(() => ({ query, close: vi.fn(async () => undefined) })),
};
}
describe('KtxClickHouseScanConnector', () => {
it('resolves ClickHouse connection configuration safely', () => {
expect(isKtxClickHouseConnectionConfig({ driver: 'clickhouse', host: 'localhost', database: 'analytics' })).toBe(
true,
);
expect(isKtxClickHouseConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
expect(
clickHouseClientConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
port: 9440,
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
ssl: true,
},
}),
).toMatchObject({
host: 'ch.example.test',
port: 9440,
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
ssl: true,
});
});
it('introspects schema, primary keys, comments, row counts, and views', async () => {
const connector = new KtxClickHouseScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
clientFactory: fakeClientFactory(),
now: () => new Date('2026-04-29T14:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'clickhouse' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'clickhouse',
extractedAt: '2026-04-29T14:00:00.000Z',
scope: { schemas: ['analytics'] },
metadata: {
database: 'analytics',
host: 'ch.example.test',
table_count: 2,
total_columns: 3,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
['events', 'table', 2, 'Event stream'],
['event_summary', 'view', null, null],
]);
expect(snapshot.tables.find((table) => table.name === 'events')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'UInt64',
normalizedType: 'UInt64',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
});
expect(snapshot.tables.find((table) => table.name === 'events')?.foreignKeys).toEqual([]);
});
it('introspects every configured ClickHouse database scope while preserving the default database', async () => {
const connector = new KtxClickHouseScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
databases: ['analytics', 'mart'],
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
clientFactory: multiDatabaseClickHouseClientFactory(),
now: () => new Date('2026-05-21T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'clickhouse' },
{ runId: 'scan-run-1' },
);
expect(snapshot.scope).toEqual({ schemas: ['analytics', 'mart'] });
expect(snapshot.metadata).toMatchObject({ database: 'analytics', databases: ['analytics', 'mart'] });
expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual([
'analytics.events',
'mart.order_events',
]);
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ query: string; query_params?: Record<string, unknown> }> = [];
const clientFactory: KtxClickHouseClientFactory = {
createClient: vi.fn(() => ({
query: vi.fn(async (input: { query: string; format: string; query_params?: Record<string, unknown> }) => {
queries.push({ query: input.query, query_params: input.query_params });
if (input.query.includes('FROM system.tables')) {
return result([{ database: 'analytics', name: 'events', engine: 'MergeTree', comment: '' }]);
}
if (input.query.includes('FROM system.columns')) {
return result([
{
database: 'analytics',
table: 'events',
name: 'id',
type: 'UInt64',
comment: '',
is_in_primary_key: 1,
},
]);
}
if (input.query.includes('FROM system.parts')) {
return result([{ database: 'analytics', table: 'events', row_count: '2' }]);
}
throw new Error(`Unexpected SQL: ${input.query}`);
}),
close: vi.fn(async () => undefined),
})),
};
const connector = new KtxClickHouseScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
clientFactory,
});
const scope = tableRefSet([{ catalog: null, db: 'analytics', name: 'events' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'clickhouse', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['events']);
const tablesQuery = queries.find((query) => query.query.includes('FROM system.tables'));
expect(tablesQuery?.query).toContain('AND name IN {table_names:Array(String)}');
expect(tablesQuery?.query_params).toEqual({ databases: ['analytics'], table_names: ['events'] });
});
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
const clientFactory = fakeClientFactory();
const connector = new KtxClickHouseScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
clientFactory,
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: null, db: 'analytics', name: 'events' },
columns: ['id', 'event_name'],
limit: 1,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id', 'event_name'], rows: [[10, 'signup']], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'events' }, column: 'event_name', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['signup', 'purchase'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: null, db: 'analytics', name: 'events' },
'event_name',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['purchase', 'signup'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, event_name from analytics.events', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'event_name'], rows: [[10, 'signup']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from events' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(connector.getTableRowCount('events')).resolves.toBe(2);
await expect(connector.listSchemas()).resolves.toEqual(['analytics', 'warehouse']);
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'events' }, column: 'event_name' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
await connector.cleanup();
});
it('adapts native ClickHouse snapshots to live-database introspection for local ingest', async () => {
const introspection = createClickHouseLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
},
clientFactory: fakeClientFactory(),
now: () => new Date('2026-04-29T14:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T14:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'events')).toMatchObject({
name: 'events',
catalog: null,
db: 'analytics',
columns: [
{
name: 'id',
nativeType: 'UInt64',
normalizedType: 'UInt64',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
},
{
name: 'event_name',
nativeType: 'LowCardinality(String)',
normalizedType: 'LowCardinality(String)',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
});
});

View file

@ -1,4 +1,5 @@
import { createClient } from '@clickhouse/client';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -6,7 +7,6 @@ import { readFileSync } from 'node:fs';
import { Agent as HttpsAgent } from 'node:https';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { KtxClickHouseDialect } from './dialect.js';
export interface KtxClickHouseConnectionConfig {
driver?: string;
@ -198,6 +198,49 @@ function clickHouseTableKey(database: string, table: string): string {
return `${database}.${table}`;
}
function inferClickHouseQueryParamType(value: unknown): string {
if (value === null || value === undefined) {
return 'String';
}
if (typeof value === 'boolean') {
return 'Bool';
}
if (typeof value === 'number') {
return Number.isInteger(value) ? 'Int64' : 'Float64';
}
if (value instanceof Date) {
return 'DateTime';
}
return 'String';
}
/** @internal */
export function prepareClickHouseReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let parameterizedQuery = sql;
const queryParams: Record<string, unknown> = {};
const sortedKeys = Object.keys(params).sort((a, b) => b.length - a.length);
for (const key of sortedKeys) {
const placeholder = `:${key}`;
if (parameterizedQuery.includes(placeholder)) {
parameterizedQuery = parameterizedQuery.replace(
new RegExp(`:${key}\\b`, 'g'),
`{${key}:${inferClickHouseQueryParamType(params[key])}}`,
);
queryParams[key] = params[key];
}
}
return { sql: parameterizedQuery, params: Object.keys(queryParams).length > 0 ? queryParams : undefined };
}
export function isKtxClickHouseConnectionConfig(
connection: KtxClickHouseConnectionConfig | undefined,
): connection is KtxClickHouseConnectionConfig {
@ -256,7 +299,7 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
private readonly clientFactory: KtxClickHouseClientFactory;
private readonly endpointResolver?: KtxClickHouseEndpointResolver;
private readonly now: () => Date;
private readonly dialect = new KtxClickHouseDialect();
private readonly dialect = getDialectForDriver('clickhouse');
private client: KtxClickHouseClient | null = null;
private resolvedEndpoint: KtxClickHouseResolvedEndpoint | null = null;
@ -408,7 +451,7 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxClickHouseReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const prepared = prepareClickHouseReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -488,6 +531,7 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
{ schemas: filterSchemas },
);
return rows.map((row) => ({
catalog: null,
schema: row.database,
name: row.name,
kind: row.engine === 'View' || row.engine === 'MaterializedView' ? ('view' as const) : ('table' as const),

View file

@ -1,49 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxClickHouseDialect } from './dialect.js';
describe('KtxClickHouseDialect', () => {
const dialect = new KtxClickHouseDialect();
it('quotes identifiers and formats database-qualified table names', () => {
expect(dialect.quoteIdentifier('events')).toBe('`events`');
expect(dialect.quoteIdentifier('odd`name')).toBe('`odd``name`');
expect(dialect.formatTableName({ catalog: null, db: 'analytics', name: 'events' })).toBe(
'`analytics`.`events`',
);
expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('`events`');
});
it('maps nullable and low-cardinality ClickHouse types to KTX dimension types', () => {
expect(dialect.mapToDimensionType('Nullable(DateTime64(3))')).toBe('time');
expect(dialect.mapToDimensionType('LowCardinality(Nullable(String))')).toBe('string');
expect(dialect.mapToDimensionType('UInt64')).toBe('number');
expect(dialect.mapToDimensionType('Decimal(18, 4)')).toBe('number');
expect(dialect.mapToDimensionType('Bool')).toBe('boolean');
expect(dialect.mapToDimensionType('IPv4')).toBe('string');
expect(dialect.mapToDimensionType('')).toBe('string');
});
it('builds sampling, distinct-value, pagination, and time SQL', () => {
expect(dialect.generateSampleQuery('`analytics`.`events`', 25, ['id', 'event_name'])).toBe(
'SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 25',
);
expect(dialect.generateColumnSampleQuery('`analytics`.`events`', 'event_name', 10)).toBe(
"SELECT `event_name` FROM `analytics`.`events` WHERE `event_name` IS NOT NULL AND trim(toString(`event_name`)) != '' LIMIT 10",
);
expect(dialect.generateDistinctValuesQuery('`analytics`.`events`', '`event_name`', 5)).toContain(
'SELECT DISTINCT toString(`event_name`) AS val',
);
expect(dialect.getLimitOffsetClause(10, 20)).toBe('LIMIT 10 OFFSET 20');
expect(dialect.getTimeTruncExpression('created_at', 'week')).toBe('toStartOfWeek(created_at, 1)');
});
it('prepares named parameters using ClickHouse typed placeholders', () => {
expect(dialect.prepareQuery('select * from events where id = :id and event_name = :name', {
id: 10,
name: 'signup',
})).toEqual({
sql: 'select * from events where id = {id:Int64} and event_name = {name:String}',
params: { id: 10, name: 'signup' },
});
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type ClickHouseTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxClickHouseDialect {
readonly type = 'clickhouse';
/** @internal */
export class KtxClickHouseDialect implements KtxDialect {
readonly type = 'clickhouse' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
date: 'time',
@ -45,9 +54,19 @@ export class KtxClickHouseDialect {
}
formatTableName(table: ClickHouseTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi');
}
formatDisplayRef(table: ClickHouseTableNameRef): string {
return formatDialectDisplayRef(table, 'ansi');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'ansi');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('ansi');
}
mapDataType(nativeType: string): string {
@ -97,29 +116,6 @@ export class KtxClickHouseDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND trim(toString(${quotedColumn})) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let parameterizedQuery = sql;
const queryParams: Record<string, unknown> = {};
const sortedKeys = Object.keys(params).sort((a, b) => b.length - a.length);
for (const key of sortedKeys) {
const placeholder = `:${key}`;
if (parameterizedQuery.includes(placeholder)) {
parameterizedQuery = parameterizedQuery.replace(
new RegExp(`:${key}\\b`, 'g'),
`{${key}:${this.inferClickHouseType(params[key])}}`,
);
queryParams[key] = params[key];
}
}
return { sql: parameterizedQuery, params: queryParams };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -132,7 +128,11 @@ export class KtxClickHouseDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -143,6 +143,18 @@ export class KtxClickHouseDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `length(toString(${columnSql}))`;
}
castToText(columnSql: string): string {
return `toString(${columnSql})`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT arrayStringConcat(groupArray(toString(value)), '\\x1F') FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
SELECT COUNT(DISTINCT val) AS cardinality
@ -181,99 +193,9 @@ export class KtxClickHouseDialect {
)
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const tz = timezone ? `, '${timezone}'` : '';
switch (granularity) {
case 'day':
return `toStartOfDay(${column}${tz})`;
case 'week':
return `toStartOfWeek(${column}, 1${tz})`;
case 'month':
return `toStartOfMonth(${column}${tz})`;
case 'quarter':
return `toStartOfQuarter(${column}${tz})`;
case 'year':
return `toStartOfYear(${column}${tz})`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `toTimezone(${column}, '${timezone}')` : column;
const [rawAmount, rawUnit] = interval.split(' ');
const amount = Number(rawAmount);
const unit = rawUnit!.toLowerCase();
const originExpr = origin ? `toDateTime('${origin}')` : "toDateTime('1970-01-01')";
const calendarUnit = this.toClickHouseDateDiffUnit(unit);
if (calendarUnit) {
return `dateAdd(${calendarUnit}, intDiv(dateDiff(${calendarUnit}, ${originExpr}, ${col}), ${amount}) * ${amount}, ${originExpr})`;
}
const seconds = this.intervalToSeconds(amount, unit);
return `addSeconds(${originExpr}, intDiv(toUInt64(dateDiff('second', ${originExpr}, ${col})), ${seconds}) * ${seconds})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
}
private unwrapClickHouseType(value: string, wrapper: string): string {
const prefix = `${wrapper}(`;
return value.startsWith(prefix) && value.endsWith(')') ? value.slice(prefix.length, -1) : value;
}
private inferClickHouseType(value: unknown): string {
if (value === null || value === undefined) {
return 'String';
}
if (typeof value === 'boolean') {
return 'Bool';
}
if (typeof value === 'number') {
return Number.isInteger(value) ? 'Int64' : 'Float64';
}
if (value instanceof Date) {
return 'DateTime';
}
return 'String';
}
private toClickHouseDateDiffUnit(unit: string): string | null {
if (unit === 'month' || unit === 'months') {
return "'month'";
}
if (unit === 'quarter' || unit === 'quarters') {
return "'quarter'";
}
if (unit === 'year' || unit === 'years') {
return "'year'";
}
return null;
}
private intervalToSeconds(amount: number, unit: string): number {
switch (unit) {
case 'second':
case 'seconds':
return amount;
case 'minute':
case 'minutes':
return amount * 60;
case 'hour':
case 'hours':
return amount * 3600;
case 'day':
case 'days':
return amount * 86400;
case 'week':
case 'weeks':
return amount * 604800;
default:
return amount * 86400;
}
}
}

View file

@ -1,556 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import type { FieldPacket, RowDataPacket } from 'mysql2/promise';
import { createMysqlLiveDatabaseIntrospection } from '../../connectors/mysql/live-database-introspection.js';
import { isKtxMysqlConnectionConfig, KtxMysqlScanConnector, mysqlConnectionPoolConfigFromConfig, type KtxMysqlConnectionConfig, type KtxMysqlPoolFactory } from '../../connectors/mysql/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function mysqlResult(rows: Record<string, unknown>[], fields: Array<{ name: string; type?: number }>): [RowDataPacket[], FieldPacket[]] {
return [rows as RowDataPacket[], fields as FieldPacket[]];
}
function fakePoolFactory(): KtxMysqlPoolFactory {
const query = vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => {
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return mysqlResult(
[
{ TABLE_NAME: 'customers', TABLE_TYPE: 'BASE TABLE', TABLE_COMMENT: 'Customer table', TABLE_ROWS: 2 },
{ TABLE_NAME: 'orders', TABLE_TYPE: 'BASE TABLE', TABLE_COMMENT: 'InnoDB free: 1 kB; Order table', TABLE_ROWS: 2 },
{ TABLE_NAME: 'order_summary', TABLE_TYPE: 'VIEW', TABLE_COMMENT: '', TABLE_ROWS: null },
],
[{ name: 'TABLE_NAME' }, { name: 'TABLE_TYPE' }, { name: 'TABLE_COMMENT' }, { name: 'TABLE_ROWS' }],
);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return mysqlResult(
[
{ TABLE_NAME: 'customers', COLUMN_NAME: 'id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: 'PK' },
{ TABLE_NAME: 'customers', COLUMN_NAME: 'name', DATA_TYPE: 'varchar', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
{ TABLE_NAME: 'orders', COLUMN_NAME: 'id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
{ TABLE_NAME: 'orders', COLUMN_NAME: 'customer_id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
{ TABLE_NAME: 'orders', COLUMN_NAME: 'status', DATA_TYPE: 'varchar', IS_NULLABLE: 'YES', COLUMN_COMMENT: '' },
{ TABLE_NAME: 'order_summary', COLUMN_NAME: 'status', DATA_TYPE: 'varchar', IS_NULLABLE: 'YES', COLUMN_COMMENT: '' },
],
[{ name: 'TABLE_NAME' }, { name: 'COLUMN_NAME' }, { name: 'DATA_TYPE' }, { name: 'IS_NULLABLE' }],
);
}
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes("CONSTRAINT_NAME = 'PRIMARY'")) {
return mysqlResult([{ TABLE_NAME: 'customers', COLUMN_NAME: 'id' }, { TABLE_NAME: 'orders', COLUMN_NAME: 'id' }], []);
}
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes('REFERENCED_TABLE_NAME IS NOT NULL')) {
return mysqlResult(
[
{
TABLE_NAME: 'orders',
COLUMN_NAME: 'customer_id',
REFERENCED_TABLE_NAME: 'customers',
REFERENCED_COLUMN_NAME: 'id',
CONSTRAINT_NAME: 'orders_customer_id_fk',
},
],
[],
);
}
if (sql.includes('SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 1')) {
return mysqlResult([{ id: 10, status: 'paid' }], [{ name: 'id', type: 3 }, { name: 'status', type: 253 }]);
}
if (sql.includes('select * from (select id, status from analytics.orders) as ktx_query_result limit 1')) {
return mysqlResult([{ id: 10, status: 'paid' }], [{ name: 'id', type: 3 }, { name: 'status', type: 253 }]);
}
if (sql.includes('SELECT `status` FROM `analytics`.`orders`')) {
return mysqlResult([{ status: 'paid' }, { status: 'open' }], [{ name: 'status', type: 253 }]);
}
if (sql.includes('COUNT(DISTINCT val)')) {
return mysqlResult([{ cardinality: 2 }], [{ name: 'cardinality', type: 8 }]);
}
if (sql.includes('SELECT DISTINCT CAST(`status` AS CHAR) AS val')) {
return mysqlResult([{ val: 'open' }, { val: 'paid' }], [{ name: 'val', type: 253 }]);
}
if (sql.includes('COUNT(*) AS count')) {
return mysqlResult([{ count: 2 }], [{ name: 'count', type: 8 }]);
}
if (sql.includes('INFORMATION_SCHEMA.SCHEMATA')) {
return mysqlResult([{ SCHEMA_NAME: 'analytics' }, { SCHEMA_NAME: 'warehouse' }], [{ name: 'SCHEMA_NAME' }]);
}
if (sql.trim() === 'SELECT 1') {
return mysqlResult([{ '1': 1 }], [{ name: '1', type: 8 }]);
}
throw new Error(`Unexpected SQL: ${sql} params=${JSON.stringify(params)}`);
});
const release = vi.fn();
const end = vi.fn(async () => undefined);
return {
createPool: vi.fn(() => ({
getConnection: vi.fn(async () => ({ query, release })),
end,
})),
};
}
function multiSchemaMysqlPoolFactory(
options: { primaryKeyError?: Error; foreignKeyError?: Error } = {},
): KtxMysqlPoolFactory {
const query = vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => {
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
expect(params).toEqual(['analytics', 'mart']);
return mysqlResult(
[
{
TABLE_SCHEMA: 'analytics',
TABLE_NAME: 'customers',
TABLE_TYPE: 'BASE TABLE',
TABLE_COMMENT: '',
TABLE_ROWS: 2,
},
{
TABLE_SCHEMA: 'mart',
TABLE_NAME: 'orders',
TABLE_TYPE: 'BASE TABLE',
TABLE_COMMENT: '',
TABLE_ROWS: 3,
},
],
[
{ name: 'TABLE_SCHEMA' },
{ name: 'TABLE_NAME' },
{ name: 'TABLE_TYPE' },
{ name: 'TABLE_COMMENT' },
{ name: 'TABLE_ROWS' },
],
);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
expect(params).toEqual(['analytics', 'mart']);
return mysqlResult(
[
{
TABLE_SCHEMA: 'analytics',
TABLE_NAME: 'customers',
COLUMN_NAME: 'id',
DATA_TYPE: 'int',
IS_NULLABLE: 'NO',
COLUMN_COMMENT: '',
},
{
TABLE_SCHEMA: 'mart',
TABLE_NAME: 'orders',
COLUMN_NAME: 'id',
DATA_TYPE: 'int',
IS_NULLABLE: 'NO',
COLUMN_COMMENT: '',
},
],
[],
);
}
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes("CONSTRAINT_NAME = 'PRIMARY'")) {
if (options.primaryKeyError) {
throw options.primaryKeyError;
}
expect(params).toEqual(['analytics', 'mart']);
return mysqlResult(
[
{ TABLE_SCHEMA: 'analytics', TABLE_NAME: 'customers', COLUMN_NAME: 'id' },
{ TABLE_SCHEMA: 'mart', TABLE_NAME: 'orders', COLUMN_NAME: 'id' },
],
[],
);
}
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes('REFERENCED_TABLE_NAME IS NOT NULL')) {
if (options.foreignKeyError) {
throw options.foreignKeyError;
}
expect(params).toEqual(['analytics', 'mart']);
return mysqlResult([], []);
}
throw new Error(`Unexpected SQL: ${sql} params=${JSON.stringify(params)}`);
});
return {
createPool: vi.fn(() => ({
getConnection: vi.fn(async () => ({ query, release: vi.fn() })),
end: vi.fn(async () => undefined),
})),
};
}
describe('KtxMysqlScanConnector', () => {
it('resolves MySQL connection configuration safely', () => {
expect(isKtxMysqlConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(true);
expect(isKtxMysqlConnectionConfig({ driver: 'postgres', host: 'localhost', database: 'analytics' })).toBe(false);
expect(
mysqlConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
port: 3307,
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
ssl: true,
},
}),
).toMatchObject({
host: 'db.example.test',
port: 3307,
database: 'analytics',
user: 'reader',
password: 'secret', // pragma: allowlist secret
ssl: { rejectUnauthorized: false },
});
});
it('defaults and validates MySQL maxConnections', () => {
const baseConnection: KtxMysqlConnectionConfig = {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
};
expect(
mysqlConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: baseConnection,
}),
).toMatchObject({ connectionLimit: 10 });
expect(
mysqlConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: 25 },
}),
).toMatchObject({ connectionLimit: 25 });
expect(
mysqlConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: '12' as never },
}),
).toMatchObject({ connectionLimit: 12 });
for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) {
expect(() =>
mysqlConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections },
}),
).toThrow('connections.warehouse.maxConnections must be a positive integer');
}
});
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T12:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'mysql' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'mysql',
extractedAt: '2026-04-29T12:00:00.000Z',
scope: { schemas: ['analytics'] },
metadata: {
database: 'analytics',
host: 'db.example.test',
table_count: 3,
total_columns: 6,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
['customers', 'table', 2, 'Customer table'],
['orders', 'table', 2, 'Order table'],
['order_summary', 'view', null, null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: 'analytics',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fk',
},
]);
});
it('introspects every configured MySQL schema scope', async () => {
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
schemas: ['analytics', 'mart'],
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory: multiSchemaMysqlPoolFactory(),
now: () => new Date('2026-05-21T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'mysql' },
{ runId: 'scan-run-1' },
);
expect(snapshot.scope).toEqual({ schemas: ['analytics', 'mart'] });
expect(snapshot.metadata).toMatchObject({ database: 'analytics', schemas: ['analytics', 'mart'] });
expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual([
'analytics.customers',
'mart.orders',
]);
});
it('soft-fails denied MySQL constraint discovery with one warning per schema and kind', async () => {
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
schemas: ['analytics', 'mart'],
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory: multiSchemaMysqlPoolFactory({
primaryKeyError: Object.assign(new Error('select command denied'), {
code: 'ER_TABLEACCESS_DENIED_ERROR',
errno: 1142,
}),
foreignKeyError: Object.assign(new Error('database access denied'), {
code: 'ER_DBACCESS_DENIED_ERROR',
errno: 1044,
}),
}),
now: () => new Date('2026-04-29T12:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'mysql' },
{ runId: 'scan-run-mysql-denied-constraints' },
);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in analytics (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'analytics', kind: 'primary_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in mart (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'mart', kind: 'primary_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in analytics (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'analytics', kind: 'foreign_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in mart (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'mart', kind: 'foreign_key' },
},
]);
expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true);
expect(snapshot.tables.every((table) => table.foreignKeys.length === 0)).toBe(true);
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ sql: string; params?: unknown }> = [];
const poolFactory: KtxMysqlPoolFactory = {
createPool: vi.fn(() => ({
getConnection: vi.fn(async () => ({
query: vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => {
queries.push({ sql, params });
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return mysqlResult(
[
{
TABLE_SCHEMA: 'analytics',
TABLE_NAME: 'orders',
TABLE_TYPE: 'BASE TABLE',
TABLE_COMMENT: '',
TABLE_ROWS: 2,
},
],
[],
);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return mysqlResult(
[
{
TABLE_SCHEMA: 'analytics',
TABLE_NAME: 'orders',
COLUMN_NAME: 'id',
DATA_TYPE: 'int',
IS_NULLABLE: 'NO',
COLUMN_COMMENT: '',
},
],
[],
);
}
return mysqlResult([], []);
}),
release: vi.fn(),
})),
end: vi.fn(async () => undefined),
})),
};
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory,
});
const scope = tableRefSet([{ catalog: null, db: 'analytics', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'mysql', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
const tablesQuery = queries.find((query) => query.sql.includes('INFORMATION_SCHEMA.TABLES'));
expect(tablesQuery?.sql).toMatch(/TABLE_NAME IN \(\?\)/);
expect(tablesQuery?.params).toEqual(['analytics', 'orders']);
});
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
const poolFactory = fakePoolFactory();
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory,
});
await expect(
connector.sampleTable(
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, columns: ['id', 'status'], limit: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: null, db: 'analytics', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from analytics.orders', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
await expect(connector.listSchemas()).resolves.toEqual(['analytics', 'warehouse']);
await expect(connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
)).resolves.toBeNull();
await connector.cleanup();
});
it('adapts native MySQL snapshots to live-database introspection for local ingest', async () => {
const introspection = createMysqlLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T12:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T12:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: null,
db: 'analytics',
columns: [
{
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
},
{
name: 'name',
nativeType: 'varchar',
normalizedType: 'varchar',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
});
});

View file

@ -2,6 +2,7 @@ import mysql, { type FieldPacket, type Pool, type RowDataPacket } from 'mysql2/p
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import {
constraintDiscoveryWarning,
@ -30,7 +31,6 @@ import {
type KtxTableSampleInput,
type KtxTableSampleResult,
} from '../../context/scan/types.js';
import { KtxMysqlDialect } from './dialect.js';
export interface KtxMysqlConnectionConfig {
driver?: string;
@ -303,6 +303,25 @@ function queryParams(params: Record<string, unknown> | unknown[] | undefined): u
return Array.isArray(params) ? params : Object.values(params);
}
/** @internal */
export function prepareMysqlReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: unknown[] } {
if (!params) {
return { sql, params: undefined };
}
const values: unknown[] = [];
const parameterizedQuery = sql.replace(/:([A-Za-z_][A-Za-z0-9_]*)\b/g, (placeholder, key: string) => {
if (!(key in params)) {
return placeholder;
}
values.push(params[key]);
return '?';
});
return { sql: parameterizedQuery, params: values };
}
export function isKtxMysqlConnectionConfig(
connection: KtxMysqlConnectionConfig | undefined,
): connection is KtxMysqlConnectionConfig {
@ -376,7 +395,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
private readonly poolFactory: KtxMysqlPoolFactory;
private readonly endpointResolver?: KtxMysqlEndpointResolver;
private readonly now: () => Date;
private readonly dialect = new KtxMysqlDialect();
private readonly dialect = getDialectForDriver('mysql');
private pool: KtxMysqlPool | null = null;
private resolvedEndpoint: KtxMysqlResolvedEndpoint | null = null;
@ -550,7 +569,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = Array.isArray(input.params)
? { sql: limitedSql, params: input.params }
: this.dialect.prepareQuery(limitedSql, input.params);
: prepareMysqlReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -625,6 +644,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
filterSchemas,
);
return rows.map((row) => ({
catalog: null,
schema: row.TABLE_SCHEMA,
name: row.TABLE_NAME,
kind: row.TABLE_TYPE === 'VIEW' ? ('view' as const) : ('table' as const),

View file

@ -1,49 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxMysqlDialect } from './dialect.js';
describe('KtxMysqlDialect', () => {
const dialect = new KtxMysqlDialect();
it('quotes identifiers and formats database-qualified table names', () => {
expect(dialect.quoteIdentifier('orders')).toBe('`orders`');
expect(dialect.quoteIdentifier('odd`name')).toBe('`odd``name`');
expect(dialect.formatTableName({ catalog: null, db: 'analytics', name: 'orders' })).toBe(
'`analytics`.`orders`',
);
expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('`orders`');
});
it('maps native MySQL types to KTX dimension types', () => {
expect(dialect.mapToDimensionType('tinyint(1)')).toBe('boolean');
expect(dialect.mapToDimensionType('int')).toBe('number');
expect(dialect.mapToDimensionType('decimal(10,2)')).toBe('number');
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
expect(dialect.mapToDimensionType('varchar(255)')).toBe('string');
expect(dialect.mapToDimensionType('json')).toBe('string');
expect(dialect.mapToDimensionType('')).toBe('string');
});
it('builds sampling, distinct-value, pagination, and time SQL', () => {
expect(dialect.generateSampleQuery('`analytics`.`orders`', 25, ['id', 'status'])).toBe(
'SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 25',
);
expect(dialect.generateColumnSampleQuery('`analytics`.`orders`', 'status', 10)).toBe(
"SELECT `status` FROM `analytics`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS CHAR)) != '' LIMIT 10",
);
expect(dialect.generateDistinctValuesQuery('`analytics`.`orders`', '`status`', 5)).toContain(
'SELECT DISTINCT CAST(`status` AS CHAR) AS val',
);
expect(dialect.getLimitOffsetClause(10, 20)).toBe('LIMIT 10 OFFSET 20');
expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe("DATE_FORMAT(created_at, '%Y-%m-01')");
});
it('prepares named parameters in deterministic SQL placeholder order', () => {
expect(dialect.prepareQuery('select * from orders where id = :id and status = :status', {
status: 'paid',
id: 10,
})).toEqual({
sql: 'select * from orders where id = ? and status = ?',
params: [10, 'paid'],
});
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type MysqlTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxMysqlDialect {
readonly type = 'mysql';
/** @internal */
export class KtxMysqlDialect implements KtxDialect {
readonly type = 'mysql' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
datetime: 'time',
@ -41,9 +50,19 @@ export class KtxMysqlDialect {
}
formatTableName(table: MysqlTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi');
}
formatDisplayRef(table: MysqlTableNameRef): string {
return formatDialectDisplayRef(table, 'ansi');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'ansi');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('ansi');
}
mapDataType(nativeType: string): string {
@ -91,21 +110,6 @@ export class KtxMysqlDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS CHAR)) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
if (!params) {
return { sql, params: undefined };
}
const values: unknown[] = [];
const parameterizedQuery = sql.replace(/:([A-Za-z_][A-Za-z0-9_]*)\b/g, (placeholder, key: string) => {
if (!(key in params)) {
return placeholder;
}
values.push(params[key]);
return '?';
});
return { sql: parameterizedQuery, params: values };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -118,7 +122,11 @@ export class KtxMysqlDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -129,6 +137,18 @@ export class KtxMysqlDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `CHAR_LENGTH(CAST(${columnSql} AS CHAR))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS CHAR)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT GROUP_CONCAT(CAST(value AS CHAR) SEPARATOR CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
SELECT COUNT(DISTINCT val) AS cardinality
@ -167,36 +187,4 @@ export class KtxMysqlDialect {
) AS sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column;
switch (granularity) {
case 'day':
return `DATE(${col})`;
case 'week':
return `DATE(${col} - INTERVAL WEEKDAY(${col}) DAY)`;
case 'month':
return `DATE_FORMAT(${col}, '%Y-%m-01')`;
case 'quarter':
return `MAKEDATE(YEAR(${col}), 1) + INTERVAL (QUARTER(${col}) - 1) QUARTER`;
case 'year':
return `DATE_FORMAT(${col}, '%Y-01-01')`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column;
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
return `DATE_ADD(${originExpr}, INTERVAL FLOOR(TIMESTAMPDIFF(${unit!.toUpperCase()}, ${originExpr}, ${col}) / ${amount}) * ${amount} ${unit!.toUpperCase()})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
}
}

View file

@ -1,559 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { createPostgresLiveDatabaseIntrospection } from '../../connectors/postgres/live-database-introspection.js';
import { isKtxPostgresConnectionConfig, KtxPostgresScanConnector, postgresPoolConfigFromConfig, type KtxPostgresConnectionConfig, type KtxPostgresPoolFactory } from '../../connectors/postgres/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
interface FakeQueryResult {
rows: Record<string, unknown>[];
fields?: Array<{ name: string; dataTypeID: number }>;
}
type FakeQueryResponse = FakeQueryResult | Error;
function fakePoolFactory(results: Map<string, FakeQueryResponse>): KtxPostgresPoolFactory {
const query = vi.fn(async (sql: string, params?: unknown[]) => {
const normalized = sql.replace(/\s+/g, ' ').trim();
for (const [key, value] of results.entries()) {
if (normalized.includes(key)) {
if (value instanceof Error) {
throw value;
}
return value;
}
}
throw new Error(`Unexpected SQL: ${normalized} params=${JSON.stringify(params ?? [])}`);
});
return {
createPool() {
return {
async connect() {
return {
query,
release: vi.fn(),
};
},
end: vi.fn(async () => undefined),
};
},
};
}
function metadataResults(): Map<string, FakeQueryResponse> {
return new Map<string, FakeQueryResponse>([
[
'FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n',
{
rows: [
{ table_name: 'customers', table_kind: 'r', row_count: '2', table_comment: 'Customers' },
{ table_name: 'orders', table_kind: 'r', row_count: '3', table_comment: null },
{ table_name: 'recent_orders', table_kind: 'v', row_count: '0', table_comment: 'Recent orders' },
],
},
],
[
'FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_class c',
{
rows: [
{ table_name: 'customers', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
{ table_name: 'customers', column_name: 'name', data_type: 'text', is_nullable: false, column_comment: 'Name' },
{ table_name: 'orders', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
{ table_name: 'orders', column_name: 'customer_id', data_type: 'integer', is_nullable: false, column_comment: null },
{ table_name: 'orders', column_name: 'status', data_type: 'text', is_nullable: true, column_comment: null },
{ table_name: 'recent_orders', column_name: 'id', data_type: 'integer', is_nullable: true, column_comment: null },
],
},
],
[
"tc.constraint_type = 'FOREIGN KEY'",
{
rows: [
{
table_name: 'orders',
column_name: 'customer_id',
foreign_table_schema: 'public',
foreign_table_name: 'customers',
foreign_column_name: 'id',
constraint_name: 'orders_customer_id_fkey',
},
],
},
],
[
"tc.constraint_type = 'PRIMARY KEY'",
{
rows: [
{ table_name: 'customers', column_name: 'id' },
{ table_name: 'orders', column_name: 'id' },
],
},
],
['SELECT "id" FROM "public"."orders" LIMIT 1', { rows: [{ id: 10 }], fields: [{ name: 'id', dataTypeID: 23 }] }],
[
'SELECT "status" FROM "public"."orders" WHERE "status" IS NOT NULL',
{ rows: [{ status: 'paid' }, { status: 'open' }], fields: [{ name: 'status', dataTypeID: 25 }] },
],
['COUNT(DISTINCT val) AS cardinality', { rows: [{ cardinality: '2' }] }],
['SELECT DISTINCT "status"::text AS val', { rows: [{ val: 'open' }, { val: 'paid' }] }],
['SELECT COUNT(*) AS count FROM "public"."orders"', { rows: [{ count: '3' }] }],
['FROM pg_stats s', { rows: [{ column_name: 'status', estimated_cardinality: '2' }] }],
['SELECT 1', { rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] }],
['SELECT schema_name FROM information_schema.schemata', { rows: [{ schema_name: 'public' }] }],
]);
}
describe('KtxPostgresScanConnector', () => {
it('resolves configuration safely', () => {
expect(isKtxPostgresConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL' })).toBe(true);
expect(isKtxPostgresConnectionConfig({ driver: 'postgresql', host: 'db', database: 'analytics' })).toBe(false);
expect(isKtxPostgresConnectionConfig({ driver: 'mysql', host: 'db' })).toBe(false);
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schemas: ['analytics', 'public'],
ssl: true,
rejectUnauthorized: false,
},
}),
).toMatchObject({
host: 'db.example.test',
port: 5432,
database: 'analytics',
user: 'reader',
password: 'test-password', // pragma: allowlist secret
options: '-c search_path=analytics,public',
ssl: { rejectUnauthorized: false },
});
const libpqPreferConfig = postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
url: 'env:DEMO_DATABASE_URL',
},
env: {
DEMO_DATABASE_URL: 'postgresql://reader@demo.example.test:5432/demo?sslmode=prefer',
},
});
expect(libpqPreferConfig).toMatchObject({
host: 'demo.example.test',
port: 5432,
database: 'demo',
user: 'reader',
});
expect(libpqPreferConfig).not.toHaveProperty('connectionString');
expect(libpqPreferConfig).not.toHaveProperty('ssl');
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { driver: 'postgres', host: 'db.example.test', database: 'analytics', username: 'reader' },
}),
).toMatchObject({
host: 'db.example.test',
database: 'analytics',
user: 'reader',
});
});
it('defaults and validates Postgres maxConnections', () => {
const baseConnection: KtxPostgresConnectionConfig = {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
};
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: baseConnection,
}),
).toMatchObject({ max: 10 });
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: 50 },
}),
).toMatchObject({ max: 50 });
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: '12' as never },
}),
).toMatchObject({ max: 12 });
for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) {
expect(() =>
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections },
}),
).toThrow('connections.warehouse.maxConnections must be a positive integer');
}
});
it('introspects schemas, tables, views, primary keys, comments, row counts, and foreign keys', async () => {
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory: fakePoolFactory(metadataResults()),
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'postgres' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'postgres',
extractedAt: '2026-04-29T10:00:00.000Z',
scope: { schemas: ['public'] },
metadata: {
database: 'analytics',
schemas: ['public'],
host: 'db.example.test',
table_count: 3,
total_columns: 6,
},
});
expect(snapshot.tables.map((table) => [table.db, table.name, table.kind, table.estimatedRows])).toEqual([
['public', 'customers', 'table', 2],
['public', 'orders', 'table', 3],
['public', 'recent_orders', 'view', null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: 'public',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fkey',
},
]);
});
it('soft-fails denied Postgres constraint discovery with scan warnings', async () => {
const results = metadataResults();
results.set(
"tc.constraint_type = 'PRIMARY KEY'",
Object.assign(new Error('permission denied for information_schema'), { code: '42501' }),
);
results.set(
"tc.constraint_type = 'FOREIGN KEY'",
Object.assign(new Error('relation information_schema.key_column_usage does not exist'), { code: '42P01' }),
);
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory: fakePoolFactory(results),
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'postgres' },
{ runId: 'scan-run-denied-constraints' },
);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'public', kind: 'primary_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'public', kind: 'foreign_key' },
},
]);
expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true);
expect(snapshot.tables.every((table) => table.foreignKeys.length === 0)).toBe(true);
});
it('propagates non-denial Postgres constraint discovery errors', async () => {
const results = metadataResults();
const resetError = Object.assign(new Error('connection reset'), { code: 'ECONNRESET' });
results.set("tc.constraint_type = 'PRIMARY KEY'", resetError);
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory: fakePoolFactory(results),
});
await expect(
connector.introspect({ connectionId: 'warehouse', driver: 'postgres' }, { runId: 'scan-run-network-error' }),
).rejects.toBe(resetError);
});
it('runs samples, distinct values, statistics, read-only SQL, and schema listing', async () => {
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory: fakePoolFactory(metadataResults()),
});
await expect(
connector.sampleTable(
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, columns: ['id'], limit: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id'], headerTypes: ['integer'], rows: [[10]], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: null, db: 'public', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(connector.getColumnStatistics({ catalog: null, db: 'public', name: 'orders' })).resolves.toEqual({
cardinalityByColumn: new Map([['status', 2]]),
});
await expect(connector.getTableRowCount({ db: 'public', name: 'orders' })).resolves.toBe(3);
await expect(connector.listSchemas()).resolves.toEqual(['public']);
await expect(connector.testConnection()).resolves.toEqual({ success: true });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ sql: string; params?: unknown[] }> = [];
const poolFactory: KtxPostgresPoolFactory = {
createPool() {
return {
async connect() {
return {
query: vi.fn(async (sql: string, params?: unknown[]) => {
queries.push({ sql, params });
if (sql.includes('FROM pg_catalog.pg_class c')) {
return { rows: [{ table_name: 'orders', table_kind: 'r', row_count: '3', table_comment: null }] };
}
if (sql.includes('FROM pg_catalog.pg_attribute a')) {
return {
rows: [
{
table_name: 'orders',
column_name: 'id',
data_type: 'integer',
is_nullable: false,
column_comment: null,
},
],
};
}
return { rows: [] };
}),
release: vi.fn(),
};
},
end: vi.fn(async () => undefined),
};
},
};
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory,
});
const scope = tableRefSet([{ catalog: null, db: 'public', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'postgres', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
const tablesQuery = queries.find((query) => query.sql.includes('FROM pg_catalog.pg_class c'));
expect(tablesQuery?.sql).toMatch(/c\.relname = ANY\(\$2\)/);
expect(tablesQuery?.params).toEqual(['public', ['orders']]);
});
it('adapts native PostgreSQL snapshots to live-database introspection for local ingest', async () => {
const introspection = createPostgresLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
},
poolFactory: fakePoolFactory(metadataResults()),
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T10:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: null,
db: 'public',
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
{
name: 'name',
nativeType: 'text',
normalizedType: 'text',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: 'Name',
},
],
foreignKeys: [],
});
});
it('does not end the pool before introspection completes', async () => {
let endCalled = false;
const endAwarePoolFactory: KtxPostgresPoolFactory = {
createPool() {
const inner = fakePoolFactory(metadataResults()).createPool({
max: 1,
idleTimeoutMillis: 1,
connectionTimeoutMillis: 1,
});
return {
async connect() {
if (endCalled) {
throw new Error('Cannot use a pool after calling end on the pool');
}
return inner.connect();
},
async end() {
endCalled = true;
return inner.end();
},
};
},
};
const introspection = createPostgresLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
},
poolFactory: endAwarePoolFactory,
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot.tables.length).toBeGreaterThan(0);
expect(endCalled).toBe(true);
});
it('attaches an error listener to the pg pool', async () => {
const on = vi.fn();
const poolFactory: KtxPostgresPoolFactory = {
createPool() {
return {
on,
async connect() {
return {
query: vi.fn(async () => ({ rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] })),
release: vi.fn(),
};
},
end: vi.fn(async () => undefined),
};
},
};
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
},
poolFactory,
});
await expect(connector.testConnection()).resolves.toEqual({ success: true });
expect(on).toHaveBeenCalledWith('error', expect.any(Function));
});
});

View file

@ -1,6 +1,7 @@
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -26,7 +27,6 @@ import {
type KtxTableSampleResult,
} from '../../context/scan/types.js';
import { Pool } from 'pg';
import { KtxPostgresDialect } from './dialect.js';
const PG_OID_TYPE_MAP: Record<number, string> = {
16: 'boolean',
@ -219,6 +219,29 @@ function groupByTable<T extends { table_name: string }>(rows: T[]): Map<string,
return grouped;
}
/** @internal */
export function preparePostgresReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: unknown[] } {
if (!params) {
return { sql, params: undefined };
}
const paramNames = Object.keys(params);
const values: unknown[] = new Array(paramNames.length);
const paramIndexMap = new Map<string, number>();
paramNames.forEach((name, index) => {
paramIndexMap.set(name, index + 1);
values[index] = params[name];
});
const sortedKeys = [...paramNames].sort((a, b) => b.length - a.length);
let parameterizedQuery = sql;
for (const name of sortedKeys) {
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${name}\\b`, 'g'), `$${paramIndexMap.get(name)}`);
}
return { sql: parameterizedQuery, params: values };
}
function primaryKeyMap(rows: PostgresPrimaryKeyRow[]): Map<string, Set<string>> {
const grouped = new Map<string, Set<string>>();
for (const row of rows) {
@ -400,7 +423,7 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
private readonly poolFactory: KtxPostgresPoolFactory;
private readonly endpointResolver?: KtxPostgresEndpointResolver;
private readonly now: () => Date;
private readonly dialect = new KtxPostgresDialect();
private readonly dialect = getDialectForDriver('postgres');
private pool: KtxPostgresPool | null = null;
private lastIdlePoolError: Error | null = null;
private resolvedEndpoint: KtxPostgresResolvedEndpoint | null = null;
@ -489,7 +512,7 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = Array.isArray(input.params)
? { sql: limitedSql, params: input.params }
: this.dialect.prepareQuery(limitedSql, input.params);
: preparePostgresReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -584,6 +607,7 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
[filterSchemas],
);
return rows.map((row) => ({
catalog: null,
schema: row.schema_name,
name: row.table_name,
kind: row.table_kind === 'v' ? ('view' as const) : ('table' as const),

View file

@ -1,52 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxPostgresDialect } from './dialect.js';
describe('KtxPostgresDialect', () => {
const dialect = new KtxPostgresDialect();
it('quotes identifiers and formats schema-qualified tables', () => {
expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"');
expect(dialect.formatTableName({ catalog: null, db: 'public', name: 'orders' })).toBe('"public"."orders"');
expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('"orders"');
});
it('maps native PostgreSQL types to KTX dimension types', () => {
expect(dialect.mapToDimensionType('timestamp with time zone')).toBe('time');
expect(dialect.mapToDimensionType('numeric(12,2)')).toBe('number');
expect(dialect.mapToDimensionType('uuid')).toBe('string');
expect(dialect.mapToDimensionType('boolean')).toBe('boolean');
expect(dialect.mapToDimensionType('jsonb')).toBe('string');
});
it('generates sample, distinct-value, statistics, and time SQL', () => {
expect(dialect.generateSampleQuery('"public"."orders"', 5, ['id', 'status'])).toBe(
'SELECT "id", "status" FROM "public"."orders" LIMIT 5',
);
expect(dialect.generateColumnSampleQuery('"public"."orders"', 'status', 10)).toContain(
'TRIM(CAST("status" AS TEXT)) != \'\'',
);
expect(dialect.generateDistinctValuesQuery('"public"."orders"', '"status"', 20)).toContain(
'SELECT DISTINCT "status"::text AS val',
);
expect(dialect.generateColumnStatisticsQuery('public', 'orders')).toContain('FROM pg_stats s');
expect(dialect.getTimeTruncExpression('"created_at"', 'month')).toBe('DATE_TRUNC(\'month\', "created_at")');
});
it('prepares named parameters with PostgreSQL positional parameters', () => {
expect(
dialect.prepareQuery('select * from orders where id = :id and status = :status', { id: 1, status: 'paid' }),
).toEqual({
sql: 'select * from orders where id = $1 and status = $2',
params: [1, 'paid'],
});
expect(
dialect.prepareQuery('select :Client_Name_10, :Client_Name_1', {
Client_Name_1: 'short',
Client_Name_10: 'long',
}),
).toEqual({
sql: 'select $2, $1',
params: ['short', 'long'],
});
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type PostgresTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxPostgresDialect {
readonly type = 'postgresql';
/** @internal */
export class KtxPostgresDialect implements KtxDialect {
readonly type = 'postgres' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
timestamp: 'time',
@ -45,9 +54,19 @@ export class KtxPostgresDialect {
}
formatTableName(table: PostgresTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi');
}
formatDisplayRef(table: PostgresTableNameRef): string {
return formatDialectDisplayRef(table, 'ansi');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'ansi');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('ansi');
}
mapDataType(nativeType: string): string {
@ -92,25 +111,6 @@ export class KtxPostgresDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS TEXT)) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
if (!params) {
return { sql, params: undefined };
}
const paramNames = Object.keys(params);
const values: unknown[] = new Array(paramNames.length);
const paramIndexMap = new Map<string, number>();
paramNames.forEach((name, index) => {
paramIndexMap.set(name, index + 1);
values[index] = params[name];
});
const sortedKeys = [...paramNames].sort((a, b) => b.length - a.length);
let parameterizedQuery = sql;
for (const name of sortedKeys) {
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${name}\\b`, 'g'), `$${paramIndexMap.get(name)}`);
}
return { sql: parameterizedQuery, params: values };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -126,7 +126,11 @@ export class KtxPostgresDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -137,6 +141,18 @@ export class KtxPostgresDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS TEXT))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS TEXT)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT STRING_AGG(CAST(value AS TEXT), CHR(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -191,23 +207,4 @@ export class KtxPostgresDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
return `DATE_TRUNC('${granularity}', ${col})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
const originExpr = origin ? `TIMESTAMP '${origin.replace(/'/g, "''")}'` : "TIMESTAMP '1970-01-01'";
return `${originExpr} + FLOOR(EXTRACT(EPOCH FROM (${col} - ${originExpr})) / EXTRACT(EPOCH FROM INTERVAL '${interval.replace(/'/g, "''")}')) * INTERVAL '${interval.replace(/'/g, "''")}'`;
}
parseIntervalToSql(interval: string): string {
return `INTERVAL '${interval.replace(/'/g, "''")}'`;
}
}

View file

@ -1,49 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { KtxPostgresHistoricSqlQueryClient } from './historic-sql-query-client.js';
import type { KtxPostgresPoolConfig, KtxPostgresPoolFactory } from './connector.js';
describe('KtxPostgresHistoricSqlQueryClient', () => {
it('executes parameterized read-only SQL through the native Postgres connector pool', async () => {
const queryCalls: Array<{ sql: string; params?: unknown[] }> = [];
const release = vi.fn();
const end = vi.fn(async () => {});
const poolFactory: KtxPostgresPoolFactory = {
createPool(_config: KtxPostgresPoolConfig) {
return {
async connect() {
return {
async query(sql: string, params?: unknown[]) {
queryCalls.push({ sql, params });
return {
fields: [{ name: 'answer', dataTypeID: 23 }],
rows: [{ answer: 42 }],
};
},
release,
};
},
end,
};
},
};
const client = new KtxPostgresHistoricSqlQueryClient({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
url: 'postgresql://readonly:secret@pg.example.test/warehouse', // pragma: allowlist secret
},
poolFactory,
});
await expect(client.executeQuery('SELECT $1::int AS answer', [42])).resolves.toEqual({
headers: ['answer'],
rows: [[42]],
totalRows: 1,
});
expect(queryCalls).toEqual([{ sql: 'SELECT $1::int AS answer', params: [42] }]);
await client.cleanup();
expect(release).toHaveBeenCalledTimes(1);
expect(end).toHaveBeenCalledTimes(1);
});
});

View file

@ -1,620 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
const createPool = vi.hoisted(() => vi.fn());
vi.mock('snowflake-sdk', () => ({
default: { createPool },
createPool,
}));
import { createSnowflakeLiveDatabaseIntrospection } from '../../connectors/snowflake/live-database-introspection.js';
import { isKtxSnowflakeConnectionConfig, KtxSnowflakeScanConnector, snowflakeConnectionConfigFromConfig, type KtxSnowflakeConnectionConfig, type KtxSnowflakeDriver, type KtxSnowflakeDriverFactory } from '../../connectors/snowflake/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function fakeDriverFactory(): KtxSnowflakeDriverFactory {
const driver: KtxSnowflakeDriver = {
test: vi.fn(async () => ({ success: true })),
query: vi.fn(async (sql: string) => {
if (sql.includes('TABLE_CONSTRAINTS')) {
return { headers: ['TABLE_NAME', 'COLUMN_NAME'], rows: [['ORDERS', 'ID']], totalRows: 1, rowCount: 1 };
}
if (sql.includes('SELECT "ID", "STATUS" FROM "ANALYTICS"."PUBLIC"."ORDERS"')) {
return {
headers: ['ID', 'STATUS'],
headerTypes: ['NUMBER', 'VARCHAR'],
rows: [[1, 'paid']],
totalRows: 1,
rowCount: 1,
};
}
if (sql.includes('select * from (select ID, STATUS from ORDERS) as ktx_query_result limit 1')) {
return { headers: ['ID', 'STATUS'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 };
}
if (sql.includes('SELECT "STATUS" FROM "ANALYTICS"."PUBLIC"."ORDERS"')) {
return { headers: ['STATUS'], rows: [['paid'], ['open']], totalRows: 2, rowCount: 2 };
}
if (sql.includes('COUNT(DISTINCT val)')) {
return { headers: ['CARDINALITY'], rows: [[2]], totalRows: 1, rowCount: 1 };
}
if (sql.includes('SELECT DISTINCT "STATUS"::VARCHAR AS val')) {
return { headers: ['VAL'], rows: [['open'], ['paid']], totalRows: 2, rowCount: 2 };
}
throw new Error(`Unexpected SQL: ${sql}`);
}),
getSchemaMetadata: vi.fn(async () => [
{
name: 'ORDERS',
catalog: 'ANALYTICS',
db: 'PUBLIC',
rowCount: 12,
comment: 'Orders',
columns: [
{ name: 'ID', type: 'NUMBER(38,0)', nullable: false, comment: 'Primary key' },
{ name: 'STATUS', type: 'VARCHAR', nullable: true, comment: null },
],
},
{
name: 'ORDER_SUMMARY',
catalog: 'ANALYTICS',
db: 'PUBLIC',
rowCount: 3,
comment: null,
columns: [{ name: 'STATUS', type: 'VARCHAR', nullable: true, comment: null }],
},
]),
listSchemas: vi.fn(async () => ['PUBLIC', 'MART']),
listTables: vi.fn(async () => [
{ schema: 'PUBLIC', name: 'ORDERS', kind: 'table' as const },
{ schema: 'PUBLIC', name: 'ORDER_SUMMARY', kind: 'view' as const },
]),
cleanup: vi.fn(async () => undefined),
};
return { createDriver: vi.fn(() => driver) };
}
function fakeSnowflakeStatement(headers: string[] = ['ONE']) {
return {
getColumns: () => headers.map((header) => ({ getName: () => header, getType: () => 'TEXT' })),
};
}
function installSnowflakePoolMock() {
const executedSql: string[] = [];
const connection = {
execute: vi.fn(
(input: {
sqlText: string;
complete: (
error: Error | null,
statement: ReturnType<typeof fakeSnowflakeStatement>,
rows: Array<Record<string, unknown>>,
) => void;
}) => {
executedSql.push(input.sqlText);
input.complete(null, fakeSnowflakeStatement(), [{ ONE: 1 }]);
},
),
};
const pool = {
use: vi.fn(async (fn: (conn: typeof connection) => Promise<unknown>) => fn(connection)),
drain: vi.fn(async () => undefined),
clear: vi.fn(async () => undefined),
};
createPool.mockReturnValue(pool);
return { connection, pool, executedSql };
}
describe('KtxSnowflakeScanConnector', () => {
it('resolves Snowflake connection configuration safely', () => {
expect(
isKtxSnowflakeConnectionConfig({
driver: 'snowflake',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
username: 'reader',
}),
).toBe(true);
expect(isKtxSnowflakeConnectionConfig({ driver: 'bigquery' })).toBe(false);
expect(
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
}),
).toMatchObject({
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schemas: ['PUBLIC'],
username: 'reader',
authMethod: 'password',
});
});
it('defaults and validates Snowflake maxConnections', () => {
const baseConnection: KtxSnowflakeConnectionConfig = {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
};
expect(
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: baseConnection,
}),
).toMatchObject({ maxConnections: 4 });
expect(
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: 8 },
}),
).toMatchObject({ maxConnections: 8 });
expect(
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: '12' as never },
}),
).toMatchObject({ maxConnections: 12 });
for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) {
expect(() =>
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections },
}),
).toThrow('connections.warehouse.maxConnections must be a positive integer');
}
});
it('rejects stale Snowflake pool config key', () => {
const baseConnection: KtxSnowflakeConnectionConfig = {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
};
expect(() =>
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxSessions: 8 },
}),
).toThrow(/renamed to maxConnections/);
});
it('uses one lazy Snowflake pool and drains it during cleanup', async () => {
const { pool, executedSql } = installSnowflakePoolMock();
const close = vi.fn(async () => undefined);
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
role: 'ANALYST',
maxConnections: 3,
},
sdkOptionsProvider: {
resolve: vi.fn(async () => ({ sdkOptions: { application: 'ktx-test' }, close })),
},
});
expect(createPool).not.toHaveBeenCalled();
await connector.executeReadOnly({ connectionId: 'warehouse', sql: 'select 1', maxRows: 1 }, { runId: 'run-1' });
await connector.executeReadOnly({ connectionId: 'warehouse', sql: 'select 1', maxRows: 1 }, { runId: 'run-1' });
expect(createPool).toHaveBeenCalledTimes(1);
expect(createPool).toHaveBeenCalledWith(
expect.objectContaining({
account: 'acct',
username: 'reader',
warehouse: 'WH',
database: 'ANALYTICS',
schema: 'PUBLIC',
role: 'ANALYST',
password: 'fixture-pass', // pragma: allowlist secret
clientSessionKeepAlive: true,
clientSessionKeepAliveHeartbeatFrequency: 900,
application: 'ktx-test',
}),
expect.objectContaining({
min: 0,
max: 3,
evictionRunIntervalMillis: 30_000,
acquireTimeoutMillis: 60_000,
}),
);
expect(pool.use).toHaveBeenCalledTimes(2);
expect(executedSql.some((sql) => /^USE\s+/i.test(sql.trim()))).toBe(false);
await connector.cleanup();
expect(pool.drain).toHaveBeenCalledBefore(pool.clear);
expect(pool.clear).toHaveBeenCalledTimes(1);
expect(close).toHaveBeenCalledTimes(1);
});
it('introspects schema, primary keys, comments, row counts, and dimensions', async () => {
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory: fakeDriverFactory(),
now: () => new Date('2026-04-29T18:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'snowflake' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'snowflake',
extractedAt: '2026-04-29T18:00:00.000Z',
scope: { catalogs: ['ANALYTICS'], schemas: ['PUBLIC'] },
metadata: {
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schemas: ['PUBLIC'],
table_count: 2,
total_columns: 3,
},
});
expect(snapshot.tables.find((table) => table.name === 'ORDERS')?.columns).toEqual([
{
name: 'ID',
nativeType: 'NUMBER(38,0)',
normalizedType: 'NUMBER(38,0)',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Primary key',
},
{
name: 'STATUS',
nativeType: 'VARCHAR',
normalizedType: 'VARCHAR',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
]);
});
it('continues introspection when primary-key discovery is not authorized', async () => {
const driverFactory = fakeDriverFactory();
const driver = (driverFactory.createDriver as ReturnType<typeof vi.fn>).getMockImplementation() as
| (() => KtxSnowflakeDriver)
| undefined;
if (!driver) throw new Error('driver mock missing');
const built = driver();
(built.query as ReturnType<typeof vi.fn>).mockImplementation(async (sql: string) => {
if (sql.includes('TABLE_CONSTRAINTS')) {
throw new Error(
"SQL compilation error: Object 'ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE' does not exist or not authorized.",
);
}
throw new Error(`Unexpected SQL: ${sql}`);
});
(driverFactory.createDriver as ReturnType<typeof vi.fn>).mockReturnValue(built);
const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined);
try {
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'snowflake' },
{ runId: 'scan-run-pk-skip' },
);
expect(snapshot.tables.map((table) => table.name).sort()).toEqual(['ORDERS', 'ORDER_SUMMARY']);
expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in PUBLIC (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'PUBLIC', kind: 'primary_key' },
},
]);
expect(warn).not.toHaveBeenCalled();
} finally {
warn.mockRestore();
}
});
it('propagates non-denial Snowflake primary-key discovery errors', async () => {
const driverFactory = fakeDriverFactory();
const driver = (driverFactory.createDriver as ReturnType<typeof vi.fn>).getMockImplementation() as
| (() => KtxSnowflakeDriver)
| undefined;
if (!driver) throw new Error('driver mock missing');
const built = driver();
const networkError = new Error('network unavailable');
(built.query as ReturnType<typeof vi.fn>).mockImplementation(async (sql: string) => {
if (sql.includes('TABLE_CONSTRAINTS')) {
throw networkError;
}
throw new Error(`Unexpected SQL: ${sql}`);
});
(driverFactory.createDriver as ReturnType<typeof vi.fn>).mockReturnValue(built);
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
await expect(
connector.introspect({ connectionId: 'warehouse', driver: 'snowflake' }, { runId: 'scan-run-snowflake-network' }),
).rejects.toBe(networkError);
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ sql: string; params?: unknown }> = [];
const getSchemaMetadata = vi.fn(async (_schemaName?: string, scopedNames?: readonly string[] | null) =>
scopedNames?.includes('ORDERS')
? [
{
name: 'ORDERS',
catalog: 'ANALYTICS',
db: 'MARTS',
rowCount: 10,
comment: null,
columns: [{ name: 'ID', type: 'NUMBER', nullable: false, comment: null }],
},
]
: [],
);
const driverFactory: KtxSnowflakeDriverFactory = {
createDriver: vi.fn(() => ({
test: vi.fn(async () => ({ success: true })),
query: vi.fn(async (sql: string, params?: unknown) => {
queries.push({ sql, params });
return { headers: [], rows: [], totalRows: 0, rowCount: 0 };
}),
getSchemaMetadata,
listSchemas: vi.fn(async () => []),
listTables: vi.fn(async () => []),
cleanup: vi.fn(async () => undefined),
})),
};
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'MARTS',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
const scope = tableRefSet([{ catalog: 'ANALYTICS', db: 'MARTS', name: 'ORDERS' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'snowflake', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['ORDERS']);
expect(getSchemaMetadata).toHaveBeenCalledWith('MARTS', ['ORDERS']);
const primaryKeysQuery = queries.find((query) => query.sql.includes('TABLE_CONSTRAINTS'));
expect(primaryKeysQuery?.sql).toMatch(/AND tc\.TABLE_NAME IN \(\?\)/);
expect(primaryKeysQuery?.params).toEqual(['MARTS', 'ANALYTICS', 'ORDERS']);
});
it('supports read-only query, sampling, distinct values, row counts, schema listing, and cleanup', async () => {
const driverFactory = fakeDriverFactory();
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' },
limit: 1,
columns: ['ID', 'STATUS'],
},
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['ID', 'STATUS'], rows: [[1, 'paid']], totalRows: 1 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select ID, STATUS from ORDERS', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['ID', 'STATUS'], rows: [[1, 'paid']], rowCount: 1 });
await expect(
connector.sampleColumn(
{
connectionId: 'warehouse',
table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' },
column: 'STATUS',
limit: 2,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }, 'STATUS', {
maxCardinality: 10,
limit: 5,
}),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(connector.getTableRowCount('ORDERS')).resolves.toBe(12);
await expect(connector.listSchemas()).resolves.toEqual(['PUBLIC', 'MART']);
await connector.cleanup();
const driver = (driverFactory.createDriver as ReturnType<typeof vi.fn>).mock.results[0]?.value as KtxSnowflakeDriver;
expect(driver.cleanup).toHaveBeenCalledTimes(1);
});
it('lists tables across schemas with one information schema query', async () => {
const queries: Array<{ sql: string; params?: unknown }> = [];
const driverFactory: KtxSnowflakeDriverFactory = {
createDriver: vi.fn(() => ({
test: vi.fn(async () => ({ success: true })),
query: vi.fn(async (sql: string, params?: unknown) => {
queries.push({ sql, params });
return {
headers: ['TABLE_SCHEMA', 'TABLE_NAME', 'TABLE_TYPE'],
rows: [
['MART', 'ORDERS', 'BASE TABLE'],
['PUBLIC', 'ORDER_SUMMARY', 'VIEW'],
],
totalRows: 2,
rowCount: 2,
};
}),
getSchemaMetadata: vi.fn(async () => []),
listSchemas: vi.fn(async () => []),
listTables: vi.fn(async () => []),
cleanup: vi.fn(async () => undefined),
})),
};
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
await expect(connector.listTables(['MART', 'PUBLIC'])).resolves.toEqual([
{ schema: 'MART', name: 'ORDERS', kind: 'table' },
{ schema: 'PUBLIC', name: 'ORDER_SUMMARY', kind: 'view' },
]);
expect(queries).toHaveLength(1);
expect(queries[0]?.sql).toContain('FROM "ANALYTICS".INFORMATION_SCHEMA.TABLES');
expect(queries[0]?.sql).toContain('AND TABLE_SCHEMA IN (?, ?)');
expect(queries[0]?.params).toEqual(['ANALYTICS', 'MART', 'PUBLIC']);
});
it('rejects unsafe Snowflake identifiers before driver creation', () => {
expect(
() =>
new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH;DROP',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory: fakeDriverFactory(),
}),
).toThrow('Invalid Snowflake warehouse identifier "WH;DROP"');
});
it('converts a native snapshot into a live-database introspection snapshot', async () => {
const introspection = createSnowflakeLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
},
driverFactory: fakeDriverFactory(),
now: () => new Date('2026-04-29T18:00:00.000Z'),
});
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
connectionId: 'warehouse',
metadata: { database: 'ANALYTICS', schemas: ['PUBLIC'] },
tables: expect.arrayContaining([
expect.objectContaining({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }),
]),
});
});
});

View file

@ -2,6 +2,7 @@ import { createPrivateKey } from 'node:crypto';
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -27,7 +28,6 @@ import {
} from '../../context/scan/types.js';
import snowflake from 'snowflake-sdk';
import type { Bind, Binds, Connection, ConnectionOptions } from 'snowflake-sdk';
import { KtxSnowflakeDialect } from './dialect.js';
import { assertSafeSnowflakeIdentifier, quoteSnowflakeIdentifier } from './identifiers.js';
import { configureSnowflakeSdkLogger } from './sdk-logger.js';
@ -229,6 +229,14 @@ function toSnowflakeBinds(params: unknown[] | undefined): Binds | undefined {
return params?.map((value) => toSnowflakeBind(value));
}
/** @internal */
export function prepareSnowflakeReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: unknown[] } {
return { sql, params: params ? Object.values(params) : undefined };
}
export function isKtxSnowflakeConnectionConfig(
connection: KtxSnowflakeConnectionConfig | undefined,
): connection is KtxSnowflakeConnectionConfig {
@ -430,6 +438,7 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver {
[this.resolved.database, ...(schemas ?? [])],
);
return result.rows.map((row) => ({
catalog: this.resolved.database,
schema: String(row[0]),
name: String(row[1]),
kind: String(row[2]) === 'VIEW' ? ('view' as const) : ('table' as const),
@ -550,7 +559,7 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector {
private readonly resolved: KtxSnowflakeResolvedConnectionConfig;
private readonly driverFactory: KtxSnowflakeDriverFactory;
private readonly dialect = new KtxSnowflakeDialect();
private readonly dialect = getDialectForDriver('snowflake');
private readonly now: () => Date;
private driverInstance: KtxSnowflakeDriver | null = null;
@ -635,7 +644,7 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxSnowflakeReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const prepared = prepareSnowflakeReadOnlyQuery(limitedSql, input.params);
return this.getDriver().query(prepared.sql, prepared.params);
}
@ -696,6 +705,7 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector {
[this.resolved.database, ...(schemas ?? [])],
);
return result.rows.map((row) => ({
catalog: this.resolved.database,
schema: String(row[0]),
name: String(row[1]),
kind: String(row[2]) === 'VIEW' ? ('view' as const) : ('table' as const),

View file

@ -1,50 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxSnowflakeDialect } from './dialect.js';
describe('KtxSnowflakeDialect', () => {
const dialect = new KtxSnowflakeDialect();
it('quotes identifiers and formats database.schema.table names', () => {
expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"');
expect(dialect.formatTableName({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' })).toBe(
'"ANALYTICS"."PUBLIC"."ORDERS"',
);
expect(dialect.formatTableName({ db: 'PUBLIC', name: 'ORDERS' })).toBe('"PUBLIC"."ORDERS"');
expect(dialect.formatTableName({ name: 'ORDERS' })).toBe('"ORDERS"');
});
it('maps native Snowflake types to scan dimensions', () => {
expect(dialect.mapDataType('NUMBER(38,0)')).toBe('NUMBER(38,0)');
expect(dialect.mapToDimensionType('TIMESTAMP_NTZ')).toBe('time');
expect(dialect.mapToDimensionType('NUMBER(38,0)')).toBe('number');
expect(dialect.mapToDimensionType('BOOLEAN')).toBe('boolean');
expect(dialect.mapToDimensionType('VARIANT')).toBe('string');
});
it('generates sampling and dictionary SQL', () => {
expect(dialect.generateSampleQuery('"PUBLIC"."ORDERS"', 5, ['ID', 'STATUS'])).toBe(
'SELECT "ID", "STATUS" FROM "PUBLIC"."ORDERS" SAMPLE ROW (5 ROWS)',
);
expect(dialect.generateColumnSampleQuery('"PUBLIC"."ORDERS"', 'STATUS', 10)).toBe(
'SELECT "STATUS" FROM "PUBLIC"."ORDERS" WHERE "STATUS" IS NOT NULL AND TRIM(CAST("STATUS" AS STRING)) != \'\' LIMIT 10',
);
expect(dialect.generateCardinalitySampleQuery('"PUBLIC"."ORDERS"', '"STATUS"', 100)).toContain(
'SELECT COUNT(DISTINCT val) AS cardinality',
);
expect(dialect.generateDistinctValuesQuery('"PUBLIC"."ORDERS"', '"STATUS"', 20)).toContain(
'SELECT DISTINCT "STATUS"::VARCHAR AS val',
);
});
it('passes Snowflake positional parameters as bind arrays', () => {
expect(dialect.prepareQuery('SELECT * FROM ORDERS WHERE ID = ? AND STATUS = ?', { id: 1, status: 'paid' })).toEqual({
sql: 'SELECT * FROM ORDERS WHERE ID = ? AND STATUS = ?',
params: [1, 'paid'],
});
expect(dialect.prepareQuery('SELECT * FROM ORDERS')).toEqual({ sql: 'SELECT * FROM ORDERS', params: undefined });
});
it('keeps unsupported statistics explicit', () => {
expect(dialect.generateColumnStatisticsQuery('PUBLIC', 'ORDERS')).toBeNull();
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type SnowflakeTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxSnowflakeDialect {
readonly type = 'snowflake';
/** @internal */
export class KtxSnowflakeDialect implements KtxDialect {
readonly type = 'snowflake' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
TIMESTAMP_NTZ: 'time',
@ -45,13 +54,19 @@ export class KtxSnowflakeDialect {
}
formatTableName(table: SnowflakeTableNameRef): string {
if (table.catalog && table.db) {
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
if (table.db) {
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
return this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
}
formatDisplayRef(table: SnowflakeTableNameRef): string {
return formatDialectDisplayRef(table, 'three-part');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'three-part');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('three-part');
}
mapDataType(nativeType: string): string {
@ -96,10 +111,6 @@ export class KtxSnowflakeDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
return { sql, params: params ? Object.values(params) : undefined };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -115,7 +126,11 @@ export class KtxSnowflakeDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -126,6 +141,18 @@ export class KtxSnowflakeDialect {
return `APPROX_COUNT_DISTINCT(${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS TEXT))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS VARCHAR)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT LISTAGG(CAST(value AS VARCHAR), '\\x1f') FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -164,24 +191,4 @@ export class KtxSnowflakeDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column;
return `DATE_TRUNC('${granularity}', ${target})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column;
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `'${origin}'::TIMESTAMP` : `'1970-01-01'::TIMESTAMP`;
return `DATEADD(${unit}, FLOOR(DATEDIFF(${unit}, ${originExpr}, ${target}) / ${amount}) * ${amount}, ${originExpr})`;
}
parseIntervalToSql(interval: string): string {
return `INTERVAL '${interval}'`;
}
}

View file

@ -1,18 +0,0 @@
import { describe, expect, it } from 'vitest';
import { assertSafeSnowflakeIdentifier, quoteSnowflakeIdentifier } from './identifiers.js';
describe('Snowflake identifier guards', () => {
it('quotes simple Snowflake identifiers', () => {
expect(quoteSnowflakeIdentifier('ANALYTICS_DB', 'database')).toBe('"ANALYTICS_DB"');
expect(quoteSnowflakeIdentifier('ROLE_1$', 'role')).toBe('"ROLE_1$"');
});
it('rejects configured identifiers with field and value in the error', () => {
expect(() => assertSafeSnowflakeIdentifier('bad.db', 'database')).toThrow(
'Invalid Snowflake database identifier "bad.db"; use a simple unquoted identifier matching /^[A-Za-z_][A-Za-z0-9_$]*$/',
);
expect(() => assertSafeSnowflakeIdentifier('WH"DROP', 'warehouse')).toThrow(
'Invalid Snowflake warehouse identifier "WH\\"DROP"; use a simple unquoted identifier matching /^[A-Za-z_][A-Za-z0-9_$]*$/',
);
});
});

View file

@ -1,57 +0,0 @@
import { mkdtempSync, rmSync, statSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
const { configure } = vi.hoisted(() => ({ configure: vi.fn() }));
vi.mock('snowflake-sdk', () => ({
default: { configure },
}));
import {
configureSnowflakeSdkLogger,
resetSnowflakeSdkLoggerConfigurationForTests,
} from './sdk-logger.js';
describe('configureSnowflakeSdkLogger', () => {
let projectDir: string;
beforeEach(() => {
configure.mockReset();
resetSnowflakeSdkLoggerConfigurationForTests();
projectDir = mkdtempSync(join(tmpdir(), 'ktx-snowflake-logger-'));
});
afterEach(() => {
rmSync(projectDir, { recursive: true, force: true });
});
it('routes logs to <projectDir>/.ktx/logs/snowflake.log with console output disabled', () => {
const expected = resolve(projectDir, '.ktx', 'logs', 'snowflake.log');
const returned = configureSnowflakeSdkLogger(projectDir);
expect(returned).toBe(expected);
expect(configure).toHaveBeenCalledTimes(1);
expect(configure).toHaveBeenCalledWith({
logFilePath: expected,
additionalLogToConsole: false,
});
expect(statSync(resolve(projectDir, '.ktx', 'logs')).isDirectory()).toBe(true);
});
it('is idempotent for the same projectDir', () => {
configureSnowflakeSdkLogger(projectDir);
configureSnowflakeSdkLogger(projectDir);
expect(configure).toHaveBeenCalledTimes(1);
});
it('reconfigures when projectDir changes', () => {
const other = mkdtempSync(join(tmpdir(), 'ktx-snowflake-logger-other-'));
try {
configureSnowflakeSdkLogger(projectDir);
configureSnowflakeSdkLogger(other);
expect(configure).toHaveBeenCalledTimes(2);
} finally {
rmSync(other, { recursive: true, force: true });
}
});
});

View file

@ -1,270 +0,0 @@
import Database from 'better-sqlite3';
import { writeFileSync } from 'node:fs';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { createSqliteLiveDatabaseIntrospection } from '../../connectors/sqlite/live-database-introspection.js';
import { isKtxSqliteConnectionConfig, KtxSqliteScanConnector, sqliteDatabasePathFromConfig } from '../../connectors/sqlite/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
describe('KtxSqliteScanConnector', () => {
let tempDir: string;
let dbPath: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-connector-sqlite-'));
dbPath = join(tempDir, 'warehouse.db');
const db = new Database(dbPath);
db.exec(`
PRAGMA foreign_keys = ON;
CREATE TABLE customers (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
tier TEXT
);
CREATE TABLE orders (
id INTEGER PRIMARY KEY,
customer_id INTEGER NOT NULL,
status TEXT,
total NUMERIC,
created_at TEXT,
FOREIGN KEY(customer_id) REFERENCES customers(id)
);
CREATE VIEW recent_orders AS SELECT id, customer_id, status FROM orders;
INSERT INTO customers (id, name, tier) VALUES (1, 'Ada', 'enterprise'), (2, 'Grace', 'growth');
INSERT INTO orders (id, customer_id, status, total, created_at)
VALUES (10, 1, 'paid', 42.5, '2026-04-28'), (11, 2, 'open', 9.5, '2026-04-29');
`);
db.close();
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('resolves SQLite path configuration safely', () => {
const originalDatabaseUrl = process.env.KTX_SQLITE_TEST_URL;
const pointerPath = join(tempDir, 'sqlite-path.txt');
process.env.KTX_SQLITE_TEST_URL = `sqlite:${dbPath}`;
writeFileSync(pointerPath, dbPath, 'utf-8');
try {
expect(isKtxSqliteConnectionConfig({ driver: 'sqlite', path: 'warehouse.db' })).toBe(true);
expect(isKtxSqliteConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL' })).toBe(false);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: 'warehouse.db' },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', url: 'env:KTX_SQLITE_TEST_URL' },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', url: `file://${dbPath}` },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: `file:${pointerPath}` },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: 'warehouse.db' },
}),
).toBe(dbPath);
expect(() =>
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', file_path: 'warehouse.db' },
}),
).toThrow('Native SQLite connector requires connections.warehouse.path or url');
} finally {
if (originalDatabaseUrl === undefined) {
delete process.env.KTX_SQLITE_TEST_URL;
} else {
process.env.KTX_SQLITE_TEST_URL = originalDatabaseUrl;
}
}
});
it('introspects schema, primary keys, row counts, views, and foreign keys', async () => {
const connector = new KtxSqliteScanConnector({
connectionId: 'warehouse',
connection: { driver: 'sqlite', path: dbPath },
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlite' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'sqlite',
extractedAt: '2026-04-29T10:00:00.000Z',
metadata: {
file_path: dbPath,
table_count: 3,
total_columns: 11,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows])).toEqual([
['customers', 'table', 2],
['orders', 'table', 2],
['recent_orders', 'view', null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'INTEGER',
normalizedType: 'INTEGER',
dimensionType: 'number',
nullable: false,
primaryKey: true,
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: null,
toTable: 'customers',
toColumn: 'id',
constraintName: null,
},
]);
});
it('runs samples, distinct values, statistics, and read-only SQL', async () => {
const connector = new KtxSqliteScanConnector({
connectionId: 'warehouse',
connection: { driver: 'sqlite', path: dbPath },
});
await expect(
connector.sampleTable(
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, columns: ['id'], limit: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id'], rows: [[10]], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: null, db: null, name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from orders order by id', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
});
it('limits introspection to tables in tableScope', async () => {
const connector = new KtxSqliteScanConnector({
connectionId: 'warehouse',
connection: { driver: 'sqlite', path: dbPath },
});
const scope = tableRefSet([{ catalog: null, db: null, name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlite', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
});
it('adapts native SQLite snapshots to live-database introspection for local ingest', async () => {
const introspection = createSqliteLiveDatabaseIntrospection({
projectDir: tempDir,
connections: {
warehouse: { driver: 'sqlite', path: 'warehouse.db' },
},
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T10:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: null,
db: null,
columns: [
{
name: 'id',
nativeType: 'INTEGER',
normalizedType: 'INTEGER',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
{
name: 'name',
nativeType: 'TEXT',
normalizedType: 'TEXT',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
{
name: 'tier',
nativeType: 'TEXT',
normalizedType: 'TEXT',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
expect(snapshot.tables.find((table) => table.name === 'orders')).toMatchObject({
name: 'orders',
catalog: null,
db: null,
foreignKeys: [{ fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }],
});
});
});

View file

@ -3,11 +3,11 @@ import { existsSync, readFileSync, statSync } from 'node:fs';
import { homedir } from 'node:os';
import { isAbsolute, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { normalizeQueryRows } from '../../context/connections/query-executor.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import { KtxSqliteDialect } from './dialect.js';
export interface KtxSqliteConnectionConfig {
driver?: string;
@ -157,7 +157,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
private readonly connectionId: string;
private readonly dbPath: string;
private readonly now: () => Date;
private readonly dialect = new KtxSqliteDialect();
private readonly dialect = getDialectForDriver('sqlite');
private db: Database.Database | null = null;
constructor(options: KtxSqliteScanConnectorOptions) {
@ -209,6 +209,31 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
};
}
async listSchemas(): Promise<string[]> {
return [];
}
async listTables(_schemas?: string[]): Promise<KtxTableListEntry[]> {
const rows = this.database()
.prepare(
`
SELECT name, type
FROM sqlite_master
WHERE type IN ('table', 'view')
AND name NOT LIKE 'sqlite_%'
ORDER BY name
`,
)
.all() as SqliteMasterRow[];
return rows.map((row) => ({
catalog: null,
schema: '',
name: row.name,
kind: row.type === 'view' ? ('view' as const) : ('table' as const),
}));
}
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
this.assertConnection(input.connectionId);
const result = this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));

View file

@ -1,33 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxSqliteDialect } from './dialect.js';
describe('KtxSqliteDialect', () => {
const dialect = new KtxSqliteDialect();
it('quotes identifiers and formats single-file SQLite table names', () => {
expect(dialect.quoteIdentifier('orders')).toBe('"orders"');
expect(dialect.quoteIdentifier('weird"name')).toBe('"weird""name"');
expect(dialect.formatTableName({ catalog: 'ignored', db: 'ignored', name: 'orders' })).toBe('"orders"');
});
it('maps native SQLite types to KTX dimension types', () => {
expect(dialect.mapToDimensionType('INTEGER')).toBe('number');
expect(dialect.mapToDimensionType('numeric(10,2)')).toBe('number');
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
expect(dialect.mapToDimensionType('VARCHAR(255)')).toBe('string');
expect(dialect.mapToDimensionType('bool')).toBe('boolean');
expect(dialect.mapToDimensionType('')).toBe('string');
});
it('builds sampling and distinct-value SQL without host-specific state', () => {
expect(dialect.generateSampleQuery('"orders"', 25, ['id', 'status'])).toBe(
'SELECT "id", "status" FROM "orders" LIMIT 25',
);
expect(dialect.generateColumnSampleQuery('"orders"', 'status', 10)).toBe(
'SELECT "status" FROM "orders" WHERE "status" IS NOT NULL AND TRIM(CAST("status" AS TEXT)) != \'\' LIMIT 10',
);
expect(dialect.generateDistinctValuesQuery('"orders"', '"status"', 5)).toContain(
'SELECT DISTINCT CAST("status" AS TEXT) AS val',
);
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type SqliteTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxSqliteDialect {
readonly type = 'sqlite';
/** @internal */
export class KtxSqliteDialect implements KtxDialect {
readonly type = 'sqlite' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
DATETIME: 'time',
@ -29,7 +38,19 @@ export class KtxSqliteDialect {
}
formatTableName(table: SqliteTableNameRef): string {
return this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'sqlite');
}
formatDisplayRef(table: SqliteTableNameRef): string {
return formatDialectDisplayRef(table, 'sqlite');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'sqlite');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('sqlite');
}
mapDataType(nativeType: string): string {
@ -76,10 +97,6 @@ export class KtxSqliteDialect {
return `SELECT ${quoted} FROM ${tableName} WHERE ${quoted} IS NOT NULL AND TRIM(CAST(${quoted} AS TEXT)) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown } {
return params ? { sql, params } : { sql };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -92,7 +109,11 @@ export class KtxSqliteDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -103,6 +124,18 @@ export class KtxSqliteDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS TEXT))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS TEXT)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -143,35 +176,4 @@ export class KtxSqliteDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
_timezone?: string,
): string {
switch (granularity) {
case 'day':
return `DATE(${column})`;
case 'week':
return `DATE(${column}, 'weekday 0', '-6 days')`;
case 'month':
return `DATE(${column}, 'start of month')`;
case 'quarter':
return `DATE(${column}, 'start of month', '-' || ((CAST(STRFTIME('%m', ${column}) AS INTEGER) - 1) % 3) || ' months')`;
case 'year':
return `DATE(${column}, 'start of year')`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, _timezone?: string): string {
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `julianday('${origin}')` : `julianday('1970-01-01')`;
const unitDays = unit === 'day' ? 1 : unit === 'week' ? 7 : 30;
const intervalDays = Number(amount) * unitDays;
return `DATE(julianday('1970-01-01') + (CAST((julianday(${column}) - ${originExpr}) / ${intervalDays} AS INTEGER) * ${intervalDays}))`;
}
parseIntervalToSql(interval: string): string {
return `'${interval}'`;
}
}

View file

@ -1,476 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { createSqlServerLiveDatabaseIntrospection } from '../../connectors/sqlserver/live-database-introspection.js';
import { isKtxSqlServerConnectionConfig, KtxSqlServerScanConnector, sqlServerConnectionPoolConfigFromConfig, type KtxSqlServerConnectionConfig, type KtxSqlServerPoolFactory, type KtxSqlServerQueryResult } from '../../connectors/sqlserver/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function recordset<T extends Record<string, unknown>>(
rows: T[],
columnNames: string[],
): T[] & { columns: Record<string, { type: { declaration: string } }> } {
const withColumns = rows as T[] & { columns: Record<string, { type: { declaration: string } }> };
withColumns.columns = Object.fromEntries(columnNames.map((name) => [name, { type: { declaration: 'nvarchar' } }]));
return withColumns;
}
function result<T extends Record<string, unknown>>(rows: T[], columnNames: string[]): KtxSqlServerQueryResult {
return { recordset: recordset(rows, columnNames) };
}
function fakePoolFactory(options: { primaryKeyError?: Error; foreignKeyError?: Error } = {}): KtxSqlServerPoolFactory {
const query = vi.fn(async (sql: string): Promise<KtxSqlServerQueryResult> => {
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return result(
[
{ table_name: 'customers', table_type: 'BASE TABLE' },
{ table_name: 'orders', table_type: 'BASE TABLE' },
{ table_name: 'order_summary', table_type: 'VIEW' },
],
['table_name', 'table_type'],
);
}
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = 0')) {
return result([{ table_name: 'customers', table_comment: 'Customer table' }], [
'table_name',
'table_comment',
]);
}
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = c.column_id')) {
return result([{ table_name: 'customers', column_name: 'id', column_comment: 'PK' }], [
'table_name',
'column_name',
'column_comment',
]);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return result(
[
{ table_name: 'customers', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'customers', column_name: 'name', data_type: 'nvarchar', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'customer_id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
{ table_name: 'order_summary', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
],
['table_name', 'column_name', 'data_type', 'is_nullable'],
);
}
if (sql.includes("CONSTRAINT_TYPE = 'PRIMARY KEY'")) {
if (options.primaryKeyError) {
throw options.primaryKeyError;
}
return result(
[
{ table_name: 'customers', column_name: 'id' },
{ table_name: 'orders', column_name: 'id' },
],
['table_name', 'column_name'],
);
}
if (sql.includes('REFERENTIAL_CONSTRAINTS')) {
if (options.foreignKeyError) {
throw options.foreignKeyError;
}
return result(
[
{
table_name: 'orders',
column_name: 'customer_id',
referenced_table_schema: 'dbo',
referenced_table_name: 'customers',
referenced_column_name: 'id',
constraint_name: 'orders_customer_id_fk',
},
],
[
'table_name',
'column_name',
'referenced_table_schema',
'referenced_table_name',
'referenced_column_name',
'constraint_name',
],
);
}
if (sql.includes('sys.partitions') && sql.includes('GROUP BY t.name')) {
return result(
[
{ table_name: 'customers', row_count: 2 },
{ table_name: 'orders', row_count: 2 },
],
['table_name', 'row_count'],
);
}
if (sql.includes('SELECT TOP 1 [id], [status] FROM [dbo].[orders]')) {
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
}
if (sql.includes('SELECT TOP 1 * FROM (select id, status from dbo.orders) AS ktx_query_result')) {
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
}
if (sql.includes('SELECT TOP 5 [status] FROM [dbo].[orders]')) {
return result([{ status: 'paid' }, { status: 'open' }], ['status']);
}
if (sql.includes('COUNT(DISTINCT val)')) {
return result([{ cardinality: 2 }], ['cardinality']);
}
if (sql.includes('SELECT TOP 10 val')) {
return result([{ val: 'open' }, { val: 'paid' }], ['val']);
}
if (sql.includes('SUM(p.rows) AS row_count') && sql.includes('t.name = @tableName')) {
return result([{ row_count: 2 }], ['row_count']);
}
if (sql.includes('SELECT s.name AS schema_name')) {
return result([{ schema_name: 'dbo' }, { schema_name: 'sales' }], ['schema_name']);
}
if (sql.trim() === 'SELECT 1') {
return result([{ ok: 1 }], ['ok']);
}
throw new Error(`Unexpected SQL: ${sql}`);
});
const request: { input(name: string, value: unknown): typeof request; query: typeof query } = {
input: vi.fn((_key: string, _value: unknown) => request),
query,
};
const close = vi.fn(async () => undefined);
return {
createPool: vi.fn(async () => ({
request: () => request,
close,
})),
};
}
describe('KtxSqlServerScanConnector', () => {
it('resolves SQL Server connection configuration safely', () => {
expect(
isKtxSqlServerConnectionConfig({
driver: 'sqlserver',
host: 'localhost',
database: 'analytics',
}),
).toBe(true);
expect(isKtxSqlServerConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
port: 14330,
database: 'analytics',
username: 'reader',
trustServerCertificate: false,
},
}),
).toMatchObject({
server: 'db.example.test',
port: 14330,
database: 'analytics',
user: 'reader',
options: { encrypt: true, trustServerCertificate: false },
});
});
it('defaults and validates SQL Server maxConnections', () => {
const baseConnection: KtxSqlServerConnectionConfig = {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
};
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: baseConnection,
}),
).toMatchObject({ pool: { max: 10 } });
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: 15 },
}),
).toMatchObject({ pool: { max: 15 } });
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: '12' as never },
}),
).toMatchObject({ pool: { max: 12 } });
for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) {
expect(() =>
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections },
}),
).toThrow('connections.warehouse.maxConnections must be a positive integer');
}
});
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'sqlserver',
extractedAt: '2026-04-29T16:00:00.000Z',
scope: { catalogs: ['analytics'], schemas: ['dbo'] },
metadata: {
database: 'analytics',
host: 'db.example.test',
schemas: ['dbo'],
table_count: 3,
total_columns: 6,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
['customers', 'table', 2, 'Customer table'],
['orders', 'table', 2, null],
['order_summary', 'view', null, null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: 'analytics',
toDb: 'dbo',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fk',
},
]);
});
it('soft-fails denied SQL Server constraint discovery with scan warnings', async () => {
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory: fakePoolFactory({
primaryKeyError: Object.assign(new Error('SELECT permission denied'), { number: 229 }),
foreignKeyError: Object.assign(new Error('EXECUTE permission denied'), { number: 230 }),
}),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver' },
{ runId: 'scan-run-sqlserver-denied-constraints' },
);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in dbo (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'dbo', kind: 'primary_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in dbo (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'dbo', kind: 'foreign_key' },
},
]);
expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true);
expect(snapshot.tables.every((table) => table.foreignKeys.length === 0)).toBe(true);
});
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
const poolFactory = fakePoolFactory();
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory,
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: 'analytics', db: 'dbo', name: 'orders' },
columns: ['id', 'status'],
limit: 1,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({
headers: ['id', 'status'],
headerTypes: ['nvarchar', 'nvarchar'],
rows: [[10, 'paid']],
totalRows: 1,
});
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: 'analytics', db: 'dbo', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from dbo.orders', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
await expect(connector.listSchemas()).resolves.toEqual(['dbo', 'sales']);
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
await connector.cleanup();
});
it('limits introspection to tables in tableScope', async () => {
const queries: string[] = [];
const inputs: Array<{ name: string; value: unknown }> = [];
const request = {
input: vi.fn((name: string, value: unknown) => {
inputs.push({ name, value });
return request;
}),
query: vi.fn(async (sql: string): Promise<KtxSqlServerQueryResult> => {
queries.push(sql);
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return result([{ table_name: 'orders', table_type: 'BASE TABLE' }], ['table_name', 'table_type']);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return result(
[{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' }],
['table_name', 'column_name', 'data_type', 'is_nullable'],
);
}
return result([], []);
}),
};
const poolFactory: KtxSqlServerPoolFactory = {
createPool: vi.fn(async () => ({
request: () => request,
close: vi.fn(async () => undefined),
})),
};
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory,
});
const scope = tableRefSet([{ catalog: 'analytics', db: 'dbo', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
expect(queries.find((query) => query.includes('INFORMATION_SCHEMA.TABLES'))).toMatch(/TABLE_NAME IN \(@table_0\)/);
expect(inputs).toEqual(expect.arrayContaining([{ name: 'table_0', value: 'orders' }]));
});
it('adapts native SQL Server snapshots to live-database introspection for local ingest', async () => {
const introspection = createSqlServerLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T16:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: 'analytics',
db: 'dbo',
columns: [
{
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
},
{
name: 'name',
nativeType: 'nvarchar',
normalizedType: 'nvarchar',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
});
});

View file

@ -1,4 +1,5 @@
import { assertReadOnlySql } from '../../context/connections/read-only-sql.js';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import {
@ -26,7 +27,6 @@ import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import sql from 'mssql';
import { KtxSqlServerDialect } from './dialect.js';
export interface KtxSqlServerConnectionConfig {
driver?: string;
@ -158,6 +158,21 @@ function tableScopeSql(
return { clause: `AND ${columnExpression} IN (${placeholders.join(', ')})`, params };
}
/** @internal */
export function prepareSqlServerReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let parameterizedQuery = sql;
for (const key of Object.keys(params)) {
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
}
return { sql: parameterizedQuery, params };
}
class DefaultSqlServerPoolFactory implements KtxSqlServerPoolFactory {
async createPool(config: KtxSqlServerPoolConfig): Promise<KtxSqlServerPool> {
const pool = await new sql.ConnectionPool(config as sql.config).connect();
@ -349,7 +364,7 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
private readonly poolFactory: KtxSqlServerPoolFactory;
private readonly endpointResolver?: KtxSqlServerEndpointResolver;
private readonly now: () => Date;
private readonly dialect = new KtxSqlServerDialect();
private readonly dialect = getDialectForDriver('sqlserver');
private pool: KtxSqlServerPool | null = null;
private resolvedEndpoint: KtxSqlServerResolvedEndpoint | null = null;
@ -427,7 +442,7 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxSqlServerReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForSqlServerExecution(input.sql, input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const prepared = prepareSqlServerReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -517,6 +532,7 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
params,
);
return rows.map((row) => ({
catalog: this.poolConfig.database,
schema: row.schema_name,
name: row.table_name,
kind: row.table_type === 'VIEW' ? ('view' as const) : ('table' as const),

View file

@ -1,49 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxSqlServerDialect } from './dialect.js';
describe('KtxSqlServerDialect', () => {
const dialect = new KtxSqlServerDialect();
it('quotes identifiers and formats schema-qualified table names', () => {
expect(dialect.quoteIdentifier('events')).toBe('[events]');
expect(dialect.quoteIdentifier('odd]name')).toBe('[odd]]name]');
expect(dialect.formatTableName({ catalog: 'warehouse', db: 'dbo', name: 'events' })).toBe('[dbo].[events]');
expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('[events]');
});
it('maps SQL Server types to KTX dimension types', () => {
expect(dialect.mapToDimensionType('datetime2')).toBe('time');
expect(dialect.mapToDimensionType('decimal(18, 2)')).toBe('number');
expect(dialect.mapToDimensionType('bigint')).toBe('number');
expect(dialect.mapToDimensionType('bit')).toBe('boolean');
expect(dialect.mapToDimensionType('uniqueidentifier')).toBe('string');
expect(dialect.mapToDimensionType('')).toBe('string');
});
it('builds sampling, distinct-value, pagination, and time SQL', () => {
expect(dialect.generateSampleQuery('[dbo].[events]', 25, ['id', 'event_name'])).toBe(
'SELECT TOP 25 [id], [event_name] FROM [dbo].[events]',
);
expect(dialect.generateColumnSampleQuery('[dbo].[events]', 'event_name', 10)).toBe(
"SELECT TOP 10 [event_name] FROM [dbo].[events] WHERE [event_name] IS NOT NULL AND LTRIM(RTRIM(CAST([event_name] AS NVARCHAR(MAX)))) != ''",
);
expect(dialect.generateDistinctValuesQuery('[dbo].[events]', '[event_name]', 5)).toContain('SELECT TOP 5 val');
expect(dialect.getTopClause(10)).toBe('TOP 10');
expect(dialect.getLimitOffsetClause(10, 20)).toBe('OFFSET 20 ROWS FETCH NEXT 10 ROWS ONLY');
expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe(
'DATEFROMPARTS(YEAR(created_at), MONTH(created_at), 1)',
);
});
it('prepares named parameters using SQL Server @ parameters', () => {
expect(
dialect.prepareQuery('select * from events where id = :id and name = :name', {
id: 10,
name: 'signup',
}),
).toEqual({
sql: 'select * from events where id = @id and name = @name',
params: { id: 10, name: 'signup' },
});
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
parseDialectDisplayRef,
safeSqlLimit,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type SqlServerTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxSqlServerDialect {
readonly type = 'sqlserver';
/** @internal */
export class KtxSqlServerDialect implements KtxDialect {
readonly type = 'sqlserver' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
datetime: 'time',
@ -39,9 +48,19 @@ export class KtxSqlServerDialect {
}
formatTableName(table: SqlServerTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
}
formatDisplayRef(table: SqlServerTableNameRef): string {
return formatDialectDisplayRef(table, 'three-part');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'three-part');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('three-part');
}
mapDataType(nativeType: string): string {
@ -86,17 +105,6 @@ export class KtxSqlServerDialect {
return `SELECT TOP ${limit} ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND LTRIM(RTRIM(CAST(${quotedColumn} AS NVARCHAR(MAX)))) != ''`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let parameterizedQuery = sql;
for (const key of Object.keys(params)) {
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
}
return { sql: parameterizedQuery, params };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -111,12 +119,12 @@ export class KtxSqlServerDialect {
return `TABLESAMPLE (${samplePct * 100} PERCENT)`;
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `OFFSET ${offset} ROWS FETCH NEXT ${limit} ROWS ONLY` : '';
getLimitOffsetClause(_limit: number, _offset?: number): string {
return '';
}
getTopClause(limit: number): string {
return `TOP ${limit}`;
return `TOP (${safeSqlLimit(limit)})`;
}
getNullCountExpression(column: string): string {
@ -127,6 +135,18 @@ export class KtxSqlServerDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `LEN(CAST(${columnSql} AS NVARCHAR(MAX)))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS NVARCHAR(MAX))`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -167,35 +187,4 @@ export class KtxSqlServerDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
switch (granularity) {
case 'day':
return `CAST(${col} AS DATE)`;
case 'week':
return `DATEADD(WEEK, DATEDIFF(WEEK, 0, ${col}), 0)`;
case 'month':
return `DATEFROMPARTS(YEAR(${col}), MONTH(${col}), 1)`;
case 'quarter':
return `DATEFROMPARTS(YEAR(${col}), (DATEPART(QUARTER, ${col}) - 1) * 3 + 1, 1)`;
case 'year':
return `DATEFROMPARTS(YEAR(${col}), 1, 1)`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
return `DATEADD(${unit}, (DATEDIFF(${unit}, ${originExpr}, ${col}) / ${amount}) * ${amount}, ${originExpr})`;
}
parseIntervalToSql(interval: string): string {
return `'${interval}'`;
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,19 +0,0 @@
import { describe, expect, it } from 'vitest';
import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from './bigquery-identifiers.js';
describe('BigQuery identifier normalization', () => {
it('normalizes project ids and regions for information schema paths', () => {
expect(normalizeBigQueryProjectId('project-1')).toBe('project-1');
expect(normalizeBigQueryRegion('US')).toBe('us');
expect(normalizeBigQueryRegion('region-eu')).toBe('eu');
});
it('rejects malformed project ids and regions with caller-specific context', () => {
expect(() => normalizeBigQueryProjectId('project`1', 'table discovery')).toThrow(
'Invalid BigQuery project id for table discovery: project`1',
);
expect(() => normalizeBigQueryRegion('US;DROP', 'table discovery')).toThrow(
'Invalid BigQuery region for table discovery: US;DROP',
);
});
});

View file

@ -0,0 +1,87 @@
import type { KtxTableRef } from '../scan/types.js';
export type KtxDialectIdentifierShape = 'ansi' | 'sqlite' | 'three-part';
export type KtxDialectTableRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export function safeSqlLimit(limit: number): number {
return Math.max(1, Math.floor(limit));
}
function safeSqlOffset(offset: number | undefined): number | null {
if (offset === undefined) {
return null;
}
const normalized = Math.floor(offset);
return normalized > 0 ? normalized : null;
}
function cleanIdentifierPart(part: string): string {
return part.trim().replace(/^["'`\[]|["'`\]]$/g, '');
}
function splitDisplay(display: string): string[] {
return display.trim().split('.').map(cleanIdentifierPart).filter(Boolean);
}
function tableParts(table: KtxDialectTableRef, shape: KtxDialectIdentifierShape): string[] {
if (shape === 'sqlite') {
return [table.name];
}
return [table.catalog ?? null, table.db ?? null, table.name].filter((part): part is string => Boolean(part));
}
function acceptedDisplayPartCounts(shape: KtxDialectIdentifierShape): readonly number[] {
if (shape === 'sqlite') {
return [1];
}
if (shape === 'three-part') {
return [3];
}
return [2, 3];
}
export function formatDialectTableName(
table: KtxDialectTableRef,
quoteIdentifier: (identifier: string) => string,
shape: KtxDialectIdentifierShape,
): string {
return tableParts(table, shape).map(quoteIdentifier).join('.');
}
export function formatDialectDisplayRef(table: KtxDialectTableRef, shape: KtxDialectIdentifierShape): string {
return tableParts(table, shape).join('.');
}
export function parseDialectDisplayRef(display: string, shape: KtxDialectIdentifierShape): KtxTableRef | null {
const parts = splitDisplay(display);
if (!acceptedDisplayPartCounts(shape).includes(parts.length)) {
return null;
}
if (parts.length === 1) {
return { catalog: null, db: null, name: parts[0]! };
}
if (parts.length === 2) {
return { catalog: null, db: parts[0]!, name: parts[1]! };
}
if (parts.length === 3) {
return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
}
return null;
}
export function columnDisplayPartCount(shape: KtxDialectIdentifierShape): 1 | 2 | 3 {
if (shape === 'sqlite') {
return 1;
}
if (shape === 'three-part') {
return 3;
}
return 2;
}
export function limitOffsetClause(limit: number, offset?: number): string {
const safeLimit = safeSqlLimit(limit);
const safeOffset = safeSqlOffset(offset);
return safeOffset === null ? `LIMIT ${safeLimit}` : `LIMIT ${safeLimit} OFFSET ${safeOffset}`;
}

View file

@ -1,34 +0,0 @@
import { describe, expect, it } from 'vitest';
import { getDialectForDriver } from './dialects.js';
describe('getDialectForDriver', () => {
it.each([
['postgres', '"public"."orders"'],
['mysql', '`public`.`orders`'],
['clickhouse', '`public`.`orders`'],
['sqlite', '"orders"'],
['snowflake', '"analytics"."public"."orders"'],
['bigquery', '`analytics`.`public`.`orders`'],
['sqlserver', '[analytics].[public].[orders]'],
] as const)('formats table names for %s', (driver, expected) => {
const dialect = getDialectForDriver(driver);
expect(
dialect.formatTableName({
catalog: driver === 'snowflake' || driver === 'bigquery' || driver === 'sqlserver' ? 'analytics' : null,
db: driver === 'sqlite' ? null : 'public',
name: 'orders',
}),
).toBe(expected);
});
it('throws with a supported-driver list for unknown drivers', () => {
expect(() => getDialectForDriver('oracle')).toThrow(
'Unsupported warehouse driver "oracle". Supported drivers: bigquery, clickhouse, mysql, postgres, sqlite, snowflake, sqlserver',
);
});
it('rejects legacy driver aliases', () => {
expect(() => getDialectForDriver('postgresql')).toThrow('Unsupported warehouse driver "postgresql"');
expect(() => getDialectForDriver('sqlite3')).toThrow('Unsupported warehouse driver "sqlite3"');
});
});

View file

@ -1,22 +1,40 @@
import type { KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
type SupportedDriver =
| 'postgres'
| 'mysql'
| 'sqlserver'
| 'snowflake'
| 'bigquery'
| 'clickhouse'
| 'sqlite';
import { KtxBigQueryDialect } from '../../connectors/bigquery/dialect.js';
import { KtxClickHouseDialect } from '../../connectors/clickhouse/dialect.js';
import { KtxMysqlDialect } from '../../connectors/mysql/dialect.js';
import { KtxPostgresDialect } from '../../connectors/postgres/dialect.js';
import { KtxSqliteDialect } from '../../connectors/sqlite/dialect.js';
import { KtxSnowflakeDialect } from '../../connectors/snowflake/dialect.js';
import { KtxSqlServerDialect } from '../../connectors/sqlserver/dialect.js';
import type { KtxConnectionDriver, KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
import type { KtxDialectTableRef } from './dialect-helpers.js';
export interface KtxDialect {
readonly type: SupportedDriver;
readonly type: KtxConnectionDriver;
quoteIdentifier(identifier: string): string;
formatTableName(table: KtxTableRef): string;
formatTableName(table: KtxDialectTableRef): string;
formatDisplayRef(table: KtxDialectTableRef): string;
parseDisplayRef(display: string): KtxTableRef | null;
columnDisplayTablePartCount(): 1 | 2 | 3;
getLimitOffsetClause(limit: number, offset?: number): string;
getTopClause(limit: number): string;
getRandomSampleFilter(samplePct: number): string;
getTableSampleClause(samplePct: number): string;
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string;
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string;
getSampleValueAggregation(innerSql: string): string;
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string;
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string;
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string;
generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null;
getNullCountExpression(column: string): string;
getDistinctCountExpression(column: string): string;
textLengthExpression(columnSql: string): string;
castToText(columnSql: string): string;
mapToDimensionType(nativeType: string): KtxSchemaDimensionType;
mapDataType(nativeType: string): string;
}
const supportedDrivers: SupportedDriver[] = [
const supportedDrivers: KtxConnectionDriver[] = [
'bigquery',
'clickhouse',
'mysql',
@ -26,71 +44,21 @@ const supportedDrivers: SupportedDriver[] = [
'sqlserver',
];
function doubleQuoted(identifier: string): string {
return `"${identifier.replace(/"/g, '""')}"`;
}
function backtickQuoted(identifier: string): string {
return `\`${identifier.replace(/`/g, '``')}\``;
}
function bigQueryQuoted(identifier: string): string {
return `\`${identifier.replace(/`/g, '\\`')}\``;
}
function bracketQuoted(identifier: string): string {
return `[${identifier.replace(/\]/g, ']]')}]`;
}
function inferDimensionType(nativeType: string): KtxSchemaDimensionType {
const normalized = nativeType.toLowerCase().trim();
if (normalized.includes('date') || normalized.includes('time')) {
return 'time';
}
if (
normalized.includes('int') ||
normalized.includes('num') ||
normalized.includes('dec') ||
normalized.includes('float') ||
normalized.includes('double') ||
normalized.includes('real')
) {
return 'number';
}
if (normalized.includes('bool') || normalized === 'bit') {
return 'boolean';
}
return 'string';
}
function formatWithParts(table: KtxTableRef, quote: (identifier: string) => string, sqlite = false): string {
const parts = sqlite ? [table.name] : [table.catalog, table.db, table.name].filter((part): part is string => !!part);
return parts.map(quote).join('.');
}
function createDialect(type: SupportedDriver, quote: (identifier: string) => string, sqlite = false): KtxDialect {
return {
type,
quoteIdentifier: quote,
formatTableName: (table) => formatWithParts(table, quote, sqlite),
mapToDimensionType: inferDimensionType,
};
}
const dialects: Record<SupportedDriver, KtxDialect> = {
postgres: createDialect('postgres', doubleQuoted),
mysql: createDialect('mysql', backtickQuoted),
clickhouse: createDialect('clickhouse', backtickQuoted),
sqlite: createDialect('sqlite', doubleQuoted, true),
snowflake: createDialect('snowflake', doubleQuoted),
bigquery: createDialect('bigquery', bigQueryQuoted),
sqlserver: createDialect('sqlserver', bracketQuoted),
const dialectFactories: Record<KtxConnectionDriver, () => KtxDialect> = {
bigquery: () => new KtxBigQueryDialect(),
clickhouse: () => new KtxClickHouseDialect(),
mysql: () => new KtxMysqlDialect(),
postgres: () => new KtxPostgresDialect(),
sqlite: () => new KtxSqliteDialect(),
snowflake: () => new KtxSnowflakeDialect(),
sqlserver: () => new KtxSqlServerDialect(),
};
export function getDialectForDriver(driver: string): KtxDialect {
const normalized = driver.toLowerCase().trim();
if (normalized in dialects) {
return dialects[normalized as SupportedDriver];
const factory = dialectFactories[normalized as KtxConnectionDriver];
if (factory) {
return factory();
}
throw new Error(`Unsupported warehouse driver "${driver}". Supported drivers: ${supportedDrivers.join(', ')}`);
}

View file

@ -0,0 +1,199 @@
import type { KtxConnectionDriver, KtxScanConnector } from '../scan/types.js';
/** @internal */
export type KtxScopeConfigKey = 'dataset_ids' | 'databases' | 'schemas' | 'schema_names';
/** @internal */
export interface KtxDriverConnectorModule {
isConnectionConfig(connection: unknown): boolean;
createScanConnector(args: {
connectionId: string;
connection: unknown;
projectDir: string;
}): KtxScanConnector;
}
export interface KtxDriverRegistration {
readonly driver: KtxConnectionDriver;
readonly scopeConfigKey: KtxScopeConfigKey | null;
readonly hasHistoricSqlReader: boolean;
readonly hasLocalQueryExecutor: boolean;
load(): Promise<KtxDriverConnectorModule>;
}
function invalidConnectionConfig(driver: KtxConnectionDriver): Error {
return new Error(`Connection config does not match warehouse driver "${driver}".`);
}
/** @internal */
export const driverRegistrations: Record<KtxConnectionDriver, KtxDriverRegistration> = {
bigquery: {
driver: 'bigquery',
scopeConfigKey: 'dataset_ids',
hasHistoricSqlReader: true,
hasLocalQueryExecutor: false,
load: async () => {
const m = await import('../../connectors/bigquery/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxBigQueryConnectionConfig>[0];
return m.isKtxBigQueryConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection }) => {
const typedConnection = connection as Parameters<typeof m.isKtxBigQueryConnectionConfig>[0];
if (!m.isKtxBigQueryConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('bigquery');
}
return new m.KtxBigQueryScanConnector({ connectionId, connection: typedConnection });
},
};
},
},
clickhouse: {
driver: 'clickhouse',
scopeConfigKey: 'databases',
hasHistoricSqlReader: false,
hasLocalQueryExecutor: false,
load: async () => {
const m = await import('../../connectors/clickhouse/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxClickHouseConnectionConfig>[0];
return m.isKtxClickHouseConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection }) => {
const typedConnection = connection as Parameters<typeof m.isKtxClickHouseConnectionConfig>[0];
if (!m.isKtxClickHouseConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('clickhouse');
}
return new m.KtxClickHouseScanConnector({ connectionId, connection: typedConnection });
},
};
},
},
mysql: {
driver: 'mysql',
scopeConfigKey: 'schemas',
hasHistoricSqlReader: false,
hasLocalQueryExecutor: false,
load: async () => {
const m = await import('../../connectors/mysql/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxMysqlConnectionConfig>[0];
return m.isKtxMysqlConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection }) => {
const typedConnection = connection as Parameters<typeof m.isKtxMysqlConnectionConfig>[0];
if (!m.isKtxMysqlConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('mysql');
}
return new m.KtxMysqlScanConnector({ connectionId, connection: typedConnection });
},
};
},
},
postgres: {
driver: 'postgres',
scopeConfigKey: 'schemas',
hasHistoricSqlReader: true,
hasLocalQueryExecutor: true,
load: async () => {
const m = await import('../../connectors/postgres/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxPostgresConnectionConfig>[0];
return m.isKtxPostgresConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection }) => {
const typedConnection = connection as Parameters<typeof m.isKtxPostgresConnectionConfig>[0];
if (!m.isKtxPostgresConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('postgres');
}
return new m.KtxPostgresScanConnector({ connectionId, connection: typedConnection });
},
};
},
},
sqlite: {
driver: 'sqlite',
scopeConfigKey: null,
hasHistoricSqlReader: false,
hasLocalQueryExecutor: true,
load: async () => {
const m = await import('../../connectors/sqlite/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxSqliteConnectionConfig>[0];
return m.isKtxSqliteConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection, projectDir }) => {
const typedConnection = connection as Parameters<typeof m.isKtxSqliteConnectionConfig>[0];
if (!m.isKtxSqliteConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('sqlite');
}
return new m.KtxSqliteScanConnector({ connectionId, connection: typedConnection, projectDir });
},
};
},
},
snowflake: {
driver: 'snowflake',
scopeConfigKey: 'schema_names',
hasHistoricSqlReader: true,
hasLocalQueryExecutor: false,
load: async () => {
const m = await import('../../connectors/snowflake/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxSnowflakeConnectionConfig>[0];
return m.isKtxSnowflakeConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection, projectDir }) => {
const typedConnection = connection as Parameters<typeof m.isKtxSnowflakeConnectionConfig>[0];
if (!m.isKtxSnowflakeConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('snowflake');
}
return new m.KtxSnowflakeScanConnector({ connectionId, connection: typedConnection, projectDir });
},
};
},
},
sqlserver: {
driver: 'sqlserver',
scopeConfigKey: 'schemas',
hasHistoricSqlReader: false,
hasLocalQueryExecutor: false,
load: async () => {
const m = await import('../../connectors/sqlserver/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxSqlServerConnectionConfig>[0];
return m.isKtxSqlServerConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection }) => {
const typedConnection = connection as Parameters<typeof m.isKtxSqlServerConnectionConfig>[0];
if (!m.isKtxSqlServerConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('sqlserver');
}
return new m.KtxSqlServerScanConnector({ connectionId, connection: typedConnection });
},
};
},
},
};
const supportedDrivers = Object.keys(driverRegistrations).sort() as KtxConnectionDriver[];
function isRegisteredDriver(driver: string): driver is KtxConnectionDriver {
return Object.prototype.hasOwnProperty.call(driverRegistrations, driver);
}
export function getDriverRegistration(driver: string): KtxDriverRegistration | undefined {
const normalized = driver.toLowerCase().trim();
return isRegisteredDriver(normalized) ? driverRegistrations[normalized] : undefined;
}
export function listSupportedDrivers(): KtxConnectionDriver[] {
return [...supportedDrivers];
}

View file

@ -1,59 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { createDefaultLocalQueryExecutor } from './local-query-executor.js';
describe('createDefaultLocalQueryExecutor', () => {
it('dispatches postgres and sqlite drivers to their executors', async () => {
const postgres = {
execute: vi.fn(async () => ({
headers: ['pg'],
rows: [[1]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
})),
};
const sqlite = {
execute: vi.fn(async () => ({
headers: ['sqlite'],
rows: [[2]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
})),
};
const executor = createDefaultLocalQueryExecutor({ postgres, sqlite });
await expect(
executor.execute({
connectionId: 'pg',
connection: { driver: 'postgres' },
sql: 'select 1',
}),
).resolves.toMatchObject({ headers: ['pg'] });
await expect(
executor.execute({
connectionId: 'local',
connection: { driver: 'sqlite' },
sql: 'select 1',
}),
).resolves.toMatchObject({ headers: ['sqlite'] });
expect(postgres.execute).toHaveBeenCalledTimes(1);
expect(sqlite.execute).toHaveBeenCalledTimes(1);
});
it('rejects unsupported local execution drivers', async () => {
const executor = createDefaultLocalQueryExecutor({
postgres: { execute: vi.fn() },
sqlite: { execute: vi.fn() },
});
await expect(
executor.execute({
connectionId: 'warehouse',
connection: { driver: 'snowflake' },
sql: 'select 1',
}),
).rejects.toThrow('No local query executor is configured for driver "snowflake".');
});
});

View file

@ -1,3 +1,4 @@
import { driverRegistrations, getDriverRegistration } from './drivers.js';
import { createPostgresQueryExecutor } from './postgres-query-executor.js';
import type {
KtxSqlQueryExecutionInput,
@ -5,6 +6,7 @@ import type {
KtxSqlQueryExecutorPort,
} from './query-executor.js';
import { createSqliteQueryExecutor } from './sqlite-query-executor.js';
import type { KtxConnectionDriver } from '../scan/types.js';
export interface DefaultLocalQueryExecutorOptions {
postgres?: KtxSqlQueryExecutorPort;
@ -15,20 +17,43 @@ function driverFor(input: KtxSqlQueryExecutionInput): string {
return String(input.connection?.driver ?? '').toLowerCase();
}
function localExecutorMap(
options: DefaultLocalQueryExecutorOptions,
): Partial<Record<KtxConnectionDriver, KtxSqlQueryExecutorPort>> {
const wiredExecutors: Partial<Record<KtxConnectionDriver, KtxSqlQueryExecutorPort>> = {
postgres: options.postgres ?? createPostgresQueryExecutor(),
sqlite: options.sqlite ?? createSqliteQueryExecutor(),
};
const executors: Partial<Record<KtxConnectionDriver, KtxSqlQueryExecutorPort>> = {};
for (const registration of Object.values(driverRegistrations)) {
if (!registration.hasLocalQueryExecutor) continue;
const executor = wiredExecutors[registration.driver];
if (executor) {
executors[registration.driver] = executor;
}
}
return executors;
}
export function createDefaultLocalQueryExecutor(options: DefaultLocalQueryExecutorOptions = {}): KtxSqlQueryExecutorPort {
const postgres = options.postgres ?? createPostgresQueryExecutor();
const sqlite = options.sqlite ?? createSqliteQueryExecutor();
const executors = localExecutorMap(options);
return {
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
const driver = driverFor(input);
if (driver === 'postgres') {
return postgres.execute(input);
const registration = getDriverRegistration(driver);
if (!registration?.hasLocalQueryExecutor) {
throw new Error(`No local query executor is configured for driver "${input.connection?.driver ?? 'unknown'}".`);
}
if (driver === 'sqlite') {
return sqlite.execute(input);
const executor = executors[registration.driver];
if (!executor) {
throw new Error(
`Local query executor flag is enabled for driver "${registration.driver}", but no executor factory is wired.`,
);
}
throw new Error(`No local query executor is configured for driver "${input.connection?.driver ?? 'unknown'}".`);
return executor.execute(input);
},
};
}

View file

@ -1,76 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
localConnectionInfoFromConfig,
localConnectionToWarehouseDescriptor,
localConnectionTypeForConfig,
} from './local-warehouse-descriptor.js';
describe('localConnectionToWarehouseDescriptor', () => {
it('maps local Postgres URLs to canonical warehouse descriptors', () => {
expect(
localConnectionToWarehouseDescriptor('warehouse', {
driver: 'postgres',
url: 'postgresql://readonly@db.example.test/analytics',
}),
).toMatchObject({
id: 'warehouse',
connection_type: 'POSTGRESQL',
host: 'db.example.test',
database: 'analytics',
});
});
it('maps BigQuery project and dataset from explicit fields', () => {
expect(
localConnectionToWarehouseDescriptor('bq', {
driver: 'bigquery',
project_id: 'acme',
dataset_id: 'warehouse',
}),
).toMatchObject({
id: 'bq',
connection_type: 'BIGQUERY',
project_id: 'acme',
dataset_id: 'warehouse',
});
});
it('returns null for non-warehouse adapters', () => {
expect(
localConnectionToWarehouseDescriptor('looker', {
driver: 'looker',
base_url: 'https://looker.example.com',
client_id: 'client',
}),
).toBeNull();
});
});
describe('local connection info helpers', () => {
it('returns canonical warehouse connection types for local catalogs', () => {
expect(localConnectionTypeForConfig('warehouse', { driver: 'postgres' })).toBe('POSTGRESQL');
expect(localConnectionTypeForConfig('bq', { driver: 'bigquery', project_id: 'acme' })).toBe('BIGQUERY');
expect(localConnectionTypeForConfig('snowflake', { driver: 'snowflake' })).toBe('SNOWFLAKE');
});
it('keeps removed driver aliases as display-only labels', () => {
expect(localConnectionTypeForConfig('warehouse', { driver: 'postgresql' } as never)).toBe('postgresql');
expect(localConnectionTypeForConfig('warehouse', { driver: 'mssql' } as never)).toBe('mssql');
});
it('keeps non-warehouse adapter labels for display-only local connection surfaces', () => {
expect(localConnectionTypeForConfig('prod-metabase', { driver: 'metabase', api_url: 'https://metabase.example.com' })).toBe(
'metabase',
);
expect(localConnectionTypeForConfig('missing-driver', {} as never)).toBe('unknown');
});
it('builds nullable local connection info records', () => {
expect(localConnectionInfoFromConfig('warehouse', { driver: 'postgres' })).toEqual({
id: 'warehouse',
name: 'warehouse',
connectionType: 'POSTGRESQL',
});
expect(localConnectionInfoFromConfig('missing', undefined)).toBeNull();
});
});

View file

@ -1,157 +0,0 @@
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import {
notionConnectionToPullConfig,
parseNotionConnectionConfig,
redactNotionConnectionConfig,
resolveNotionAuthToken,
} from './notion-config.js';
describe('standalone Notion connection config', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-notion-config-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('parses selected-root Notion config with safe defaults', () => {
const parsed = parseNotionConnectionConfig({
driver: 'notion',
auth_token_ref: 'env:NOTION_TOKEN',
crawl_mode: 'selected_roots',
root_page_ids: ['page-1'],
});
expect(parsed).toEqual({
driver: 'notion',
auth_token: null,
auth_token_ref: 'env:NOTION_TOKEN',
crawl_mode: 'selected_roots',
root_page_ids: ['page-1'],
root_database_ids: [],
root_data_source_ids: [],
max_pages_per_run: 1000,
max_knowledge_creates_per_run: 25,
max_knowledge_updates_per_run: 20,
});
expect(parsed).not.toHaveProperty('last_successful_cursor');
});
it('parses inline Notion auth tokens without requiring auth_token_ref', () => {
const parsed = parseNotionConnectionConfig({
driver: 'notion',
auth_token: ' ntn_inline_token ',
crawl_mode: 'selected_roots',
root_page_ids: ['page-1'],
});
expect(parsed).toMatchObject({
driver: 'notion',
auth_token: 'ntn_inline_token',
auth_token_ref: null,
crawl_mode: 'selected_roots',
root_page_ids: ['page-1'],
});
});
it('redacts token references from display output', () => {
expect(
redactNotionConnectionConfig(
parseNotionConnectionConfig({
driver: 'notion',
auth_token_ref: 'file:/Users/example/.config/notion-token',
crawl_mode: 'all_accessible',
max_pages_per_run: 80,
}),
),
).toEqual({
driver: 'notion',
hasAuthToken: true,
crawlMode: 'all_accessible',
rootPageIds: [],
rootDatabaseIds: [],
rootDataSourceIds: [],
maxPagesPerRun: 80,
maxKnowledgeCreatesPerRun: 25,
maxKnowledgeUpdatesPerRun: 20,
warning: 'Anything accessible to this Notion integration can become organization knowledge.',
});
});
it('requires at least one selected root in selected_roots mode', () => {
expect(() =>
parseNotionConnectionConfig({
driver: 'notion',
auth_token_ref: 'env:NOTION_TOKEN',
crawl_mode: 'selected_roots',
}),
).toThrow('selected_roots requires at least one root page, database, or data source id');
});
it('resolves env and file token references without exposing the reference in errors', async () => {
const tokenPath = join(tempDir, 'notion-token.txt');
await writeFile(tokenPath, 'ntn_file_token\n', 'utf-8');
await expect(
resolveNotionAuthToken('env:NOTION_TOKEN', {
env: { NOTION_TOKEN: 'ntn_env_token' },
}),
).resolves.toBe('ntn_env_token');
await expect(resolveNotionAuthToken(`file:${tokenPath}`)).resolves.toBe('ntn_file_token');
await expect(resolveNotionAuthToken('env:MISSING_NOTION_TOKEN', { env: {} })).rejects.toThrow(
'Notion token environment variable MISSING_NOTION_TOKEN is not set',
);
});
it('converts standalone config into adapter pull config', async () => {
const pullConfig = await notionConnectionToPullConfig(
parseNotionConnectionConfig({
driver: 'notion',
auth_token_ref: 'env:NOTION_TOKEN',
crawl_mode: 'all_accessible',
max_pages_per_run: 12,
max_knowledge_creates_per_run: 2,
max_knowledge_updates_per_run: 7,
last_successful_cursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}',
}),
{ env: { NOTION_TOKEN: 'ntn_env_token' } },
);
expect(pullConfig).toEqual({
authToken: 'ntn_env_token',
crawlMode: 'all_accessible',
rootPageIds: [],
rootDatabaseIds: [],
rootDataSourceIds: [],
maxPagesPerRun: 12,
maxKnowledgeCreatesPerRun: 2,
maxKnowledgeUpdatesPerRun: 7,
lastSuccessfulCursor: null,
});
});
it('uses inline Notion auth_token when building adapter pull config', async () => {
const pullConfig = await notionConnectionToPullConfig(
parseNotionConnectionConfig({
driver: 'notion',
auth_token: 'ntn_inline_token',
auth_token_ref: 'env:STALE_NOTION_TOKEN',
crawl_mode: 'all_accessible',
}),
{
env: {},
readTextFile: async () => {
throw new Error('readTextFile should not be called for inline auth_token');
},
},
);
expect(pullConfig.authToken).toBe('ntn_inline_token');
});
});

View file

@ -1,103 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { createPostgresQueryExecutor } from './postgres-query-executor.js';
function makeClient() {
const calls: unknown[] = [];
const client = {
connect: vi.fn(async () => undefined),
query: vi.fn(async (input: unknown) => {
calls.push(input);
if (input === 'BEGIN READ ONLY') {
return { rows: [], fields: [], rowCount: null, command: 'BEGIN' };
}
if (input === 'COMMIT') {
return { rows: [], fields: [], rowCount: null, command: 'COMMIT' };
}
return {
rows: [
['paid', 2],
['open', 1],
],
fields: [{ name: 'status' }, { name: 'order_count' }],
rowCount: 2,
command: 'SELECT',
};
}),
end: vi.fn(async () => undefined),
};
return { client, calls };
}
describe('createPostgresQueryExecutor', () => {
it('runs a read-only transaction in array row mode and closes the client', async () => {
const { client, calls } = makeClient();
const executor = createPostgresQueryExecutor({
clientFactory: vi.fn(() => client),
});
const result = await executor.execute({
connectionId: 'warehouse',
connection: { driver: 'postgres', url: 'postgres://example/db' },
sql: 'select status, count(*) as order_count from public.orders group by status',
maxRows: 50,
});
expect(client.connect).toHaveBeenCalledTimes(1);
expect(calls[0]).toBe('BEGIN READ ONLY');
expect(calls[1]).toEqual({
text: 'select * from (select status, count(*) as order_count from public.orders group by status) as ktx_query_result limit 50',
rowMode: 'array',
});
expect(calls[2]).toBe('COMMIT');
expect(client.end).toHaveBeenCalledTimes(1);
expect(result).toEqual({
headers: ['status', 'order_count'],
rows: [
['paid', 2],
['open', 1],
],
totalRows: 2,
command: 'SELECT',
rowCount: 2,
});
});
it('rolls back and closes the client when query execution fails', async () => {
const client = {
connect: vi.fn(async () => undefined),
query: vi.fn(async (input: unknown) => {
if (input === 'BEGIN READ ONLY' || input === 'ROLLBACK') {
return { rows: [], fields: [], rowCount: null, command: String(input) };
}
throw new Error('syntax error');
}),
end: vi.fn(async () => undefined),
};
const executor = createPostgresQueryExecutor({
clientFactory: vi.fn(() => client),
});
await expect(
executor.execute({
connectionId: 'warehouse',
connection: { driver: 'postgres', url: 'postgres://example/db' },
sql: 'select * from broken',
maxRows: 10,
}),
).rejects.toThrow('syntax error');
expect(client.query).toHaveBeenCalledWith('ROLLBACK');
expect(client.end).toHaveBeenCalledTimes(1);
});
it('requires a Postgres url', async () => {
const executor = createPostgresQueryExecutor({ clientFactory: vi.fn() });
await expect(
executor.execute({
connectionId: 'warehouse',
connection: { driver: 'postgres' },
sql: 'select 1',
}),
).rejects.toThrow('Local Postgres execution requires connections.warehouse.url');
});
});

View file

@ -1,30 +0,0 @@
import { describe, expect, it } from 'vitest';
import { assertReadOnlySql, limitSqlForExecution } from './read-only-sql.js';
describe('assertReadOnlySql', () => {
it('allows select and with queries', () => {
expect(assertReadOnlySql('select * from orders')).toBe('select * from orders');
expect(assertReadOnlySql('with paid as (select * from orders) select * from paid')).toContain('with paid');
});
it('rejects mutating statements before opening a database connection', () => {
expect(() => assertReadOnlySql('delete from orders')).toThrow(
'Only read-only SELECT/WITH queries can be executed locally',
);
expect(() => assertReadOnlySql('create table x(id int)')).toThrow(
'Only read-only SELECT/WITH queries can be executed locally',
);
});
});
describe('limitSqlForExecution', () => {
it('wraps compiled SQL and strips trailing semicolons', () => {
expect(limitSqlForExecution('select * from public.orders; ', 25)).toBe(
'select * from (select * from public.orders) as ktx_query_result limit 25',
);
});
it('returns the trimmed SQL when no maxRows value is provided', () => {
expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders');
});
});

View file

@ -1,139 +0,0 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { writeFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import Database from 'better-sqlite3';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { createSqliteQueryExecutor, sqliteDatabasePathFromConnection } from './sqlite-query-executor.js';
describe('createSqliteQueryExecutor', () => {
let tempDir: string;
let dbPath: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sqlite-query-'));
dbPath = join(tempDir, 'warehouse.db');
const db = new Database(dbPath);
db.exec(`
CREATE TABLE orders (
id INTEGER PRIMARY KEY,
status TEXT NOT NULL,
amount INTEGER NOT NULL
);
INSERT INTO orders (status, amount) VALUES
('paid', 20),
('paid', 30),
('open', 10);
`);
db.close();
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('executes read-only SELECT SQL against a relative SQLite path', async () => {
const executor = createSqliteQueryExecutor();
const result = await executor.execute({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: 'warehouse.db' },
sql: 'select status, count(*) as order_count from orders group by status order by status',
maxRows: 10,
});
expect(result).toEqual({
headers: ['status', 'order_count'],
rows: [
['open', 1],
['paid', 2],
],
totalRows: 2,
command: 'SELECT',
rowCount: 2,
});
});
it('supports file urls for SQLite database paths', async () => {
expect(
sqliteDatabasePathFromConnection({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', url: `file://${dbPath}` },
sql: 'select 1',
}),
).toBe(dbPath);
});
it('resolves file references for SQLite path fields', async () => {
const pointerPath = join(tempDir, 'sqlite-path.txt');
writeFileSync(pointerPath, dbPath, 'utf-8');
expect(
sqliteDatabasePathFromConnection({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: `file:${pointerPath}` },
sql: 'select 1',
}),
).toBe(dbPath);
});
it('resolves env references for SQLite database urls', async () => {
const originalDatabaseUrl = process.env.KTX_SQLITE_TEST_URL;
process.env.KTX_SQLITE_TEST_URL = `sqlite:${dbPath}`;
try {
expect(
sqliteDatabasePathFromConnection({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', url: 'env:KTX_SQLITE_TEST_URL' },
sql: 'select 1',
}),
).toBe(dbPath);
} finally {
if (originalDatabaseUrl === undefined) {
delete process.env.KTX_SQLITE_TEST_URL;
} else {
process.env.KTX_SQLITE_TEST_URL = originalDatabaseUrl;
}
}
});
it('rejects mutating SQL before opening the database', async () => {
const executor = createSqliteQueryExecutor();
await expect(
executor.execute({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: 'warehouse.db' },
sql: 'delete from orders',
}),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
});
it('requires a SQLite driver and a database path', async () => {
const executor = createSqliteQueryExecutor();
await expect(
executor.execute({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'postgres', path: 'warehouse.db' },
sql: 'select 1',
}),
).rejects.toThrow('Local SQLite execution cannot run driver "postgres"');
await expect(
executor.execute({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite' },
sql: 'select 1',
}),
).rejects.toThrow('Local SQLite execution requires connections.warehouse.path or connections.warehouse.url');
});
});

View file

@ -1,34 +0,0 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { resolveKtxConfigReference, resolveKtxHomePath } from './config-reference.js';
describe('KTX config references', () => {
it('resolves env references without returning empty values', () => {
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', { AI_GATEWAY_API_KEY: ' gateway-key ' })).toBe(
'gateway-key',
);
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', { AI_GATEWAY_API_KEY: ' ' })).toBeUndefined();
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', {})).toBeUndefined();
});
it('resolves file references and trims file content', async () => {
const dir = join(tmpdir(), `ktx-config-reference-${process.pid}`);
await mkdir(dir, { recursive: true });
const keyPath = join(dir, 'gateway-key.txt');
await writeFile(keyPath, 'file-gateway-key\n', 'utf8');
expect(resolveKtxConfigReference(`file:${keyPath}`, {})).toBe('file-gateway-key');
});
it('returns literal values unchanged after trimming blank-only values', () => {
expect(resolveKtxConfigReference('provider/model', {})).toBe('provider/model');
expect(resolveKtxConfigReference(' ', {})).toBeUndefined();
expect(resolveKtxConfigReference(undefined, {})).toBeUndefined();
});
it('resolves home-prefixed paths', () => {
expect(resolveKtxHomePath('~/ktx/key.txt')).toContain('/ktx/key.txt');
});
});

View file

@ -1,75 +0,0 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { SimpleGit } from 'simple-git';
import type { KtxCoreConfig } from './config.js';
import { createSimpleGit } from './git-env.js';
import { GitService } from './git.service.js';
describe('GitService.assertWorktreeClean', () => {
let workdir: string;
let git: SimpleGit;
let gitService: GitService;
beforeEach(async () => {
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-clean-'));
git = createSimpleGit(workdir);
await git.init();
await git.addConfig('user.email', 't@test');
await git.addConfig('user.name', 'Test');
await writeFile(join(workdir, 'init'), 'init');
await git.add('.');
await git.commit('init');
const coreConfig: KtxCoreConfig = {
storage: { configDir: workdir, homeDir: workdir },
git: { userName: 'Test', userEmail: 't@test' },
};
gitService = new GitService(coreConfig);
(gitService as any).git = git;
(gitService as any).configDir = workdir;
});
afterEach(async () => rm(workdir, { recursive: true, force: true }));
it('does not throw on a clean worktree', async () => {
await expect(gitService.assertWorktreeClean()).resolves.toBeUndefined();
});
it('throws when MERGE_HEAD exists', async () => {
await writeFile(join(workdir, '.git', 'MERGE_HEAD'), 'deadbeef\n');
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/MERGE_HEAD/);
});
it('throws when CHERRY_PICK_HEAD exists', async () => {
await writeFile(join(workdir, '.git', 'CHERRY_PICK_HEAD'), 'deadbeef\n');
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/CHERRY_PICK_HEAD/);
});
it('throws when REVERT_HEAD exists', async () => {
await writeFile(join(workdir, '.git', 'REVERT_HEAD'), 'deadbeef\n');
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/REVERT_HEAD/);
});
it('throws when sequencer/todo exists (interrupted multi-commit revert/cherry-pick)', async () => {
await mkdir(join(workdir, '.git', 'sequencer'), { recursive: true });
await writeFile(join(workdir, '.git', 'sequencer', 'todo'), 'pick deadbeef foo\n');
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/sequencer/);
});
it('throws when the index has unmerged paths', async () => {
await git.checkoutLocalBranch('a');
await writeFile(join(workdir, 'shared'), 'A version');
await git.add('.');
await git.commit('a');
await git.checkout('master').catch(() => git.checkout('main'));
await git.checkoutLocalBranch('b');
await writeFile(join(workdir, 'shared'), 'B version');
await git.add('.');
await git.commit('b');
await git.raw(['merge', 'a']).catch(() => undefined);
await expect(gitService.assertWorktreeClean()).rejects.toThrow();
});
});

View file

@ -1,78 +0,0 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { mkdir, mkdtemp, readdir, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { SimpleGit } from 'simple-git';
import type { KtxCoreConfig } from './config.js';
import { createSimpleGit } from './git-env.js';
import { GitService } from './git.service.js';
describe('GitService.deleteDirectories', () => {
let workdir: string;
let git: SimpleGit;
let gitService: GitService;
beforeEach(async () => {
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-dd-'));
git = createSimpleGit(workdir);
await git.init();
await git.addConfig('user.email', 't@test');
await git.addConfig('user.name', 'Test');
await writeFile(join(workdir, 'keep'), 'k');
await git.add('.');
await git.commit('init');
const coreConfig: KtxCoreConfig = {
storage: { configDir: workdir, homeDir: workdir },
git: { userName: 'Test', userEmail: 't@test' },
};
gitService = new GitService(coreConfig);
(gitService as any).git = git;
(gitService as any).configDir = workdir;
});
afterEach(async () => rm(workdir, { recursive: true, force: true }));
it('removes multiple directories in a single commit', async () => {
for (const name of ['a', 'b', 'c']) {
await mkdir(join(workdir, name), { recursive: true });
await writeFile(join(workdir, name, 'f.txt'), name);
}
await git.add('.');
await git.commit('seed 3 dirs');
const beforeCommits = (await git.log()).total;
const result = await gitService.deleteDirectories(['a', 'b'], 'gc: drop a+b', 'System User', 'system@example.com');
expect(result.commitHash).toBeTruthy();
const entries = await readdir(workdir);
expect(entries).not.toContain('a');
expect(entries).not.toContain('b');
expect(entries).toContain('c');
const afterCommits = (await git.log()).total;
expect(afterCommits).toBe(beforeCommits + 1);
});
it('no-ops and returns a null hash when the input list is empty', async () => {
const result = await gitService.deleteDirectories([], 'empty', 'X', 'x@example.com');
expect(result.commitHash).toBe('');
expect(result.created).toBe(false);
});
it('ignores paths that have already been deleted — commits only the remaining ones', async () => {
await mkdir(join(workdir, 'stale'), { recursive: true });
await writeFile(join(workdir, 'stale', 'x'), 'x');
await git.add('.');
await git.commit('seed stale');
const result = await gitService.deleteDirectories(
['stale', 'missing'],
'gc: drop stale + missing',
'System User',
'system@example.com',
);
expect(result.commitHash).toBeTruthy();
const entries = await readdir(workdir);
expect(entries).not.toContain('stale');
});
});

View file

@ -1,45 +0,0 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { GitService } from './git.service.js';
async function makeGit() {
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-'));
const configDir = join(homeDir, 'config');
const git = new GitService({
storage: { configDir, homeDir },
git: {
userName: 'System User',
userEmail: 'system@example.com',
bootstrapMessage: 'init',
bootstrapAuthor: 'system',
bootstrapAuthorEmail: 'system@example.com',
},
});
await git.onModuleInit();
return { homeDir, configDir, git };
}
describe('GitService patch helpers', () => {
it('collects binary-safe no-rename patches and applies them with --3way --index', async () => {
const { homeDir, configDir, git } = await makeGit();
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n');
await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com');
const base = await git.revParseHead();
await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n');
await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'proposal.patch');
await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath);
const targetDir = join(homeDir, 'target');
await git.addWorktree(targetDir, 'target', base);
const targetGit = git.forWorktree(targetDir);
await targetGit.applyPatchFile3WayIndex(patchPath);
await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com');
await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n');
});
});

View file

@ -1,56 +0,0 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { SimpleGit } from 'simple-git';
import type { KtxCoreConfig } from './config.js';
import { createSimpleGit } from './git-env.js';
import { GitService } from './git.service.js';
describe('GitService.resetHardTo', () => {
let workdir: string;
let git: SimpleGit;
let gitService: GitService;
beforeEach(async () => {
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-reset-'));
git = createSimpleGit(workdir);
await git.init();
await git.addConfig('user.email', 't@test');
await git.addConfig('user.name', 'Test');
await writeFile(join(workdir, 'init'), 'init');
await git.add('.');
await git.commit('init');
const coreConfig: KtxCoreConfig = {
storage: { configDir: workdir, homeDir: workdir },
git: { userName: 'Test', userEmail: 't@test' },
};
gitService = new GitService(coreConfig);
(gitService as any).git = git;
(gitService as any).configDir = workdir;
});
afterEach(async () => rm(workdir, { recursive: true, force: true }));
it('rewinds HEAD to the target SHA, removing later commits and their files', async () => {
const baseSha = (await git.revparse(['HEAD'])).trim();
await writeFile(join(workdir, 'a'), 'a1');
await git.add('.');
await git.commit('a');
await writeFile(join(workdir, 'b'), 'b1');
await git.add('.');
await git.commit('b');
await gitService.resetHardTo(baseSha);
expect((await git.revparse(['HEAD'])).trim()).toBe(baseSha);
expect(await readFile(join(workdir, 'a'), 'utf-8').catch(() => null)).toBeNull();
expect(await readFile(join(workdir, 'b'), 'utf-8').catch(() => null)).toBeNull();
});
it('is a no-op when target SHA equals current HEAD', async () => {
const sha = (await git.revparse(['HEAD'])).trim();
await gitService.resetHardTo(sha);
expect((await git.revparse(['HEAD'])).trim()).toBe(sha);
});
});

View file

@ -1,450 +0,0 @@
import { mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { KtxCoreConfig } from './config.js';
import { GitService } from './git.service.js';
// These tests drive a real git repo inside a temp directory — simple-git shells out to the
// system `git` binary. They are fast enough to run as unit tests and catch real issues that
// would be invisible with mocked git.
describe('GitService', () => {
let service: GitService;
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'git-service-spec-'));
const coreConfig: KtxCoreConfig = {
storage: { configDir: tempDir, homeDir: tempDir },
git: {
userName: 'Test User',
userEmail: 'test@example.com',
bootstrapMessage: 'Initialize test config repo',
bootstrapAuthor: 'test-system',
bootstrapAuthorEmail: 'system@example.com',
},
};
service = new GitService(coreConfig);
await service.onModuleInit();
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
const writeAndCommit = async (filePath: string, content: string, message = 'msg') => {
await writeFile(join(tempDir, filePath), content, 'utf-8');
return service.commitFile(filePath, message, 'Test', 'test@example.com');
};
describe('cold-start bootstrap commit', () => {
it('writes an empty commit on init so HEAD always resolves', async () => {
// beforeEach already ran onModuleInit() against an empty temp dir.
const head = await service.revParseHead();
expect(head).toMatch(/^[0-9a-f]{40}$/);
});
it('does not double-commit when re-initialized', async () => {
const before = await service.revParseHead();
await service.onModuleInit();
const after = await service.revParseHead();
expect(after).toBe(before);
});
it('keeps git auto-maintenance attached for deterministic cleanup', async () => {
const config = await readFile(join(tempDir, '.git', 'config'), 'utf-8');
expect(config).toMatch(/\[gc]\n\s+autoDetach = false/);
expect(config).toMatch(/\[maintenance]\n\s+autoDetach = false/);
});
it('initializes when release automation sets GIT_ASKPASS', async () => {
const releaseEnvDir = await mkdtemp(join(tmpdir(), 'git-service-release-env-'));
const previousAskPass = process.env.GIT_ASKPASS;
process.env.GIT_ASKPASS = 'echo';
try {
const releaseEnvService = new GitService({
storage: { configDir: releaseEnvDir, homeDir: releaseEnvDir },
git: {
userName: 'Test User',
userEmail: 'test@example.com',
bootstrapMessage: 'Initialize test config repo',
bootstrapAuthor: 'test-system',
bootstrapAuthorEmail: 'system@example.com',
},
});
await expect(releaseEnvService.onModuleInit()).resolves.toBeUndefined();
} finally {
if (previousAskPass === undefined) {
delete process.env.GIT_ASKPASS;
} else {
process.env.GIT_ASKPASS = previousAskPass;
}
await rm(releaseEnvDir, { recursive: true, force: true });
}
});
});
describe('commitFile `created` flag', () => {
it('is true for a real commit', async () => {
const info = await writeAndCommit('a.md', '# Hello');
expect(info.created).toBe(true);
});
it('is false on a no-op write (content unchanged)', async () => {
await writeAndCommit('a.md', '# Hello');
const second = await writeAndCommit('a.md', '# Hello', 'unused');
expect(second.created).toBe(false);
});
});
describe('addNote / getNote', () => {
it('attaches a note and reads it back', async () => {
const info = await writeAndCommit('a.md', '# Hello');
await service.addNote(info.commitHash, 'Rich message from LLM');
expect(await service.getNote(info.commitHash)).toBe('Rich message from LLM');
});
it('returns undefined when no note exists', async () => {
const info = await writeAndCommit('a.md', '# Hello');
expect(await service.getNote(info.commitHash)).toBeUndefined();
});
it('overwrites an existing note (idempotent retries)', async () => {
const info = await writeAndCommit('a.md', '# Hello');
await service.addNote(info.commitHash, 'First');
await service.addNote(info.commitHash, 'Second');
expect(await service.getNote(info.commitHash)).toBe('Second');
});
it('skips empty/whitespace messages silently', async () => {
const info = await writeAndCommit('a.md', '# Hello');
await service.addNote(info.commitHash, ' ');
expect(await service.getNote(info.commitHash)).toBeUndefined();
});
});
describe('getFileHistory', () => {
it('surfaces enhancedMessage when a note is present', async () => {
const info = await writeAndCommit('a.md', '# Hello');
await service.addNote(info.commitHash, 'Note body');
const history = await service.getFileHistory('a.md');
expect(history[0]?.enhancedMessage).toBe('Note body');
});
it('leaves enhancedMessage undefined when no note is attached', async () => {
await writeAndCommit('a.md', '# Hello');
const history = await service.getFileHistory('a.md');
expect(history[0]?.enhancedMessage).toBeUndefined();
});
});
describe('getCommitDiff', () => {
it('returns the patch scoped to the requested path', async () => {
const info = await writeAndCommit('a.md', '# Hello');
const diff = await service.getCommitDiff(info.commitHash, 'a.md');
expect(diff).toContain('diff --git');
expect(diff).toContain('Hello');
});
it('handles the repository initial commit without throwing', async () => {
const info = await writeAndCommit('first.md', 'first');
await expect(service.getCommitDiff(info.commitHash, 'first.md')).resolves.toBeDefined();
});
});
describe('squashTo', () => {
const writeAsSystem = async (filePath: string, content: string, message = 'msg') => {
await writeFile(join(tempDir, filePath), content, 'utf-8');
return service.commitFile(filePath, message, 'System User', 'system@example.com');
};
it('collapses 3 commits after preHead into a single commit', async () => {
const pre = await writeAsSystem('a.md', 'v1');
const preHead = pre.commitHash;
await writeAsSystem('b.md', 'b', 'add b');
await writeAsSystem('c.md', 'c', 'add c');
await writeAsSystem('a.md', 'v2', 'update a');
const result = await service.squashTo(preHead, {
message: 'Ingest: bundle 3 writes',
author: 'System User',
authorEmail: 'system@example.com',
});
expect(result.squashed).toBe(true);
expect(result.squashedCount).toBe(3);
expect(result.commitHash).toBeTruthy();
expect(result.commitHash).not.toBe(preHead);
const commitHash = result.commitHash;
if (!commitHash) {
throw new Error('Expected squash commit hash');
}
// The squashed commit should preserve the final tree state.
const fileAtSquash = await service.getFileAtCommit('a.md', commitHash);
expect(fileAtSquash).toBe('v2');
const bAtSquash = await service.getFileAtCommit('b.md', commitHash);
expect(bAtSquash).toBe('b');
});
it('is a no-op when preHead equals HEAD', async () => {
const pre = await writeAsSystem('a.md', 'v1');
const result = await service.squashTo(pre.commitHash, {
message: 'nothing to squash',
author: 'System User',
authorEmail: 'system@example.com',
});
expect(result.squashed).toBe(false);
expect(result.commitHash).toBe(pre.commitHash);
});
it('skips squash when a foreign-author commit sits between preHead and HEAD', async () => {
const pre = await writeAsSystem('a.md', 'v1');
const preHead = pre.commitHash;
await writeAsSystem('b.md', 'from us', 'ours');
// Foreign commit
await writeAndCommit('c.md', 'from someone else', 'foreign');
await writeAsSystem('d.md', 'ours again', 'ours 2');
const result = await service.squashTo(preHead, {
message: 'should be skipped',
author: 'System User',
authorEmail: 'system@example.com',
});
expect(result.squashed).toBe(false);
expect(result.reason).toContain('foreign');
expect(result.squashedCount).toBe(3);
});
it('returns cleanly when preHead is empty (no starting commit)', async () => {
const result = await service.squashTo('', {
message: 'would have squashed',
author: 'System User',
authorEmail: 'system@example.com',
});
expect(result.squashed).toBe(false);
expect(result.commitHash).toBeNull();
});
});
describe('worktree lifecycle', () => {
// macOS canonicalizes tmp paths (/var/folders → /private/var/folders) when git
// returns them from `worktree list`. Resolve through realpath() before comparing.
const canonicalSiblingPath = async (suffix: string): Promise<string> => {
const parent = await realpath(join(tempDir, '..'));
return join(parent, `wt-${Date.now()}-${suffix}`);
};
it('addWorktree creates a branch + directory at the given startSha', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const wtDir = await canonicalSiblingPath('add');
await service.addWorktree(wtDir, 'session/alpha', commitHash);
const list = await service.listWorktrees();
expect(list.find((e) => e.path === wtDir && e.branch === 'refs/heads/session/alpha')).toBeTruthy();
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('removeWorktree detaches the worktree entry', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const wtDir = await canonicalSiblingPath('rm');
await service.addWorktree(wtDir, 'session/beta', commitHash);
await service.removeWorktree(wtDir);
const list = await service.listWorktrees();
expect(list.find((e) => e.path === wtDir)).toBeFalsy();
});
it('deleteBranch removes a branch ref', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const wtDir = await canonicalSiblingPath('br');
await service.addWorktree(wtDir, 'session/gamma', commitHash);
await service.removeWorktree(wtDir);
await service.deleteBranch('session/gamma', true);
const branches = await (service as unknown as { git: import('simple-git').SimpleGit }).git.branchLocal();
expect(branches.all).not.toContain('session/gamma');
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
});
describe('forWorktree', () => {
it('returns a GitService whose operations run inside the given worktree', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-fw`);
await service.addWorktree(wtDir, 'session/delta', commitHash);
const scoped = service.forWorktree(wtDir);
expect(await scoped.revParseHead()).toBe(commitHash);
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('serializes concurrent commits from scoped services targeting the same worktree', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-fw-concurrent`);
await service.addWorktree(wtDir, 'session/concurrent', commitHash);
const first = service.forWorktree(wtDir);
const second = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'a.md'), 'a\n', 'utf-8');
await writeFile(join(wtDir, 'b.md'), 'b\n', 'utf-8');
const [a, b] = await Promise.all([
first.commitFile('a.md', 'add a', 'System User', 'system@example.com'),
second.commitFile('b.md', 'add b', 'System User', 'system@example.com'),
]);
expect(a.commitHash).toMatch(/^[0-9a-f]{40}$/);
expect(b.commitHash).toMatch(/^[0-9a-f]{40}$/);
await expect(first.getFileAtCommit('a.md', a.commitHash)).resolves.toBe('a\n');
await expect(second.getFileAtCommit('b.md', b.commitHash)).resolves.toBe('b\n');
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
});
describe('squashMergeIntoMain', () => {
it('merges a session branch as one commit on main, returning the new SHA + touched paths', async () => {
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-sm`);
await service.addWorktree(wtDir, 'session/happy', baseSha);
const scoped = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'a.yaml'), 'one: 1\n', 'utf-8');
await scoped.commitFile('a.yaml', 'wip a', 'System User', 'system@example.com');
await writeFile(join(wtDir, 'b.yaml'), 'two: 2\n', 'utf-8');
await scoped.commitFile('b.yaml', 'wip b', 'System User', 'system@example.com');
const result = await service.squashMergeIntoMain(
'session/happy',
'System User',
'system@example.com',
'Memory capture: 2 files [chat=abcd1234]',
);
expect(result.ok).toBe(true);
if (!result.ok) {
throw new Error('unreachable');
}
expect(result.squashSha).toMatch(/^[0-9a-f]{40}$/);
expect(result.touchedPaths.sort()).toEqual(['a.yaml', 'b.yaml']);
const mainHead = await service.revParseHead();
expect(mainHead).toBe(result.squashSha);
expect(mainHead).not.toBe(baseSha);
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('returns ok with empty touchedPaths when the session branch has no diff vs main', async () => {
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-sm-empty`);
await service.addWorktree(wtDir, 'session/empty', baseSha);
const result = await service.squashMergeIntoMain(
'session/empty',
'System User',
'system@example.com',
'should be a no-op',
);
expect(result.ok).toBe(true);
if (!result.ok) {
throw new Error('unreachable');
}
expect(result.touchedPaths).toEqual([]);
expect(result.squashSha).toBe(baseSha);
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('returns conflict=true and leaves main clean when session+main touched same file differently', async () => {
await writeAndCommit('shared.yaml', 'base\n');
const base = await service.revParseHead();
if (!base) {
throw new Error('no base head');
}
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-conf`);
await service.addWorktree(wtDir, 'session/conf', base);
const scoped = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'shared.yaml'), 'session-edit\n', 'utf-8');
await scoped.commitFile('shared.yaml', 'session edit', 'System User', 'system@example.com');
// Main edits the same file a different way, after the session branched.
await writeAndCommit('shared.yaml', 'main-edit\n');
const result = await service.squashMergeIntoMain(
'session/conf',
'System User',
'system@example.com',
'Memory capture: 1 file [chat=dead1234]',
);
expect(result.ok).toBe(false);
if (result.ok) {
throw new Error('unreachable');
}
expect(result.conflict).toBe(true);
expect(result.conflictPaths).toContain('shared.yaml');
const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status();
expect(status.isClean()).toBe(true);
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('reports untracked files that would be overwritten by the squash merge', async () => {
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-untracked`);
await service.addWorktree(wtDir, 'session/untracked', baseSha);
const scoped = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'knowledge.md'), 'session version\n', 'utf-8');
await scoped.commitFile('knowledge.md', 'session write', 'System User', 'system@example.com');
await writeFile(join(tempDir, 'knowledge.md'), 'untracked local version\n', 'utf-8');
const result = await service.squashMergeIntoMain(
'session/untracked',
'System User',
'system@example.com',
'Memory capture: 1 file [chat=untracked]',
);
expect(result.ok).toBe(false);
if (result.ok) {
throw new Error('unreachable');
}
expect(result.conflict).toBe(true);
expect(result.conflictPaths).toEqual(['knowledge.md']);
const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status();
expect(status.not_added).toContain('knowledge.md');
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
});
});

View file

@ -1,124 +0,0 @@
import { mkdtemp, realpath, rm, stat } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { KtxCoreConfig } from './config.js';
import { GitService } from './git.service.js';
import { SessionWorktreeService, type WorktreeConfigPort } from './session-worktree.service.js';
interface TestWorktreeConfig extends WorktreeConfigPort<TestWorktreeConfig> {
workdir?: string;
}
// SessionWorktreeService glues a real GitService to a scoped config adapter.
describe('SessionWorktreeService', () => {
let sessionService: SessionWorktreeService<TestWorktreeConfig>;
let gitService: GitService;
let homeDir: string;
beforeEach(async () => {
homeDir = await mkdtemp(join(tmpdir(), 'sws-spec-'));
homeDir = await realpath(homeDir);
const coreConfig: KtxCoreConfig = {
storage: { configDir: homeDir, homeDir },
git: {
userName: 'System User',
userEmail: 'system@example.com',
bootstrapMessage: 'Initialize test config repo',
bootstrapAuthor: 'test-system',
bootstrapAuthorEmail: 'system@example.com',
},
};
gitService = new GitService(coreConfig);
await gitService.onModuleInit();
const configService: TestWorktreeConfig = {
forWorktree: vi.fn(
(workdir: string): TestWorktreeConfig => ({ workdir, forWorktree: configService.forWorktree }),
),
};
sessionService = new SessionWorktreeService({
coreConfig,
gitService,
configService,
});
});
afterEach(async () => {
await rm(homeDir, { recursive: true, force: true });
});
describe('create', () => {
it('creates a worktree + branch and returns scoped services', async () => {
const baseSha = await gitService.revParseHead();
if (!baseSha) {
throw new Error('no base sha');
}
const session = await sessionService.create('chat-abc', baseSha);
expect(session.workdir).toBe(join(homeDir, '.worktrees', 'session-chat-abc'));
expect(session.branch).toBe('session/chat-abc');
expect(session.baseSha).toBe(baseSha);
const stats = await stat(session.workdir);
expect(stats.isDirectory()).toBe(true);
// Scoped git instance reports the worktree's HEAD (= baseSha at creation time).
expect(await session.git.revParseHead()).toBe(baseSha);
const list = await gitService.listWorktrees();
expect(list.find((e) => e.path === session.workdir)).toBeTruthy();
});
it('appends a timestamp suffix when the primary dir already exists', async () => {
const baseSha = await gitService.revParseHead();
if (!baseSha) {
throw new Error('no base sha');
}
const first = await sessionService.create('chat-dup', baseSha);
const second = await sessionService.create('chat-dup', baseSha);
expect(first.workdir).not.toBe(second.workdir);
expect(second.branch).toMatch(/^session\/chat-dup-\d+$/);
});
});
describe('cleanup', () => {
it('success removes the worktree dir and deletes the branch', async () => {
const baseSha = await gitService.revParseHead();
if (!baseSha) {
throw new Error('no base sha');
}
const session = await sessionService.create('chat-cleanup-ok', baseSha);
await sessionService.cleanup(session, 'success');
const list = await gitService.listWorktrees();
expect(list.find((e) => e.path === session.workdir)).toBeFalsy();
await expect(stat(session.workdir)).rejects.toThrow();
});
it('conflict keeps the worktree and writes a sentinel file', async () => {
const baseSha = await gitService.revParseHead();
if (!baseSha) {
throw new Error('no base sha');
}
const session = await sessionService.create('chat-cleanup-conflict', baseSha);
await sessionService.cleanup(session, 'conflict', { conflictPaths: ['shared.yaml'] });
// Dir still exists.
await expect(stat(session.workdir)).resolves.toBeTruthy();
const { readFile } = await import('node:fs/promises');
const raw = await readFile(join(session.workdir, '.ktx-outcome'), 'utf-8');
const parsed = JSON.parse(raw);
expect(parsed.outcome).toBe('conflict');
expect(parsed.chatId).toBe('chat-cleanup-conflict');
expect(parsed.conflictPaths).toEqual(['shared.yaml']);
expect(typeof parsed.at).toBe('string');
});
});
});

View file

@ -1,343 +0,0 @@
import { once } from 'node:events';
import { createServer } from 'node:http';
import { describe, expect, it, vi } from 'vitest';
import { createHttpSemanticLayerComputePort, createPythonSemanticLayerComputePort } from './semantic-layer-compute.js';
const source = {
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [{ name: 'order_count', expr: 'count(*)' }],
};
const sourceGenerationInput = {
tables: [
{
name: 'orders',
db: 'public',
comment: 'Orders table',
columns: [
{ name: 'id', type: 'integer', primaryKey: true, nullable: false, comment: 'Order ID' },
{ name: 'customer_id', type: 'integer' },
{ name: 'amount', type: 'decimal', comment: 'Order amount' },
],
},
{
name: 'customers',
db: 'public',
columns: [
{ name: 'id', type: 'integer', primaryKey: true },
{ name: 'email', type: 'varchar' },
],
},
],
links: [
{
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
relationshipType: 'MANY_TO_ONE',
},
],
dialect: 'postgres',
};
const sourceGenerationDaemonPayload = {
tables: [
{
name: 'orders',
db: 'public',
comment: 'Orders table',
columns: [
{ name: 'id', type: 'integer', primary_key: true, nullable: false, comment: 'Order ID' },
{ name: 'customer_id', type: 'integer' },
{ name: 'amount', type: 'decimal', comment: 'Order amount' },
],
},
{
name: 'customers',
db: 'public',
columns: [
{ name: 'id', type: 'integer', primary_key: true },
{ name: 'email', type: 'varchar' },
],
},
],
links: [
{
from_table: 'orders',
from_column: 'customer_id',
to_table: 'customers',
to_column: 'id',
relationship_type: 'MANY_TO_ONE',
},
],
dialect: 'postgres',
};
const sourceGenerationDaemonResponse = {
source_count: 2,
sources: [
{
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [
{
to: 'customers',
on: 'customer_id = customers.id',
relationship: 'many_to_one',
},
],
measures: [{ name: 'record_count', expr: 'count(id)' }],
},
],
};
describe('createPythonSemanticLayerComputePort', () => {
it('calls the semantic-query stdio command', async () => {
const runJson = vi.fn(async () => ({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
}));
const port = createPythonSemanticLayerComputePort({
runJson,
projectId: 'hashed-project-id',
});
await expect(
port.query({
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
}),
).resolves.toEqual({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
});
expect(runJson).toHaveBeenCalledWith('semantic-query', {
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
projectId: 'hashed-project-id',
});
});
it('calls the semantic-validate stdio command', async () => {
const runJson = vi.fn(async () => ({
valid: true,
errors: [],
warnings: [],
per_source_warnings: {},
}));
const port = createPythonSemanticLayerComputePort({ runJson });
await expect(
port.validateSources({
sources: [source],
dialect: 'postgres',
recentlyTouched: ['orders'],
}),
).resolves.toEqual({
valid: true,
errors: [],
warnings: [],
perSourceWarnings: {},
});
expect(runJson).toHaveBeenCalledWith('semantic-validate', {
sources: [source],
dialect: 'postgres',
recently_touched: ['orders'],
});
});
it('calls the semantic-generate-sources stdio command', async () => {
const runJson = vi.fn(async () => sourceGenerationDaemonResponse);
const port = createPythonSemanticLayerComputePort({ runJson });
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
sourceCount: 2,
sources: sourceGenerationDaemonResponse.sources,
});
expect(runJson).toHaveBeenCalledWith('semantic-generate-sources', sourceGenerationDaemonPayload);
});
});
describe('createHttpSemanticLayerComputePort', () => {
it('calls semantic query and validate HTTP endpoints through an injected runner', async () => {
const requestJson = vi.fn(async (path: string) => {
if (path === '/semantic-layer/query') {
return {
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
};
}
return {
valid: true,
errors: [],
warnings: [],
per_source_warnings: {},
};
});
const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson });
await expect(
port.query({
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
}),
).resolves.toEqual({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
});
await expect(
port.validateSources({
sources: [source],
dialect: 'postgres',
recentlyTouched: ['orders'],
}),
).resolves.toEqual({
valid: true,
errors: [],
warnings: [],
perSourceWarnings: {},
});
expect(requestJson).toHaveBeenNthCalledWith(1, '/semantic-layer/query', {
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
});
expect(requestJson).toHaveBeenNthCalledWith(2, '/semantic-layer/validate', {
sources: [source],
dialect: 'postgres',
recently_touched: ['orders'],
});
});
it('calls the semantic source-generation HTTP endpoint through an injected runner', async () => {
const requestJson = vi.fn(async () => sourceGenerationDaemonResponse);
const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson });
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
sourceCount: 2,
sources: sourceGenerationDaemonResponse.sources,
});
expect(requestJson).toHaveBeenCalledWith('/semantic-layer/generate-sources', sourceGenerationDaemonPayload);
});
it('posts JSON to a running HTTP daemon endpoint', async () => {
const requests: Array<{ url: string | undefined; body: unknown }> = [];
const server = createServer((request, response) => {
const chunks: Buffer[] = [];
request.on('data', (chunk: Buffer) => chunks.push(chunk));
request.on('end', () => {
requests.push({
url: request.url,
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
});
response.writeHead(200, { 'content-type': 'application/json' });
response.end(
JSON.stringify({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
}),
);
});
});
server.listen(0, '127.0.0.1');
await once(server, 'listening');
try {
const address = server.address();
if (!address || typeof address === 'string') {
throw new Error('expected TCP server address');
}
const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` });
await expect(
port.query({
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
}),
).resolves.toMatchObject({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
});
expect(requests).toEqual([
{
url: '/semantic-layer/query',
body: {
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
},
},
]);
} finally {
server.close();
}
});
it('posts source-generation JSON to a running HTTP daemon endpoint', async () => {
const requests: Array<{ url: string | undefined; body: unknown }> = [];
const server = createServer((request, response) => {
const chunks: Buffer[] = [];
request.on('data', (chunk: Buffer) => chunks.push(chunk));
request.on('end', () => {
requests.push({
url: request.url,
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
});
response.writeHead(200, { 'content-type': 'application/json' });
response.end(JSON.stringify(sourceGenerationDaemonResponse));
});
});
server.listen(0, '127.0.0.1');
await once(server, 'listening');
try {
const address = server.address();
if (!address || typeof address === 'string') {
throw new Error('expected TCP server address');
}
const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` });
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
sourceCount: 2,
sources: sourceGenerationDaemonResponse.sources,
});
expect(requests).toEqual([
{
url: '/semantic-layer/generate-sources',
body: sourceGenerationDaemonPayload,
},
]);
} finally {
server.close();
}
});
});

View file

@ -1,196 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../../context/project/project.js';
import { SqliteKnowledgeIndex } from '../wiki/sqlite-knowledge-index.js';
import { reindexLocalIndexes } from './reindex.js';
class FakeEmbeddingPort implements KtxEmbeddingPort {
readonly maxBatchSize = 8;
async computeEmbedding(text: string): Promise<number[]> {
return [text.length, 1];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return texts.map((text) => [text.length, 1]);
}
}
async function createProject(tempDir: string): Promise<KtxLocalProject> {
await initKtxProject({ projectDir: tempDir, force: true });
return loadKtxProject({ projectDir: tempDir });
}
describe('reindexLocalIndexes', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-reindex-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('returns an empty summary when no wiki or semantic-layer directories exist', async () => {
const project = await createProject(tempDir);
await rm(join(project.projectDir, 'wiki'), { recursive: true, force: true });
await rm(join(project.projectDir, 'semantic-layer'), { recursive: true, force: true });
await expect(reindexLocalIndexes(project, { force: false, embeddingService: null })).resolves.toMatchObject({
scopes: [],
totals: { scanned: 0, updated: 0, deleted: 0, embeddingsRecomputed: 0, embeddingsFailed: 0 },
force: false,
embeddingsAvailable: false,
});
});
it('discovers empty directories as zero-row scopes', async () => {
const project = await createProject(tempDir);
await mkdir(join(project.projectDir, 'wiki/user/local'), { recursive: true });
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(summary.scopes.map((scope) => scope.label)).toEqual(['global', 'user/local', 'warehouse']);
expect(summary.totals.scanned).toBe(0);
});
it('indexes mixed wiki and SL sources and reports totals', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
'utf-8',
);
const summary = await reindexLocalIndexes(project, {
force: false,
embeddingService: new FakeEmbeddingPort(),
});
expect(summary.scopes).toHaveLength(2);
expect(summary.totals).toMatchObject({ scanned: 2, updated: 2, deleted: 0, embeddingsRecomputed: 2 });
expect(summary.embeddingsAvailable).toBe(true);
});
it('does not report unchanged lexical-only rows as updated on repeated runs', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
'utf-8',
);
const first = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(first.totals).toMatchObject({
scanned: 2,
updated: 2,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
const second = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(second.totals).toMatchObject({
scanned: 2,
updated: 0,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
expect(second.scopes.map((scope) => [scope.label, scope.updated])).toEqual([
['global', 0],
['warehouse', 0],
]);
});
it('force clears stale rows before rebuilding each discovered scope', async () => {
const project = await createProject(tempDir);
const wikiIndex = new SqliteKnowledgeIndex({ dbPath: join(project.projectDir, '.ktx/db.sqlite') });
wikiIndex.sync([
{
path: 'wiki/global/stale.md',
key: 'stale',
scope: 'GLOBAL',
scopeId: null,
summary: 'Stale',
content: 'Stale content',
tags: [],
embedding: [1, 0],
},
]);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
const summary = await reindexLocalIndexes(project, {
force: true,
embeddingService: new FakeEmbeddingPort(),
});
expect(summary.force).toBe(true);
expect(summary.totals).toMatchObject({ scanned: 1, updated: 1, deleted: 0 });
expect(wikiIndex.search('Stale', 10)).toEqual([]);
});
it('captures a per-scope error and continues other scopes', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8');
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML');
});
it('marks a scope errored when configured embeddings fail', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
const embeddingService: KtxEmbeddingPort = {
maxBatchSize: 8,
async computeEmbedding() {
throw new Error('embedding provider unavailable');
},
async computeEmbeddingsBulk() {
throw new Error('embedding provider unavailable');
},
};
const summary = await reindexLocalIndexes(project, { force: false, embeddingService });
expect(summary.scopes[0]).toMatchObject({
label: 'global',
embeddingsFailed: 1,
error: '1 embedding recomputation failed',
});
});
});

View file

@ -1,42 +0,0 @@
import { describe, expect, it } from 'vitest';
import { actionTargetConnectionId, memoryActionIdentity } from './action-identity.js';
describe('memory action target identity', () => {
it('keys SL actions by target connection and wiki actions by run connection', () => {
expect(
memoryActionIdentity(
{ target: 'sl', type: 'created', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' },
'looker-run',
),
).toBe('sl:warehouse-b:orders');
expect(memoryActionIdentity({ target: 'sl', type: 'created', key: 'orders', detail: '' }, 'warehouse-a')).toBe(
'sl:warehouse-a:orders',
);
expect(
memoryActionIdentity(
{
target: 'wiki',
type: 'created',
key: 'wiki/global/orders.md',
detail: '',
targetConnectionId: 'ignored',
},
'looker-run',
),
).toBe('wiki:looker-run:wiki/global/orders.md');
});
it('resolves action target connection only for SL actions', () => {
expect(
actionTargetConnectionId(
{ target: 'sl', type: 'updated', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' },
'looker-run',
),
).toBe('warehouse-b');
expect(actionTargetConnectionId({ target: 'wiki', type: 'updated', key: 'orders', detail: '' }, 'looker-run')).toBe(
'looker-run',
);
});
});

View file

@ -1,214 +0,0 @@
import { describe, expect, it } from 'vitest';
import { parseDbtSchemaFile, parseDbtSchemaFiles } from './parse-schema.js';
describe('dbt descriptions schema parser', () => {
it('resolves shared dbt vars and defaults before parsing schema YAML', () => {
const result = parseDbtSchemaFile(
`
version: 2
sources:
- name: raw
database: "{{ var('database') }}"
schema: "{{ var('schema', 'fallback_schema') }}"
tables:
- name: orders
identifier: fct_orders
description: "Orders from {{ var('database') }}"
columns:
- name: customer_id
description: "Customer id"
tests:
- relationships:
to: ref('customers')
field: id
models:
- name: "{{ var('model_name', 'orders_model') }}"
schema: "{{ var('model_schema') }}"
columns:
- name: id
description: "Order id"
`,
{ path: 'models/schema.yml', variables: new Map([['database', 'analytics'], ['model_schema', 'mart']]) },
);
expect(result.tables).toEqual([
{
name: 'fct_orders',
description: 'Orders from analytics',
database: 'analytics',
schema: 'fallback_schema',
columns: [
{
name: 'customer_id',
description: 'Customer id',
dataType: null,
dataTests: [{ name: 'relationships', package: 'dbt', kwargs: { to: "ref('customers')", field: 'id' } }],
},
],
resourceType: 'source',
},
{
name: 'orders_model',
description: null,
database: null,
schema: 'mart',
columns: [{ name: 'id', description: 'Order id', dataType: null }],
resourceType: 'model',
},
]);
expect(result.relationships).toEqual([
{
fromTable: 'fct_orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
fromSchema: 'fallback_schema',
},
]);
});
it('deduplicates tables by database schema and name while merging columns', () => {
const result = parseDbtSchemaFiles([
{
path: 'models/a.yml',
content: `
version: 2
models:
- name: orders
description: Orders
columns:
- name: id
description: Primary key
`,
},
{
path: 'models/b.yml',
content: `
version: 2
models:
- name: orders
columns:
- name: status
description: Status
- name: id
data_type: integer
`,
},
]);
expect(result.tables).toEqual([
{
name: 'orders',
description: 'Orders',
database: null,
schema: null,
resourceType: 'model',
columns: [
{ name: 'id', description: 'Primary key', dataType: 'integer' },
{ name: 'status', description: 'Status', dataType: null },
],
},
]);
});
it('returns an empty result for malformed YAML and preserves unresolved Jinja text', () => {
expect(parseDbtSchemaFile('{{{{ invalid yaml', { path: 'broken.yml' })).toEqual({
projectName: null,
dbtVersion: null,
tables: [],
relationships: [],
});
const unresolved = parseDbtSchemaFile(
`
version: 2
models:
- name: "{{ var('missing_model') }}"
`,
{ variables: new Map() },
);
expect(unresolved.tables[0]?.name).toBe("{{ var('missing_model') }}");
});
it('extracts data tests, constraints, enum values, tags, and freshness', () => {
const result = parseDbtSchemaFile(`
version: 2
sources:
- name: raw
schema: jaffle
tags: ["raw"]
tables:
- name: customers
tags: ["core"]
loaded_at_field: updated_at
freshness:
warn_after: { count: 12, period: hour }
columns:
- name: id
tests:
- not_null
- unique
- name: status
data_tests:
- accepted_values:
values: ['active', 'inactive']
models:
- name: orders
tags: ["finance"]
loaded_at_field: run_at
columns:
- name: status
data_tests:
- dbt_utils.expression_is_true:
expression: "status is not null"
- accepted_values: ['placed', 'shipped']
`);
const customers = result.tables.find((table) => table.name === 'customers');
expect(customers?.tagsDbt).toEqual(['raw', 'core']);
expect(customers?.freshnessDbt?.loadedAtField).toBe('updated_at');
expect(customers?.freshnessDbt?.raw).toBeDefined();
const id = customers?.columns.find((column) => column.name === 'id');
expect(id?.constraints?.dbt).toEqual({ not_null: true, unique: true });
const status = customers?.columns.find((column) => column.name === 'status');
expect(status?.enumValuesDbt).toEqual(['active', 'inactive']);
const orders = result.tables.find((table) => table.name === 'orders');
expect(orders?.tagsDbt).toEqual(['finance']);
expect(orders?.freshnessDbt?.loadedAtField).toBe('run_at');
const ordersStatus = orders?.columns.find((column) => column.name === 'status');
expect(ordersStatus?.enumValuesDbt).toEqual(['placed', 'shipped']);
expect(ordersStatus?.dataTests).toEqual(
expect.arrayContaining([
expect.objectContaining({ package: 'dbt_utils', name: 'expression_is_true' }),
expect.objectContaining({ package: 'dbt', name: 'accepted_values' }),
]),
);
});
it('parses relationships from model column data tests', () => {
const result = parseDbtSchemaFile(`
version: 2
models:
- name: orders
schema: public
columns:
- name: customer_id
data_tests:
- relationships:
arguments:
to: "ref('customers')"
field: id
`);
expect(result.relationships).toEqual([
{
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
fromSchema: 'public',
},
]);
});
});

View file

@ -1,36 +0,0 @@
import { describe, expect, it } from 'vitest';
import { chunkDbtProject } from './chunk.js';
describe('chunkDbtProject', () => {
const diffSet = (modified: string[]) => ({ added: [], modified, deleted: [], unchanged: [] });
it('caps peerFileIndex when the project has very many yaml files', () => {
const modelPaths = Array.from({ length: 201 }, (_, i) => `models/m${i}.yml`);
const allPaths = ['dbt_project.yml', ...modelPaths].sort();
const { workUnits } = chunkDbtProject({ allPaths });
const [first] = workUnits;
expect(first).toBeDefined();
expect(first?.peerFileIndex).toHaveLength(200);
expect(first?.notes).toMatch(/capped at 200/);
});
it('keeps large-project model work units when dbt_project.yml changes', () => {
const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`);
const allPaths = ['dbt_project.yml', ...modelPaths].sort();
const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['dbt_project.yml']) });
expect(workUnits).toHaveLength(30);
expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']);
expect(workUnits[0]?.dependencyPaths).toContain('dbt_project.yml');
});
it('keeps large-project model work units when non-model yaml peers change', () => {
const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`);
const allPaths = ['dbt_project.yml', 'seeds/seed_properties.yml', ...modelPaths].sort();
const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['seeds/seed_properties.yml']) });
expect(workUnits).toHaveLength(30);
expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']);
expect(workUnits[0]?.dependencyPaths).toContain('seeds/seed_properties.yml');
});
});

View file

@ -1,57 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { SourceAdapter } from '../../types.js';
import { DbtSourceAdapter } from './dbt.adapter.js';
describe('DbtSourceAdapter', () => {
let stagedDir: string;
let adapter: SourceAdapter;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'dbt-adapter-'));
adapter = new DbtSourceAdapter();
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('declares the expected source key and skill list', () => {
expect(adapter.source).toBe('dbt');
expect(adapter.skillNames).toEqual(['dbt_ingest']);
});
it('detects a staged dbt project root (dbt_project.yml)', async () => {
await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\nversion: '1.0.0'\n", 'utf-8');
expect(await adapter.detect(stagedDir)).toBe(true);
});
it('chunk: dbt_project.yml + models/a.yml yields one WU (≤25 files)', async () => {
await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\n", 'utf-8');
await mkdir(join(stagedDir, 'models'), { recursive: true });
await writeFile(
join(stagedDir, 'models/a.yml'),
'version: 2\nmodels:\n - name: orders\n description: Orders\n',
'utf-8',
);
const result = await adapter.chunk(stagedDir);
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].unitKey).toBe('dbt-all');
expect(result.parseArtifacts).toMatchObject({
projectName: 'jaffle',
tables: [{ name: 'orders', description: 'Orders' }],
});
});
it('implements fetch() for git-backed dbt source setup', () => {
expect(adapter.fetch).toBeTypeOf('function');
});
it('reports mapped warehouse targets for bundle SL discovery', async () => {
adapter = new DbtSourceAdapter({ targetConnectionIds: ['postgres-warehouse', 'postgres-warehouse'] });
await expect(adapter.listTargetConnectionIds?.(stagedDir)).resolves.toEqual(['postgres-warehouse']);
});
});

View file

@ -1,38 +0,0 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { fetchDbtRepo } from './fetch.js';
describe('fetchDbtRepo', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-dbt-fetch-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('copies dbt yaml files from a fetched repo subpath into staged dir', async () => {
const cacheDir = join(tempDir, 'cache');
const stagedDir = join(tempDir, 'staged');
await mkdir(join(cacheDir, 'analytics', 'models'), { recursive: true });
await writeFile(join(cacheDir, 'analytics', 'dbt_project.yml'), 'name: analytics\n', 'utf-8');
await writeFile(join(cacheDir, 'analytics', 'models', 'orders.yml'), 'models: []\n', 'utf-8');
const cloneOrPull = vi.fn(async () => ({ commitHash: 'abc123' }));
await expect(
fetchDbtRepo({
config: { repoUrl: 'https://github.com/acme/dbt.git', path: 'analytics' },
cacheDir,
stagedDir,
deps: { cloneOrPull },
}),
).resolves.toEqual({ commitHash: 'abc123', filesCopied: 2 });
await expect(readFile(join(stagedDir, 'dbt_project.yml'), 'utf-8')).resolves.toContain('analytics');
await expect(readFile(join(stagedDir, 'models', 'orders.yml'), 'utf-8')).resolves.toContain('models');
});
});

View file

@ -1,8 +0,0 @@
import { describe, expect, it } from 'vitest';
import { normalizeDbtPath } from './parse.js';
describe('normalizeDbtPath', () => {
it('normalizes Windows separators to POSIX separators', () => {
expect(normalizeDbtPath('models\\marts\\orders.yml')).toBe('models/marts/orders.yml');
});
});

View file

@ -1,158 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { BigQueryHistoricSqlQueryHistoryReader } from './bigquery-query-history-reader.js';
import { HistoricSqlGrantsMissingError } from './errors.js';
interface FakeQueryResult {
headers: string[];
rows: unknown[][];
totalRows: number;
error?: string;
}
function queryClient(results: FakeQueryResult[]) {
const executeQuery = vi.fn(async (_query: string) => {
const next = results.shift();
if (!next) {
throw new Error('unexpected query');
}
return next;
});
return { executeQuery };
}
function firstQuery(client: ReturnType<typeof queryClient>): string {
const call = client.executeQuery.mock.calls[0];
if (!call) {
throw new Error('expected query client to be called');
}
return call[0];
}
describe('BigQueryHistoricSqlQueryHistoryReader', () => {
it('probes region-qualified INFORMATION_SCHEMA.JOBS_BY_PROJECT', async () => {
const client = queryClient([{ headers: ['1'], rows: [[1]], totalRows: 1 }]);
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
await expect(reader.probe(client)).resolves.toEqual({ warnings: [], info: [] });
expect(client.executeQuery).toHaveBeenCalledWith(
'SELECT 1 FROM `project-1.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT` LIMIT 1',
);
});
it('turns probe result errors into HistoricSqlGrantsMissingError', async () => {
const client = queryClient([{ headers: [], rows: [], totalRows: 0, error: 'Access Denied: jobs.listAll' }]);
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'us-central1' });
await expect(reader.probe(client)).rejects.toMatchObject({
name: 'HistoricSqlGrantsMissingError',
dialect: 'bigquery',
remediation:
'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.',
});
});
it('turns thrown probe failures into HistoricSqlGrantsMissingError', async () => {
const client = {
executeQuery: vi.fn(async () => {
throw new Error('permission denied');
}),
};
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
await expect(reader.probe(client)).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
});
it('fetches aggregated BigQuery query templates', async () => {
const client = queryClient([
{
headers: [
'template_id',
'canonical_sql',
'executions',
'distinct_users',
'first_seen',
'last_seen',
'p50_ms',
'p95_ms',
'error_rate',
'rows_produced',
'top_users',
],
rows: [
[
'hash-1',
'select status from orders',
42,
3,
'2026-05-01T00:00:00.000Z',
'2026-05-11T00:00:00.000Z',
12,
40,
0.05,
null,
JSON.stringify([{ user: 'analyst@example.test', executions: 1 }]),
],
],
totalRows: 1,
},
]);
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'demo', region: 'us' });
const rows = [];
for await (const row of reader.fetchAggregated(
client,
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
{ dialect: 'bigquery', minExecutions: 5, windowDays: 90, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
)) {
rows.push(row);
}
const sql = firstQuery(client);
expect(sql).toContain('COUNT(*) AS executions');
expect(sql).toContain('COUNT(DISTINCT user_email) AS distinct_users');
expect(sql).toContain('GROUP BY query_hash');
expect(sql).toContain('HAVING COUNT(*) >= 5');
expect(rows).toMatchObject([
{
templateId: 'hash-1',
stats: {
executions: 42,
errorRate: 0.05,
},
topUsers: [{ user: 'analyst@example.test', executions: 1 }],
},
]);
});
it('throws a clear error when the query client cannot execute SQL', async () => {
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
await expect(async () => {
for await (const _row of reader.fetchAggregated(
{},
{ start: new Date(), end: new Date() },
{
dialect: 'bigquery',
minExecutions: 5,
windowDays: 90,
enabledTables: [],
filters: { dropTrivialProbes: true },
redactionPatterns: [],
staleArchiveAfterDays: 90,
},
)) {
throw new Error('unreachable');
}
}).rejects.toThrow('Historic SQL BigQuery reader requires a query client with executeQuery(query)');
});
it('rejects unsafe project and region identifiers before building SQL', () => {
expect(() => new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project`1', region: 'US' })).toThrow(
'Invalid BigQuery project id for historic-SQL ingest: project`1',
);
expect(() => new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US;DROP' })).toThrow(
'Invalid BigQuery region for historic-SQL ingest: US;DROP',
);
});
});

View file

@ -1,59 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
bucketDistinctUsers,
bucketErrorRate,
bucketExecutions,
bucketFrequency,
bucketP95Runtime,
bucketRecency,
} from './buckets.js';
describe('historic-sql bucket helpers', () => {
it('uses stable execution buckets', () => {
expect([0, 9, 10, 99, 100, 999, 1000, 4999, 5000, 49999, 50000].map(bucketExecutions)).toEqual([
'<10',
'<10',
'10-100',
'10-100',
'100-1k',
'100-1k',
'1k-5k',
'1k-5k',
'5k-50k',
'5k-50k',
'>50k',
]);
});
it('uses stable distinct-user, error-rate, runtime, and recency buckets', () => {
expect([0, 1, 2, 5, 6, 10, 11].map(bucketDistinctUsers)).toEqual([
'0',
'1',
'2-5',
'2-5',
'5-10',
'5-10',
'>10',
]);
expect([0, 0.01, 0.05, 0.2].map(bucketErrorRate)).toEqual(['none', 'low', 'low', 'high']);
expect([null, 99, 100, 999, 1000, 9999, 10000].map(bucketP95Runtime)).toEqual([
'unknown',
'<100ms',
'100ms-1s',
'100ms-1s',
'1s-10s',
'1s-10s',
'>10s',
]);
expect(bucketRecency('2026-05-11T00:00:00.000Z', new Date('2026-05-11T12:00:00.000Z'))).toBe('current');
expect(bucketRecency('2026-04-20T00:00:00.000Z', new Date('2026-05-11T12:00:00.000Z'))).toBe('recent');
expect(bucketRecency('2026-01-01T00:00:00.000Z', new Date('2026-05-11T12:00:00.000Z'))).toBe('stale');
});
it('maps frequency counts to high, mid, and low labels', () => {
expect(bucketFrequency(80, 100)).toBe('high');
expect(bucketFrequency(20, 100)).toBe('mid');
expect(bucketFrequency(1, 100)).toBe('low');
expect(bucketFrequency(0, 0)).toBe('low');
});
});

View file

@ -1,182 +0,0 @@
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { chunkHistoricSqlUnifiedStagedDir, describeHistoricSqlUnifiedScope } from './chunk-unified.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-unified-chunk-'));
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(join(target, '..'), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
async function writeUnifiedStagedDir(root: string): Promise<void> {
await writeJson(root, 'manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 1,
touchedTableCount: 1,
parseFailures: 0,
warnings: [],
probeWarnings: [],
});
await writeJson(root, 'tables/public.orders.json', {
table: 'public.orders',
stats: {
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
errorRateBucket: 'none',
p95RuntimeBucket: '<100ms',
recencyBucket: 'current',
},
columnsByClause: { select: [['status', 'high']] },
observedJoins: [],
topTemplates: [{ id: 'orders', canonicalSql: 'select * from public.orders', topUsers: [{ user: 'analyst' }] }],
});
await writeJson(root, 'patterns-input.json', {
templates: [
{
id: 'orders',
canonicalSql: 'select * from public.orders join public.customers on true',
tablesTouched: ['public.orders', 'public.customers'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
});
await writeJson(root, 'patterns-input/part-0001.json', {
templates: [
{
id: 'orders',
canonicalSql: 'select * from public.orders join public.customers on true',
tablesTouched: ['public.orders', 'public.customers'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
});
}
describe('chunkHistoricSqlUnifiedStagedDir', () => {
it('emits one table WorkUnit plus one patterns WorkUnit', async () => {
const stagedDir = await tempDir();
await writeUnifiedStagedDir(stagedDir);
const result = await chunkHistoricSqlUnifiedStagedDir(stagedDir);
expect(result.workUnits).toEqual([
expect.objectContaining({
unitKey: 'historic-sql-table-public-orders',
displayLabel: 'Historic SQL usage: public.orders',
rawFiles: ['tables/public.orders.json'],
dependencyPaths: ['manifest.json'],
notes: expect.stringContaining('historic_sql_table_digest'),
}),
expect.objectContaining({
unitKey: 'historic-sql-patterns-part-0001',
displayLabel: 'Historic SQL cross-table patterns: part-0001',
rawFiles: ['patterns-input/part-0001.json'],
dependencyPaths: ['manifest.json'],
notes: expect.stringContaining('patterns-input/part-0001.json'),
}),
]);
expect(result.workUnits[0]?.notes).toContain('emit_historic_sql_evidence');
expect(result.workUnits[1]?.notes).toContain('emit_historic_sql_evidence');
expect(result.reconcileNotes).toEqual(['Historic-SQL touched tables=1 parseFailures=0']);
});
it('respects diff sets for unchanged table and patterns files', async () => {
const stagedDir = await tempDir();
await writeUnifiedStagedDir(stagedDir);
await expect(
chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: [],
modified: ['tables/public.orders.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input.json', 'patterns-input/part-0001.json'],
}),
).resolves.toMatchObject({
workUnits: [expect.objectContaining({ unitKey: 'historic-sql-table-public-orders' })],
});
await expect(
chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: [],
modified: ['patterns-input/part-0001.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input.json', 'tables/public.orders.json'],
}),
).resolves.toMatchObject({
workUnits: [expect.objectContaining({ unitKey: 'historic-sql-patterns-part-0001' })],
});
await expect(
chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: [],
modified: ['patterns-input.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input/part-0001.json', 'tables/public.orders.json'],
}),
).resolves.toMatchObject({
workUnits: [],
});
});
it('describes unified staged scope', async () => {
const stagedDir = await tempDir();
await writeUnifiedStagedDir(stagedDir);
const scope = await describeHistoricSqlUnifiedScope(stagedDir);
expect(scope.isPathInScope('manifest.json')).toBe(true);
expect(scope.isPathInScope('patterns-input.json')).toBe(true);
expect(scope.isPathInScope('patterns-input/part-0001.json')).toBe(true);
expect(scope.isPathInScope('patterns-input/part-1.json')).toBe(false);
expect(scope.isPathInScope('tables/public.orders.json')).toBe(true);
expect(scope.isPathInScope('templates/old/page.md')).toBe(false);
});
it('emits one patterns WorkUnit per changed shard', async () => {
const stagedDir = await tempDir();
await writeUnifiedStagedDir(stagedDir);
await writeJson(stagedDir, 'patterns-input/part-0002.json', {
templates: [
{
id: 'line-items',
canonicalSql: 'select * from public.orders join public.line_items on true',
tablesTouched: ['public.orders', 'public.line_items'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
});
const result = await chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: ['patterns-input/part-0002.json'],
modified: ['patterns-input/part-0001.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input.json', 'tables/public.orders.json'],
});
expect(result.workUnits.map((unit) => unit.unitKey)).toEqual([
'historic-sql-patterns-part-0001',
'historic-sql-patterns-part-0002',
]);
expect(result.workUnits.map((unit) => unit.rawFiles)).toEqual([
['patterns-input/part-0001.json'],
['patterns-input/part-0002.json'],
]);
});
});

View file

@ -1,5 +1,9 @@
import { getDriverRegistration } from '../../../connections/drivers.js';
import type { KtxConnectionDriver } from '../../../scan/types.js';
import type { HistoricSqlDialect } from './types.js';
const historicSqlDialects: readonly HistoricSqlDialect[] = ['postgres', 'bigquery', 'snowflake'];
function recordOrNull(value: unknown): Record<string, unknown> | null {
return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record<string, unknown>) : null;
}
@ -10,6 +14,14 @@ function queryHistoryRecord(connection: unknown): Record<string, unknown> | null
return context ? recordOrNull(context.queryHistory) : null;
}
function historicSqlDialectForDriver(driver: KtxConnectionDriver): HistoricSqlDialect {
const dialect = historicSqlDialects.find((candidate) => candidate === driver);
if (!dialect) {
throw new Error(`Driver "${driver}" is marked as historic-SQL capable but has no HistoricSqlDialect mapping.`);
}
return dialect;
}
export function isQueryHistoryEnabled(connection: unknown): boolean {
return queryHistoryRecord(connection)?.enabled === true;
}
@ -25,8 +37,6 @@ export function queryHistoryDialectForConnection(connection: unknown): HistoricS
}
const conn = recordOrNull(connection);
const driver = String(conn?.driver ?? '').toLowerCase();
if (driver === 'postgres') return 'postgres';
if (driver === 'bigquery') return 'bigquery';
if (driver === 'snowflake') return 'snowflake';
return null;
const registration = getDriverRegistration(driver);
return registration?.hasHistoricSqlReader ? historicSqlDialectForDriver(registration.driver) : null;
}

View file

@ -1,57 +0,0 @@
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { detectHistoricSqlStagedDir } from './detect.js';
import { HISTORIC_SQL_SOURCE_KEY, stagedManifestSchema } from './types.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-detect-'));
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(join(target, '..'), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
function manifest() {
return stagedManifestSchema.parse({
source: HISTORIC_SQL_SOURCE_KEY,
connectionId: 'conn_1',
dialect: 'postgres',
fetchedAt: '2026-05-04T12:00:00.000Z',
windowStart: '2026-02-03T12:00:00.000Z',
windowEnd: '2026-05-04T12:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
});
}
describe('historic-sql staged dir detection', () => {
it('detects manifest source', async () => {
const stagedDir = await tempDir();
await writeJson(stagedDir, 'manifest.json', manifest());
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(true);
});
it('detects unified table and patterns structure without manifest', async () => {
const stagedDir = await tempDir();
await writeFile(join(stagedDir, 'not-a-match.txt'), 'x', 'utf-8');
await writeJson(stagedDir, 'patterns-input.json', { templates: [] });
await writeJson(stagedDir, 'tables/public.orders.json', { table: 'public.orders' });
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(true);
});
it('does not detect unrelated directories', async () => {
const stagedDir = await tempDir();
await writeJson(stagedDir, 'manifest.json', { source: 'notion' });
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(false);
});
});

View file

@ -1,89 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { asSchema } from 'ai';
import { createEmitHistoricSqlEvidenceTool } from './evidence-tool.js';
describe('emit_historic_sql_evidence tool', () => {
it('exposes an AI SDK v6 tool input schema with top-level object type', async () => {
const tool = createEmitHistoricSqlEvidenceTool();
expect(await asSchema(tool.inputSchema).jsonSchema).toMatchObject({
type: 'object',
});
});
it('writes table usage evidence to the ignored run evidence directory', async () => {
const writeFile = vi.fn(async () => ({ success: true, commitHash: null }));
const tool = createEmitHistoricSqlEvidenceTool();
const result = await tool.execute!(
{
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried by paid status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [],
staleSince: null,
},
},
{
toolCallId: 'call-1',
messages: [],
abortSignal: new AbortController().signal,
experimental_context: {
connectionId: 'warehouse',
session: {
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'historic-sql' },
configService: { writeFile },
},
},
} as never,
);
expect(result).toBe('Recorded historic-SQL table_usage evidence for public.orders.');
expect(writeFile).toHaveBeenCalledWith(
'.ktx/ingest-evidence/historic-sql/run-1/historic-sql-table-public-orders.json',
expect.stringContaining('"kind": "table_usage"'),
'System User',
'system@example.com',
'Record historic-SQL evidence: historic-sql-table-public-orders',
{ skipLock: true },
);
});
it('rejects non-historic ingest sessions', async () => {
const tool = createEmitHistoricSqlEvidenceTool();
await expect(
tool.execute!(
{
kind: 'pattern',
rawPath: 'patterns-input.json',
pattern: {
slug: 'orders',
title: 'Orders',
narrative: 'Orders pattern.',
definitionSql: 'select * from public.orders',
tablesInvolved: ['public.orders'],
slRefs: ['orders'],
constituentTemplateIds: ['pg:1'],
},
},
{
toolCallId: 'call-1',
messages: [],
abortSignal: new AbortController().signal,
experimental_context: {
connectionId: 'warehouse',
session: {
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'notion' },
configService: { writeFile: vi.fn() },
},
},
} as never,
),
).resolves.toContain('Error: emit_historic_sql_evidence is only available during historic-sql ingest');
});
});

View file

@ -1,57 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
historicSqlEvidenceEnvelopeSchema,
historicSqlEvidencePath,
historicSqlPatternEvidenceSchema,
historicSqlTableUsageEvidenceSchema,
} from './evidence.js';
describe('historic-sql evidence contracts', () => {
it('validates table usage evidence emitted by table digest WorkUnits', () => {
const parsed = historicSqlTableUsageEvidenceSchema.parse({
kind: 'table_usage',
connectionId: 'warehouse',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried for paid/refunded lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
},
});
expect(parsed.table).toBe('public.orders');
expect(parsed.usage.frequencyTier).toBe('high');
});
it('validates pattern evidence emitted by the patterns WorkUnit', () => {
const parsed = historicSqlPatternEvidenceSchema.parse(
historicSqlEvidenceEnvelopeSchema.parse({
kind: 'pattern',
connectionId: 'warehouse',
rawPath: 'patterns-input.json',
pattern: {
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Analysts compare order status changes by customer segment.',
definitionSql: 'select status, count(*) from public.orders group by status',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:1', 'pg:2'],
},
}),
);
expect(parsed.kind).toBe('pattern');
expect(parsed.pattern.slug).toBe('order-lifecycle-analysis');
});
it('builds a stable ignored evidence path from run and WorkUnit identity', () => {
expect(historicSqlEvidencePath('run-1', 'historic-sql-table-public-orders')).toBe(
'.ktx/ingest-evidence/historic-sql/run-1/historic-sql-table-public-orders.json',
);
});
});

View file

@ -1,110 +0,0 @@
import { mkdtemp } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
import type { SourceAdapter } from '../../types.js';
import { HistoricSqlSourceAdapter } from './historic-sql.adapter.js';
import type { HistoricSqlReader } from './types.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-adapter-'));
}
const sqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint() {
throw new Error('analyzeForFingerprint must not be used');
},
async analyzeBatch() {
return new Map();
},
async validateReadOnly() {
return { ok: true };
},
};
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {},
};
describe('HistoricSqlSourceAdapter', () => {
it('declares canonical adapter metadata', () => {
const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} });
expect(adapter.source).toBe('historic-sql');
expect(adapter.skillNames).toEqual(['historic_sql_table_digest', 'historic_sql_patterns']);
expect(adapter.reconcileSkillNames).toEqual([]);
expect((adapter as SourceAdapter).evidenceIndexing).toBeUndefined();
expect(adapter.triageSupported).toBe(false);
});
it('fetches a unified aggregate snapshot and emits unified WorkUnits', async () => {
const stagedDir = await tempDir();
const aggregateReader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {
yield {
templateId: 'pg:1',
canonicalSql:
'select o.status, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status',
dialect: 'postgres',
stats: {
executions: 25,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 10,
p95RuntimeMs: 20,
errorRate: 0,
rowsProduced: 10,
},
topUsers: [{ user: 'analyst', executions: 25 }],
};
},
};
const batchSqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint() {
throw new Error('analyzeForFingerprint must not be used');
},
async analyzeBatch() {
return new Map([
[
'pg:1',
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: { select: ['status'], join: ['customer_id', 'id'], groupBy: ['status'] },
},
],
]);
},
async validateReadOnly() {
return { ok: true };
},
};
const adapter = new HistoricSqlSourceAdapter({
sqlAnalysis: batchSqlAnalysis,
reader: aggregateReader,
queryClient: {},
now: () => new Date('2026-05-11T00:00:00.000Z'),
});
await adapter.fetch({ dialect: 'postgres', minExecutions: 5 }, stagedDir, {
connectionId: 'warehouse',
sourceKey: 'historic-sql',
});
await expect(adapter.detect(stagedDir)).resolves.toBe(true);
await expect(adapter.chunk(stagedDir)).resolves.toMatchObject({
workUnits: [
{ unitKey: 'historic-sql-table-public-customers' },
{ unitKey: 'historic-sql-table-public-orders' },
{ unitKey: 'historic-sql-patterns-part-0001' },
],
});
});
});

View file

@ -1,286 +0,0 @@
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import YAML from 'yaml';
import type { AgentRunnerPort, RunLoopParams } from '../../../../context/llm/runtime-port.js';
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../../../../context/project/project.js';
import type { SqlAnalysisBatchItem, SqlAnalysisBatchResult, SqlAnalysisDialect, SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
import { searchLocalSlSources } from '../../../sl/local-sl.js';
import { searchLocalKnowledgePages } from '../../../wiki/local-knowledge.js';
import { runLocalIngest } from '../../local-ingest.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { HistoricSqlSourceAdapter } from './historic-sql.adapter.js';
import type { AggregatedTemplate, HistoricSqlReader, HistoricSqlUnifiedPullConfig } from './types.js';
class AcceptanceHistoricSqlReader implements HistoricSqlReader {
async probe() {
return { warnings: [], info: [] };
}
async *fetchAggregated(
_client: unknown,
_window: { start: Date; end: Date },
_config: HistoricSqlUnifiedPullConfig,
): AsyncIterable<AggregatedTemplate> {
yield {
templateId: 'pg:orders-lifecycle',
canonicalSql:
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id where o.status = $1 group by o.status, c.segment',
dialect: 'postgres',
stats: {
executions: 42,
distinctUsers: 4,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 18,
p95RuntimeMs: 84,
errorRate: 0,
rowsProduced: 420,
},
topUsers: [{ user: 'analyst@example.test', executions: 42 }],
};
}
}
class HistoricSqlAcceptanceAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.telemetryTags?.operationName !== 'ingest-bundle-wu') {
return { stopReason: 'natural' as const };
}
const emitEvidence = params.toolSet.emit_historic_sql_evidence;
if (!emitEvidence?.execute) {
throw new Error('emit_historic_sql_evidence tool was not available to the historic-SQL WorkUnit');
}
if (params.telemetryTags.unitKey === 'historic-sql-table-public-orders') {
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Analysts repeatedly inspect paid order lifecycle by customer segment.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status', 'segment'],
commonJoins: [{ table: 'public.customers', on: ['customer_id', 'id'] }],
staleSince: null,
},
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected orders evidence result: ${result.markdown}`);
}
}
if (params.telemetryTags.unitKey === 'historic-sql-table-public-customers') {
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.customers',
rawPath: 'tables/public.customers.json',
usage: {
narrative: 'Customers provide segment context for paid order lifecycle analysis.',
frequencyTier: 'mid',
commonFilters: [],
commonGroupBys: ['segment'],
commonJoins: [{ table: 'public.orders', on: ['id', 'customer_id'] }],
staleSince: null,
},
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected customers evidence result: ${result.markdown}`);
}
}
if (params.telemetryTags.unitKey === 'historic-sql-patterns-part-0001') {
const result = await emitEvidence.execute({
kind: 'pattern',
rawPath: 'patterns-input/part-0001.json',
pattern: {
slug: 'paid-order-lifecycle',
title: 'Paid Order Lifecycle',
narrative: 'Analysts join orders and customers to compare paid order lifecycle by segment.',
definitionSql:
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status, c.segment',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:orders-lifecycle'],
},
});
if (!result.markdown.includes('Recorded historic-SQL pattern evidence')) {
throw new Error(`Unexpected pattern evidence result: ${result.markdown}`);
}
}
return { stopReason: 'natural' as const };
});
}
function acceptanceSqlAnalysis(): SqlAnalysisPort {
return {
analyzeForFingerprint: async () => {
throw new Error('analyzeForFingerprint should not be used by unified historic-SQL ingest');
},
analyzeBatch: vi.fn(
async (
items: SqlAnalysisBatchItem[],
_dialect: SqlAnalysisDialect,
): Promise<Map<string, SqlAnalysisBatchResult>> => {
return new Map(
items.map((item) => [
item.id,
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: ['status', 'segment'],
where: ['status'],
join: ['customer_id', 'id'],
groupBy: ['status', 'segment'],
},
},
]),
);
},
),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
}
async function writeHistoricSqlProject(project: KtxLocalProject): Promise<KtxLocalProject> {
await writeFile(
join(project.projectDir, 'ktx.yaml'),
[
'connections:',
' warehouse:',
' driver: postgres',
' context:',
' queryHistory:',
' enabled: true',
' minExecutions: 2',
'ingest:',
' adapters:',
' - historic-sql',
' embeddings:',
' backend: none',
'storage:',
' state: sqlite',
' search: sqlite-fts5',
' git:',
' auto_commit: false',
' author: KTX Test <system@ktx.local>',
'',
].join('\n'),
'utf-8',
);
const loaded = await loadKtxProject({ projectDir: project.projectDir });
await loaded.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
columns: [
{ name: 'id', type: 'string' },
{ name: 'status', type: 'string' },
{ name: 'customer_id', type: 'string' },
],
},
customers: {
table: 'public.customers',
columns: [
{ name: 'id', type: 'string' },
{ name: 'segment', type: 'string' },
],
},
},
}),
'KTX Test',
'system@ktx.local',
'Seed schema shard',
);
return loaded;
}
describe('historic-SQL local ingest retrieval acceptance', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-historic-sql-acceptance-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('projects table and pattern evidence into semantic-layer and wiki retrieval surfaces', async () => {
const initialized = await initKtxProject({ projectDir: join(tempDir, 'project') });
const project = await writeHistoricSqlProject(initialized);
const sqlAnalysis = acceptanceSqlAnalysis();
const agentRunner = new HistoricSqlAcceptanceAgentRunner();
const adapter = new HistoricSqlSourceAdapter({
reader: new AcceptanceHistoricSqlReader(),
queryClient: {},
sqlAnalysis,
now: () => new Date('2026-05-11T00:00:00.000Z'),
});
const result = await runLocalIngest({
project,
adapters: [adapter],
adapter: 'historic-sql',
connectionId: 'warehouse',
jobId: 'historic-sql-retrieval-acceptance',
agentRunner,
});
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledTimes(1);
expect(result.result.failedWorkUnits).toEqual([]);
expect(result.result.workUnitCount).toBe(3);
expect(agentRunner.runLoop).toHaveBeenCalledTimes(3);
const finalization = result.report.body.finalization;
expect(finalization).toBeDefined();
if (!finalization) {
throw new Error('Expected historic-SQL finalization result');
}
expect(finalization).toMatchObject({
sourceKey: 'historic-sql',
status: 'success',
result: {
tableUsageMerged: 2,
patternPagesWritten: 1,
},
});
expect(finalization.declaredTouchedSources).toEqual(
expect.arrayContaining([
{ connectionId: 'warehouse', sourceName: 'customers' },
{ connectionId: 'warehouse', sourceName: 'orders' },
]),
);
await expect(readFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves
.toContain('Analysts repeatedly inspect paid order lifecycle by customer segment.');
await expect(readFile(join(project.projectDir, 'wiki/global/historic-sql-paid-order-lifecycle.md'), 'utf-8'))
.resolves.toContain('Paid Order Lifecycle');
const reloaded = await loadKtxProject({ projectDir: project.projectDir });
await expect(
searchLocalSlSources(reloaded, { connectionId: 'warehouse', query: 'paid order lifecycle', limit: 5 }),
).resolves.toEqual(expect.arrayContaining([
expect.objectContaining({
name: 'orders',
frequencyTier: 'high',
snippet: expect.stringContaining('<mark>'),
matchReasons: expect.arrayContaining(['lexical']),
}),
]));
await expect(
searchLocalKnowledgePages(reloaded, { query: 'paid order lifecycle', userId: 'local', limit: 5 }),
).resolves.toEqual([
expect.objectContaining({
key: 'historic-sql-paid-order-lifecycle',
summary: 'Paid Order Lifecycle',
matchReasons: expect.arrayContaining(['lexical']),
}),
]);
});
});

View file

@ -1,89 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
HISTORIC_SQL_PATTERN_WORKUNIT_MAX_BYTES,
isHistoricSqlPatternInputShardPath,
serializedStagedPatternsInputByteLength,
splitHistoricSqlPatternInputs,
} from './pattern-inputs.js';
import type { StagedPatternsInput } from './types.js';
type PatternTemplate = StagedPatternsInput['templates'][number];
function template(id: string, tablesTouched: string[], canonicalSql = 'select 1'): PatternTemplate {
return {
id,
canonicalSql,
tablesTouched,
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
};
}
describe('historic-SQL pattern input sharding', () => {
it('keeps the audit input complete while sharding only cross-table pattern candidates', () => {
const largeSql = `select * from public.orders join public.customers on true where marker = '${'x'.repeat(260)}'`;
const input: StagedPatternsInput = {
templates: [
template('single-table-orders', ['public.orders']),
template('orders-customers-2', ['public.orders', 'public.customers'], largeSql),
template('orders-customers-1', ['public.customers', 'public.orders'], largeSql),
template('orders-customers-payments', ['public.orders', 'public.customers', 'public.payments'], largeSql),
],
};
const result = splitHistoricSqlPatternInputs(input, { maxBytes: 760 });
expect(result.auditInput.templates.map((entry) => entry.id)).toEqual([
'orders-customers-1',
'orders-customers-2',
'orders-customers-payments',
'single-table-orders',
]);
expect(result.shards.length).toBeGreaterThan(1);
expect(result.shards.map((shard) => shard.path)).toEqual([
'patterns-input/part-0001.json',
'patterns-input/part-0002.json',
'patterns-input/part-0003.json',
]);
expect(result.shards.flatMap((shard) => shard.input.templates.map((entry) => entry.id))).toEqual([
'orders-customers-payments',
'orders-customers-1',
'orders-customers-2',
]);
expect(result.shards.every((shard) => shard.byteLength <= 760)).toBe(true);
expect(result.shards.flatMap((shard) => shard.input.templates).some((entry) => entry.id === 'single-table-orders')).toBe(false);
expect(result.warnings).toEqual([]);
});
it('omits a single oversized template from shards and reports a manifest warning', () => {
const input: StagedPatternsInput = {
templates: [
template(
'oversized-cross-table',
['public.orders', 'public.customers'],
`select * from public.orders join public.customers on true where payload = '${'x'.repeat(500)}'`,
),
],
};
const result = splitHistoricSqlPatternInputs(input, { maxBytes: 240 });
expect(result.auditInput.templates.map((entry) => entry.id)).toEqual(['oversized-cross-table']);
expect(result.shards).toEqual([]);
expect(result.warnings).toEqual(['patterns_input_template_too_large:oversized-cross-table']);
});
it('recognizes only generated pattern shard paths', () => {
expect(isHistoricSqlPatternInputShardPath('patterns-input/part-0001.json')).toBe(true);
expect(isHistoricSqlPatternInputShardPath('patterns-input/part-0012.json')).toBe(true);
expect(isHistoricSqlPatternInputShardPath('patterns-input.json')).toBe(false);
expect(isHistoricSqlPatternInputShardPath('patterns-input/part-1.json')).toBe(false);
expect(isHistoricSqlPatternInputShardPath('patterns-input/readme.md')).toBe(false);
});
it('uses a production byte budget below read_raw_file maximum size', () => {
expect(HISTORIC_SQL_PATTERN_WORKUNIT_MAX_BYTES).toBeLessThan(120_000);
expect(serializedStagedPatternsInputByteLength({ templates: [] })).toBeGreaterThan(0);
});
});

View file

@ -1,242 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import {
HistoricSqlExtensionMissingError,
HistoricSqlGrantsMissingError,
HistoricSqlVersionUnsupportedError,
} from './errors.js';
import { PostgresPgssReader } from './postgres-pgss-reader.js';
interface FakeQueryResult {
headers: string[];
rows: unknown[][];
totalRows?: number;
error?: string;
}
function queryClient(results: Array<FakeQueryResult | Error>) {
const executeQuery = vi.fn(async (_query: string, _params?: unknown[]) => {
const next = results.shift();
if (!next) {
throw new Error('unexpected query');
}
if (next instanceof Error) {
throw next;
}
return next;
});
return { executeQuery };
}
function executedSql(client: ReturnType<typeof queryClient>, index: number): string {
const call = client.executeQuery.mock.calls[index];
if (!call) {
throw new Error(`expected query client call ${index}`);
}
return call[0];
}
describe('PostgresPgssReader aggregate path', () => {
it('probes version, extension presence, grants, and tracking state', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4 on x86_64-apple-darwin']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[true]] },
{ headers: ['track'], rows: [['top']] },
{ headers: ['max'], rows: [['5000']] },
]);
const reader = new PostgresPgssReader();
await expect(reader.probe(client)).resolves.toEqual({
pgServerVersion: 'PostgreSQL 16.4 on x86_64-apple-darwin',
warnings: [],
info: [],
});
expect(executedSql(client, 0)).toContain("current_setting('server_version_num')::int");
expect(executedSql(client, 1)).toBe('SELECT 1 FROM pg_stat_statements LIMIT 1');
expect(executedSql(client, 2)).toBe(
"SELECT pg_has_role(current_user, 'pg_read_all_stats', 'USAGE') AS has_role",
);
expect(executedSql(client, 3)).toBe("SELECT current_setting('pg_stat_statements.track') AS track");
expect(executedSql(client, 4)).toBe("SELECT current_setting('pg_stat_statements.max') AS max");
});
it('rejects PostgreSQL versions older than 14 without probing the extension', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[130012, 'PostgreSQL 13.12']],
},
]);
const reader = new PostgresPgssReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlVersionUnsupportedError',
dialect: 'postgres',
detectedVersion: 'PostgreSQL 13.12',
minimumVersion: 'PostgreSQL 14',
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlVersionUnsupportedError);
expect(client.executeQuery).toHaveBeenCalledTimes(1);
});
it('maps a missing pg_stat_statements relation to HistoricSqlExtensionMissingError', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
new Error('relation "pg_stat_statements" does not exist'),
]);
const reader = new PostgresPgssReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlExtensionMissingError',
dialect: 'postgres',
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlExtensionMissingError);
});
it('maps pg_stat_statements preload failures to HistoricSqlExtensionMissingError with preload remediation', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
new Error('pg_stat_statements must be loaded via shared_preload_libraries'),
]);
const reader = new PostgresPgssReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlExtensionMissingError',
dialect: 'postgres',
message: 'pg_stat_statements is installed but not loaded via shared_preload_libraries.',
remediation: expect.stringContaining("shared_preload_libraries includes 'pg_stat_statements'"),
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlExtensionMissingError);
});
it('maps missing pg_read_all_stats membership to HistoricSqlGrantsMissingError', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[false]] },
]);
const reader = new PostgresPgssReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlGrantsMissingError',
dialect: 'postgres',
remediation: 'GRANT pg_read_all_stats TO <connection role>;',
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
});
it('returns a warning instead of failing when pg_stat_statements.track is none', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[true]] },
{ headers: ['track'], rows: [['none']] },
{ headers: ['max'], rows: [['5000']] },
]);
const reader = new PostgresPgssReader();
await expect(reader.probe(client)).resolves.toEqual({
pgServerVersion: 'PostgreSQL 16.4',
warnings: [
"pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config",
],
info: [],
});
});
it('returns an info note when pg_stat_statements.max is below the recommended floor', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[true]] },
{ headers: ['track'], rows: [['top']] },
{ headers: ['max'], rows: [['1000']] },
]);
const reader = new PostgresPgssReader();
await expect(reader.probe(client)).resolves.toEqual({
pgServerVersion: 'PostgreSQL 16.4',
warnings: [],
info: [
'pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn',
],
});
});
it('aggregates pg_stat_statements rows by queryid and query', async () => {
const executeQuery = vi.fn(async (sql: string, params?: unknown[]) => {
if (sql.includes('pg_stat_statements_info')) {
return { headers: ['stats_reset', 'dealloc'], rows: [['2026-05-01T00:00:00.000Z', 1]] };
}
expect(sql).toContain('GROUP BY queryid, query');
expect(sql).toContain('HAVING SUM(calls) >= $1');
expect(params).toEqual([5]);
return {
headers: ['template_id', 'canonical_sql', 'executions', 'distinct_users', 'mean_ms', 'rows_produced', 'top_users'],
rows: [
[
'123',
'select status from public.orders',
'42',
'3',
'11.5',
'100',
JSON.stringify([{ user: 'analyst', executions: 40 }]),
],
],
};
});
const reader = new PostgresPgssReader();
const rows = [];
for await (const row of reader.fetchAggregated(
{ executeQuery },
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
{ dialect: 'postgres', minExecutions: 5, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
)) {
rows.push(row);
}
expect(rows).toEqual([
{
templateId: '123',
canonicalSql: 'select status from public.orders',
dialect: 'postgres',
stats: {
executions: 42,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 11.5,
p95RuntimeMs: 11.5,
errorRate: 0,
rowsProduced: 100,
},
topUsers: [{ user: 'analyst', executions: 40 }],
},
]);
});
});

View file

@ -1,457 +0,0 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import YAML from 'yaml';
import { describe, expect, it } from 'vitest';
import { projectHistoricSqlEvidence } from './projection.js';
async function tempWorkdir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-projection-'));
}
async function writeText(root: string, relPath: string, content: string): Promise<void> {
const target = join(root, relPath);
await mkdir(join(target, '..'), { recursive: true });
await writeFile(target, content, 'utf-8');
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
await writeText(root, relPath, `${JSON.stringify(value, null, 2)}\n`);
}
describe('projectHistoricSqlEvidence', () => {
it('merges table usage into matching _schema shards and preserves external usage keys', async () => {
const workdir = await tempWorkdir();
await writeText(
workdir,
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
usage: {
narrative: 'Old generated usage.',
frequencyTier: 'low',
commonFilters: ['old_status'],
commonJoins: [],
ownerNote: 'keep me',
},
columns: [{ name: 'id', type: 'string' }],
},
},
}),
);
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 1,
touchedTableCount: 1,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.orders.json', { table: 'public.orders' });
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/orders.json', {
kind: 'table_usage',
connectionId: 'warehouse',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried for lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
},
});
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
expect(result.actions).toEqual(
expect.arrayContaining([
expect.objectContaining({
target: 'sl',
key: 'orders',
rawPaths: ['tables/public.orders.json'],
}),
]),
);
const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'));
expect(shard.tables.orders.usage).toEqual({
ownerNote: 'keep me',
narrative: 'Orders are repeatedly queried for lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
});
});
it('writes pattern pages, reuses similar slugs, and marks missing old pattern pages stale', async () => {
const workdir = await tempWorkdir();
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 2,
touchedTableCount: 2,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.orders.json', { table: 'public.orders' });
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' });
await writeText(
workdir,
'wiki/global/historic-sql-old-order-lifecycle.md',
[
'---',
YAML.stringify({
summary: 'Old order lifecycle page',
tags: ['historic-sql', 'pattern'],
refs: [],
sl_refs: ['orders'],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.orders', 'public.customers'],
fingerprints: ['pg:1'],
}).trimEnd(),
'---',
'',
'Old body',
'',
].join('\n'),
);
await writeText(
workdir,
'wiki/global/historic-sql-retired-pattern.md',
[
'---',
YAML.stringify({
summary: 'Retired pattern',
tags: ['historic-sql', 'pattern'],
refs: [],
sl_refs: [],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.tickets'],
fingerprints: ['pg:9'],
}).trimEnd(),
'---',
'',
'Retired body',
'',
].join('\n'),
);
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/pattern.json', {
kind: 'pattern',
connectionId: 'warehouse',
rawPath: 'patterns-input.json',
pattern: {
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Analysts compare order status with customer segment.',
definitionSql: 'select * from public.orders join public.customers on customers.id = orders.customer_id',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:1', 'pg:2'],
},
});
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.patternPagesWritten).toBe(1);
expect(result.changedWikiPageKeys).toContain('historic-sql-old-order-lifecycle');
expect(result.actions).toEqual(
expect.arrayContaining([
expect.objectContaining({
target: 'wiki',
key: 'historic-sql-old-order-lifecycle',
rawPaths: ['patterns-input.json'],
}),
]),
);
await expect(readFile(join(workdir, 'wiki/global/historic-sql-old-order-lifecycle.md'), 'utf-8')).resolves.toContain(
'Order Lifecycle Analysis',
);
await expect(readFile(join(workdir, 'wiki/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain(
'stale_since: "2026-05-11T00:00:00.000Z"',
);
});
it('rewrites a reappearing archived pattern at the flat slug', async () => {
const workdir = await tempWorkdir();
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 2,
touchedTableCount: 2,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 30,
});
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.orders.json', { table: 'public.orders' });
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' });
await writeText(
workdir,
'wiki/global/historic-sql-order-lifecycle-analysis.md',
[
'---',
YAML.stringify({
summary: 'Archived order lifecycle page',
tags: ['historic-sql', 'pattern', 'archived'],
refs: [],
sl_refs: ['orders'],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.orders', 'public.customers'],
fingerprints: ['pg:1'],
stale_since: '2026-01-01T00:00:00.000Z',
}).trimEnd(),
'---',
'',
'Archived body',
'',
].join('\n'),
);
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/pattern.json', {
kind: 'pattern',
connectionId: 'warehouse',
rawPath: 'patterns-input.json',
pattern: {
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Analysts compare order status with customer segment again.',
definitionSql: 'select * from public.orders join public.customers on customers.id = orders.customer_id',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:1', 'pg:2'],
},
});
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.patternPagesWritten).toBe(1);
const page = await readFile(join(workdir, 'wiki/global/historic-sql-order-lifecycle-analysis.md'), 'utf-8');
expect(page).toContain('Analysts compare order status with customer segment again.');
expect(page).not.toContain('Archived body');
expect(page).not.toContain('archived');
});
it('leaves already archived pattern pages stable when they are still absent', async () => {
const workdir = await tempWorkdir();
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 30,
});
await writeText(
workdir,
'wiki/global/historic-sql-retired-pattern.md',
[
'---',
YAML.stringify({
summary: 'Retired pattern',
tags: ['historic-sql', 'pattern', 'archived'],
refs: [],
sl_refs: [],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.tickets'],
fingerprints: ['pg:9'],
stale_since: '2026-01-01T00:00:00.000Z',
}).trimEnd(),
'---',
'',
'Archived retired body',
'',
].join('\n'),
);
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.archivedPatternPages).toBe(0);
expect(result.stalePatternPagesMarked).toBe(0);
await expect(readFile(join(workdir, 'wiki/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain(
'Archived retired body',
);
});
it('marks missing table usage stale without deleting old query pages', async () => {
const workdir = await tempWorkdir();
await writeText(
workdir,
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
usage: {
narrative: 'Orders were active before.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
ownerNote: 'keep analyst annotation',
},
columns: [{ name: 'id', type: 'string' }],
},
},
}),
);
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/customers.json', {
kind: 'table_usage',
connectionId: 'warehouse',
table: 'public.customers',
rawPath: 'tables/public.customers.json',
usage: {
narrative: 'Customers were queried.',
frequencyTier: 'low',
commonFilters: [],
commonJoins: [],
staleSince: null,
},
});
await writeText(
workdir,
'wiki/global/historic-sql-old-template.md',
[
'---',
YAML.stringify({
summary: 'Old template page',
tags: ['historic-sql', 'query-pattern'],
refs: [],
sl_refs: ['orders'],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.orders'],
fingerprints: ['old:1'],
}).trimEnd(),
'---',
'',
'Old body',
'',
].join('\n'),
);
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.staleTablesMarked).toBe(1);
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
const staleAction = result.actions.find((action) => action.target === 'sl' && action.key === 'orders');
expect(staleAction).toEqual(expect.objectContaining({ target: 'sl', key: 'orders' }));
expect(staleAction?.rawPaths).toBeUndefined();
const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'));
expect(shard.tables.orders.usage).toEqual({
ownerNote: 'keep analyst annotation',
narrative: 'No recent historic SQL usage was observed in the latest snapshot.',
frequencyTier: 'unused',
commonFilters: [],
commonGroupBys: [],
commonJoins: [],
staleSince: '2026-05-11T00:00:00.000Z',
});
await expect(readFile(join(workdir, 'wiki/global/historic-sql-old-template.md'), 'utf-8')).resolves.toContain(
'Old body',
);
});
it('does not mark stale or archive pages when override replay has no current-run evidence', async () => {
const workdir = await tempWorkdir();
await writeText(
workdir,
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
usage: {
narrative: 'Orders were active before.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status'],
commonJoins: [],
},
columns: [{ name: 'id', type: 'string' }],
},
},
}),
);
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/override-sync/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
const result = await projectHistoricSqlEvidence({
workdir,
connectionId: 'warehouse',
syncId: 'override-sync',
runId: 'override-run',
overrideReplay: {
priorJobId: 'prior-job',
priorRunId: 'prior-run',
priorSyncId: 'prior-sync',
evictionRawPaths: ['tables/public/orders.json'],
},
});
expect(result.tableUsageMerged).toBe(0);
expect(result.staleTablesMarked).toBe(0);
expect(result.patternPagesWritten).toBe(0);
expect(result.stalePatternPagesMarked).toBe(0);
expect(result.archivedPatternPages).toBe(0);
expect(result.touchedSources).toEqual([]);
expect(result.changedWikiPageKeys).toEqual([]);
expect(result.actions).toEqual([]);
});
});

View file

@ -1,36 +0,0 @@
import { describe, expect, it } from 'vitest';
import { compileHistoricSqlRedactionPatterns, redactHistoricSqlText } from './redaction.js';
describe('historic-SQL redaction', () => {
it('redacts regex matches and supports the (?i) case-insensitive prefix', () => {
const redactors = compileHistoricSqlRedactionPatterns([
'sk_live_[A-Za-z0-9]+',
'(?i)secret_token_[a-z0-9]+',
]);
const sql =
"select * from public.api_events where api_key = 'sk_live_abc123' and note = 'Secret_Token_9f'"; // pragma: allowlist secret
expect(redactHistoricSqlText(sql, redactors)).toBe(
"select * from public.api_events where api_key = '[REDACTED]' and note = '[REDACTED]'",
);
});
it('returns the original SQL text when no redaction patterns are configured', () => {
const sql = "select * from public.orders where status = 'paid'";
expect(redactHistoricSqlText(sql, compileHistoricSqlRedactionPatterns([]))).toBe(sql);
});
it('throws a config-focused error for invalid redaction regex patterns', () => {
expect(() => compileHistoricSqlRedactionPatterns(['[broken'])).toThrow(
'Invalid historicSql.redactionPatterns entry "[broken"',
);
});
it('throws a config-focused error for empty redaction regex patterns', () => {
expect(() => compileHistoricSqlRedactionPatterns([' '])).toThrow(
'Invalid historicSql.redactionPatterns entry " "',
);
});
});

View file

@ -1,74 +0,0 @@
import { describe, expect, it } from 'vitest';
import { z } from 'zod';
import {
patternOutputSchema,
patternsArraySchema,
tableUsageOutputSchema,
} from './skill-schemas.js';
describe('historic-sql skill schemas', () => {
it('accepts table usage output and preserves future keys', () => {
const parsed = tableUsageOutputSchema.parse({
narrative: 'Orders are queried for paid/refunded lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
analystNote: 'preserve me',
});
expect(parsed).toMatchObject({
narrative: 'Orders are queried for paid/refunded lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
analystNote: 'preserve me',
});
});
it('rejects invalid frequency tiers', () => {
const result = tableUsageOutputSchema.safeParse({
narrative: 'Orders are queried often.',
frequencyTier: 'sometimes',
commonFilters: [],
commonJoins: [],
});
expect(result.success).toBe(false);
});
it('accepts pattern outputs used for wiki projection', () => {
const parsed = patternsArraySchema.parse([
{
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Teams inspect order status by customer and month.',
definitionSql: 'select status, count(*) from public.orders group by status',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['template_1', 'template_2'],
},
]);
expect(parsed[0]).toEqual({
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Teams inspect order status by customer and month.',
definitionSql: 'select status, count(*) from public.orders group by status',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['template_1', 'template_2'],
});
});
it('exports zod schemas that can produce JSON schema for prompt prefixes', () => {
const tableUsageJsonSchema = z.toJSONSchema(tableUsageOutputSchema);
const patternJsonSchema = z.toJSONSchema(patternOutputSchema);
expect(tableUsageJsonSchema).toMatchObject({ type: 'object' });
expect(patternJsonSchema).toMatchObject({ type: 'object' });
});
});

View file

@ -1,148 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { HistoricSqlGrantsMissingError } from './errors.js';
import { SnowflakeHistoricSqlQueryHistoryReader } from './snowflake-query-history-reader.js';
interface FakeQueryResult {
headers: string[];
rows: unknown[][];
totalRows: number;
error?: string;
}
function queryClient(results: FakeQueryResult[]) {
const executeQuery = vi.fn(async (_query: string) => {
const next = results.shift();
if (!next) {
throw new Error('unexpected query');
}
return next;
});
return { executeQuery };
}
function firstQuery(client: ReturnType<typeof queryClient>): string {
const call = client.executeQuery.mock.calls[0];
if (!call) {
throw new Error('expected query client to be called');
}
return call[0];
}
describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
it('probes SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', async () => {
const client = queryClient([{ headers: ['1'], rows: [[1]], totalRows: 1 }]);
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(reader.probe(client)).resolves.toEqual({ warnings: [], info: [] });
expect(client.executeQuery).toHaveBeenCalledWith(
'SELECT 1 FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY LIMIT 1',
);
});
it('turns probe result errors into HistoricSqlGrantsMissingError', async () => {
const client = queryClient([{ headers: [], rows: [], totalRows: 0, error: 'Object does not exist or not authorized' }]);
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(reader.probe(client)).rejects.toMatchObject({
name: 'HistoricSqlGrantsMissingError',
dialect: 'snowflake',
remediation: 'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE <connection role>;',
});
});
it('turns thrown probe failures into HistoricSqlGrantsMissingError', async () => {
const client = {
executeQuery: vi.fn(async () => {
throw new Error('permission denied');
}),
};
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(reader.probe(client)).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
});
it('fetches aggregated Snowflake query templates', async () => {
const client = queryClient([
{
headers: [
'template_id',
'canonical_sql',
'executions',
'distinct_users',
'first_seen',
'last_seen',
'p50_ms',
'p95_ms',
'error_rate',
'rows_produced',
'top_users',
],
rows: [
[
'hash-1',
'select status from orders',
42,
3,
'2026-05-01T00:00:00.000Z',
'2026-05-11T00:00:00.000Z',
12,
40,
0.05,
100,
JSON.stringify([{ user: 'ANALYST', executions: 1 }]),
],
],
totalRows: 1,
},
]);
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
const rows = [];
for await (const row of reader.fetchAggregated(
client,
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
{ dialect: 'snowflake', minExecutions: 5, windowDays: 90, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
)) {
rows.push(row);
}
const sql = firstQuery(client);
expect(sql).toContain('SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY');
expect(sql).toContain('COUNT(*) AS executions');
expect(sql).toContain('GROUP BY query_hash');
expect(sql).toContain('HAVING COUNT(*) >= 5');
expect(rows).toMatchObject([
{
templateId: 'hash-1',
stats: {
executions: 42,
errorRate: 0.05,
},
topUsers: [{ user: 'ANALYST', executions: 1 }],
},
]);
});
it('throws a clear error when the query client cannot execute SQL', async () => {
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(async () => {
for await (const _row of reader.fetchAggregated(
{},
{ start: new Date(), end: new Date() },
{
dialect: 'snowflake',
minExecutions: 5,
windowDays: 90,
enabledTables: [],
filters: { dropTrivialProbes: true },
redactionPatterns: [],
staleArchiveAfterDays: 90,
},
)) {
throw new Error('unreachable');
}
}).rejects.toThrow('Historic SQL Snowflake reader requires a query client with executeQuery(query)');
});
});

View file

@ -1,436 +0,0 @@
import { mkdtemp, readFile, readdir } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
import { stageHistoricSqlAggregatedSnapshot } from './stage-unified.js';
import type { AggregatedTemplate, HistoricSqlReader } from './types.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-unified-stage-'));
}
async function readJson<T>(root: string, relPath: string): Promise<T> {
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
}
function aggregate(overrides: Partial<AggregatedTemplate> & { templateId: string; canonicalSql: string }): AggregatedTemplate {
return {
templateId: overrides.templateId,
canonicalSql: overrides.canonicalSql,
dialect: overrides.dialect ?? 'postgres',
stats: overrides.stats ?? {
executions: 42,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 20,
p95RuntimeMs: 80,
errorRate: 0,
rowsProduced: 100,
},
topUsers: overrides.topUsers ?? [{ user: 'analyst', executions: 40 }],
};
}
describe('stageHistoricSqlAggregatedSnapshot', () => {
it('batch parses templates and writes stable table and patterns artifacts', async () => {
const stagedDir = await tempDir();
const reader: HistoricSqlReader = {
async probe() {
return { warnings: ['pg_stat_statements.track is none; aggregation still proceeds'], info: [] };
},
async *fetchAggregated() {
yield aggregate({
templateId: 'orders-by-status',
canonicalSql: 'select o.status, count(*) from public.orders o join public.customers c on c.id = o.customer_id where o.created_at >= $1 group by o.status',
});
yield aggregate({
templateId: 'service-account-only',
canonicalSql: 'select * from public.orders where id = $1',
stats: {
executions: 20,
distinctUsers: 1,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 5,
p95RuntimeMs: 10,
errorRate: 0,
rowsProduced: 1,
},
topUsers: [{ user: 'svc_loader', executions: 20 }],
});
yield aggregate({
templateId: 'bad-parse',
canonicalSql: 'select broken from',
});
},
};
const sqlAnalysis: SqlAnalysisPort = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([
[
'orders-by-status',
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: ['status'],
where: ['created_at'],
join: ['customer_id'],
groupBy: ['status'],
},
},
],
['bad-parse', { tablesTouched: [], columnsByClause: {}, error: 'parse failed' }],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
stagedDir,
connectionId: 'warehouse',
queryClient: {},
reader,
sqlAnalysis,
pullConfig: {
dialect: 'postgres',
filters: {
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
},
},
now: new Date('2026-05-11T12:00:00.000Z'),
});
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledTimes(1);
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
[
{
id: 'orders-by-status',
sql: 'select o.status, count(*) from public.orders o join public.customers c on c.id = o.customer_id where o.created_at >= $1 group by o.status',
},
{ id: 'bad-parse', sql: 'select broken from' },
],
'postgres',
);
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['public.customers.json', 'public.orders.json']);
const manifest = await readJson<Record<string, unknown>>(stagedDir, 'manifest.json');
expect(manifest).toMatchObject({
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
snapshotRowCount: 3,
touchedTableCount: 2,
parseFailures: 1,
warnings: ['parse_failed:bad-parse'],
probeWarnings: ['pg_stat_statements.track is none; aggregation still proceeds'],
staleArchiveAfterDays: 90,
});
const orders = await readJson<Record<string, any>>(stagedDir, 'tables/public.orders.json');
expect(orders).toMatchObject({
table: 'public.orders',
stats: {
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
errorRateBucket: 'none',
p95RuntimeBucket: '<100ms',
recencyBucket: 'current',
},
columnsByClause: {
select: [['status', 'high']],
where: [['created_at', 'high']],
join: [['customer_id', 'high']],
groupBy: [['status', 'high']],
},
observedJoins: [{ withTable: 'public.customers', on: ['customer_id'], freq: 'high' }],
topTemplates: [
{
id: 'orders-by-status',
topUsers: [{ user: 'analyst' }],
},
],
});
expect(orders.topTemplates[0].canonicalSql).toContain('group by o.status');
const patterns = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
expect(patterns.templates).toEqual([
{
id: 'orders-by-status',
canonicalSql: expect.stringContaining('public.orders'),
tablesTouched: ['public.customers', 'public.orders'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
]);
});
it('redacts configured SQL substrings in staged artifacts while analyzing original SQL', async () => {
const stagedDir = await tempDir();
const originalSql =
"select * from public.api_events where api_key = 'sk_live_abc123' and note = 'Secret_Token_9f'"; // pragma: allowlist secret
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {
yield aggregate({
templateId: 'api-events-with-secret',
canonicalSql: originalSql,
stats: {
executions: 15,
distinctUsers: 2,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 12,
p95RuntimeMs: 25,
errorRate: 0,
rowsProduced: 15,
},
});
},
};
const sqlAnalysis: SqlAnalysisPort = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([
[
'api-events-with-secret',
{
tablesTouched: ['public.api_events'],
columnsByClause: {
select: [],
where: ['api_key', 'note'],
join: [],
groupBy: [],
},
},
],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
stagedDir,
connectionId: 'warehouse',
queryClient: {},
reader,
sqlAnalysis,
pullConfig: {
dialect: 'postgres',
redactionPatterns: ['sk_live_[A-Za-z0-9]+', '(?i)secret_token_[a-z0-9]+'],
},
now: new Date('2026-05-11T12:00:00.000Z'),
});
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
[{ id: 'api-events-with-secret', sql: originalSql }],
'postgres',
);
const tableJson = await readFile(join(stagedDir, 'tables/public.api_events.json'), 'utf-8');
const patternsJson = await readFile(join(stagedDir, 'patterns-input.json'), 'utf-8');
expect(tableJson).not.toContain('sk_live_abc123');
expect(tableJson).not.toContain('Secret_Token_9f');
expect(patternsJson).not.toContain('sk_live_abc123');
expect(patternsJson).not.toContain('Secret_Token_9f');
expect(tableJson).toContain('[REDACTED]');
expect(patternsJson).toContain('[REDACTED]');
});
it('limits staged table artifacts to configured enabled tables', async () => {
const stagedDir = await tempDir();
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {
yield aggregate({
templateId: 'selected-qualified',
canonicalSql: 'select count(*) from orbit_analytics.int_active_contract_arr',
});
yield aggregate({
templateId: 'selected-unqualified',
canonicalSql: 'select count(*) from int_customer_health_signals',
});
yield aggregate({
templateId: 'unselected',
canonicalSql: 'select count(*) from orbit_raw.accounts',
});
},
};
const sqlAnalysis: SqlAnalysisPort = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([
[
'selected-qualified',
{
tablesTouched: ['orbit_analytics.int_active_contract_arr'],
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
},
],
[
'selected-unqualified',
{
tablesTouched: ['int_customer_health_signals'],
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
},
],
[
'unselected',
{
tablesTouched: ['orbit_raw.accounts'],
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
},
],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
stagedDir,
connectionId: 'warehouse',
queryClient: {},
reader,
sqlAnalysis,
pullConfig: {
dialect: 'postgres',
enabledTables: [
'orbit_analytics.int_active_contract_arr',
'orbit_analytics.int_customer_health_signals',
],
},
now: new Date('2026-05-11T12:00:00.000Z'),
});
expect(await readdir(join(stagedDir, 'tables'))).toEqual([
'int_customer_health_signals.json',
'orbit_analytics.int_active_contract_arr.json',
]);
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
expect(manifest.touchedTableCount).toBe(2);
const patterns = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
expect(patterns.templates.map((entry: any) => entry.id)).toEqual(['selected-qualified', 'selected-unqualified']);
});
it('preserves full patterns audit input and writes bounded cross-table pattern shards', async () => {
const stagedDir = await tempDir();
const largeSql = `select * from public.orders o join public.customers c on c.id = o.customer_id where payload = '${'x'.repeat(8000)}'`;
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {
yield aggregate({
templateId: 'orders-customers-a',
canonicalSql: largeSql,
stats: {
executions: 25,
distinctUsers: 4,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 15,
p95RuntimeMs: 90,
errorRate: 0,
rowsProduced: 250,
},
});
yield aggregate({
templateId: 'orders-customers-b',
canonicalSql: largeSql.replace('payload', 'payload_b'),
stats: {
executions: 22,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 20,
p95RuntimeMs: 95,
errorRate: 0,
rowsProduced: 220,
},
});
yield aggregate({
templateId: 'orders-single-table',
canonicalSql: 'select count(*) from public.orders',
stats: {
executions: 30,
distinctUsers: 2,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 10,
p95RuntimeMs: 20,
errorRate: 0,
rowsProduced: 30,
},
});
},
};
const sqlAnalysis: SqlAnalysisPort = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([
[
'orders-customers-a',
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: [],
where: ['payload'],
join: ['customer_id', 'id'],
groupBy: [],
},
},
],
[
'orders-customers-b',
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: [],
where: ['payload_b'],
join: ['customer_id', 'id'],
groupBy: [],
},
},
],
[
'orders-single-table',
{
tablesTouched: ['public.orders'],
columnsByClause: {
select: [],
where: [],
join: [],
groupBy: [],
},
},
],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
stagedDir,
connectionId: 'warehouse',
queryClient: {},
reader,
sqlAnalysis,
pullConfig: { dialect: 'postgres' },
now: new Date('2026-05-11T12:00:00.000Z'),
});
const audit = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
expect(audit.templates.map((entry: any) => entry.id)).toEqual([
'orders-customers-a',
'orders-customers-b',
'orders-single-table',
]);
const firstShard = await readJson<Record<string, any>>(stagedDir, 'patterns-input/part-0001.json');
expect(firstShard.templates.map((entry: any) => entry.id)).toEqual(['orders-customers-a', 'orders-customers-b']);
expect(firstShard.templates.some((entry: any) => entry.id === 'orders-single-table')).toBe(false);
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
expect(manifest.warnings).toEqual([]);
});
});

View file

@ -1,110 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
aggregatedTemplateSchema,
historicSqlUnifiedPullConfigSchema,
stagedManifestSchema,
stagedPatternsInputSchema,
stagedTableInputSchema,
} from './types.js';
describe('historic-sql unified contracts', () => {
it('parses minExecutions and service-account filters', () => {
expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minExecutions: 9 })).toMatchObject({
dialect: 'postgres',
minExecutions: 9,
redactionPatterns: [],
staleArchiveAfterDays: 90,
});
expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minExecutions: 9 })).not.toHaveProperty(
'windowDays',
);
expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minExecutions: 9 })).not.toHaveProperty(
'concurrency',
);
const parsed = historicSqlUnifiedPullConfigSchema.parse({
dialect: 'postgres',
minExecutions: 7,
filters: {
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
},
});
expect(parsed.minExecutions).toBe(7);
expect(parsed.filters.serviceAccounts).toEqual({ patterns: ['^svc_'], mode: 'exclude' });
});
it('validates aggregate templates from warehouse readers', () => {
const parsed = aggregatedTemplateSchema.parse({
templateId: 'pg:123',
canonicalSql: 'select status, count(*) from public.orders group by status',
dialect: 'postgres',
stats: {
executions: 42,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 12.5,
p95RuntimeMs: 40,
errorRate: 0,
rowsProduced: 100,
},
topUsers: [{ user: 'analyst', executions: 40 }],
});
expect(parsed.templateId).toBe('pg:123');
expect(parsed.topUsers).toEqual([{ user: 'analyst', executions: 40 }]);
});
it('validates staged table, patterns, and manifest artifacts', () => {
expect(
stagedTableInputSchema.parse({
table: 'public.orders',
stats: {
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
errorRateBucket: 'none',
p95RuntimeBucket: '<100ms',
recencyBucket: 'current',
},
columnsByClause: {
select: [['status', 'high']],
where: [['created_at', 'mid']],
},
observedJoins: [{ withTable: 'public.customers', on: ['customer_id'], freq: 'high' }],
topTemplates: [{ id: 'pg:123', canonicalSql: 'select * from public.orders', topUsers: [{ user: 'analyst' }] }],
}).table,
).toBe('public.orders');
expect(
stagedPatternsInputSchema.parse({
templates: [
{
id: 'pg:123',
canonicalSql: 'select * from public.orders',
tablesTouched: ['public.orders'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
}).templates,
).toHaveLength(1);
expect(
stagedManifestSchema.parse({
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 2,
touchedTableCount: 1,
parseFailures: 1,
warnings: ['parse_failed:bad'],
probeWarnings: [],
staleArchiveAfterDays: 90,
}).staleArchiveAfterDays,
).toBe(90);
});
});

View file

@ -1,107 +0,0 @@
import { mkdtemp } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import type { KtxSchemaSnapshot } from '../../../scan/types.js';
import { chunkLiveDatabaseStagedDir } from './chunk.js';
import { liveDatabaseTablePath, writeLiveDatabaseSnapshot } from './stage.js';
function snapshot(): KtxSchemaSnapshot {
return {
connectionId: 'conn-1',
driver: 'postgres',
extractedAt: '2026-04-27T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: {},
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
{
name: 'customers',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
],
};
}
describe('chunkLiveDatabaseStagedDir', () => {
it('emits one work unit per table on the first run', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-chunk-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
const result = await chunkLiveDatabaseStagedDir(dir);
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual([
'live-database-public-customers',
'live-database-public-orders',
]);
expect(result.workUnits[0]?.dependencyPaths).toEqual(['connection.json', 'foreign-keys.json']);
expect(result.workUnits[0]?.peerFileIndex).toContain(
liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' }),
);
});
it('keeps only changed tables during incremental syncs and records table evictions', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-diff-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
const ordersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' });
const customersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'customers' });
const result = await chunkLiveDatabaseStagedDir(dir, {
added: [],
modified: [ordersPath],
deleted: [customersPath],
unchanged: ['connection.json', 'foreign-keys.json'],
});
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']);
expect(result.eviction?.deletedRawPaths).toEqual([customersPath]);
});
it('fans out all table work units when the foreign-key index changes', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-fk-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
const result = await chunkLiveDatabaseStagedDir(dir, {
added: [],
modified: ['foreign-keys.json'],
deleted: [],
unchanged: [],
});
expect(result.workUnits).toHaveLength(2);
});
});

View file

@ -1,262 +0,0 @@
import { once } from 'node:events';
import { createServer } from 'node:http';
import { describe, expect, it, vi } from 'vitest';
import { tableRefSet } from '../../../scan/table-ref.js';
import { createDaemonLiveDatabaseIntrospection } from './daemon-introspection.js';
const daemonResponse = {
connection_id: 'warehouse',
extracted_at: '2026-04-28T10:00:00+00:00',
metadata: { driver: 'postgres', schemas: ['public'] },
tables: [
{
catalog: 'warehouse',
db: 'public',
name: 'customers',
comment: null,
columns: [{ name: 'id', type: 'integer', nullable: false, primary_key: true, comment: null }],
foreign_keys: [],
},
{
catalog: 'warehouse',
db: 'public',
name: 'orders',
comment: 'Order facts',
columns: [
{ name: 'id', type: 'integer', nullable: false, primary_key: true, comment: 'Order id' },
{ name: 'customer_id', type: 'integer', nullable: false, primary_key: false, comment: null },
],
foreign_keys: [
{
from_column: 'customer_id',
to_table: 'customers',
to_column: 'id',
constraint_name: 'orders_customer_id_fkey',
},
],
},
],
};
describe('createDaemonLiveDatabaseIntrospection', () => {
it('calls the database-introspect daemon command and maps the snapshot response', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
},
},
schemas: ['public'],
runJson,
});
await expect(introspection.extractSchema('warehouse')).resolves.toEqual({
connectionId: 'warehouse',
driver: 'postgres',
extractedAt: '2026-04-28T10:00:00+00:00',
scope: { schemas: ['public'] },
metadata: { driver: 'postgres', schemas: ['public'] },
tables: [
{
catalog: 'warehouse',
db: 'public',
name: 'customers',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
{
catalog: 'warehouse',
db: 'public',
name: 'orders',
kind: 'table',
comment: 'Order facts',
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
{
name: 'customer_id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: null,
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fkey',
},
],
},
],
});
expect(runJson).toHaveBeenCalledWith('database-introspect', {
connection_id: 'warehouse',
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
schemas: ['public'],
statement_timeout_ms: 30_000,
connection_timeout_seconds: 5,
});
});
it('calls a running daemon HTTP endpoint when baseUrl is configured', async () => {
const requests: Array<{ url: string | undefined; body: unknown }> = [];
const server = createServer((request, response) => {
const chunks: Buffer[] = [];
request.on('data', (chunk: Buffer) => chunks.push(chunk));
request.on('end', () => {
requests.push({
url: request.url,
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
});
response.writeHead(200, { 'content-type': 'application/json' });
response.end(JSON.stringify(daemonResponse));
});
});
server.listen(0, '127.0.0.1');
await once(server, 'listening');
try {
const address = server.address();
if (!address || typeof address === 'string') {
throw new Error('expected TCP server address');
}
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
},
},
baseUrl: `http://127.0.0.1:${address.port}`,
});
await expect(
introspection.extractSchema('warehouse', {
tableScope: tableRefSet([{ catalog: 'warehouse', db: 'public', name: 'orders' }]),
}),
).resolves.toMatchObject({
connectionId: 'warehouse',
tables: [{ name: 'customers' }, { name: 'orders' }],
});
expect(requests).toEqual([
{
url: '/database/introspect',
body: {
connection_id: 'warehouse',
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
schemas: ['public'],
statement_timeout_ms: 30_000,
connection_timeout_seconds: 5,
table_scope: [{ catalog: 'warehouse', db: 'public', name: 'orders' }],
},
},
]);
} finally {
server.close();
}
});
it('requires a configured postgres connection with a url', async () => {
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
},
},
runJson: vi.fn(async () => daemonResponse),
});
await expect(introspection.extractSchema('warehouse')).rejects.toThrow(
'Local live-database ingest requires connections.warehouse.url.',
);
});
it('rejects unsupported local connection drivers before calling the daemon', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'snowflake',
url: 'snowflake://example',
},
},
runJson,
});
await expect(introspection.extractSchema('warehouse')).rejects.toThrow(
'Local live-database ingest cannot run driver "snowflake".',
);
expect(runJson).not.toHaveBeenCalled();
});
it('does not use connection enabled_tables as a response filter', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
enabled_tables: ['public.orders'],
},
},
schemas: ['public'],
runJson,
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual(['public.customers', 'public.orders']);
expect(runJson).toHaveBeenCalledWith('database-introspect', expect.not.objectContaining({ table_scope: expect.anything() }));
});
it('passes through every table when enabled_tables is omitted or empty', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
enabled_tables: [],
},
},
schemas: ['public'],
runJson,
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot.tables.map((table) => table.name)).toEqual(['customers', 'orders']);
});
});

View file

@ -1,111 +0,0 @@
import { mkdtemp, readdir, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { tableRefSet, type KtxTableRefKey } from '../../../scan/table-ref.js';
import { LiveDatabaseSourceAdapter } from './live-database.adapter.js';
describe('LiveDatabaseSourceAdapter', () => {
it('fetches a schema snapshot through the introspection port', async () => {
const extractSchema = vi.fn().mockResolvedValue({
connectionId: 'conn-1',
driver: 'postgres',
extractedAt: '2026-04-27T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: {},
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
],
});
const adapter = new LiveDatabaseSourceAdapter({
introspection: { extractSchema },
now: () => new Date('2026-04-27T00:00:00.000Z'),
});
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-adapter-'));
await adapter.fetch(undefined, dir, { connectionId: 'conn-1', sourceKey: 'live-database' });
expect(extractSchema).toHaveBeenCalledWith('conn-1', { tableScope: undefined });
await expect(adapter.detect(dir)).resolves.toBe(true);
const chunked = await adapter.chunk(dir);
expect(chunked.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']);
});
it('declares the live database source and skill', () => {
const adapter = new LiveDatabaseSourceAdapter({
introspection: { extractSchema: vi.fn() },
});
expect(adapter.source).toBe('live-database');
expect(adapter.skillNames).toEqual(['live_database_ingest']);
});
it('threads tableScope from fetch context into the introspection port without post-filtering', async () => {
const extractSchema = vi.fn(
async (_connectionId: string, _options?: { tableScope?: ReadonlySet<KtxTableRefKey> }) => ({
connectionId: 'warehouse',
driver: 'snowflake' as const,
extractedAt: '2026-05-22T00:00:00.000Z',
scope: {},
metadata: {},
tables: [
{
catalog: 'A',
db: 'MARTS',
name: 'IN_SCOPE',
kind: 'table' as const,
comment: null,
estimatedRows: 0,
columns: [],
foreignKeys: [],
},
{
catalog: 'A',
db: 'MARTS',
name: 'OUT_OF_SCOPE',
kind: 'table' as const,
comment: null,
estimatedRows: 0,
columns: [],
foreignKeys: [],
},
],
}),
);
const scope = tableRefSet([{ catalog: 'A', db: 'MARTS', name: 'IN_SCOPE' }]);
const adapter = new LiveDatabaseSourceAdapter({
introspection: { extractSchema },
});
const stagedDir = await mkdtemp(join(tmpdir(), 'ktx-livedb-scope-'));
try {
await adapter.fetch(undefined, stagedDir, {
connectionId: 'warehouse',
sourceKey: 'live-database',
tableScope: scope,
});
expect(extractSchema).toHaveBeenCalledWith('warehouse', { tableScope: scope });
const tables = await readdir(join(stagedDir, 'tables'));
expect(tables).toHaveLength(2);
} finally {
await rm(stagedDir, { recursive: true, force: true });
}
});
});

View file

@ -1,308 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
buildLiveDatabaseManifestShards,
type LiveDatabaseManifestExistingDescriptions,
type LiveDatabaseManifestJoinEntry,
type LiveDatabaseManifestShard,
} from './manifest.js';
function shardObject(shards: Map<string, LiveDatabaseManifestShard>): Record<string, LiveDatabaseManifestShard> {
return Object.fromEntries([...shards.entries()].sort(([a], [b]) => a.localeCompare(b)));
}
describe('buildLiveDatabaseManifestShards', () => {
it('builds shard objects with generated joins and preserved external descriptions', () => {
const existingDescriptions = new Map<string, LiveDatabaseManifestExistingDescriptions>([
[
'orders',
{
table: { user: 'Pinned analyst description', db: 'Old db description' },
columns: new Map([['id', { user: 'Pinned id description', db: 'Old id description' }]]),
},
],
]);
const preservedJoins = new Map<string, LiveDatabaseManifestJoinEntry[]>([
[
'orders',
[
{
to: 'customers',
on: 'orders.account_id = customers.id',
relationship: 'many_to_one',
source: 'manual',
},
{
to: 'missing_accounts',
on: 'orders.account_id = missing_accounts.id',
relationship: 'many_to_one',
source: 'manual',
},
],
],
]);
const result = buildLiveDatabaseManifestShards({
connectionType: 'POSTGRESQL',
mapColumnType: (nativeType) => nativeType.toLowerCase(),
existingDescriptions,
existingPreservedJoins: preservedJoins,
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
descriptions: { db: 'Fresh db description', ai: 'Generated AI description' },
columns: [
{
name: 'id',
type: 'INTEGER',
pk: true,
nullable: false,
descriptions: { db: 'Fresh id description' },
},
{
name: 'customer_id',
type: 'INTEGER',
},
],
},
{
name: 'customers',
catalog: null,
db: 'public',
columns: [
{
name: 'id',
type: 'INTEGER',
pk: true,
nullable: false,
},
],
},
],
joins: [
{
fromTable: 'orders',
fromColumns: ['customer_id'],
toTable: 'customers',
toColumns: ['id'],
relationship: 'MANY_TO_ONE',
source: 'formal',
},
],
});
expect(result.tablesProcessed).toBe(2);
expect(shardObject(result.shards)).toEqual({
public: {
tables: {
orders: {
table: 'public.orders',
descriptions: {
user: 'Pinned analyst description',
db: 'Fresh db description',
ai: 'Generated AI description',
},
columns: [
{
name: 'id',
type: 'integer',
pk: true,
nullable: false,
descriptions: {
user: 'Pinned id description',
db: 'Fresh id description',
},
},
{
name: 'customer_id',
type: 'integer',
},
],
joins: [
{
to: 'customers',
on: 'orders.customer_id = customers.id',
relationship: 'many_to_one',
source: 'formal',
},
{
to: 'customers',
on: 'orders.account_id = customers.id',
relationship: 'many_to_one',
source: 'manual',
},
],
},
customers: {
table: 'public.customers',
columns: [
{
name: 'id',
type: 'integer',
pk: true,
nullable: false,
},
],
joins: [
{
to: 'orders',
on: 'customers.id = orders.customer_id',
relationship: 'one_to_many',
source: 'formal',
},
],
},
},
},
});
});
it('uses warehouse and schema shard keys for snowflake-style connections', () => {
const result = buildLiveDatabaseManifestShards({
connectionType: 'SNOWFLAKE',
mapColumnType: (nativeType) => nativeType.toLowerCase(),
tables: [
{
name: 'accounts',
catalog: 'ANALYTICS',
db: 'CORE',
columns: [{ name: 'id', type: 'NUMBER' }],
},
],
joins: [],
});
expect(shardObject(result.shards)).toEqual({
'ANALYTICS.CORE': {
tables: {
accounts: {
table: 'ANALYTICS.CORE.accounts',
columns: [{ name: 'id', type: 'number' }],
},
},
},
});
});
it('preserves external usage keys while replacing historic SQL managed keys', () => {
const existingUsage = new Map([
[
'orders',
{
narrative: 'Old generated usage narrative.',
frequencyTier: 'low' as const,
commonFilters: ['old_status'],
commonJoins: [],
ownerNote: 'Pinned analyst note',
},
],
]);
const result = buildLiveDatabaseManifestShards({
connectionType: 'POSTGRESQL',
mapColumnType: (nativeType) => nativeType.toLowerCase(),
existingUsage,
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
usage: {
narrative: 'Fresh generated usage narrative.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['created_at'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
},
columns: [{ name: 'id', type: 'INTEGER' }],
},
],
joins: [],
});
expect(shardObject(result.shards)).toEqual({
public: {
tables: {
orders: {
table: 'public.orders',
usage: {
ownerNote: 'Pinned analyst note',
narrative: 'Fresh generated usage narrative.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['created_at'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
},
columns: [{ name: 'id', type: 'integer' }],
},
},
},
});
});
it('renders ordered multi-column joins in both directions', () => {
const result = buildLiveDatabaseManifestShards({
connectionType: 'POSTGRESQL',
mapColumnType: (nativeType) => nativeType,
tables: [
{
name: 'order_lines',
catalog: null,
db: 'public',
columns: [
{ name: 'order_id', type: 'integer' },
{ name: 'line_number', type: 'integer' },
],
},
{
name: 'order_line_allocations',
catalog: null,
db: 'public',
columns: [
{ name: 'order_id', type: 'integer' },
{ name: 'line_number', type: 'integer' },
],
},
],
joins: [
{
fromTable: 'order_line_allocations',
fromColumns: ['order_id', 'line_number'],
toTable: 'order_lines',
toColumns: ['order_id', 'line_number'],
relationship: 'many_to_one',
source: 'inferred',
},
],
});
expect(shardObject(result.shards)).toMatchObject({
public: {
tables: {
order_line_allocations: {
joins: [
{
to: 'order_lines',
on: 'order_line_allocations.order_id = order_lines.order_id AND order_line_allocations.line_number = order_lines.line_number',
relationship: 'many_to_one',
source: 'inferred',
},
],
},
order_lines: {
joins: [
{
to: 'order_line_allocations',
on: 'order_lines.order_id = order_line_allocations.order_id AND order_lines.line_number = order_line_allocations.line_number',
relationship: 'one_to_many',
source: 'inferred',
},
],
},
},
},
});
});
});

View file

@ -1,178 +0,0 @@
import { mkdtemp, readFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import {
detectLiveDatabaseStagedDir,
LIVE_DATABASE_FOREIGN_KEYS_FILE,
LIVE_DATABASE_META_FILE,
LIVE_DATABASE_WARNINGS_FILE,
liveDatabaseTablePath,
readLiveDatabaseTableFiles,
writeLiveDatabaseSnapshot,
} from './stage.js';
import type { KtxSchemaSnapshot } from '../../../scan/types.js';
function snapshot(): KtxSchemaSnapshot {
return {
connectionId: 'conn-1',
driver: 'postgres',
extractedAt: '2026-04-27T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: { dialect: 'postgres' },
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
kind: 'table',
comment: 'Orders placed by customers',
estimatedRows: 200,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
{
name: 'customer_id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
{
name: 'total',
nativeType: 'numeric',
normalizedType: 'numeric',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: 'public',
toTable: 'customers',
toColumn: 'id',
constraintName: null,
},
],
},
{
name: 'customers',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: 50,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
],
};
}
describe('live-database staged snapshot files', () => {
it('writes deterministic metadata, table, and foreign-key files', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-stage-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
await expect(readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8')).resolves.toContain('"connectionId": "conn-1"');
await expect(readFile(join(dir, LIVE_DATABASE_FOREIGN_KEYS_FILE), 'utf8')).resolves.toContain(
'"fromTable": "orders"',
);
const connectionJson = await readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8');
expect(connectionJson).toContain('"driver": "postgres"');
expect(connectionJson).toContain('"schemas"');
const ordersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' });
const customersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'customers' });
expect(ordersPath).toMatch(/^tables\/[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.json$/);
await expect(readFile(join(dir, ordersPath), 'utf8')).resolves.toContain('"name": "orders"');
await expect(readFile(join(dir, customersPath), 'utf8')).resolves.toContain('"name": "customers"');
const ordersJson = await readFile(join(dir, ordersPath), 'utf8');
expect(ordersJson).toContain('"kind": "table"');
expect(ordersJson).toContain('"estimatedRows": 200');
expect(ordersJson).toContain('"nativeType": "integer"');
expect(ordersJson).toContain('"normalizedType": "integer"');
expect(ordersJson).not.toContain('"type": "integer"');
const tableFiles = await readLiveDatabaseTableFiles(dir);
expect(tableFiles.map((file) => file.table.name)).toEqual(['customers', 'orders']);
expect(await detectLiveDatabaseStagedDir(dir)).toBe(true);
});
it('redacts sensitive snapshot metadata before writing connection metadata', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-redacted-stage-'));
await writeLiveDatabaseSnapshot(dir, {
...snapshot(),
metadata: {
dialect: 'postgres',
url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret
serviceAccountJson: {
client_email: 'reader@example.test',
private_key: 'pem-value', // pragma: allowlist secret
},
},
});
const connectionJson = await readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8');
expect(connectionJson).toContain('"dialect": "postgres"');
expect(connectionJson).toContain('"client_email": "reader@example.test"');
expect(connectionJson).toContain('"url": "<redacted>"');
expect(connectionJson).toContain('"private_key": "<redacted>"');
expect(connectionJson).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret
expect(connectionJson).not.toContain('pem-value');
});
it('writes redacted scan warnings next to live database metadata', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-warning-stage-'));
await writeLiveDatabaseSnapshot(dir, {
...snapshot(),
warnings: [
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: {
schema: 'public',
kind: 'primary_key',
url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret
},
},
],
});
const warningsJson = await readFile(join(dir, LIVE_DATABASE_WARNINGS_FILE), 'utf8');
expect(warningsJson).toContain('"constraint_discovery_unauthorized"');
expect(warningsJson).toContain('"schema": "public"');
expect(warningsJson).toContain('"url": "<redacted>"');
expect(warningsJson).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret
});
it('returns false for a directory that is missing live database metadata', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-empty-'));
expect(await detectLiveDatabaseStagedDir(dir)).toBe(false);
});
});

View file

@ -1,154 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { chunkLookerStagedDir } from './chunk.js';
import { writeLookerEvidenceDocuments } from './evidence-documents.js';
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
async function writeSmallFixture(stagedDir: string): Promise<void> {
await writeJson(stagedDir, 'sync-config.json', {
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
fetchedAt: '2026-04-30T12:30:00.000Z',
});
await writeJson(stagedDir, 'lookml_models.json', {
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
});
await writeJson(stagedDir, 'explores/b2b/sales_pipeline.json', {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
joins: [],
});
await writeJson(stagedDir, 'dashboards/10.json', {
lookerId: '10',
title: 'Sales Pipeline',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [{ id: '100', title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
});
await writeJson(stagedDir, 'looks/20.json', {
lookerId: '20',
title: 'Open Pipeline',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
});
await writeJson(stagedDir, 'folders/tree.json', {
folders: [{ id: '7', name: 'Sandbox', parentId: null, path: ['Sandbox'] }],
});
await writeJson(stagedDir, 'users/3.json', { id: '3', displayName: 'Ada Lovelace', email: null });
await writeJson(stagedDir, 'signals/dashboard_usage.json', [
{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8 },
]);
await writeJson(stagedDir, 'signals/look_usage.json', [{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5 }]);
await writeJson(stagedDir, 'signals/scheduled_plans.json', [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
]);
await writeJson(stagedDir, 'signals/favorites.json', [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
]);
await writeLookerEvidenceDocuments(stagedDir);
}
describe('chunkLookerStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-chunk-'));
await writeSmallFixture(stagedDir);
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('emits one WU per explore, dashboard, and Look with readable dependencies', async () => {
const result = await chunkLookerStagedDir(stagedDir);
expect(result.reconcileNotes).toEqual([
expect.stringContaining('emit_artifact_resolution with actionType="subsumed"'),
]);
expect(result.workUnits.map((wu) => wu.unitKey).sort()).toEqual([
'looker-dashboard-10',
'looker-explore-b2b-sales_pipeline',
'looker-look-20',
]);
const dashboard = result.workUnits.find((wu) => wu.unitKey === 'looker-dashboard-10');
expect(dashboard?.rawFiles).toEqual([
'dashboards/10.json',
'evidence/dashboards/10/metadata.json',
'evidence/dashboards/10/page.md',
]);
expect(dashboard?.notes).toContain('context_candidate_write');
expect(dashboard?.notes).not.toContain('wiki_write');
expect(dashboard?.dependencyPaths.sort()).toEqual([
'explores/b2b/sales_pipeline.json',
'folders/tree.json',
'signals/dashboard_usage.json',
'signals/favorites.json',
'signals/scheduled_plans.json',
'users/3.json',
]);
const explore = result.workUnits.find((wu) => wu.unitKey === 'looker-explore-b2b-sales_pipeline');
expect(explore?.rawFiles).toEqual([
'explores/b2b/sales_pipeline.json',
'evidence/explores/b2b/sales_pipeline/metadata.json',
'evidence/explores/b2b/sales_pipeline/page.md',
]);
expect(explore?.dependencyPaths).toEqual(['lookml_models.json']);
});
it('keeps downstream dashboard and Look WUs when an explore dependency changes', async () => {
const result = await chunkLookerStagedDir(stagedDir, {
added: [],
modified: ['explores/b2b/sales_pipeline.json'],
deleted: [],
unchanged: [
'dashboards/10.json',
'looks/20.json',
'lookml_models.json',
'folders/tree.json',
'users/3.json',
'signals/dashboard_usage.json',
'signals/look_usage.json',
'signals/scheduled_plans.json',
'signals/favorites.json',
],
});
expect(result.workUnits.map((wu) => wu.unitKey).sort()).toEqual([
'looker-dashboard-10',
'looker-explore-b2b-sales_pipeline',
'looker-look-20',
]);
expect(result.workUnits.find((wu) => wu.unitKey === 'looker-dashboard-10')?.rawFiles).toEqual([
'dashboards/10.json',
'evidence/dashboards/10/metadata.json',
'evidence/dashboards/10/page.md',
]);
});
it('returns an EvictionUnit for deleted runtime entity raw paths', async () => {
const result = await chunkLookerStagedDir(stagedDir, {
added: [],
modified: [],
deleted: ['looks/20.json'],
unchanged: ['dashboards/10.json', 'explores/b2b/sales_pipeline.json'],
});
expect(result.eviction).toEqual({ deletedRawPaths: ['looks/20.json'] });
});
});

View file

@ -1,14 +0,0 @@
import { readFile } from 'node:fs/promises';
import { describe, expect, it } from 'vitest';
describe('LookerClient boundary', () => {
it('does not import server or NestJS modules', async () => {
const source = await readFile(new URL('./client.ts', import.meta.url), 'utf-8');
expect(source).not.toMatch(/@nestjs\/common/);
expect(source).not.toMatch(/DataSourceClient/);
expect(source).not.toMatch(/\.\.\/interfaces/);
expect(source).not.toMatch(/\.\.\/types/);
expect(source).not.toMatch(/server\/src/);
});
});

View file

@ -1,473 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { LookerClient, type LookerSdkPort } from './client.js';
const clientSecretParam = 'client_secret'; // pragma: allowlist secret
function params(): Record<string, unknown> {
return {
base_url: 'https://example.looker.com',
client_id: 'id',
[clientSecretParam]: 'credential', // pragma: allowlist secret
};
}
function sdk(overrides: Partial<LookerSdkPort> = {}): LookerSdkPort {
const port: LookerSdkPort = {
me: vi.fn().mockResolvedValue({ id: '1', display_name: 'API User', email: 'api@example.com' }),
search_dashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
dashboard: vi.fn().mockResolvedValue({
id: '10',
title: 'Revenue Dashboard',
description: 'Revenue concepts',
folder_id: '20',
user_id: '1',
updated_at: '2026-04-30T00:00:00.000Z',
dashboard_elements: [
{
id: '99',
title: 'ARR',
look_id: null,
query: {
id: 'q1',
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr', 'opportunities.stage'],
filters: { 'opportunities.stage': 'open' },
sorts: ['opportunities.arr desc'],
limit: '500',
},
},
],
}),
search_looks: vi.fn().mockResolvedValue([{ id: '30' }]),
search_scheduled_plans: vi.fn().mockResolvedValue([]),
look: vi.fn().mockResolvedValue({
id: '30',
title: 'Open Pipeline ARR',
description: 'ARR for open opportunities',
folder_id: '20',
user_id: '1',
updated_at: '2026-04-30T00:00:00.000Z',
query: {
id: 'q2',
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr'],
filters: { 'opportunities.stage': 'open' },
},
}),
all_folders: vi.fn().mockResolvedValue([{ id: '20', name: 'Executive', parent_id: null }]),
all_users: vi.fn().mockResolvedValue([{ id: '1', display_name: 'API User', email: 'api@example.com' }]),
all_groups: vi.fn().mockResolvedValue([{ id: '2', name: 'Finance' }]),
all_connections: vi.fn().mockResolvedValue([
{
name: 'b2b_sandbox_bq',
host: 'warehouse.example.com',
database: 'analytics',
schema: 'public',
dialect_name: 'bigquery_standard_sql',
},
]),
all_lookml_models: vi
.fn()
.mockResolvedValue([
{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] },
]),
lookml_model_explore: vi.fn().mockResolvedValue({
name: 'sales_pipeline',
label: 'Sales Pipeline',
description: 'Opportunity pipeline',
sql_table_name: 'proj.dataset.opportunities AS opportunities',
connection_name: 'b2b_sandbox_bq',
view_name: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.stage', label: 'Stage', type: 'string', sql: '$' + '{TABLE}.stage' }],
measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '$' + '{TABLE}.arr' }],
},
joins: [
{
name: 'accounts',
type: 'left_outer',
relationship: 'many_to_one',
sql_table_name: 'proj.dataset.accounts',
sql_on: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
from: null,
},
],
}),
run_inline_query: vi.fn().mockResolvedValue('[]'),
logout: vi.fn().mockResolvedValue(undefined),
...overrides,
};
return port;
}
describe('LookerClient', () => {
it('validates credentials with me()', async () => {
const client = new LookerClient(params(), { sdkFactory: () => sdk() });
await expect(client.testConnection()).resolves.toEqual({
success: true,
metadata: { userId: '1', displayName: 'API User', email: 'api@example.com' },
});
});
it('does not warn to console when optional prioritization inputs fail by default', async () => {
const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined);
const fakeSdk = sdk({
search_dashboards: vi.fn().mockRejectedValue(new Error('dashboards unavailable')),
search_looks: vi.fn().mockRejectedValue(new Error('looks unavailable')),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.getSignals()).resolves.toMatchObject({
dashboardUsage: [],
lookUsage: [],
scheduledPlans: [],
favorites: [],
});
expect(warn).not.toHaveBeenCalled();
});
it('maps dashboards, looks, folders, models, explores, users, and groups to staged DTOs', async () => {
const fakeSdk = sdk();
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
await expect(client.getDashboard('10')).resolves.toMatchObject({
lookerId: '10',
title: 'Revenue Dashboard',
tiles: [{ id: '99', query: { model: 'b2b', view: 'sales_pipeline' } }],
});
await expect(client.listLooks()).resolves.toEqual([{ id: '30', updatedAt: null }]);
await expect(client.getLook('30')).resolves.toMatchObject({
lookerId: '30',
title: 'Open Pipeline ARR',
query: { model: 'b2b', view: 'sales_pipeline' },
});
await expect(client.listFolders()).resolves.toEqual({
folders: [{ id: '20', name: 'Executive', parentId: null, path: ['Executive'] }],
});
await expect(client.listLookmlModels()).resolves.toEqual({
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
});
await expect(client.listLookerConnections()).resolves.toEqual([
{
name: 'b2b_sandbox_bq',
host: 'warehouse.example.com',
database: 'analytics',
schema: 'public',
dialect: 'bigquery_standard_sql',
},
]);
await expect(client.getExplore('b2b', 'sales_pipeline')).resolves.toMatchObject({
modelName: 'b2b',
exploreName: 'sales_pipeline',
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: { dimensions: [{ name: 'opportunities.stage' }], measures: [{ name: 'opportunities.arr' }] },
joins: [
{
name: 'accounts',
rawSqlTableName: 'proj.dataset.accounts',
sqlOn: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
from: null,
targetTable: null,
},
],
targetWarehouseConnectionId: null,
targetTable: null,
});
expect(fakeSdk.dashboard).toHaveBeenCalledWith(
'10',
'id,title,description,folder_id,user_id,updated_at,dashboard_elements(id,title,look_id,query(id,model,view,fields,filters,sorts,limit,dynamic_fields))',
);
expect(fakeSdk.look).toHaveBeenCalledWith(
'30',
'id,title,description,folder_id,user_id,updated_at,query(id,model,view,fields,filters,sorts,limit,dynamic_fields)',
);
expect(fakeSdk.lookml_model_explore).toHaveBeenCalledWith(
'b2b',
'sales_pipeline',
'name,label,description,sql_table_name,connection_name,view_name,fields,joins(name,type,relationship,sql_table_name,sql_on,from)',
);
expect(fakeSdk.all_connections).toHaveBeenCalledWith('name,host,database,schema,dialect_name');
});
it('returns empty usage signals when system activity access fails', async () => {
const client = new LookerClient(params(), {
sdkFactory: () =>
sdk({
run_inline_query: vi.fn().mockRejectedValue(new Error('access denied')),
search_dashboards: vi.fn().mockResolvedValue([{ id: '10', favorite_count: 4 }]),
search_looks: vi.fn().mockResolvedValue([{ id: '30', favorite_count: 2 }]),
search_scheduled_plans: vi.fn().mockResolvedValue([]),
}),
});
await expect(client.getSignals()).resolves.toEqual({
dashboardUsage: [],
lookUsage: [],
scheduledPlans: [],
favorites: [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
{ contentId: '30', contentType: 'look', favoriteCount: 2 },
],
});
});
it('paginates dashboard and Look searches', async () => {
const dashboardPageOne = Array.from({ length: 500 }, (_, index) => ({ id: String(index + 1) }));
const lookPageOne = Array.from({ length: 500 }, (_, index) => ({ id: String(index + 1001) }));
const fakeSdk = sdk({
search_dashboards: vi
.fn()
.mockResolvedValueOnce(dashboardPageOne)
.mockResolvedValueOnce([{ id: '501' }]),
search_looks: vi
.fn()
.mockResolvedValueOnce(lookPageOne)
.mockResolvedValueOnce([{ id: '1501' }]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.listDashboards()).resolves.toHaveLength(501);
await expect(client.listLooks()).resolves.toHaveLength(501);
expect(fakeSdk.search_dashboards).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
deleted: false,
fields: 'id,updated_at',
limit: 500,
offset: 0,
sorts: 'id',
}),
);
expect(fakeSdk.search_dashboards).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
limit: 500,
offset: 500,
}),
);
expect(fakeSdk.search_looks).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
deleted: false,
fields: 'id,updated_at',
limit: 500,
offset: 0,
sorts: 'id',
}),
);
expect(fakeSdk.search_looks).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
limit: 500,
offset: 500,
}),
);
});
it('returns updatedAt cursors from dashboard and Look listing rows', async () => {
const fakeSdk = sdk({
search_dashboards: vi.fn().mockResolvedValue([{ id: '10', updated_at: '2026-04-30T12:00:00.000Z' }]),
search_looks: vi.fn().mockResolvedValue([{ id: '30', updated_at: '2026-04-30T11:00:00.000Z' }]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' }]);
await expect(client.listLooks()).resolves.toEqual([{ id: '30', updatedAt: '2026-04-30T11:00:00.000Z' }]);
});
it('logs out the SDK session during cleanup', async () => {
const fakeSdk = sdk();
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await client.testConnection();
await client.cleanup();
expect(fakeSdk.logout).toHaveBeenCalledTimes(1);
});
it('aggregates usage, scheduled-plan, and favorite signals', async () => {
const runInlineQuery = vi
.fn()
.mockResolvedValueOnce(
JSON.stringify([
{
'dashboard.id': '10',
'history.query_run_count': 3,
'history.created_date': '2026-04-30',
'user.id': 'user-1',
},
{
'dashboard.id': '10',
'history.query_run_count': '2',
'history.created_date': '2026-04-29',
'user.id': 'user-2',
},
]),
)
.mockResolvedValueOnce(
JSON.stringify([
{
'look.id': '30',
'history.query_run_count': 7,
'history.created_date': '2026-04-28',
'user.id': 'user-1',
},
]),
);
const fakeSdk = sdk({
run_inline_query: runInlineQuery,
search_dashboards: vi.fn().mockResolvedValueOnce([{ id: '10', favorite_count: 4 }]),
search_looks: vi.fn().mockResolvedValueOnce([{ id: '30', favorite_count: 2 }]),
search_scheduled_plans: vi.fn().mockResolvedValueOnce([
{
id: 'sp-dashboard',
dashboard_id: '10',
look_id: null,
enabled: true,
scheduled_plan_destination: [{ id: 'dest-1' }, { id: 'dest-2' }],
},
{
id: 'sp-look',
dashboard_id: null,
look_id: '30',
enabled: true,
scheduled_plan_destination: [{ id: 'dest-3' }],
},
]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.getSignals()).resolves.toEqual({
dashboardUsage: [
{
contentId: '10',
queryCount30d: 5,
uniqueUsers30d: 2,
lastRunAt: '2026-04-30',
topUsers: ['user-1', 'user-2'],
},
],
lookUsage: [
{
contentId: '30',
queryCount30d: 7,
uniqueUsers30d: 1,
lastRunAt: '2026-04-28',
topUsers: ['user-1'],
},
],
scheduledPlans: [
{
contentId: '10',
contentType: 'dashboard',
isScheduled: true,
scheduleCount: 1,
recipientCount: 2,
},
{
contentId: '30',
contentType: 'look',
isScheduled: true,
scheduleCount: 1,
recipientCount: 1,
},
],
favorites: [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
{ contentId: '30', contentType: 'look', favoriteCount: 2 },
],
});
expect(runInlineQuery).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
result_format: 'json',
body: expect.objectContaining({
model: 'system__activity',
view: 'history',
fields: ['dashboard.id', 'history.query_run_count', 'history.created_date', 'user.id'],
}),
}),
);
expect(fakeSdk.search_scheduled_plans).toHaveBeenCalledWith(
expect.objectContaining({
all_users: true,
fields: 'id,dashboard_id,look_id,enabled,scheduled_plan_destination',
limit: 500,
offset: 0,
sorts: 'id',
}),
);
});
it('retries a 429 response once using Retry-After seconds', async () => {
const sleep = vi.fn().mockResolvedValue(undefined);
const rateLimitError = new Error('rate limited');
Object.assign(rateLimitError, { statusCode: 429, headers: { 'retry-after': '2' } });
const fakeSdk = sdk({
search_dashboards: vi
.fn()
.mockRejectedValueOnce(rateLimitError)
.mockResolvedValueOnce([{ id: '10' }]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk, sleep });
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
expect(sleep).toHaveBeenCalledWith(2000);
expect(fakeSdk.search_dashboards).toHaveBeenCalledTimes(2);
});
it('does not retry non-429 errors', async () => {
const sleep = vi.fn().mockResolvedValue(undefined);
const error = new Error('broken dashboard');
Object.assign(error, { statusCode: 500 });
const fakeSdk = sdk({ dashboard: vi.fn().mockRejectedValue(error) });
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk, sleep });
await expect(client.getDashboard('10')).rejects.toThrow('broken dashboard');
expect(sleep).not.toHaveBeenCalled();
expect(fakeSdk.dashboard).toHaveBeenCalledTimes(1);
});
it('initializes the real @looker/sdk-node SDK with inline credentials without throwing', async () => {
const client = new LookerClient(params());
const result = await client.testConnection();
// Without injected sdkFactory the real SDK is constructed via InlineLookerSettings.
// This used to throw "Missing required configuration values like base_url" because
// the parent NodeSettingsIniFile constructor validated config before the override
// could supply credentials. Whatever happens now (auth/network failure against the
// bogus example URL is fine) — what must NOT happen is a synchronous SDK-init throw.
expect(result.success).toBe(false);
expect(result.error).toBeDefined();
expect(result.error).not.toMatch(/Missing required configuration values/i);
await client.cleanup();
});
it('strips trailing /api/4.0 from base_url so the SDK does not double-prefix it', async () => {
const clientWithSuffix = new LookerClient({
base_url: 'https://example.looker.com/api/4.0',
client_id: 'id',
[clientSecretParam]: 'credential', // pragma: allowlist secret
});
const result = await clientWithSuffix.testConnection();
expect(result.success).toBe(false);
// If base_url is double-prefixed the SDK would hit /api/4.0/api/4.0/login. Either
// the URL is correctly normalized (transport-level network failure) or we'd see a
// 404/HTML response — either way the stack must not be a config-validation throw.
expect(result.error).not.toMatch(/Missing required configuration values/i);
await clientWithSuffix.cleanup();
});
});

View file

@ -1,44 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { createDaemonLookerTableIdentifierParser } from './daemon-table-identifier-parser.js';
describe('createDaemonLookerTableIdentifierParser', () => {
it('posts parse items to the daemon endpoint', async () => {
const requestJson = vi.fn(async () => ({
results: {
orders: {
ok: true,
catalog: null,
schema: 'public',
name: 'orders',
canonical_table: 'public.orders',
},
},
}));
const parser = createDaemonLookerTableIdentifierParser({
baseUrl: 'http://127.0.0.1:8765',
requestJson,
});
await expect(parser.parse([{ key: 'orders', sql_table_name: 'public.orders', dialect: 'postgres' }])).resolves.toEqual({
orders: {
ok: true,
catalog: null,
schema: 'public',
name: 'orders',
canonical_table: 'public.orders',
},
});
expect(requestJson).toHaveBeenCalledWith('/sql/parse-table-identifier', {
items: [{ key: 'orders', sql_table_name: 'public.orders', dialect: 'postgres' }],
});
});
it('rejects non-object daemon responses', async () => {
const parser = createDaemonLookerTableIdentifierParser({
baseUrl: 'http://127.0.0.1:8765',
requestJson: async () => ({ results: null }),
});
await expect(parser.parse([])).rejects.toThrow('ktx-daemon table identifier parser returned invalid results');
});
});

View file

@ -1,47 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { detectLookerStagedDir } from './detect.js';
async function touch(stagedDir: string, relPath: string, body = '{}\n'): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, body, 'utf-8');
}
describe('detectLookerStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-detect-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('returns true when sync-config.json and at least one runtime entity are present', async () => {
await touch(stagedDir, 'sync-config.json');
await touch(stagedDir, 'explores/b2b/sales_pipeline.json');
expect(await detectLookerStagedDir(stagedDir)).toBe(true);
});
it('returns true for dashboard-only staged dirs', async () => {
await touch(stagedDir, 'sync-config.json');
await touch(stagedDir, 'dashboards/10.json');
expect(await detectLookerStagedDir(stagedDir)).toBe(true);
});
it('returns false without sync-config.json', async () => {
await touch(stagedDir, 'looks/20.json');
expect(await detectLookerStagedDir(stagedDir)).toBe(false);
});
it('returns false when only control files are present', async () => {
await touch(stagedDir, 'sync-config.json');
await touch(stagedDir, 'lookml_models.json');
await touch(stagedDir, 'signals/dashboard_usage.json', '[]\n');
expect(await detectLookerStagedDir(stagedDir)).toBe(false);
});
});

View file

@ -1,188 +0,0 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { dirname, join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { getLookerTriageSignals, writeLookerEvidenceDocuments } from './evidence-documents.js';
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(dirname(target), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
async function readJson<T>(root: string, relPath: string): Promise<T> {
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
}
describe('Looker evidence documents', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-evidence-docs-'));
await writeJson(stagedDir, 'explores/b2b/sales_pipeline.json', {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: 'Pipeline analysis explore.',
fields: {
dimensions: [
{ name: 'opportunities.stage', label: 'Stage', type: 'string', sql: '${TABLE}.stage', description: null },
],
measures: [
{
name: 'opportunities.arr',
label: 'ARR',
type: 'sum',
sql: '${TABLE}.arr',
description: 'Annual recurring revenue.',
},
],
},
joins: [{ name: 'accounts', type: 'left_outer', relationship: 'many_to_one' }],
});
await writeJson(stagedDir, 'dashboards/10.json', {
lookerId: '10',
title: 'Sales Pipeline Overview',
description: 'Executive dashboard for open pipeline ARR.',
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T10:00:00.000Z',
tiles: [
{
id: '100',
title: 'Open Pipeline ARR',
lookId: null,
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr', 'opportunities.stage'],
filters: { 'opportunities.stage': 'open' },
sorts: ['opportunities.arr desc'],
limit: '500',
},
},
],
});
await writeJson(stagedDir, 'looks/20.json', {
lookerId: '20',
title: 'Active Opportunity Pipeline',
description: 'Saved Look for active opportunity pipeline review.',
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T11:00:00.000Z',
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr'],
filters: { 'opportunities.stage': 'open' },
sorts: [],
limit: '500',
},
});
await writeJson(stagedDir, 'signals/dashboard_usage.json', [
{
contentId: '10',
queryCount30d: 80,
uniqueUsers30d: 12,
lastRunAt: '2026-04-30T09:00:00.000Z',
topUsers: ['3'],
},
]);
await writeJson(stagedDir, 'signals/look_usage.json', [
{
contentId: '20',
queryCount30d: 2,
uniqueUsers30d: 1,
lastRunAt: '2026-04-29T09:00:00.000Z',
topUsers: ['3'],
},
]);
await writeJson(stagedDir, 'signals/scheduled_plans.json', [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 2, recipientCount: 5 },
]);
await writeJson(stagedDir, 'signals/favorites.json', [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
]);
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('writes indexable metadata and markdown for explores, dashboards, and Looks', async () => {
await writeLookerEvidenceDocuments(stagedDir);
await expect(readJson(stagedDir, 'evidence/explores/b2b/sales_pipeline/metadata.json')).resolves.toMatchObject({
objectType: 'looker_explore',
id: 'looker:explore:b2b.sales_pipeline',
title: 'Sales Pipeline',
path: 'Looker / Explores / b2b.sales_pipeline',
properties: {
rawPath: 'explores/b2b/sales_pipeline.json',
modelName: 'b2b',
exploreName: 'sales_pipeline',
},
});
await expect(readJson(stagedDir, 'evidence/dashboards/10/metadata.json')).resolves.toMatchObject({
objectType: 'looker_dashboard',
id: 'looker:dashboard:10',
title: 'Sales Pipeline Overview',
path: 'Looker / Dashboards / Sales Pipeline Overview',
lastEditedAt: '2026-04-30T10:00:00.000Z',
properties: {
rawPath: 'dashboards/10.json',
lookerId: '10',
},
});
await expect(readJson(stagedDir, 'evidence/looks/20/metadata.json')).resolves.toMatchObject({
objectType: 'looker_look',
id: 'looker:look:20',
title: 'Active Opportunity Pipeline',
path: 'Looker / Looks / Active Opportunity Pipeline',
properties: {
rawPath: 'looks/20.json',
lookerId: '20',
},
});
const dashboardMarkdown = await readFile(join(stagedDir, 'evidence/dashboards/10/page.md'), 'utf-8');
expect(dashboardMarkdown).toContain('# Sales Pipeline Overview');
expect(dashboardMarkdown).toContain('Executive dashboard for open pipeline ARR.');
expect(dashboardMarkdown).toContain('## Tile: Open Pipeline ARR');
expect(dashboardMarkdown).toContain('- model: b2b');
expect(dashboardMarkdown).toContain('- explore: sales_pipeline');
expect(dashboardMarkdown).toContain('- opportunities.stage = open');
expect(dashboardMarkdown).not.toContain('80');
expect(dashboardMarkdown).not.toContain('queryCount30d');
expect(dashboardMarkdown).not.toContain('recipient');
expect(dashboardMarkdown).not.toContain('favorite');
expect(dashboardMarkdown).not.toContain('owner');
});
it('returns usage-aware triage signals without exposing usage as document prose', async () => {
await writeLookerEvidenceDocuments(stagedDir);
await expect(getLookerTriageSignals(stagedDir, 'looker:dashboard:10')).resolves.toEqual({
objectType: 'looker_dashboard',
propertyHints: {
contentType: 'dashboard',
queryCount30d: '80',
uniqueUsers30d: '12',
isScheduled: 'true',
favoriteCount: '4',
},
lastEditedAt: '2026-04-30T10:00:00.000Z',
});
await expect(getLookerTriageSignals(stagedDir, 'looker:look:20')).resolves.toEqual({
objectType: 'looker_look',
propertyHints: {
contentType: 'look',
queryCount30d: '2',
uniqueUsers30d: '1',
isScheduled: 'false',
favoriteCount: '0',
},
lastEditedAt: '2026-04-30T11:00:00.000Z',
});
});
});

View file

@ -1,74 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import type { FetchContext } from '../../types.js';
import type { LookerSdkPort } from './client.js';
import {
DefaultLookerClientFactory,
DefaultLookerConnectionClientFactory,
type LookerCredentialResolver,
} from './factory.js';
import type { LookerRuntimeClient } from './fetch.js';
import type { LookerPullConfig } from './types.js';
function sdk(): LookerSdkPort {
return {
me: vi.fn().mockResolvedValue({ id: '1', display_name: 'API User', email: 'api@example.com' }),
search_dashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
dashboard: vi.fn(),
search_looks: vi.fn().mockResolvedValue([]),
search_scheduled_plans: vi.fn().mockResolvedValue([]),
look: vi.fn(),
all_folders: vi.fn().mockResolvedValue([]),
all_users: vi.fn().mockResolvedValue([]),
all_groups: vi.fn().mockResolvedValue([]),
all_connections: vi.fn().mockResolvedValue([]),
all_lookml_models: vi.fn().mockResolvedValue([]),
lookml_model_explore: vi.fn(),
run_inline_query: vi.fn().mockResolvedValue('[]'),
logout: vi.fn().mockResolvedValue(undefined),
};
}
describe('DefaultLookerConnectionClientFactory', () => {
it('resolves credentials by Looker connection id and creates a KTX Looker client', async () => {
const fakeSdk = sdk();
const resolver: LookerCredentialResolver = {
resolve: vi.fn().mockResolvedValue({
base_url: 'https://example.looker.com',
client_id: 'id',
client_secret: 'credential', // pragma: allowlist secret
}),
};
const factory = new DefaultLookerConnectionClientFactory(resolver, { sdkFactory: () => fakeSdk });
const client = await factory.createClient('prod-looker');
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
expect(resolver.resolve).toHaveBeenCalledWith('prod-looker');
});
});
describe('DefaultLookerClientFactory', () => {
const ctx: FetchContext = { connectionId: 'ctx-looker', sourceKey: 'looker' };
it('uses pullConfig.lookerConnectionId when present', async () => {
const runtimeClient = { listDashboards: vi.fn() } as unknown as LookerRuntimeClient;
const inner = { createClient: vi.fn().mockResolvedValue(runtimeClient) };
const factory = new DefaultLookerClientFactory(inner);
const config = { lookerConnectionId: 'prod-looker' } as LookerPullConfig;
await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient);
expect(inner.createClient).toHaveBeenCalledWith('prod-looker');
});
it('falls back to ctx.connectionId when pullConfig.lookerConnectionId is absent', async () => {
const runtimeClient = { listDashboards: vi.fn() } as unknown as LookerRuntimeClient;
const inner = { createClient: vi.fn().mockResolvedValue(runtimeClient) };
const factory = new DefaultLookerClientFactory(inner);
const config = {} as LookerPullConfig;
await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient);
expect(inner.createClient).toHaveBeenCalledWith('ctx-looker');
});
});

View file

@ -1,77 +0,0 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { readLookerFetchReport, writeLookerFetchReport } from './fetch-report.js';
describe('Looker staged fetch report', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-fetch-report-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('returns null when a staged bundle has no fetch report', async () => {
await expect(readLookerFetchReport(stagedDir)).resolves.toBeNull();
});
it('round-trips partial fetch issues', async () => {
await writeLookerFetchReport(stagedDir, {
status: 'partial',
retryRecommended: true,
skipped: [
{
rawPath: 'dashboards/10.json',
entityType: 'dashboard',
entityId: '10',
severity: 'error',
statusCode: 429,
message: 'Looker API rate limit remained after retry',
retryRecommended: true,
},
],
warnings: [
{
rawPath: 'signals/dashboard_usage.json',
entityType: 'signals',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'system__activity unavailable',
retryRecommended: false,
},
],
});
await expect(readLookerFetchReport(stagedDir)).resolves.toEqual({
status: 'partial',
retryRecommended: true,
skipped: [
{
rawPath: 'dashboards/10.json',
entityType: 'dashboard',
entityId: '10',
severity: 'error',
statusCode: 429,
message: 'Looker API rate limit remained after retry',
retryRecommended: true,
},
],
warnings: [
{
rawPath: 'signals/dashboard_usage.json',
entityType: 'signals',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'system__activity unavailable',
retryRecommended: false,
},
],
});
});
});

View file

@ -1,645 +0,0 @@
import { mkdtemp, readdir, readFile, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { chunkLookerStagedDir } from './chunk.js';
import { fetchLookerRuntimeBundle, type LookerRuntimeClient } from './fetch.js';
const connectionId = '11111111-1111-4111-8111-111111111111';
function makeClient(): LookerRuntimeClient {
return {
listDashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
getDashboard: vi.fn().mockResolvedValue({
lookerId: '10',
title: 'Sales Pipeline',
description: 'Pipeline health',
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [{ id: '100', title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
}),
listLooks: vi.fn().mockResolvedValue([{ id: '20' }]),
getLook: vi.fn().mockResolvedValue({
lookerId: '20',
title: 'Open Pipeline',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
}),
listFolders: vi
.fn()
.mockResolvedValue({ folders: [{ id: '7', name: 'Sandbox', parentId: null, path: ['Sandbox'] }] }),
listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: null }]),
listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Sales' }]),
listLookmlModels: vi.fn().mockResolvedValue({
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
}),
getExplore: vi.fn().mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
joins: [],
}),
getSignals: vi.fn().mockResolvedValue({
dashboardUsage: [{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8, lastRunAt: null, topUsers: ['3'] }],
lookUsage: [{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5, lastRunAt: null, topUsers: ['3'] }],
scheduledPlans: [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
],
favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 }],
}),
cleanup: vi.fn().mockResolvedValue(undefined),
};
}
describe('fetchLookerRuntimeBundle', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-fetch-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('writes dashboards, looks, folders, users, groups, models, explores, signals, and sync config', async () => {
const client = makeClient();
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId, instanceBaseUrl: 'https://example.looker.com' },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(await readdir(join(stagedDir, 'dashboards'))).toEqual(['10.json']);
expect(await readdir(join(stagedDir, 'looks'))).toEqual(['20.json']);
expect(await readdir(join(stagedDir, 'users'))).toEqual(['3.json']);
expect(await readdir(join(stagedDir, 'groups'))).toEqual(['4.json']);
expect(await readdir(join(stagedDir, 'explores/b2b'))).toEqual(['sales_pipeline.json']);
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
expect(syncConfig).toEqual({
lookerConnectionId: connectionId,
fetchedAt: '2026-04-30T12:30:00.000Z',
instanceBaseUrl: 'https://example.looker.com',
previousCursors: {
dashboardsLastSyncedAt: null,
looksLastSyncedAt: null,
},
nextCursors: {
dashboardsLastSyncedAt: null,
looksLastSyncedAt: null,
},
});
const scope = JSON.parse(await readFile(join(stagedDir, 'looker-scope.json'), 'utf-8'));
expect(scope).toEqual({
mode: 'full',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json', 'looks/20.json'],
});
const dashboardUsage = JSON.parse(await readFile(join(stagedDir, 'signals/dashboard_usage.json'), 'utf-8'));
expect(dashboardUsage).toEqual([
{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8, lastRunAt: null, topUsers: ['3'] },
]);
const lookUsage = JSON.parse(await readFile(join(stagedDir, 'signals/look_usage.json'), 'utf-8'));
const scheduledPlans = JSON.parse(await readFile(join(stagedDir, 'signals/scheduled_plans.json'), 'utf-8'));
const favorites = JSON.parse(await readFile(join(stagedDir, 'signals/favorites.json'), 'utf-8'));
expect(lookUsage).toEqual([
{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5, lastRunAt: null, topUsers: ['3'] },
]);
expect(scheduledPlans).toEqual([
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
]);
expect(favorites).toEqual([{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 }]);
});
it('stages only changed Dashboard and Look entity bodies during incremental pulls', async () => {
const client = makeClient();
vi.mocked(client.listDashboards).mockResolvedValue([
{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' },
{ id: '11', updatedAt: '2026-04-30T12:10:00.000Z' },
]);
vi.mocked(client.getDashboard).mockImplementation(async (id: string) => ({
lookerId: id,
title: `Dashboard ${id}`,
description: null,
folderId: '7',
ownerId: '3',
updatedAt: id === '11' ? '2026-04-30T12:10:00.000Z' : '2026-04-30T12:00:00.000Z',
tiles: [],
}));
vi.mocked(client.listLooks).mockResolvedValue([
{ id: '20', updatedAt: '2026-04-30T11:00:00.000Z' },
{ id: '21', updatedAt: null },
]);
vi.mocked(client.getLook).mockImplementation(async (id: string) => ({
lookerId: id,
title: `Look ${id}`,
description: null,
folderId: '7',
ownerId: '3',
updatedAt: id === '21' ? null : '2026-04-30T11:00:00.000Z',
query: null,
}));
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
dashboardUpdatedSince: '2026-04-30T12:00:00.000Z',
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(client.getDashboard).toHaveBeenCalledTimes(1);
expect(client.getDashboard).toHaveBeenCalledWith('11');
expect(client.getLook).toHaveBeenCalledTimes(1);
expect(client.getLook).toHaveBeenCalledWith('21');
await expect(readdir(join(stagedDir, 'dashboards'))).resolves.toEqual(['11.json']);
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['21.json']);
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
expect(syncConfig.previousCursors).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
});
expect(syncConfig.nextCursors).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:10:00.000Z',
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
});
const scope = JSON.parse(await readFile(join(stagedDir, 'looker-scope.json'), 'utf-8'));
expect(scope).toEqual({
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json', 'looks/20.json', 'looks/21.json'],
fetchedRawPaths: ['dashboards/11.json', 'looks/21.json'],
});
});
it('falls back to empty signal files when the client has no signal support', async () => {
const client = makeClient();
delete client.getSignals;
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(JSON.parse(await readFile(join(stagedDir, 'signals/look_usage.json'), 'utf-8'))).toEqual([]);
});
it('stamps explore warehouse targets from pull config and reports unmapped Looker connections', async () => {
const client = makeClient();
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
vi.mocked(client.listLookmlModels).mockResolvedValue({
models: [
{
name: 'b2b',
label: 'B2B',
explores: [
{ name: 'sales_pipeline', label: 'Sales Pipeline' },
{ name: 'marketing', label: 'Marketing' },
],
},
],
});
vi.mocked(client.getExplore).mockImplementation(async (_modelName: string, exploreName: string) => {
if (exploreName === 'marketing') {
return {
modelName: 'b2b',
exploreName: 'marketing',
label: 'Marketing',
description: null,
rawSqlTableName: 'proj.dataset.marketing',
connectionName: 'missing_mapping',
viewName: 'marketing',
fields: {
dimensions: [{ name: 'marketing.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'marketing.spend', label: null, type: null, sql: null, description: null }],
},
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
};
}
return {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
},
joins: [
{
name: 'accounts',
type: 'left_outer',
relationship: 'many_to_one',
rawSqlTableName: 'proj.dataset.accounts',
sqlOn: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
from: null,
targetTable: null,
},
],
targetWarehouseConnectionId: null,
targetTable: null,
};
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
'b2b.sales_pipeline.accounts': {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'accounts',
canonicalTable: 'proj.dataset.accounts',
},
},
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
const salesPipeline = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8'));
expect(salesPipeline).toMatchObject({
connectionName: 'b2b_sandbox_bq',
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
joins: [
{
name: 'accounts',
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'accounts',
canonicalTable: 'proj.dataset.accounts',
},
},
],
});
const marketing = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/marketing.json'), 'utf-8'));
expect(marketing).toMatchObject({
connectionName: 'missing_mapping',
targetWarehouseConnectionId: null,
targetTable: {
ok: false,
reason: 'no_connection_mapping',
},
});
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report.status).toBe('partial');
expect(report.skipped).toEqual([]);
expect(report.warnings).toEqual([
{
rawPath: 'looker_connection_mappings/missing_mapping',
entityType: 'looker_connection_mapping',
entityId: 'missing_mapping',
severity: 'warning',
statusCode: null,
message: 'Looker connection missing_mapping is not mapped to a warehouse connection; 1 explore will be wiki-only.',
retryRecommended: false,
kind: 'unmapped_looker_connection',
details: {
lookerConnectionName: 'missing_mapping',
affectedExplores: ['b2b.marketing'],
},
},
]);
});
it('reports parsed target table failures without retrying the Looker fetch', async () => {
const client = makeClient();
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
vi.mocked(client.getExplore).mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: '$' + '{derived.SQL_TABLE_NAME}',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
},
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: false,
reason: 'looker_template_unresolved',
detail: 'Looker template markers cannot be resolved before parsing.',
},
},
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
const explore = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8'));
expect(explore).toMatchObject({
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: false,
reason: 'looker_template_unresolved',
},
});
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report).toMatchObject({
status: 'partial',
retryRecommended: false,
skipped: [],
warnings: [
{
rawPath: 'looker_connection_mappings/b2b_sandbox_bq',
entityType: 'looker_connection_mapping',
entityId: 'b2b_sandbox_bq',
severity: 'warning',
statusCode: null,
message:
'Looker explore b2b.sales_pipeline has sql_table_name that cannot be mapped to a physical warehouse table: looker_template_unresolved.',
retryRecommended: false,
kind: 'looker_template_unresolved',
details: {
lookerConnectionName: 'b2b_sandbox_bq',
rawSqlTableName: '$' + '{derived.SQL_TABLE_NAME}',
reason: 'looker_template_unresolved',
},
},
],
});
});
it('propagates parent explore warehouse targets onto Dashboard tile and Look queries', async () => {
const client = makeClient();
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
vi.mocked(client.getExplore).mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
},
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
},
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
const dashboard = JSON.parse(await readFile(join(stagedDir, 'dashboards/10.json'), 'utf-8'));
expect(dashboard.tiles[0].query).toMatchObject({
model: 'b2b',
view: 'sales_pipeline',
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
});
const look = JSON.parse(await readFile(join(stagedDir, 'looks/20.json'), 'utf-8'));
expect(look.query).toMatchObject({
model: 'b2b',
view: 'sales_pipeline',
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
});
});
it('records skipped detail entities and keeps cursors pinned for affected entity types', async () => {
const client = makeClient();
vi.mocked(client.listDashboards).mockResolvedValue([
{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' },
{ id: '11', updatedAt: '2026-04-30T12:10:00.000Z' },
]);
vi.mocked(client.getDashboard).mockImplementation(async (id: string) => {
if (id === '11') {
const error = new Error('Looker API rate limit remained after retry');
Object.assign(error, { statusCode: 429 });
throw error;
}
return {
lookerId: id,
title: `Dashboard ${id}`,
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [],
};
});
vi.mocked(client.listLooks).mockResolvedValue([{ id: '20', updatedAt: '2026-04-30T11:15:00.000Z' }]);
vi.mocked(client.getLook).mockResolvedValue({
lookerId: '20',
title: 'Look 20',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T11:15:00.000Z',
query: null,
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
dashboardUpdatedSince: '2026-04-30T12:00:00.000Z',
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
await expect(readdir(join(stagedDir, 'dashboards'))).rejects.toMatchObject({ code: 'ENOENT' });
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['20.json']);
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
expect(syncConfig.nextCursors).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
looksLastSyncedAt: '2026-04-30T11:15:00.000Z',
});
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report).toEqual({
status: 'partial',
retryRecommended: true,
skipped: [
{
rawPath: 'dashboards/11.json',
entityType: 'dashboard',
entityId: '11',
severity: 'error',
statusCode: 429,
message: 'Looker API rate limit remained after retry',
retryRecommended: true,
},
],
warnings: [],
});
});
it('continues without explore bootstrap when LookML model listing is denied', async () => {
const client = makeClient();
const error = new Error('LookML model access denied');
Object.assign(error, { statusCode: 403 });
vi.mocked(client.listLookmlModels).mockRejectedValue(error);
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
await expect(readdir(join(stagedDir, 'dashboards'))).resolves.toEqual(['10.json']);
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['20.json']);
await expect(readFile(join(stagedDir, 'lookml_models.json'), 'utf-8')).resolves.toBe('{\n "models": []\n}\n');
await expect(readdir(join(stagedDir, 'explores'))).rejects.toMatchObject({ code: 'ENOENT' });
expect(client.getExplore).not.toHaveBeenCalled();
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report).toEqual({
status: 'success',
retryRecommended: false,
skipped: [],
warnings: [
{
rawPath: 'lookml_models.json',
entityType: 'lookml_models',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'LookML model access denied',
retryRecommended: false,
},
],
});
const chunked = await chunkLookerStagedDir(stagedDir);
expect(chunked.workUnits.map((wu) => wu.unitKey).sort()).toEqual(['looker-dashboard-10', 'looker-look-20']);
expect(chunked.workUnits.flatMap((wu) => wu.dependencyPaths)).not.toContain('explores/b2b/sales_pipeline.json');
});
it('cleans up the Looker client after a successful fetch', async () => {
const client = makeClient();
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(client.cleanup).toHaveBeenCalledTimes(1);
});
it('cleans up the Looker client when fetch throws', async () => {
const client = makeClient();
vi.mocked(client.listDashboards).mockRejectedValue(new Error('Looker API unavailable'));
await expect(
fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
}),
).rejects.toThrow('Looker API unavailable');
expect(client.cleanup).toHaveBeenCalledTimes(1);
});
});

Some files were not shown because too many files have changed in this diff Show more