Merge remote-tracking branch 'origin/snowflake-multiple-schemas' into snowflake-ingest-perf-report

# Conflicts:
#	packages/cli/src/connectors/snowflake/connector.ts
#	packages/cli/src/context/scan/local-scan.test.ts
#	packages/cli/src/context/scan/local-scan.ts
#	packages/cli/src/setup-databases.test.ts
#	packages/cli/src/setup-databases.ts
This commit is contained in:
Andrey Avtomonov 2026-05-23 02:29:55 +02:00
commit 101f80e33b
119 changed files with 8366 additions and 360 deletions

View file

@ -271,6 +271,25 @@ use `PascalCase` without the suffix.
- Regex may be used for non-structural sanitization, but not to interpret SQL
structure.
## Telemetry
**ktx** ships anonymous PostHog telemetry. When adding commands or events:
- **MUST NOT**: Add fields that carry user data — file paths, hostnames,
environment values, SQL text, schema/table/column names, error messages,
argv, or secrets. Schemas use Zod `.strict()`, so unknown fields throw at
runtime; the privacy rule is enforced by the schema, not by goodwill.
- **MUST**: Add new event types in `packages/cli/src/telemetry/events.ts`.
`pnpm run build` mirrors the catalog into the Python daemon schema; a
pytest checks Node ↔ Python parity.
- **SHOULD**: Let Commander's `preAction` hook auto-emit the `command` event
for any new CLI command — do not call `trackTelemetryEvent` manually for
command-level success/failure.
- **MUST**: Update the public overview at
`docs-site/content/docs/community/telemetry.mdx` only when the *category*
of collected data changes. Adding another event with no new field types
needs no docs change.
## Documentation and Specs
- Keep public documentation in `README.md`, package READMEs, example READMEs,

View file

@ -93,6 +93,13 @@ ktx context built: yes
Agent integration ready: yes (codex:project)
```
## Telemetry
**ktx** collects anonymous usage telemetry from interactive CLI runs to improve
setup, command reliability, and data-agent workflows. See
[Telemetry](https://docs.kaelio.com/ktx/docs/community/telemetry) for the event
catalog, privacy details, and opt-out options.
## Common Commands
| Command | Purpose |

View file

@ -1,5 +1,5 @@
{
"title": "Community",
"defaultOpen": true,
"pages": ["support", "contributing"]
"pages": ["support", "contributing", "telemetry"]
}

View file

@ -0,0 +1,42 @@
---
title: Telemetry
description: Understand what anonymous usage telemetry ktx collects and how to opt out.
---
**ktx** collects anonymous, aggregated usage telemetry from interactive CLI
runs so maintainers can see which commands work, where setup fails, and which
parts of the data-agent workflow need improvement. Telemetry is opt-out and
disabled automatically in CI and non-interactive runs.
## Opt out
Use any of these mechanisms to disable telemetry:
| Mechanism | Effect |
|-----------|--------|
| `export KTX_TELEMETRY_DISABLED=1` | Disables telemetry for the shell and child processes |
| `export DO_NOT_TRACK=1` | Standard do-not-track environment variable |
| `CI=1` | Automatic in CI |
| Non-TTY output | Automatic for pipes and scripts |
| Edit `~/.ktx/telemetry.json` and set `"enabled": false` | Persistent for the machine |
## What we collect
High-level signals only: which commands run, how long they take, whether they
succeed or fail, and basic environment metadata (CLI version, Node version, OS
platform). For project-level analysis, **ktx** sends a salted hash of the
project directory — never the raw path.
## What we never collect
- File paths, hostnames, environment variable values, or command arguments
- `ktx.yaml` contents, connection passwords, API keys, or tokens
- Schema names, table names, column names, SQL text, or query results
- Error messages or stack traces
- Git remote URLs, Git user email, OS user, or hostname
## Storage and retention
Telemetry is sent to PostHog, a third-party product-analytics service used by
the **ktx** maintainers. Raw event data is retained for 90 days. Aggregated
counts may be retained indefinitely.

View file

@ -129,20 +129,18 @@ connections:
account: xy12345
warehouse: ANALYTICS_WH
database: PROD
schema_name: PUBLIC
schema_names:
- PUBLIC
- SALES
- MARKETING
username: KTX_SERVICE
password: env:SNOWFLAKE_PASSWORD
role: ANALYST
```
For multiple schemas:
```yaml
schema_names:
- PUBLIC
- ANALYTICS
- STAGING
```
`ktx setup` discovers schemas after the connection is verified and writes the
selected list to `schema_names`. You can also set this field manually. For a
single schema, `schema_name: PUBLIC` is accepted as an equivalent shorthand.
### Authentication

View file

@ -11,6 +11,8 @@
"packages/cli": {
"entry": [
"src/print-command-tree.ts!",
"src/telemetry/schema-writer.ts!",
"src/telemetry/index.ts!",
"scripts/**/*.mjs",
"src/**/*.test-utils.ts",
"src/**/acceptance-fixtures.ts",

View file

@ -29,7 +29,7 @@
},
"scripts": {
"assets:demo": "node scripts/build-demo-assets.mjs",
"build": "tsc -p tsconfig.json && node scripts/copy-runtime-assets.mjs && node ../../scripts/prepare-cli-bin.mjs",
"build": "tsc -p tsconfig.json && node dist/telemetry/schema-writer.js src/telemetry/events.schema.json ../../python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json && node scripts/copy-runtime-assets.mjs && node ../../scripts/prepare-cli-bin.mjs",
"clean": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\"",
"docs:commands": "pnpm run build && node dist/print-command-tree.js",
"smoke": "vitest run src/standalone-smoke.test.ts src/example-smoke.test.ts --testTimeout 30000",
@ -69,6 +69,7 @@
"openai": "^6.37.0",
"p-limit": "^7.3.0",
"pg": "^8.20.0",
"posthog-node": "^5.0.0",
"react": "^19.2.6",
"simple-git": "3.36.0",
"snowflake-sdk": "^2.4.1",

View file

@ -0,0 +1,133 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { runCommanderKtxCli } from './cli-program.js';
import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
function makeIo(stdoutIsTTY = true): { io: KtxCliIo; stdout: () => string; stderr: () => string } {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: stdoutIsTTY,
write: (chunk) => {
stdout += chunk;
},
},
stderr: {
write: (chunk) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
const info: KtxCliPackageInfo = { name: '@kaelio/ktx', version: '0.4.1' };
describe('runCommanderKtxCli telemetry', () => {
let tempDir: string;
const originalEnv = process.env;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-telemetry-'));
await writeFile(join(tempDir, 'ktx.yaml'), '{}\n', 'utf-8');
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('HOME', tempDir);
vi.stubEnv('CI', '');
vi.stubEnv('KTX_TELEMETRY_DISABLED', '');
vi.stubEnv('DO_NOT_TRACK', '');
});
afterEach(async () => {
vi.unstubAllEnvs();
process.env = originalEnv;
await rm(tempDir, { recursive: true, force: true });
});
it('emits debug command telemetry for registered actions', async () => {
const io = makeIo(true);
await expect(
runCommanderKtxCli(
['--project-dir', tempDir, 'status', '--help'],
io.io,
{},
info,
{ runInit: async () => 0 },
),
).resolves.toBe(0);
expect(io.stderr()).not.toContain('[telemetry]');
const statusIo = makeIo(true);
const deps: KtxCliDeps = { doctor: async () => 0 };
await expect(
runCommanderKtxCli(
['--project-dir', tempDir, 'status', '--json'],
statusIo.io,
deps,
info,
{ runInit: async () => 0 },
),
).resolves.toBe(0);
expect(statusIo.stderr()).toContain('[telemetry]');
expect(statusIo.stderr()).toContain('"event":"install_first_run"');
expect(statusIo.stderr()).toContain('"event":"command"');
expect(statusIo.stderr()).toContain('"commandPath":["ktx","status"]');
expect(statusIo.stderr()).toContain('"event":"project_stack_snapshot"');
expect(statusIo.stderr()).toContain('"connectionCount"');
expect(statusIo.stderr()).not.toContain(tempDir);
const noticeIndex = statusIo.stderr().indexOf('ktx collects anonymous usage data');
const firstTelemetryIndex = statusIo.stderr().indexOf('[telemetry]');
expect(noticeIndex).toBeGreaterThanOrEqual(0);
expect(firstTelemetryIndex).toBeGreaterThan(noticeIndex);
});
it('emits aborted telemetry when project validation aborts after preAction starts', async () => {
const missingProjectDir = join(tempDir, 'missing');
await mkdir(missingProjectDir, { recursive: true });
const io = makeIo(true);
await expect(
runCommanderKtxCli(
['--project-dir', missingProjectDir, 'connection'],
io.io,
{},
info,
{ runInit: async () => 0 },
),
).resolves.toBe(1);
expect(io.stderr()).toContain('[telemetry]');
expect(io.stderr()).toContain('"outcome":"aborted"');
expect(io.stderr()).toContain('"hasProject":false');
expect(io.stderr()).toContain('"projectGroupAttached":false');
expect(io.stderr()).not.toContain(missingProjectDir);
});
it('does not import or emit telemetry for help, version, bare non-TTY, or unknown top-level command', async () => {
const helpIo = makeIo(true);
await expect(runCommanderKtxCli(['--help'], helpIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(0);
expect(helpIo.stderr()).not.toContain('[telemetry]');
const versionIo = makeIo(true);
await expect(runCommanderKtxCli(['--version'], versionIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(0);
expect(versionIo.stderr()).not.toContain('[telemetry]');
const bareIo = makeIo(false);
await expect(runCommanderKtxCli([], bareIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(0);
expect(bareIo.stderr()).not.toContain('[telemetry]');
const unknownIo = makeIo(true);
await expect(runCommanderKtxCli(['unknown'], unknownIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(1);
expect(unknownIo.stderr()).not.toContain('[telemetry]');
});
});

View file

@ -1,6 +1,6 @@
import type { Command } from '@commander-js/extra-typings';
import { Command, type CommandUnknownOpts } from '@commander-js/extra-typings';
import { describe, expect, it } from 'vitest';
import { buildKtxProgram } from './cli-program.js';
import { buildKtxProgram, collectCommandFlagsPresent } from './cli-program.js';
import type { KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
function stubIo(): KtxCliIo {
@ -55,3 +55,31 @@ describe('buildKtxProgram', () => {
expect(wrote).toBe('');
});
});
describe('collectCommandFlagsPresent', () => {
it('records only CLI-sourced flags and ignores positional content that looks like a flag', async () => {
let captured: Record<string, boolean> | undefined;
const program = new Command()
.name('ktx')
.option('--project-dir <dir>', 'project directory')
.option('--json', 'json output', false);
program
.command('sql')
.argument('<sql...>')
.requiredOption('-c, --connection <id>', 'connection id')
.option('--max-rows <n>', 'cap rows')
.action(function () {
captured = collectCommandFlagsPresent(this as unknown as CommandUnknownOpts);
});
await program.parseAsync(
['--project-dir', '/tmp/p', 'sql', '-c', 'warehouse', '--', '--customer_table', 'SELECT', '1'],
{ from: 'user' },
);
expect(captured).toEqual({ projectDir: true, connection: true });
expect(captured).not.toHaveProperty('customer_table');
expect(captured).not.toHaveProperty('json');
expect(captured).not.toHaveProperty('maxRows');
});
});

View file

@ -1,6 +1,6 @@
import { existsSync } from 'node:fs';
import { join } from 'node:path';
import { Command, InvalidArgumentError } from '@commander-js/extra-typings';
import { Command, type CommandUnknownOpts, InvalidArgumentError } from '@commander-js/extra-typings';
import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
import { registerConnectionCommands } from './commands/connection-commands.js';
import { registerIngestCommands } from './commands/ingest-commands.js';
@ -14,6 +14,7 @@ import { registerAdminCommands } from './admin.js';
import { renderMissingProjectMessage } from './doctor.js';
import { findNearestKtxProjectDir, resolveKtxProjectDir } from './project-resolver.js';
import { profileMark, profileSpan } from './startup-profile.js';
import type { CommandOutcome } from './telemetry/index.js';
profileMark('module:cli-program');
@ -43,6 +44,8 @@ export interface BuildKtxProgramOptions {
packageInfo: KtxCliPackageInfo;
runInit: (args: { projectDir: string; force: boolean }, io: KtxCliIo) => Promise<number>;
setExitCode?: (code: number) => void;
argv?: string[];
setTelemetryModule?: (telemetry: typeof import('./telemetry/index.js')) => void;
}
type CommanderExitLike = { exitCode: number; code: string; message: string };
@ -327,6 +330,25 @@ function formatCliError(error: unknown): string {
return error instanceof Error ? error.message : String(error);
}
function commandOutcomeForParseResult(error: unknown, exitCode: number): CommandOutcome {
if (error) {
return isKtxProjectMissingAbortError(error) ? 'aborted' : 'error';
}
return exitCode === 0 ? 'ok' : 'error';
}
function shouldAttachCommandProjectGroup(path: string[], hasProject: boolean): boolean {
if (hasProject) {
return true;
}
const rootCommand = path[1];
const pathKey = path.join(' ');
return (
(rootCommand !== undefined && COMMANDS_THAT_CREATE_PROJECT.has(rootCommand)) ||
COMMANDS_THAT_CREATE_PROJECT.has(pathKey)
);
}
function firstTopLevelCommandToken(argv: string[]): string | null {
for (let index = 0; index < argv.length; index += 1) {
const arg = argv[index];
@ -390,11 +412,43 @@ async function runBareInteractiveCommand(
return 0;
}
/** @internal */
export function collectCommandFlagsPresent(command: CommandUnknownOpts): Record<string, boolean> {
const flags: Record<string, boolean> = {};
let current: CommandUnknownOpts | null = command;
while (current) {
for (const option of current.options) {
const key = option.attributeName();
if (current.getOptionValueSource(key) === 'cli') {
flags[key] = true;
}
}
current = current.parent;
}
return flags;
}
export function buildKtxProgram(options: BuildKtxProgramOptions): Command {
const program = createBaseProgram(options.packageInfo, options.io);
program.hook('preAction', (_thisCommand, actionCommand) => {
writeProjectDir(options.io, actionCommand as CommandPathNode);
ensureProjectAvailable(options.io, actionCommand as CommandPathNode);
program.hook('preAction', async (_thisCommand, actionCommand) => {
const telemetry = await import('./telemetry/index.js');
options.setTelemetryModule?.(telemetry);
await telemetry.showTelemetryNoticeIfNeeded(options.io, options.packageInfo);
const commandNode = actionCommand as CommandPathNode;
const path = commandPath(commandNode);
const projectDir = resolveCommandProjectDir(commandNode);
const hasProject = ktxYamlExists(projectDir);
const attachProjectGroup = shouldAttachCommandProjectGroup(path, hasProject);
telemetry.beginCommandSpan({
commandPath: path,
flagsPresent: collectCommandFlagsPresent(commandNode as unknown as CommandUnknownOpts),
projectDir: attachProjectGroup ? projectDir : undefined,
hasProject,
attachProjectGroup,
startedAt: performance.now(),
});
writeProjectDir(options.io, commandNode);
ensureProjectAvailable(options.io, commandNode);
});
const context: KtxCliCommandContext = {
@ -435,14 +489,19 @@ export async function runCommanderKtxCli(
): Promise<number> {
profileMark('commander:entry');
let exitCode = 0;
let telemetryModule: typeof import('./telemetry/index.js') | undefined;
const program = buildKtxProgram({
io,
deps,
packageInfo: info,
runInit: options.runInit,
argv,
setExitCode: (code: number) => {
exitCode = code;
},
setTelemetryModule: (telemetry) => {
telemetryModule = telemetry;
},
});
profileMark('commander:program-built');
const context: KtxCliCommandContext = {
@ -477,17 +536,29 @@ export async function runCommanderKtxCli(
return 1;
}
let parseError: unknown;
try {
await profileSpan('commander:parseAsync', () => program.parseAsync(argv, { from: 'user' }));
} catch (error) {
parseError = error;
if (isKtxProjectMissingAbortError(error)) {
return 1;
exitCode = 1;
} else if (isCommanderExit(error)) {
exitCode = error.exitCode === 0 ? 0 : 1;
} else {
io.stderr.write(`${formatCliError(error)}\n`);
exitCode = 1;
}
if (isCommanderExit(error)) {
return error.exitCode === 0 ? 0 : 1;
} finally {
if (telemetryModule) {
const completed = telemetryModule.completeCommandSpan({
completedAt: performance.now(),
outcome: commandOutcomeForParseResult(parseError, exitCode),
error: parseError,
});
await telemetryModule.emitCompletedCommand({ completed, packageInfo: info, io });
await telemetryModule.shutdownTelemetryEmitter();
}
io.stderr.write(`${formatCliError(error)}\n`);
return 1;
}
return exitCode;

View file

@ -2,6 +2,7 @@ import type { Command } from '@commander-js/extra-typings';
import type { KtxCliCommandContext } from '../cli-program.js';
import { resolveCommandProjectDir, resolveCommandProjectDirOverride } from '../cli-program.js';
import { findNearestKtxProjectDir } from '../project-resolver.js';
import { emitProjectStackSnapshot } from '../telemetry/index.js';
function outputMode(options: { json?: boolean }): 'plain' | 'json' {
return options.json === true ? 'json' : 'plain';
@ -58,11 +59,12 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC
);
return;
}
const projectDir = resolveCommandProjectDir(command);
context.setExitCode(
await runner(
{
command: 'project',
projectDir: resolveCommandProjectDir(command),
projectDir,
outputMode: outputMode(options),
verbose: options.verbose === true,
fast: options.fast === true,
@ -71,6 +73,11 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC
context.io,
),
);
await emitProjectStackSnapshot({
projectDir,
io: context.io,
packageInfo: context.packageInfo,
});
},
);
}

View file

@ -20,6 +20,7 @@ function makeIo() {
return {
io: {
stdout: {
isTTY: true,
write: (chunk: string) => {
stdout += chunk;
},
@ -72,6 +73,7 @@ describe('runKtxConnection', () => {
});
afterEach(async () => {
vi.unstubAllEnvs();
await rm(tempDir, { recursive: true, force: true });
});
@ -137,6 +139,27 @@ describe('runKtxConnection', () => {
expect(io.stdout()).toContain('Status: ok');
});
it('emits debug telemetry for connection tests without project paths', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' },
});
const { connector } = nativeConnector('postgres');
const io = makeIo();
const code = await runKtxConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, {
createScanConnector: vi.fn(async () => connector),
});
expect(code).toBe(0);
expect(io.stderr()).toContain('"event":"connection_test"');
expect(io.stderr()).toContain('"driver":"postgres"');
expect(io.stderr()).not.toContain(projectDir);
});
it('reports the connector error and still cleans up when native testConnection fails', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });

View file

@ -14,6 +14,9 @@ import type { KtxCliIo } from './index.js';
import { bold, dim, green, red, SYMBOLS } from './io/symbols.js';
import { createKtxCliScanConnector } from './local-scan-connectors.js';
import { profileMark } from './startup-profile.js';
import { isDemoConnection } from './telemetry/demo-detect.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import { scrubErrorClass } from './telemetry/scrubber.js';
profileMark('module:connection');
@ -300,6 +303,30 @@ interface ConnectionTestRow {
detail: string;
}
async function emitConnectionTest(input: {
project: KtxLocalProject;
connectionId: string;
driver: string;
outcome: 'ok' | 'error';
durationMs: number;
error?: unknown;
io: KtxCliIo;
}): Promise<void> {
const errorClass = input.error ? scrubErrorClass(input.error) : undefined;
await emitTelemetryEvent({
name: 'connection_test',
projectDir: input.project.projectDir,
io: input.io,
fields: {
driver: input.driver,
isDemoConnection: isDemoConnection(input.connectionId, input.project.config.connections[input.connectionId]),
outcome: input.outcome,
durationMs: input.durationMs,
...(errorClass ? { errorClass } : {}),
},
});
}
function visualWidth(text: string): number {
// styleText wraps content in ANSI escape sequences; strip them before measuring.
return text.replace(/\[[0-9;]*m/g, '').length;
@ -352,8 +379,17 @@ async function runTestAll(
const rows = await Promise.all(
entries.map(async ([connectionId, connection]): Promise<ConnectionTestRow> => {
const declaredDriver = String(connection.driver ?? '').trim().toLowerCase() || 'unknown';
const startedAt = performance.now();
try {
const outcome = await testConnectionByDriver(project, connectionId, deps);
await emitConnectionTest({
project,
connectionId,
driver: outcome.driver || declaredDriver,
outcome: 'ok',
durationMs: Math.max(0, performance.now() - startedAt),
io,
});
return {
connectionId,
driver: outcome.driver || declaredDriver,
@ -361,6 +397,15 @@ async function runTestAll(
detail: `${outcome.detailKey}: ${outcome.detailValue}`,
};
} catch (error) {
await emitConnectionTest({
project,
connectionId,
driver: declaredDriver,
outcome: 'error',
durationMs: Math.max(0, performance.now() - startedAt),
error,
io,
});
return {
connectionId,
driver: declaredDriver,
@ -403,7 +448,35 @@ export async function runKtxConnection(
return await runTestAll(project, io, deps);
}
const { driver, detailKey, detailValue } = await testConnectionByDriver(project, args.connectionId, deps);
const startedAt = performance.now();
let driver = normalizedConnectionDriver(project, args.connectionId) || 'unknown';
let detailKey: string;
let detailValue: string;
try {
const outcome = await testConnectionByDriver(project, args.connectionId, deps);
driver = outcome.driver;
detailKey = outcome.detailKey;
detailValue = outcome.detailValue;
await emitConnectionTest({
project,
connectionId: args.connectionId,
driver,
outcome: 'ok',
durationMs: Math.max(0, performance.now() - startedAt),
io,
});
} catch (error) {
await emitConnectionTest({
project,
connectionId: args.connectionId,
driver,
outcome: 'error',
durationMs: Math.max(0, performance.now() - startedAt),
error,
io,
});
throw error;
}
io.stdout.write(`Connection test passed: ${args.connectionId}\n`);
io.stdout.write(`Driver: ${driver}\n`);
io.stdout.write(`${detailKey}: ${detailValue}\n`);

View file

@ -1,6 +1,7 @@
import { describe, expect, it, vi } from 'vitest';
import { bigQueryConnectionConfigFromConfig, isKtxBigQueryConnectionConfig, type KtxBigQueryClient, KtxBigQueryScanConnector, type KtxBigQueryClientFactory, type KtxBigQueryDataset, type KtxBigQueryQueryJob, type KtxBigQueryTableRef } from '../../connectors/bigquery/connector.js';
import { createBigQueryLiveDatabaseIntrospection } from '../../connectors/bigquery/live-database-introspection.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function fakeClientFactory(): KtxBigQueryClientFactory {
const queryResults = vi.fn(async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
@ -234,6 +235,59 @@ describe('KtxBigQueryScanConnector', () => {
await connector.cleanup();
});
it('limits introspection to tables in tableScope', async () => {
const ordersGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{
metadata: {
type: 'TABLE',
numRows: '12',
schema: { fields: [{ name: 'id', type: 'INT64', mode: 'REQUIRED' }] },
},
},
]);
const skippedGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{ metadata: { type: 'TABLE', numRows: '1', schema: { fields: [] } } },
]);
const clientFactory: KtxBigQueryClientFactory = {
createClient: vi.fn(() => ({
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }]]),
dataset: vi.fn(
(): KtxBigQueryDataset => ({
get: vi.fn(async () => [{ id: 'analytics' }]),
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [
[
{ id: 'orders', get: ordersGet },
{ id: 'customers', get: skippedGet },
],
]),
}),
),
createQueryJob: vi.fn(async (): ReturnType<KtxBigQueryClient['createQueryJob']> => [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[],
undefined,
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
],
},
]),
})),
};
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory,
});
const scope = tableRefSet([{ catalog: 'project-1', db: 'analytics', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
expect(ordersGet).toHaveBeenCalledTimes(1);
expect(skippedGet).not.toHaveBeenCalled();
});
it('constructs for discovery without dataset scope and lists tables through one region information schema query', async () => {
const createQueryJob = vi.fn(
async (

View file

@ -2,6 +2,7 @@ import { BigQuery, type TableField } from '@google-cloud/bigquery';
import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from '../../context/connections/bigquery-identifiers.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
@ -289,7 +290,10 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
const tables: KtxSchemaTable[] = [];
const datasetIds = this.requireDatasetIdsForScan();
for (const datasetId of datasetIds) {
tables.push(...(await this.introspectDataset(datasetId)));
const scopedNames = input.tableScope
? scopedTableNames(input.tableScope, { catalog: this.resolved.projectId, db: datasetId })
: null;
tables.push(...(await this.introspectDataset(datasetId, scopedNames)));
}
return {
connectionId: this.connectionId,
@ -362,7 +366,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
if (!datasetId) {
return 0;
}
const tables = await this.introspectDataset(datasetId);
const tables = await this.introspectDataset(datasetId, null);
return tables.find((table) => table.name === tableName)?.estimatedRows ?? 0;
}
@ -463,12 +467,15 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
return firstNumber(rows[0]?.[header]);
}
private async introspectDataset(datasetId: string): Promise<KtxSchemaTable[]> {
private async introspectDataset(datasetId: string, scopedNames: readonly string[] | null): Promise<KtxSchemaTable[]> {
if (scopedNames && scopedNames.length === 0) return [];
const dataset = this.getClient().dataset(datasetId);
const [tableRefs] = await dataset.getTables();
const scopeSet = scopedNames ? new Set(scopedNames) : null;
const filteredTableRefs = scopeSet ? tableRefs.filter((tableRef) => scopeSet.has(tableRef.id ?? '')) : tableRefs;
const primaryKeys = await this.primaryKeys(datasetId);
const tables: KtxSchemaTable[] = [];
for (const tableRef of tableRefs) {
for (const tableRef of filteredTableRefs) {
const tableName = tableRef.id || '';
const [table] = await tableRef.get();
const fields = table.metadata.schema?.fields ?? [];

View file

@ -1,4 +1,7 @@
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
import type {
LiveDatabaseIntrospectionOptions,
LiveDatabaseIntrospectionPort,
} from '../../context/ingest/adapters/live-database/types.js';
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
import {
KtxBigQueryScanConnector,
@ -16,7 +19,7 @@ export function createBigQueryLiveDatabaseIntrospection(
options: CreateBigQueryLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
async extractSchema(connectionId: string, introspectionOptions?: LiveDatabaseIntrospectionOptions) {
const connection = options.connections[connectionId] as KtxBigQueryConnectionConfig | undefined;
const connector = new KtxBigQueryScanConnector({
connectionId,
@ -25,7 +28,14 @@ export function createBigQueryLiveDatabaseIntrospection(
now: options.now,
});
try {
return await connector.introspect({ connectionId, driver: 'bigquery' }, { runId: `bigquery-${connectionId}` });
return await connector.introspect(
{
connectionId,
driver: 'bigquery',
...(introspectionOptions?.tableScope ? { tableScope: introspectionOptions.tableScope } : {}),
},
{ runId: `bigquery-${connectionId}` },
);
} finally {
await connector.cleanup();
}

View file

@ -1,6 +1,7 @@
import { describe, expect, it, vi } from 'vitest';
import { clickHouseClientConfigFromConfig, isKtxClickHouseConnectionConfig, KtxClickHouseScanConnector, type KtxClickHouseClientFactory } from '../../connectors/clickhouse/connector.js';
import { createClickHouseLiveDatabaseIntrospection } from '../../connectors/clickhouse/live-database-introspection.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function result<T>(payload: T) {
return {
@ -238,6 +239,57 @@ describe('KtxClickHouseScanConnector', () => {
]);
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ query: string; query_params?: Record<string, unknown> }> = [];
const clientFactory: KtxClickHouseClientFactory = {
createClient: vi.fn(() => ({
query: vi.fn(async (input: { query: string; format: string; query_params?: Record<string, unknown> }) => {
queries.push({ query: input.query, query_params: input.query_params });
if (input.query.includes('FROM system.tables')) {
return result([{ database: 'analytics', name: 'events', engine: 'MergeTree', comment: '' }]);
}
if (input.query.includes('FROM system.columns')) {
return result([
{
database: 'analytics',
table: 'events',
name: 'id',
type: 'UInt64',
comment: '',
is_in_primary_key: 1,
},
]);
}
if (input.query.includes('FROM system.parts')) {
return result([{ database: 'analytics', table: 'events', row_count: '2' }]);
}
throw new Error(`Unexpected SQL: ${input.query}`);
}),
close: vi.fn(async () => undefined),
})),
};
const connector = new KtxClickHouseScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
clientFactory,
});
const scope = tableRefSet([{ catalog: null, db: 'analytics', name: 'events' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'clickhouse', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['events']);
const tablesQuery = queries.find((query) => query.query.includes('FROM system.tables'));
expect(tablesQuery?.query).toContain('AND name IN {table_names:Array(String)}');
expect(tablesQuery?.query_params).toEqual({ databases: ['analytics'], table_names: ['events'] });
});
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
const clientFactory = fakeClientFactory();
const connector = new KtxClickHouseScanConnector({

View file

@ -1,6 +1,7 @@
import { createClient } from '@clickhouse/client';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import { readFileSync } from 'node:fs';
import { Agent as HttpsAgent } from 'node:https';
import { homedir } from 'node:os';
@ -285,24 +286,42 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
this.assertConnection(input.connectionId);
const databases = configuredClickHouseDatabases(this.connection, this.clientConfig.database);
let allScopedTables: string[] | null = null;
if (input.tableScope) {
allScopedTables = [];
for (const database of databases) {
allScopedTables.push(...scopedTableNames(input.tableScope, { catalog: null, db: database }));
}
if (allScopedTables.length === 0) {
return this.emptySnapshot(databases);
}
}
const queryParams: Record<string, unknown> = { databases };
const tableNameClause = allScopedTables ? 'AND name IN {table_names:Array(String)}' : '';
const columnTableNameClause = allScopedTables ? 'AND table IN {table_names:Array(String)}' : '';
if (allScopedTables) {
queryParams.table_names = allScopedTables;
}
const tables = await this.queryEachRow<ClickHouseTableRow>(
`
SELECT database, name, engine, comment
FROM system.tables
WHERE database IN {databases:Array(String)}
AND engine NOT IN ('Dictionary')
${tableNameClause}
ORDER BY database, name
`,
{ databases },
queryParams,
);
const columns = await this.queryEachRow<ClickHouseColumnRow>(
`
SELECT database, table, name, type, comment, is_in_primary_key
FROM system.columns
WHERE database IN {databases:Array(String)}
${columnTableNameClause}
ORDER BY database, table, position
`,
{ databases },
queryParams,
);
const rowCounts = await this.queryEachRow<ClickHouseRowCountRow>(
`
@ -310,9 +329,10 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
FROM system.parts
WHERE database IN {databases:Array(String)}
AND active = 1
${columnTableNameClause}
GROUP BY database, table
`,
{ databases },
queryParams,
);
const columnsByTable = new Map<string, ClickHouseColumnRow[]>();
for (const column of columns) {
@ -347,6 +367,23 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
};
}
private emptySnapshot(databases: string[]): KtxSchemaSnapshot {
return {
connectionId: this.connectionId,
driver: 'clickhouse',
extractedAt: this.now().toISOString(),
scope: { schemas: databases },
metadata: {
database: this.clientConfig.database,
databases,
host: this.clientConfig.host,
table_count: 0,
total_columns: 0,
},
tables: [],
};
}
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
this.assertConnection(input.connectionId);
const result = await this.query(

View file

@ -1,4 +1,7 @@
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
import type {
LiveDatabaseIntrospectionOptions,
LiveDatabaseIntrospectionPort,
} from '../../context/ingest/adapters/live-database/types.js';
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
import {
KtxClickHouseScanConnector,
@ -18,7 +21,7 @@ export function createClickHouseLiveDatabaseIntrospection(
options: CreateClickHouseLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
async extractSchema(connectionId: string, introspectionOptions?: LiveDatabaseIntrospectionOptions) {
const connection = options.connections[connectionId] as KtxClickHouseConnectionConfig | undefined;
const connector = new KtxClickHouseScanConnector({
connectionId,
@ -29,7 +32,11 @@ export function createClickHouseLiveDatabaseIntrospection(
});
try {
return await connector.introspect(
{ connectionId, driver: 'clickhouse' },
{
connectionId,
driver: 'clickhouse',
...(introspectionOptions?.tableScope ? { tableScope: introspectionOptions.tableScope } : {}),
},
{ runId: `clickhouse-${connectionId}` },
);
} finally {

View file

@ -2,6 +2,7 @@ import { describe, expect, it, vi } from 'vitest';
import type { FieldPacket, RowDataPacket } from 'mysql2/promise';
import { createMysqlLiveDatabaseIntrospection } from '../../connectors/mysql/live-database-introspection.js';
import { isKtxMysqlConnectionConfig, KtxMysqlScanConnector, mysqlConnectionPoolConfigFromConfig, type KtxMysqlPoolFactory } from '../../connectors/mysql/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function mysqlResult(rows: Record<string, unknown>[], fields: Array<{ name: string; type?: number }>): [RowDataPacket[], FieldPacket[]] {
return [rows as RowDataPacket[], fields as FieldPacket[]];
@ -275,6 +276,71 @@ describe('KtxMysqlScanConnector', () => {
]);
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ sql: string; params?: unknown }> = [];
const poolFactory: KtxMysqlPoolFactory = {
createPool: vi.fn(() => ({
getConnection: vi.fn(async () => ({
query: vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => {
queries.push({ sql, params });
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return mysqlResult(
[
{
TABLE_SCHEMA: 'analytics',
TABLE_NAME: 'orders',
TABLE_TYPE: 'BASE TABLE',
TABLE_COMMENT: '',
TABLE_ROWS: 2,
},
],
[],
);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return mysqlResult(
[
{
TABLE_SCHEMA: 'analytics',
TABLE_NAME: 'orders',
COLUMN_NAME: 'id',
DATA_TYPE: 'int',
IS_NULLABLE: 'NO',
COLUMN_COMMENT: '',
},
],
[],
);
}
return mysqlResult([], []);
}),
release: vi.fn(),
})),
end: vi.fn(async () => undefined),
})),
};
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory,
});
const scope = tableRefSet([{ catalog: null, db: 'analytics', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'mysql', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
const tablesQuery = queries.find((query) => query.sql.includes('INFORMATION_SCHEMA.TABLES'));
expect(tablesQuery?.sql).toMatch(/TABLE_NAME IN \(\?\)/);
expect(tablesQuery?.params).toEqual(['analytics', 'orders']);
});
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
const poolFactory = fakePoolFactory();
const connector = new KtxMysqlScanConnector({

View file

@ -4,6 +4,7 @@ import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxTableListEntry, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import { KtxMysqlDialect } from './dialect.js';
export interface KtxMysqlConnectionConfig {
@ -335,23 +336,37 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
this.assertConnection(input.connectionId);
const databases = configuredMysqlSchemas(this.connection, this.poolConfig.database);
const placeholders = databases.map(() => '?').join(', ');
let allScopedTables: string[] | null = null;
if (input.tableScope) {
allScopedTables = [];
for (const database of databases) {
allScopedTables.push(...scopedTableNames(input.tableScope, { catalog: null, db: database }));
}
if (allScopedTables.length === 0) {
return this.emptySnapshot(databases);
}
}
const tableNameClause = allScopedTables
? `AND TABLE_NAME IN (${allScopedTables.map(() => '?').join(', ')})`
: '';
const tableNameParams = allScopedTables ?? [];
const tables = await this.queryRaw<MysqlTableRow>(
`
SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE, TABLE_COMMENT, TABLE_ROWS
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_SCHEMA IN (${placeholders}) AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
WHERE TABLE_SCHEMA IN (${placeholders}) AND TABLE_TYPE IN ('BASE TABLE', 'VIEW') ${tableNameClause}
ORDER BY TABLE_SCHEMA, TABLE_NAME
`,
databases,
[...databases, ...tableNameParams],
);
const columns = await this.queryRaw<MysqlColumnRow>(
`
SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COLUMN_COMMENT
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA IN (${placeholders})
WHERE TABLE_SCHEMA IN (${placeholders}) ${tableNameClause}
ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION
`,
databases,
[...databases, ...tableNameParams],
);
const primaryKeys = await this.queryRaw<MysqlPrimaryKeyRow>(
`
@ -359,9 +374,10 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
WHERE TABLE_SCHEMA IN (${placeholders})
AND CONSTRAINT_NAME = 'PRIMARY'
${tableNameClause}
ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION
`,
databases,
[...databases, ...tableNameParams],
);
const foreignKeys = await this.queryRaw<MysqlForeignKeyRow>(
`
@ -369,9 +385,10 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
WHERE TABLE_SCHEMA IN (${placeholders})
AND REFERENCED_TABLE_NAME IS NOT NULL
${tableNameClause}
ORDER BY TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME
`,
databases,
[...databases, ...tableNameParams],
);
const columnsByTable = groupByTable(columns, this.poolConfig.database);
@ -403,6 +420,23 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
};
}
private emptySnapshot(databases: string[]): KtxSchemaSnapshot {
return {
connectionId: this.connectionId,
driver: 'mysql',
extractedAt: this.now().toISOString(),
scope: { schemas: databases },
metadata: {
database: this.poolConfig.database,
schemas: databases,
host: this.poolConfig.host,
table_count: 0,
total_columns: 0,
},
tables: [],
};
}
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
this.assertConnection(input.connectionId);
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));

View file

@ -1,4 +1,7 @@
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
import type {
LiveDatabaseIntrospectionOptions,
LiveDatabaseIntrospectionPort,
} from '../../context/ingest/adapters/live-database/types.js';
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
import {
KtxMysqlScanConnector,
@ -18,7 +21,7 @@ export function createMysqlLiveDatabaseIntrospection(
options: CreateMysqlLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
async extractSchema(connectionId: string, introspectionOptions?: LiveDatabaseIntrospectionOptions) {
const connection = options.connections[connectionId] as KtxMysqlConnectionConfig | undefined;
const connector = new KtxMysqlScanConnector({
connectionId,
@ -28,7 +31,14 @@ export function createMysqlLiveDatabaseIntrospection(
now: options.now,
});
try {
return await connector.introspect({ connectionId, driver: 'mysql' }, { runId: `mysql-${connectionId}` });
return await connector.introspect(
{
connectionId,
driver: 'mysql',
...(introspectionOptions?.tableScope ? { tableScope: introspectionOptions.tableScope } : {}),
},
{ runId: `mysql-${connectionId}` },
);
} finally {
await connector.cleanup();
}

View file

@ -1,6 +1,7 @@
import { describe, expect, it, vi } from 'vitest';
import { createPostgresLiveDatabaseIntrospection } from '../../connectors/postgres/live-database-introspection.js';
import { isKtxPostgresConnectionConfig, KtxPostgresScanConnector, postgresPoolConfigFromConfig, type KtxPostgresPoolFactory } from '../../connectors/postgres/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
interface FakeQueryResult {
rows: Record<string, unknown>[];
@ -259,6 +260,63 @@ describe('KtxPostgresScanConnector', () => {
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ sql: string; params?: unknown[] }> = [];
const poolFactory: KtxPostgresPoolFactory = {
createPool() {
return {
async connect() {
return {
query: vi.fn(async (sql: string, params?: unknown[]) => {
queries.push({ sql, params });
if (sql.includes('FROM pg_catalog.pg_class c')) {
return { rows: [{ table_name: 'orders', table_kind: 'r', row_count: '3', table_comment: null }] };
}
if (sql.includes('FROM pg_catalog.pg_attribute a')) {
return {
rows: [
{
table_name: 'orders',
column_name: 'id',
data_type: 'integer',
is_nullable: false,
column_comment: null,
},
],
};
}
return { rows: [] };
}),
release: vi.fn(),
};
},
end: vi.fn(async () => undefined),
};
},
};
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory,
});
const scope = tableRefSet([{ catalog: null, db: 'public', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'postgres', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
const tablesQuery = queries.find((query) => query.sql.includes('FROM pg_catalog.pg_class c'));
expect(tablesQuery?.sql).toMatch(/c\.relname = ANY\(\$2\)/);
expect(tablesQuery?.params).toEqual(['public', ['orders']]);
});
it('adapts native PostgreSQL snapshots to live-database introspection for local ingest', async () => {
const introspection = createPostgresLiveDatabaseIntrospection({
connections: {

View file

@ -3,6 +3,7 @@ import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import { Pool } from 'pg';
import { KtxPostgresDialect } from './dialect.js';
@ -379,7 +380,9 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
const schemas = schemasFromConnection(this.connection);
const allTables: KtxSchemaTable[] = [];
for (const schema of schemas) {
const tables = await this.loadSchemaTables(schema);
const scopedNames = input.tableScope ? scopedTableNames(input.tableScope, { catalog: null, db: schema }) : null;
if (scopedNames && scopedNames.length === 0) continue;
const tables = await this.loadSchemaTables(schema, scopedNames);
allTables.push(...tables);
}
return {
@ -543,7 +546,11 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
}
}
private async loadSchemaTables(schema: string): Promise<KtxSchemaTable[]> {
private async loadSchemaTables(schema: string, scopedNames: readonly string[] | null): Promise<KtxSchemaTable[]> {
if (scopedNames && scopedNames.length === 0) return [];
const pgCatalogScopeClause = scopedNames ? 'AND c.relname = ANY($2)' : '';
const tableConstraintScopeClause = scopedNames ? 'AND tc.table_name = ANY($2)' : '';
const scopeValues = scopedNames ? [scopedNames] : [];
const tables = await this.queryRaw<PostgresTableRow>(
`
SELECT
@ -557,9 +564,10 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
ON d.objoid = c.oid AND d.objsubid = 0
WHERE n.nspname = $1
AND c.relkind IN ('r', 'v')
${pgCatalogScopeClause}
ORDER BY c.relname
`,
[schema],
[schema, ...scopeValues],
);
const columns = await this.queryRaw<PostgresColumnRow>(
`
@ -578,9 +586,10 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
AND c.relkind IN ('r', 'v')
AND a.attnum > 0
AND NOT a.attisdropped
${pgCatalogScopeClause}
ORDER BY c.relname, a.attnum
`,
[schema],
[schema, ...scopeValues],
);
const primaryKeys = await this.queryRaw<PostgresPrimaryKeyRow>(
`
@ -591,9 +600,10 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
AND tc.table_schema = kcu.table_schema
WHERE tc.constraint_type = 'PRIMARY KEY'
AND tc.table_schema = $1
${tableConstraintScopeClause}
ORDER BY tc.table_name, kcu.ordinal_position
`,
[schema],
[schema, ...scopeValues],
);
const foreignKeys = await this.queryRaw<PostgresForeignKeyRow>(
`
@ -613,9 +623,10 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
AND ccu.table_schema = tc.table_schema
WHERE tc.constraint_type = 'FOREIGN KEY'
AND tc.table_schema = $1
${tableConstraintScopeClause}
ORDER BY tc.table_name, kcu.column_name
`,
[schema],
[schema, ...scopeValues],
);
const columnsByTable = groupByTable(columns);

View file

@ -1,4 +1,7 @@
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
import type {
LiveDatabaseIntrospectionOptions,
LiveDatabaseIntrospectionPort,
} from '../../context/ingest/adapters/live-database/types.js';
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
import {
KtxPostgresScanConnector,
@ -18,7 +21,7 @@ export function createPostgresLiveDatabaseIntrospection(
options: CreatePostgresLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
async extractSchema(connectionId: string, introspectionOptions?: LiveDatabaseIntrospectionOptions) {
const connection = options.connections[connectionId] as KtxPostgresConnectionConfig | undefined;
const connector = new KtxPostgresScanConnector({
connectionId,
@ -28,7 +31,14 @@ export function createPostgresLiveDatabaseIntrospection(
now: options.now,
});
try {
return await connector.introspect({ connectionId, driver: 'postgres' }, { runId: `postgres-${connectionId}` });
return await connector.introspect(
{
connectionId,
driver: 'postgres',
...(introspectionOptions?.tableScope ? { tableScope: introspectionOptions.tableScope } : {}),
},
{ runId: `postgres-${connectionId}` },
);
} finally {
await connector.cleanup();
}

View file

@ -3,11 +3,13 @@ import { describe, expect, it, vi } from 'vitest';
const createPool = vi.hoisted(() => vi.fn());
vi.mock('snowflake-sdk', () => ({
default: { createPool },
createPool,
}));
import { createSnowflakeLiveDatabaseIntrospection } from '../../connectors/snowflake/live-database-introspection.js';
import { isKtxSnowflakeConnectionConfig, KtxSnowflakeScanConnector, snowflakeConnectionConfigFromConfig, type KtxSnowflakeDriver, type KtxSnowflakeDriverFactory } from '../../connectors/snowflake/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function fakeDriverFactory(): KtxSnowflakeDriverFactory {
const driver: KtxSnowflakeDriver = {
@ -289,6 +291,110 @@ describe('KtxSnowflakeScanConnector', () => {
]);
});
it('continues introspection when primary-key discovery is not authorized', async () => {
const driverFactory = fakeDriverFactory();
const driver = (driverFactory.createDriver as ReturnType<typeof vi.fn>).getMockImplementation() as
| (() => KtxSnowflakeDriver)
| undefined;
if (!driver) throw new Error('driver mock missing');
const built = driver();
(built.query as ReturnType<typeof vi.fn>).mockImplementation(async (sql: string) => {
if (sql.includes('TABLE_CONSTRAINTS')) {
throw new Error(
"SQL compilation error: Object 'ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE' does not exist or not authorized.",
);
}
throw new Error(`Unexpected SQL: ${sql}`);
});
(driverFactory.createDriver as ReturnType<typeof vi.fn>).mockReturnValue(built);
const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined);
try {
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'snowflake' },
{ runId: 'scan-run-pk-skip' },
);
expect(snapshot.tables.map((table) => table.name).sort()).toEqual(['ORDERS', 'ORDER_SUMMARY']);
expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true);
expect(warn).toHaveBeenCalledWith(
expect.stringContaining('Snowflake primary-key discovery skipped for ANALYTICS.PUBLIC'),
);
} finally {
warn.mockRestore();
}
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ sql: string; params?: unknown }> = [];
const getSchemaMetadata = vi.fn(async (_schemaName?: string, scopedNames?: readonly string[] | null) =>
scopedNames?.includes('ORDERS')
? [
{
name: 'ORDERS',
catalog: 'ANALYTICS',
db: 'MARTS',
rowCount: 10,
comment: null,
columns: [{ name: 'ID', type: 'NUMBER', nullable: false, comment: null }],
},
]
: [],
);
const driverFactory: KtxSnowflakeDriverFactory = {
createDriver: vi.fn(() => ({
test: vi.fn(async () => ({ success: true })),
query: vi.fn(async (sql: string, params?: unknown) => {
queries.push({ sql, params });
return { headers: [], rows: [], totalRows: 0, rowCount: 0 };
}),
getSchemaMetadata,
listSchemas: vi.fn(async () => []),
listTables: vi.fn(async () => []),
cleanup: vi.fn(async () => undefined),
})),
};
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'MARTS',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
const scope = tableRefSet([{ catalog: 'ANALYTICS', db: 'MARTS', name: 'ORDERS' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'snowflake', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['ORDERS']);
expect(getSchemaMetadata).toHaveBeenCalledWith('MARTS', ['ORDERS']);
const primaryKeysQuery = queries.find((query) => query.sql.includes('TABLE_CONSTRAINTS'));
expect(primaryKeysQuery?.sql).toMatch(/AND tc\.TABLE_NAME IN \(\?\)/);
expect(primaryKeysQuery?.params).toEqual(['MARTS', 'ANALYTICS', 'ORDERS']);
});
it('supports read-only query, sampling, distinct values, row counts, schema listing, and cleanup', async () => {
const driverFactory = fakeDriverFactory();
const connector = new KtxSnowflakeScanConnector({

View file

@ -4,9 +4,12 @@ import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js';
import * as snowflake from 'snowflake-sdk';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import snowflake from 'snowflake-sdk';
import type { Bind, Binds, Connection, ConnectionOptions } from 'snowflake-sdk';
import { KtxSnowflakeDialect } from './dialect.js';
import { assertSafeSnowflakeIdentifier, quoteSnowflakeIdentifier } from './identifiers.js';
import { configureSnowflakeSdkLogger } from './sdk-logger.js';
export interface KtxSnowflakeConnectionConfig {
driver?: string;
@ -58,7 +61,7 @@ export interface KtxSnowflakeRawTableMetadata {
export interface KtxSnowflakeDriver {
test(): Promise<{ success: boolean; error?: string }>;
query(sql: string, params?: unknown): Promise<KtxQueryResult>;
getSchemaMetadata(schemaName?: string): Promise<KtxSnowflakeRawTableMetadata[]>;
getSchemaMetadata(schemaName?: string, scopedTableNames?: readonly string[] | null): Promise<KtxSnowflakeRawTableMetadata[]>;
listSchemas(): Promise<string[]>;
listTables(schemas?: string[]): Promise<KtxTableListEntry[]>;
cleanup(): Promise<void>;
@ -81,6 +84,12 @@ export interface KtxSnowflakeSdkOptionsProvider {
export interface KtxSnowflakeScanConnectorOptions {
connectionId: string;
connection: KtxSnowflakeConnectionConfig | undefined;
/**
* KTX project directory. When provided, snowflake-sdk's logger is redirected to
* `<projectDir>/.ktx/logs/snowflake.log` so its JSON output does not bleed into
* the CLI's TTY. Tests that use a fake driverFactory can leave this undefined.
*/
projectDir?: string;
driverFactory?: KtxSnowflakeDriverFactory;
sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider;
env?: NodeJS.ProcessEnv;
@ -148,7 +157,8 @@ function schemaNames(connection: KtxSnowflakeConnectionConfig, env: NodeJS.Proce
.filter((schema) => schema.trim().length > 0)
.map((schema) => resolveStringReference(schema, env));
}
return [stringConfigValue(connection, 'schema_name', env) ?? 'PUBLIC'];
const single = stringConfigValue(connection, 'schema_name', env);
return single ? [single] : [];
}
function firstNumber(value: unknown): number | null {
@ -178,7 +188,7 @@ function normalizeSnowflakeValue(value: unknown, columnType?: string): unknown {
return value;
}
function toSnowflakeBind(value: unknown): snowflake.Bind {
function toSnowflakeBind(value: unknown): Bind {
if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
return value;
}
@ -188,7 +198,7 @@ function toSnowflakeBind(value: unknown): snowflake.Bind {
return String(value);
}
function toSnowflakeBinds(params: unknown[] | undefined): snowflake.Binds | undefined {
function toSnowflakeBinds(params: unknown[] | undefined): Binds | undefined {
return params?.map((value) => toSnowflakeBind(value));
}
@ -319,24 +329,32 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver {
}
}
async getSchemaMetadata(schemaName = this.resolved.schemas[0] ?? 'PUBLIC'): Promise<KtxSnowflakeRawTableMetadata[]> {
async getSchemaMetadata(
schemaName = this.resolved.schemas[0] ?? 'PUBLIC',
scopedTableNames: readonly string[] | null = null,
): Promise<KtxSnowflakeRawTableMetadata[]> {
const scopeClause =
scopedTableNames && scopedTableNames.length > 0
? `AND TABLE_NAME IN (${scopedTableNames.map(() => '?').join(', ')})`
: '';
const scopeParams = scopedTableNames ?? [];
const tablesResult = await this.query(
`
SELECT TABLE_NAME, TABLE_TYPE, COMMENT, ROW_COUNT
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ?
WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ? ${scopeClause}
ORDER BY TABLE_NAME
`,
[schemaName, this.resolved.database],
[schemaName, this.resolved.database, ...scopeParams],
);
const columnsResult = await this.query(
`
SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COMMENT, ORDINAL_POSITION
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ?
WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ? ${scopeClause}
ORDER BY TABLE_NAME, ORDINAL_POSITION
`,
[schemaName, this.resolved.database],
[schemaName, this.resolved.database, ...scopeParams],
);
const columnsByTable = new Map<string, KtxSnowflakeRawColumnMetadata[]>();
for (const row of columnsResult.rows) {
@ -430,12 +448,13 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver {
if (patch?.close) {
this.closeSdkOptions.push(patch.close);
}
const baseConfig: snowflake.ConnectionOptions = {
const sessionSchema = this.resolved.schemas[0];
const baseConfig: ConnectionOptions = {
account: this.resolved.account,
username: this.resolved.username,
warehouse: this.resolved.warehouse,
database: this.resolved.database,
schema: this.resolved.schemas[0] ?? 'PUBLIC',
...(sessionSchema ? { schema: sessionSchema } : {}),
role: this.resolved.role,
clientSessionKeepAlive: true,
clientSessionKeepAliveHeartbeatFrequency: 900,
@ -447,9 +466,9 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver {
}
private async executeSnowflakeQuery(
connection: snowflake.Connection,
connection: Connection,
sqlText: string,
binds?: snowflake.Binds,
binds?: Binds,
): Promise<{ headers: string[]; headerTypes?: string[]; rows: unknown[][] }> {
return new Promise((resolveQuery, rejectQuery) => {
connection.execute({
@ -509,6 +528,9 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector {
this.driverFactory = options.driverFactory ?? new DefaultSnowflakeDriverFactory();
this.now = options.now ?? (() => new Date());
this.id = `snowflake:${options.connectionId}`;
if (options.projectDir) {
configureSnowflakeSdkLogger(options.projectDir);
}
}
async testConnection(): Promise<{ success: boolean; error?: string }> {
@ -519,7 +541,11 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector {
this.assertConnection(input.connectionId);
const tables: KtxSchemaTable[] = [];
for (const schemaName of this.resolved.schemas) {
const rawTables = await this.getDriver().getSchemaMetadata(schemaName);
const scopedNames = input.tableScope
? scopedTableNames(input.tableScope, { catalog: this.resolved.database, db: schemaName })
: null;
if (scopedNames && scopedNames.length === 0) continue;
const rawTables = await this.getDriver().getSchemaMetadata(schemaName, scopedNames);
const primaryKeys = await this.primaryKeys(rawTables.map((table) => table.name), schemaName);
tables.push(...rawTables.map((table) => this.toSchemaTable(table, primaryKeys)));
}
@ -652,32 +678,41 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector {
}
private async primaryKeys(tableNames: string[], schemaName: string): Promise<Map<string, Set<string>>> {
if (tableNames.length === 0) {
return new Map();
}
const result = await this.getDriver().query(
`
SELECT tc.TABLE_NAME, kcu.COLUMN_NAME
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
AND tc.TABLE_CATALOG = kcu.TABLE_CATALOG
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
AND tc.TABLE_SCHEMA = ?
AND tc.TABLE_CATALOG = ?
ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION
`,
[schemaName, this.resolved.database],
);
const grouped = new Map<string, Set<string>>();
for (const tableName of tableNames) {
grouped.set(tableName, new Set());
}
for (const row of result.rows) {
const tableName = String(row[0]);
const columnName = String(row[1]);
grouped.get(tableName)?.add(columnName);
if (tableNames.length === 0) {
return grouped;
}
const tableNamePlaceholders = tableNames.map(() => '?').join(', ');
try {
const result = await this.getDriver().query(
`
SELECT tc.TABLE_NAME, kcu.COLUMN_NAME
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
AND tc.TABLE_CATALOG = kcu.TABLE_CATALOG
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
AND tc.TABLE_SCHEMA = ?
AND tc.TABLE_CATALOG = ?
AND tc.TABLE_NAME IN (${tableNamePlaceholders})
ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION
`,
[schemaName, this.resolved.database, ...tableNames],
);
for (const row of result.rows) {
const tableName = String(row[0]);
const columnName = String(row[1]);
grouped.get(tableName)?.add(columnName);
}
} catch (error) {
const detail = error instanceof Error ? error.message : String(error);
console.warn(
`Snowflake primary-key discovery skipped for ${this.resolved.database}.${schemaName}: ${detail.replace(/\s+/g, ' ').trim()}`,
);
}
return grouped;
}

View file

@ -0,0 +1,31 @@
import { KtxSnowflakeScanConnector, type KtxSnowflakeScanConnectorOptions } from './connector.js';
export type KtxSnowflakeHistoricSqlQueryClientOptions = KtxSnowflakeScanConnectorOptions;
export class KtxSnowflakeHistoricSqlQueryClient {
private readonly connectionId: string;
private readonly connector: KtxSnowflakeScanConnector;
constructor(options: KtxSnowflakeHistoricSqlQueryClientOptions) {
this.connectionId = options.connectionId;
this.connector = new KtxSnowflakeScanConnector(options);
}
async executeQuery(
sql: string,
): Promise<{ headers: string[]; rows: unknown[][]; totalRows: number }> {
const result = await this.connector.executeReadOnly(
{ connectionId: this.connectionId, sql },
{} as never,
);
return {
headers: result.headers,
rows: result.rows,
totalRows: result.totalRows,
};
}
async cleanup(): Promise<void> {
await this.connector.cleanup();
}
}

View file

@ -1,4 +1,7 @@
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
import type {
LiveDatabaseIntrospectionOptions,
LiveDatabaseIntrospectionPort,
} from '../../context/ingest/adapters/live-database/types.js';
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
import {
KtxSnowflakeScanConnector,
@ -9,6 +12,7 @@ import {
interface CreateSnowflakeLiveDatabaseIntrospectionOptions {
connections: Record<string, KtxProjectConnectionConfig>;
projectDir?: string;
driverFactory?: KtxSnowflakeDriverFactory;
sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider;
now?: () => Date;
@ -18,18 +22,23 @@ export function createSnowflakeLiveDatabaseIntrospection(
options: CreateSnowflakeLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
async extractSchema(connectionId: string, introspectionOptions?: LiveDatabaseIntrospectionOptions) {
const connection = options.connections[connectionId] as KtxSnowflakeConnectionConfig | undefined;
const connector = new KtxSnowflakeScanConnector({
connectionId,
connection,
...(options.projectDir ? { projectDir: options.projectDir } : {}),
driverFactory: options.driverFactory,
sdkOptionsProvider: options.sdkOptionsProvider,
now: options.now,
});
try {
return await connector.introspect(
{ connectionId, driver: 'snowflake' },
{
connectionId,
driver: 'snowflake',
...(introspectionOptions?.tableScope ? { tableScope: introspectionOptions.tableScope } : {}),
},
{ runId: `snowflake-${connectionId}` },
);
} finally {

View file

@ -0,0 +1,57 @@
import { mkdtempSync, rmSync, statSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
const { configure } = vi.hoisted(() => ({ configure: vi.fn() }));
vi.mock('snowflake-sdk', () => ({
default: { configure },
}));
import {
configureSnowflakeSdkLogger,
resetSnowflakeSdkLoggerConfigurationForTests,
} from './sdk-logger.js';
describe('configureSnowflakeSdkLogger', () => {
let projectDir: string;
beforeEach(() => {
configure.mockReset();
resetSnowflakeSdkLoggerConfigurationForTests();
projectDir = mkdtempSync(join(tmpdir(), 'ktx-snowflake-logger-'));
});
afterEach(() => {
rmSync(projectDir, { recursive: true, force: true });
});
it('routes logs to <projectDir>/.ktx/logs/snowflake.log with console output disabled', () => {
const expected = resolve(projectDir, '.ktx', 'logs', 'snowflake.log');
const returned = configureSnowflakeSdkLogger(projectDir);
expect(returned).toBe(expected);
expect(configure).toHaveBeenCalledTimes(1);
expect(configure).toHaveBeenCalledWith({
logFilePath: expected,
additionalLogToConsole: false,
});
expect(statSync(resolve(projectDir, '.ktx', 'logs')).isDirectory()).toBe(true);
});
it('is idempotent for the same projectDir', () => {
configureSnowflakeSdkLogger(projectDir);
configureSnowflakeSdkLogger(projectDir);
expect(configure).toHaveBeenCalledTimes(1);
});
it('reconfigures when projectDir changes', () => {
const other = mkdtempSync(join(tmpdir(), 'ktx-snowflake-logger-other-'));
try {
configureSnowflakeSdkLogger(projectDir);
configureSnowflakeSdkLogger(other);
expect(configure).toHaveBeenCalledTimes(2);
} finally {
rmSync(other, { recursive: true, force: true });
}
});
});

View file

@ -0,0 +1,32 @@
import { mkdirSync } from 'node:fs';
import { resolve } from 'node:path';
import snowflake from 'snowflake-sdk';
let configuredLogFilePath: string | null = null;
/**
* Redirects the snowflake-sdk logger to a project-scoped file so its JSON output
* does not bleed into the CLI's TTY (which would pollute the setup wizard and
* break the in-place progress repainter in `context-build-view.ts`).
*
* Idempotent per process: re-calling with the same projectDir is a no-op.
*/
export function configureSnowflakeSdkLogger(projectDir: string): string {
const logDir = resolve(projectDir, '.ktx', 'logs');
const logFilePath = resolve(logDir, 'snowflake.log');
if (configuredLogFilePath === logFilePath) {
return logFilePath;
}
mkdirSync(logDir, { recursive: true });
snowflake.configure({
logFilePath,
additionalLogToConsole: false,
});
configuredLogFilePath = logFilePath;
return logFilePath;
}
/** @internal */
export function resetSnowflakeSdkLoggerConfigurationForTests(): void {
configuredLogFilePath = null;
}

View file

@ -6,6 +6,7 @@ import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { createSqliteLiveDatabaseIntrospection } from '../../connectors/sqlite/live-database-introspection.js';
import { isKtxSqliteConnectionConfig, KtxSqliteScanConnector, sqliteDatabasePathFromConfig } from '../../connectors/sqlite/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
describe('KtxSqliteScanConnector', () => {
let tempDir: string;
@ -196,6 +197,19 @@ describe('KtxSqliteScanConnector', () => {
).resolves.toBeNull();
});
it('limits introspection to tables in tableScope', async () => {
const connector = new KtxSqliteScanConnector({
connectionId: 'warehouse',
connection: { driver: 'sqlite', path: dbPath },
});
const scope = tableRefSet([{ catalog: null, db: null, name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlite', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
});
it('adapts native SQLite snapshots to live-database introspection for local ingest', async () => {
const introspection = createSqliteLiveDatabaseIntrospection({
projectDir: tempDir,

View file

@ -6,6 +6,7 @@ import { fileURLToPath } from 'node:url';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { normalizeQueryRows } from '../../context/connections/query-executor.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import { KtxSqliteDialect } from './dialect.js';
export interface KtxSqliteConnectionConfig {
@ -181,11 +182,16 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
this.assertConnection(input.connectionId);
const database = this.database();
const rawTables = database
.prepare(
`SELECT name, type FROM sqlite_master WHERE type IN ('table', 'view') AND name NOT LIKE 'sqlite_%' ORDER BY name`,
)
.all() as SqliteMasterRow[];
const scopedNames = input.tableScope ? scopedTableNames(input.tableScope, { catalog: null, db: null }) : null;
const scopeClause = scopedNames ? `AND name IN (${scopedNames.map(() => '?').join(', ')})` : '';
const rawTables =
scopedNames && scopedNames.length === 0
? []
: (database
.prepare(
`SELECT name, type FROM sqlite_master WHERE type IN ('table', 'view') AND name NOT LIKE 'sqlite_%' ${scopeClause} ORDER BY name`,
)
.all(...(scopedNames ?? [])) as SqliteMasterRow[]);
const tables = rawTables.map((table) => this.readTable(database, table));
const fileStats = existsSync(this.dbPath) ? statSync(this.dbPath) : null;
return {

View file

@ -1,4 +1,7 @@
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
import type {
LiveDatabaseIntrospectionOptions,
LiveDatabaseIntrospectionPort,
} from '../../context/ingest/adapters/live-database/types.js';
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
import { KtxSqliteScanConnector, type KtxSqliteConnectionConfig } from './connector.js';
@ -12,7 +15,7 @@ export function createSqliteLiveDatabaseIntrospection(
options: CreateSqliteLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
async extractSchema(connectionId: string, introspectionOptions?: LiveDatabaseIntrospectionOptions) {
const connection = options.connections[connectionId] as KtxSqliteConnectionConfig | undefined;
const connector = new KtxSqliteScanConnector({
connectionId,
@ -21,7 +24,14 @@ export function createSqliteLiveDatabaseIntrospection(
now: options.now,
});
try {
return await connector.introspect({ connectionId, driver: 'sqlite' }, { runId: `sqlite-${connectionId}` });
return await connector.introspect(
{
connectionId,
driver: 'sqlite',
...(introspectionOptions?.tableScope ? { tableScope: introspectionOptions.tableScope } : {}),
},
{ runId: `sqlite-${connectionId}` },
);
} finally {
await connector.cleanup();
}

View file

@ -1,6 +1,7 @@
import { describe, expect, it, vi } from 'vitest';
import { createSqlServerLiveDatabaseIntrospection } from '../../connectors/sqlserver/live-database-introspection.js';
import { isKtxSqlServerConnectionConfig, KtxSqlServerScanConnector, sqlServerConnectionPoolConfigFromConfig, type KtxSqlServerPoolFactory, type KtxSqlServerQueryResult } from '../../connectors/sqlserver/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function recordset<T extends Record<string, unknown>>(
rows: T[],
@ -290,6 +291,55 @@ describe('KtxSqlServerScanConnector', () => {
await connector.cleanup();
});
it('limits introspection to tables in tableScope', async () => {
const queries: string[] = [];
const inputs: Array<{ name: string; value: unknown }> = [];
const request = {
input: vi.fn((name: string, value: unknown) => {
inputs.push({ name, value });
return request;
}),
query: vi.fn(async (sql: string): Promise<KtxSqlServerQueryResult> => {
queries.push(sql);
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return result([{ table_name: 'orders', table_type: 'BASE TABLE' }], ['table_name', 'table_type']);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return result(
[{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' }],
['table_name', 'column_name', 'data_type', 'is_nullable'],
);
}
return result([], []);
}),
};
const poolFactory: KtxSqlServerPoolFactory = {
createPool: vi.fn(async () => ({
request: () => request,
close: vi.fn(async () => undefined),
})),
};
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory,
});
const scope = tableRefSet([{ catalog: 'analytics', db: 'dbo', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
expect(queries.find((query) => query.includes('INFORMATION_SCHEMA.TABLES'))).toMatch(/TABLE_NAME IN \(@table_0\)/);
expect(inputs).toEqual(expect.arrayContaining([{ name: 'table_0', value: 'orders' }]));
});
it('adapts native SQL Server snapshots to live-database introspection for local ingest', async () => {
const introspection = createSqlServerLiveDatabaseIntrospection({
connections: {

View file

@ -1,5 +1,6 @@
import { assertReadOnlySql } from '../../context/connections/read-only-sql.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
@ -121,6 +122,20 @@ function sqlRecordset(
return recordset;
}
function tableScopeSql(
scopedNames: readonly string[] | null,
columnExpression: string,
): { clause: string; params: Record<string, unknown> } {
if (!scopedNames) return { clause: '', params: {} };
const params: Record<string, unknown> = {};
const placeholders = scopedNames.map((name, index) => {
const key = `table_${index}`;
params[key] = name;
return `@${key}`;
});
return { clause: `AND ${columnExpression} IN (${placeholders.join(', ')})`, params };
}
class DefaultSqlServerPoolFactory implements KtxSqlServerPoolFactory {
async createPool(config: KtxSqlServerPoolConfig): Promise<KtxSqlServerPool> {
const pool = await new sql.ConnectionPool(config as sql.config).connect();
@ -314,7 +329,10 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
this.assertConnection(input.connectionId);
const tables: KtxSchemaTable[] = [];
for (const schemaName of this.schemas) {
tables.push(...(await this.introspectSchema(schemaName)));
const scopedNames = input.tableScope
? scopedTableNames(input.tableScope, { catalog: this.poolConfig.database, db: schemaName })
: null;
tables.push(...(await this.introspectSchema(schemaName, scopedNames)));
}
return {
connectionId: this.connectionId,
@ -461,16 +479,19 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
}
}
private async introspectSchema(schemaName: string): Promise<KtxSchemaTable[]> {
private async introspectSchema(schemaName: string, scopedNames: readonly string[] | null): Promise<KtxSchemaTable[]> {
if (scopedNames && scopedNames.length === 0) return [];
const tableScope = tableScopeSql(scopedNames, 'TABLE_NAME');
const tables = await this.queryRaw<{ table_name: string; table_type: string }>(
`
SELECT TABLE_NAME AS table_name, TABLE_TYPE AS table_type
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_SCHEMA = @schemaName
AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
${tableScope.clause}
ORDER BY TABLE_NAME
`,
{ schemaName },
{ schemaName, ...tableScope.params },
);
const columns = await this.queryRaw<{
table_name: string;
@ -482,15 +503,16 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
SELECT TABLE_NAME AS table_name, COLUMN_NAME AS column_name, DATA_TYPE AS data_type, IS_NULLABLE AS is_nullable
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = @schemaName
${tableScope.clause}
ORDER BY TABLE_NAME, ORDINAL_POSITION
`,
{ schemaName },
{ schemaName, ...tableScope.params },
);
const tableComments = await this.tableComments(schemaName);
const columnComments = await this.columnComments(schemaName);
const primaryKeys = await this.primaryKeys(schemaName);
const foreignKeys = await this.foreignKeys(schemaName);
const rowCounts = await this.rowCounts(schemaName);
const tableComments = await this.tableComments(schemaName, scopedNames);
const columnComments = await this.columnComments(schemaName, scopedNames);
const primaryKeys = await this.primaryKeys(schemaName, scopedNames);
const foreignKeys = await this.foreignKeys(schemaName, scopedNames);
const rowCounts = await this.rowCounts(schemaName, scopedNames);
const columnsByTable = groupByTable(columns);
const foreignKeysByTable = groupByTable(foreignKeys);
@ -508,7 +530,8 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
}));
}
private async tableComments(schemaName: string): Promise<Map<string, string>> {
private async tableComments(schemaName: string, scopedNames: readonly string[] | null): Promise<Map<string, string>> {
const tableScope = tableScopeSql(scopedNames, 'o.name');
const rows = await this.queryRaw<{ table_name: string; table_comment: string }>(
`
SELECT o.name AS table_name, CAST(ep.value AS NVARCHAR(MAX)) AS table_comment
@ -519,13 +542,15 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
AND ep.name = 'MS_Description'
WHERE s.name = @schemaName
AND o.type IN ('U', 'V')
${tableScope.clause}
`,
{ schemaName },
{ schemaName, ...tableScope.params },
);
return new Map(rows.map((row) => [row.table_name, row.table_comment]));
}
private async columnComments(schemaName: string): Promise<Map<string, string>> {
private async columnComments(schemaName: string, scopedNames: readonly string[] | null): Promise<Map<string, string>> {
const tableScope = tableScopeSql(scopedNames, 'o.name');
const rows = await this.queryRaw<{ table_name: string; column_name: string; column_comment: string }>(
`
SELECT o.name AS table_name, c.name AS column_name, CAST(ep.value AS NVARCHAR(MAX)) AS column_comment
@ -537,13 +562,18 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
AND ep.name = 'MS_Description'
WHERE s.name = @schemaName
AND o.type IN ('U', 'V')
${tableScope.clause}
`,
{ schemaName },
{ schemaName, ...tableScope.params },
);
return new Map(rows.map((row) => [`${row.table_name}.${row.column_name}`, row.column_comment]));
}
private async primaryKeys(schemaName: string): Promise<Map<string, Set<string>>> {
private async primaryKeys(
schemaName: string,
scopedNames: readonly string[] | null,
): Promise<Map<string, Set<string>>> {
const tableScope = tableScopeSql(scopedNames, 'tc.TABLE_NAME');
const rows = await this.queryRaw<{ table_name: string; column_name: string }>(
`
SELECT tc.TABLE_NAME AS table_name, kcu.COLUMN_NAME AS column_name
@ -553,9 +583,10 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
AND tc.TABLE_SCHEMA = @schemaName
${tableScope.clause}
ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION
`,
{ schemaName },
{ schemaName, ...tableScope.params },
);
const grouped = new Map<string, Set<string>>();
for (const row of rows) {
@ -566,7 +597,10 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
return grouped;
}
private async foreignKeys(schemaName: string): Promise<
private async foreignKeys(
schemaName: string,
scopedNames: readonly string[] | null,
): Promise<
Array<{
table_name: string;
column_name: string;
@ -576,6 +610,7 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
constraint_name: string;
}>
> {
const tableScope = tableScopeSql(scopedNames, 'fk.TABLE_NAME');
return this.queryRaw(
`
SELECT
@ -596,13 +631,15 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
AND pk.CONSTRAINT_NAME = rc.UNIQUE_CONSTRAINT_NAME
AND pk.ORDINAL_POSITION = fk.ORDINAL_POSITION
WHERE fk.TABLE_SCHEMA = @schemaName
${tableScope.clause}
ORDER BY fk.TABLE_NAME, fk.COLUMN_NAME
`,
{ schemaName },
{ schemaName, ...tableScope.params },
);
}
private async rowCounts(schemaName: string): Promise<Map<string, number>> {
private async rowCounts(schemaName: string, scopedNames: readonly string[] | null): Promise<Map<string, number>> {
const tableScope = tableScopeSql(scopedNames, 't.name');
const rows = await this.queryRaw<{ table_name: string; row_count: unknown }>(
`
SELECT t.name AS table_name, SUM(p.rows) AS row_count
@ -611,9 +648,10 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
WHERE s.name = @schemaName
AND p.index_id IN (0, 1)
${tableScope.clause}
GROUP BY t.name
`,
{ schemaName },
{ schemaName, ...tableScope.params },
);
return new Map(rows.map((row) => [row.table_name, firstNumber(row.row_count) ?? 0]));
}

View file

@ -1,4 +1,7 @@
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
import type {
LiveDatabaseIntrospectionOptions,
LiveDatabaseIntrospectionPort,
} from '../../context/ingest/adapters/live-database/types.js';
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
import {
KtxSqlServerScanConnector,
@ -18,7 +21,7 @@ export function createSqlServerLiveDatabaseIntrospection(
options: CreateSqlServerLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
async extractSchema(connectionId: string, introspectionOptions?: LiveDatabaseIntrospectionOptions) {
const connection = options.connections[connectionId] as KtxSqlServerConnectionConfig | undefined;
const connector = new KtxSqlServerScanConnector({
connectionId,
@ -29,7 +32,11 @@ export function createSqlServerLiveDatabaseIntrospection(
});
try {
return await connector.introspect(
{ connectionId, driver: 'sqlserver' },
{
connectionId,
driver: 'sqlserver',
...(introspectionOptions?.tableScope ? { tableScope: introspectionOptions.tableScope } : {}),
},
{ runId: `sqlserver-${connectionId}` },
);
} finally {

View file

@ -319,7 +319,8 @@ function renderPhaseRow(phase: PhaseState, frame: number, styled: boolean): stri
} else if (phase.status === 'skipped') {
trailing = styled ? dim('skipped') : 'skipped';
} else if (phase.status === 'failed') {
trailing = styled ? red('failed') : 'failed';
const label = styled ? red('failed') : 'failed';
trailing = phase.summary ? `${label} ${phase.summary}` : label;
}
const bar = `${segments.join(' ')} ${trailing}`.trimEnd();
return ` ${icon} ${name} ${bar}`;

View file

@ -106,7 +106,10 @@ describe('createPythonSemanticLayerComputePort', () => {
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
}));
const port = createPythonSemanticLayerComputePort({ runJson });
const port = createPythonSemanticLayerComputePort({
runJson,
projectId: 'hashed-project-id',
});
await expect(
port.query({
@ -125,6 +128,7 @@ describe('createPythonSemanticLayerComputePort', () => {
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
projectId: 'hashed-project-id',
});
});

View file

@ -90,6 +90,7 @@ export interface PythonSemanticLayerComputeOptions {
cwd?: string;
env?: NodeJS.ProcessEnv;
runJson?: KtxDaemonJsonRunner;
projectId?: string;
}
/** @internal */
@ -238,6 +239,7 @@ export function createPythonSemanticLayerComputePort(
const command = options.command ?? 'python';
const args = options.args ?? ['-m', 'ktx_daemon'];
const runJson = options.runJson ?? runProcessJson({ command, args, cwd: options.cwd, env: options.env });
const projectId = options.projectId;
return {
async query(input) {
@ -245,6 +247,7 @@ export function createPythonSemanticLayerComputePort(
sources: input.sources,
dialect: input.dialect,
query: input.query,
...(projectId ? { projectId } : {}),
});
return {
sql: typeof raw.sql === 'string' ? raw.sql : '',

View file

@ -1,6 +1,7 @@
import { once } from 'node:events';
import { createServer } from 'node:http';
import { describe, expect, it, vi } from 'vitest';
import { tableRefSet } from '../../../scan/table-ref.js';
import { createDaemonLiveDatabaseIntrospection } from './daemon-introspection.js';
const daemonResponse = {
@ -161,7 +162,11 @@ describe('createDaemonLiveDatabaseIntrospection', () => {
baseUrl: `http://127.0.0.1:${address.port}`,
});
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
await expect(
introspection.extractSchema('warehouse', {
tableScope: tableRefSet([{ catalog: 'warehouse', db: 'public', name: 'orders' }]),
}),
).resolves.toMatchObject({
connectionId: 'warehouse',
tables: [{ name: 'customers' }, { name: 'orders' }],
});
@ -176,6 +181,7 @@ describe('createDaemonLiveDatabaseIntrospection', () => {
schemas: ['public'],
statement_timeout_ms: 30_000,
connection_timeout_seconds: 5,
table_scope: [{ catalog: 'warehouse', db: 'public', name: 'orders' }],
},
},
]);
@ -217,7 +223,7 @@ describe('createDaemonLiveDatabaseIntrospection', () => {
expect(runJson).not.toHaveBeenCalled();
});
it('filters out tables not on the enabled_tables allowlist', async () => {
it('does not use connection enabled_tables as a response filter', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
@ -232,7 +238,8 @@ describe('createDaemonLiveDatabaseIntrospection', () => {
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual(['public.orders']);
expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual(['public.customers', 'public.orders']);
expect(runJson).toHaveBeenCalledWith('database-introspect', expect.not.objectContaining({ table_scope: expect.anything() }));
});
it('passes through every table when enabled_tables is omitted or empty', async () => {

View file

@ -3,10 +3,10 @@ import { request as httpRequest } from 'node:http';
import { request as httpsRequest } from 'node:https';
import { URL } from 'node:url';
import type { KtxProjectConnectionConfig } from '../../../project/config.js';
import { filterSnapshotTables, resolveEnabledTables } from '../../../scan/enabled-tables.js';
import { tableRefFromKey } from '../../../scan/table-ref.js';
import type { KtxSchemaColumn, KtxSchemaForeignKey, KtxSchemaSnapshot, KtxSchemaTable } from '../../../scan/types.js';
import { inferKtxDimensionType, normalizeKtxNativeType } from '../../../scan/type-normalization.js';
import type { LiveDatabaseIntrospectionPort } from './types.js';
import type { LiveDatabaseIntrospectionOptions, LiveDatabaseIntrospectionPort } from './types.js';
type KtxDaemonDatabaseIntrospectionCommand = 'database-introspect';
@ -220,6 +220,18 @@ function mapDaemonSnapshot(
};
}
function serializeTableScope(options: LiveDatabaseIntrospectionOptions | undefined): Array<{
catalog: string | null;
db: string | null;
name: string;
}> | undefined {
if (!options?.tableScope) return undefined;
return [...options.tableScope].map((key) => {
const ref = tableRefFromKey(key);
return { catalog: ref.catalog, db: ref.db, name: ref.name };
});
}
export function createDaemonLiveDatabaseIntrospection(
options: DaemonLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
@ -231,8 +243,9 @@ export function createDaemonLiveDatabaseIntrospection(
const now = options.now ?? (() => new Date());
return {
async extractSchema(connectionId: string): Promise<KtxSchemaSnapshot> {
async extractSchema(connectionId: string, introspectionOptions?: LiveDatabaseIntrospectionOptions): Promise<KtxSchemaSnapshot> {
const connection = requirePostgresConnection(options.connections, connectionId);
const tableScope = serializeTableScope(introspectionOptions);
const payload = {
connection_id: connectionId,
driver: normalizeDriver(connection.driver),
@ -240,17 +253,16 @@ export function createDaemonLiveDatabaseIntrospection(
schemas,
statement_timeout_ms: options.statementTimeoutMs ?? 30_000,
connection_timeout_seconds: options.connectionTimeoutSeconds ?? 5,
...(tableScope !== undefined ? { table_scope: tableScope } : {}),
};
const raw = requestJson
? await requestJson('/database/introspect', payload)
: await runJson('database-introspect', payload);
const snapshot = mapDaemonSnapshot(raw, {
return mapDaemonSnapshot(raw, {
connectionId,
extractedAt: now().toISOString(),
schemas,
});
const enabledTables = resolveEnabledTables(connection);
return enabledTables ? filterSnapshotTables(snapshot, enabledTables) : snapshot;
},
};
}

View file

@ -1,7 +1,8 @@
import { mkdtemp } from 'node:fs/promises';
import { mkdtemp, readdir, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { tableRefSet, type KtxTableRefKey } from '../../../scan/table-ref.js';
import { LiveDatabaseSourceAdapter } from './live-database.adapter.js';
describe('LiveDatabaseSourceAdapter', () => {
@ -43,7 +44,7 @@ describe('LiveDatabaseSourceAdapter', () => {
await adapter.fetch(undefined, dir, { connectionId: 'conn-1', sourceKey: 'live-database' });
expect(extractSchema).toHaveBeenCalledWith('conn-1');
expect(extractSchema).toHaveBeenCalledWith('conn-1', { tableScope: undefined });
await expect(adapter.detect(dir)).resolves.toBe(true);
const chunked = await adapter.chunk(dir);
expect(chunked.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']);
@ -56,4 +57,55 @@ describe('LiveDatabaseSourceAdapter', () => {
expect(adapter.source).toBe('live-database');
expect(adapter.skillNames).toEqual(['live_database_ingest']);
});
it('threads tableScope from fetch context into the introspection port without post-filtering', async () => {
const extractSchema = vi.fn(
async (_connectionId: string, _options?: { tableScope?: ReadonlySet<KtxTableRefKey> }) => ({
connectionId: 'warehouse',
driver: 'snowflake' as const,
extractedAt: '2026-05-22T00:00:00.000Z',
scope: {},
metadata: {},
tables: [
{
catalog: 'A',
db: 'MARTS',
name: 'IN_SCOPE',
kind: 'table' as const,
comment: null,
estimatedRows: 0,
columns: [],
foreignKeys: [],
},
{
catalog: 'A',
db: 'MARTS',
name: 'OUT_OF_SCOPE',
kind: 'table' as const,
comment: null,
estimatedRows: 0,
columns: [],
foreignKeys: [],
},
],
}),
);
const scope = tableRefSet([{ catalog: 'A', db: 'MARTS', name: 'IN_SCOPE' }]);
const adapter = new LiveDatabaseSourceAdapter({
introspection: { extractSchema },
});
const stagedDir = await mkdtemp(join(tmpdir(), 'ktx-livedb-scope-'));
try {
await adapter.fetch(undefined, stagedDir, {
connectionId: 'warehouse',
sourceKey: 'live-database',
tableScope: scope,
});
expect(extractSchema).toHaveBeenCalledWith('warehouse', { tableScope: scope });
const tables = await readdir(join(stagedDir, 'tables'));
expect(tables).toHaveLength(2);
} finally {
await rm(stagedDir, { recursive: true, force: true });
}
});
});

View file

@ -14,7 +14,8 @@ export class LiveDatabaseSourceAdapter implements SourceAdapter {
}
async fetch(_pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
const snapshot = await this.deps.introspection.extractSchema(ctx.connectionId);
const tableScope = ctx.tableScope;
const snapshot = await this.deps.introspection.extractSchema(ctx.connectionId, { tableScope });
await writeLiveDatabaseSnapshot(stagedDir, {
...snapshot,
connectionId: ctx.connectionId,

View file

@ -1,7 +1,12 @@
import type { KtxSchemaSnapshot } from '../../../scan/types.js';
import type { KtxTableRefKey } from '../../../scan/table-ref.js';
export interface LiveDatabaseIntrospectionOptions {
tableScope?: ReadonlySet<KtxTableRefKey>;
}
export interface LiveDatabaseIntrospectionPort {
extractSchema(connectionId: string): Promise<KtxSchemaSnapshot>;
extractSchema(connectionId: string, options?: LiveDatabaseIntrospectionOptions): Promise<KtxSchemaSnapshot>;
}
export interface LiveDatabaseSourceAdapterDeps {

View file

@ -9,6 +9,7 @@ import { sanitizeMemoryFlowError } from './memory-flow/live-buffer.js';
import type { MemoryFlowEventSink, MemoryFlowPlannedWorkUnit } from './memory-flow/types.js';
import { buildSyncId } from './raw-sources-paths.js';
import { SqliteLocalIngestStore } from './sqlite-local-ingest-store.js';
import type { KtxTableRefKey } from '../scan/table-ref.js';
import type { IngestTrigger, SourceAdapter, WorkUnit } from './types.js';
type LocalIngestStatus = 'running' | 'done' | 'error';
@ -62,6 +63,7 @@ export interface RunLocalStageOnlyIngestOptions {
now?: () => Date;
dryRun?: boolean;
memoryFlow?: MemoryFlowEventSink;
tableScope?: ReadonlySet<KtxTableRefKey>;
}
const LOCAL_AUTHOR = 'ktx';
@ -225,6 +227,7 @@ async function prepareLocalStagedDir(
stagedDir: string,
sourceDir: string | undefined,
connectionId: string,
tableScope: ReadonlySet<KtxTableRefKey> | undefined,
): Promise<string | null> {
await rm(stagedDir, { recursive: true, force: true });
await mkdir(stagedDir, { recursive: true });
@ -242,7 +245,7 @@ async function prepareLocalStagedDir(
);
}
const pullConfig = await localPullConfigForAdapter(project, adapter, connectionId);
await adapter.fetch(pullConfig, stagedDir, { connectionId, sourceKey: adapter.source });
await adapter.fetch(pullConfig, stagedDir, { connectionId, sourceKey: adapter.source, tableScope });
return null;
}
@ -274,7 +277,14 @@ async function runLocalStageOnlyIngestInner(options: RunLocalStageOnlyIngestOpti
assertCompatibleExistingRun(existingRun, runId, adapter.source, connectionId);
const stagedDir = join(options.project.projectDir, '.ktx/cache/local-ingest', runId, 'staged');
const sourceDir = await prepareLocalStagedDir(options.project, adapter, stagedDir, options.sourceDir, connectionId);
const sourceDir = await prepareLocalStagedDir(
options.project,
adapter,
stagedDir,
options.sourceDir,
connectionId,
options.tableScope,
);
const detected = await adapter.detect(stagedDir);
if (!detected) {

View file

@ -2,6 +2,7 @@ import type { KtxEmbeddingPort } from '../core/embedding.js';
import type { MemoryAction } from '../../context/memory/types.js';
import type { SemanticLayerService } from '../../context/sl/semantic-layer.service.js';
import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
import type { KtxTableRefKey } from '../scan/table-ref.js';
import type { MemoryFlowEventSink } from './memory-flow/types.js';
import type { StageIndex } from './stages/stage-index.types.js';
import type { WorkUnitOutcome } from './stages/stage-3-work-units.js';
@ -52,6 +53,7 @@ export interface ChunkResult {
export interface FetchContext {
connectionId: string;
sourceKey: string;
tableScope?: ReadonlySet<KtxTableRefKey>;
memoryFlow?: MemoryFlowEventSink;
}

View file

@ -1,7 +1,10 @@
import { randomUUID } from 'node:crypto';
import type { ToolAnnotations } from '@modelcontextprotocol/sdk/types.js';
import { z } from 'zod';
import type { KtxCliIo } from '../../cli-runtime.js';
import type { MemoryAgentInput } from '../../context/memory/types.js';
import { emitTelemetryEvent, mcpTelemetrySampleRate, shouldEmitMcpTelemetry } from '../../telemetry/index.js';
import { scrubErrorClass } from '../../telemetry/scrubber.js';
import type {
KtxMcpContextPorts,
KtxMcpProgressCallback,
@ -16,6 +19,8 @@ export interface RegisterKtxContextToolsDeps {
server: KtxMcpServerLike;
ports: KtxMcpContextPorts;
userContext: KtxMcpUserContext;
projectDir?: string;
io?: KtxCliIo;
}
const connectionIdSchema = z.string().min(1);
@ -509,8 +514,58 @@ function registerParsedTool<TSchema extends z.ZodType>(
});
}
function instrumentMcpServer(
server: KtxMcpServerLike,
telemetry: { projectDir?: string; io?: KtxCliIo },
): KtxMcpServerLike {
return {
registerTool(name, config, handler) {
server.registerTool(name, config, async (input, context) => {
const startedAt = performance.now();
try {
const result = await handler(input, context);
if (telemetry.io && telemetry.projectDir && shouldEmitMcpTelemetry()) {
const isError =
typeof result === 'object' && result !== null && 'isError' in result && result.isError === true;
await emitTelemetryEvent({
name: 'mcp_request_completed',
projectDir: telemetry.projectDir,
io: telemetry.io,
fields: {
toolName: name,
outcome: isError ? 'error' : 'ok',
durationMs: Math.max(0, performance.now() - startedAt),
sampleRate: mcpTelemetrySampleRate(),
},
});
}
return result;
} catch (error) {
if (telemetry.io && telemetry.projectDir && shouldEmitMcpTelemetry()) {
const errorClass = scrubErrorClass(error);
await emitTelemetryEvent({
name: 'mcp_request_completed',
projectDir: telemetry.projectDir,
io: telemetry.io,
fields: {
toolName: name,
outcome: 'error',
...(errorClass ? { errorClass } : {}),
durationMs: Math.max(0, performance.now() - startedAt),
sampleRate: mcpTelemetrySampleRate(),
},
});
}
throw error;
}
});
},
};
}
export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void {
const { ports, server, userContext } = deps;
const { ports, userContext } = deps;
const server = instrumentMcpServer(deps.server, { projectDir: deps.projectDir, io: deps.io });
if (ports.connections) {
const connections = ports.connections;

View file

@ -3,7 +3,7 @@ import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js';
import { describe, expect, it, vi } from 'vitest';
import { afterEach, describe, expect, it, vi } from 'vitest';
import { createLocalProjectMemoryIngest } from '../../context/memory/local-memory.js';
import { detectCaptureSignals } from '../../context/memory/capture-signals.js';
import type { MemoryAgentInput } from '../../context/memory/types.js';
@ -47,6 +47,19 @@ function makeFakeServer() {
};
}
function makeIo() {
let stderr = '';
return {
stdout: { isTTY: true, write() {} },
stderr: {
write(chunk: string) {
stderr += chunk;
},
},
stderrText: () => stderr,
};
}
function getTool(tools: RegisteredTool[], name: string): RegisteredTool {
const found = tools.find((tool) => tool.name === name);
if (!found) {
@ -153,6 +166,11 @@ async function listToolsThroughSdk(contextTools: KtxMcpContextPorts) {
}
describe('createKtxMcpServer', () => {
afterEach(() => {
vi.unstubAllEnvs();
vi.restoreAllMocks();
});
it('registers annotations and output schemas for every retained tool', async () => {
const fake = makeFakeServer();
createKtxMcpServer({
@ -227,6 +245,37 @@ describe('createKtxMcpServer', () => {
});
});
it('emits sampled debug telemetry for MCP tool requests', async () => {
vi.spyOn(Math, 'random').mockReturnValue(0);
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const fake = makeFakeServer();
const io = makeIo();
const projectDir = '/tmp/ktx-mcp-telemetry';
createKtxMcpServer({
server: fake.server,
userContext: { userId: 'local-user' },
projectDir,
io,
contextTools: {
knowledge: {
search: vi.fn<KtxKnowledgeMcpPort['search']>().mockResolvedValue({ results: [], totalFound: 0 }),
read: vi.fn<KtxKnowledgeMcpPort['read']>().mockResolvedValue(null),
},
},
});
await expect(getTool(fake.tools, 'wiki_search').handler({ query: 'revenue recognition', limit: 5 })).resolves.toMatchObject({
structuredContent: { results: [], totalFound: 0 },
});
expect(io.stderrText()).toContain('"event":"mcp_request_completed"');
expect(io.stderrText()).toContain('"toolName":"wiki_search"');
expect(io.stderrText()).toContain('"sampleRate":0.1');
expect(io.stderrText()).not.toContain(projectDir);
});
it('registers parser-gated sql_execution when the host provides a SQL execution port', async () => {
const fake = makeFakeServer();
const response: KtxSqlExecutionResponse = {

View file

@ -9,6 +9,8 @@ export function createKtxMcpServer(deps: KtxMcpServerDeps): KtxMcpServerDeps['se
server: deps.server,
ports: deps.contextTools,
userContext: deps.userContext,
projectDir: deps.projectDir,
io: deps.io,
});
}
@ -26,6 +28,8 @@ export function createDefaultKtxMcpServer(
server: server as KtxMcpServerLike,
userContext: deps.userContext,
contextTools: deps.contextTools,
projectDir: deps.projectDir,
io: deps.io,
});
return server;
}

View file

@ -1,4 +1,5 @@
import type { MemoryIngestService } from '../../context/memory/memory-runs.js';
import type { KtxCliIo } from '../../cli-runtime.js';
import type { KtxEntityDetailsInput, KtxEntityDetailsResponse } from '../scan/entity-details.js';
import type { KtxDiscoverDataInput, KtxDiscoverDataResponse } from '../../context/search/discover.js';
import type { KtxDictionarySearchInput, KtxDictionarySearchResponse } from '../../context/sl/dictionary-search.js';
@ -171,4 +172,6 @@ export interface KtxMcpServerDeps {
server: KtxMcpServerLike;
userContext: KtxMcpUserContext;
contextTools?: KtxMcpContextPorts;
projectDir?: string;
io?: KtxCliIo;
}

View file

@ -563,11 +563,11 @@ export class KtxDescriptionGenerator {
}
}
const sampleTable = input.connector.sampleTable;
const connector = input.connector;
let sampleData: KtxTableSampleResult | null = null;
let fallbackReason: 'capability_missing' | 'sampling_failed' | 'empty_sample' | null = null;
if (!sampleTable) {
if (!connector.sampleTable) {
fallbackReason = 'capability_missing';
this.logger?.warn('KTX scan connector does not support table sampling; falling back to metadata-only prompt', {
connectorId: input.connector.id,
@ -584,7 +584,7 @@ export class KtxDescriptionGenerator {
try {
sampleData = await retryAsync(
() =>
sampleTable(
connector.sampleTable!(
{
connectionId: input.connectionId,
table: tableRef,
@ -934,11 +934,11 @@ export class KtxDescriptionGenerator {
});
columnValues = [];
} else {
const sampleColumn = input.connector.sampleColumn;
const connector = input.connector;
try {
const sample = await retryAsync(
() =>
sampleColumn(
connector.sampleColumn!(
{
connectionId: input.connectionId,
table: tableRef,

View file

@ -1,17 +1,63 @@
import type { KtxSchemaSnapshot } from './types.js';
import { tableRefSet, type KtxTableRefKey } from './table-ref.js';
import type { KtxTableRef } from './types.js';
export function resolveEnabledTables(connection: Record<string, unknown> | undefined): Set<string> | null {
/**
* Parses the `enabled_tables` field on a connection into a scope of
* fully-qualified table refs. Returns `null` when the field is absent or
* empty (meaning "no scope — include every table in the resolved schemas").
*
* Accepted entry forms:
* "catalog.db.name" fully qualified
* "db.name" schema-qualified (catalog = null; legacy / Postgres-shape)
* "name" bare (catalog = db = null; SQLite-shape)
* { catalog?, db?, name } escape hatch for identifiers containing dots
*
* The setup wizard writes the fully-qualified form going forward; the lenient
* parser keeps existing project configs working.
*/
export function resolveEnabledTables(
connection: Record<string, unknown> | undefined,
): ReadonlySet<KtxTableRefKey> | null {
const raw = connection?.enabled_tables;
if (!Array.isArray(raw) || raw.length === 0) return null;
return new Set(raw.filter((v): v is string => typeof v === 'string'));
const refs: KtxTableRef[] = [];
for (const value of raw) {
const parsed = parseEnabledTableEntry(value);
if (parsed) refs.push(parsed);
}
if (refs.length === 0) return null;
return tableRefSet(refs);
}
export function filterSnapshotTables(snapshot: KtxSchemaSnapshot, enabledTables: Set<string>): KtxSchemaSnapshot {
return {
...snapshot,
tables: snapshot.tables.filter((table) => {
const key = table.db ? `${table.db}.${table.name}` : table.name;
return enabledTables.has(key);
}),
};
function parseEnabledTableEntry(value: unknown): KtxTableRef | null {
if (typeof value === 'string') {
return parseDottedEntry(value);
}
if (value && typeof value === 'object' && !Array.isArray(value)) {
const entry = value as { catalog?: unknown; db?: unknown; name?: unknown };
const name = typeof entry.name === 'string' ? entry.name : null;
if (!name) return null;
return {
catalog: typeof entry.catalog === 'string' ? entry.catalog : null,
db: typeof entry.db === 'string' ? entry.db : null,
name,
};
}
return null;
}
function parseDottedEntry(value: string): KtxTableRef | null {
const trimmed = value.trim();
if (trimmed.length === 0) return null;
const parts = trimmed.split('.');
if (parts.length === 3) {
return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
}
if (parts.length === 2) {
return { catalog: null, db: parts[0]!, name: parts[1]! };
}
if (parts.length === 1) {
return { catalog: null, db: null, name: parts[0]! };
}
return null;
}

View file

@ -6,14 +6,14 @@ import YAML from 'yaml';
import type { SourceAdapter } from '../../context/ingest/types.js';
import type { KtxLlmRuntimePort } from '../../context/llm/runtime-port.js';
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../../context/project/project.js';
import { filterSnapshotTables, resolveEnabledTables } from './enabled-tables.js';
import { resolveEnabledTables } from './enabled-tables.js';
import { getLocalScanReport, getLocalScanStatus, runLocalScan } from './local-scan.js';
import { tableRefKey, tableRefSet, type KtxTableRefKey } from './table-ref.js';
import type {
KtxQueryResult,
KtxReadOnlyQueryInput,
KtxScanConnector,
KtxSchemaSnapshot,
KtxSchemaTable,
} from './types.js';
function relationshipSqlResult(
@ -336,6 +336,73 @@ describe('local scan', () => {
});
});
it('passes enabled_tables as fetch context tableScope and does not post-filter staged snapshots', async () => {
project.config.connections.warehouse = {
...project.config.connections.warehouse,
enabled_tables: ['public.orders'],
};
let capturedTableScope: ReadonlySet<KtxTableRefKey> | undefined;
const adapter: SourceAdapter = {
source: 'live-database',
skillNames: ['live_database_ingest'],
async fetch(_pullConfig, stagedDir, ctx) {
capturedTableScope = ctx.tableScope;
await mkdir(join(stagedDir, 'tables'), { recursive: true });
await writeFile(
join(stagedDir, 'connection.json'),
'{"connectionId":"warehouse","driver":"postgres","scope":{"schemas":["public"]},"metadata":{}}\n',
'utf-8',
);
await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8');
await writeFile(
join(stagedDir, 'tables', 'customers.json'),
'{"name":"customers","catalog":null,"db":"public","kind":"table","comment":null,"estimatedRows":100,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":true,"comment":null}],"foreignKeys":[]}\n',
'utf-8',
);
await writeFile(
join(stagedDir, 'tables', 'orders.json'),
'{"name":"orders","catalog":null,"db":"public","kind":"table","comment":null,"estimatedRows":1000,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":true,"comment":null}],"foreignKeys":[]}\n',
'utf-8',
);
},
async detect() {
return true;
},
async chunk() {
return {
workUnits: [
{
unitKey: 'live-database-public-customers',
rawFiles: ['tables/customers.json'],
dependencyPaths: ['connection.json', 'foreign-keys.json'],
peerFileIndex: [],
},
{
unitKey: 'live-database-public-orders',
rawFiles: ['tables/orders.json'],
dependencyPaths: ['connection.json', 'foreign-keys.json'],
peerFileIndex: [],
},
],
};
},
};
const result = await runLocalScan({
project,
adapters: [adapter],
connectionId: 'warehouse',
jobId: 'scan-strict-scope-fetch',
now: () => new Date('2026-05-22T00:00:00.000Z'),
});
expect([...(capturedTableScope ?? [])]).toEqual([...tableRefSet([{ catalog: null, db: 'public', name: 'orders' }])]);
expect(result.report.diffSummary.tablesAdded).toBe(2);
const structuralManifest = await readFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8');
expect(structuralManifest).toContain('customers:');
expect(structuralManifest).toContain('orders:');
});
it('runs a structural database scan when live-database is not listed in ktx.yaml', async () => {
await writeDatabaseConfigWithoutIngestAdapters(project.projectDir);
project = await loadKtxProject({ projectDir: project.projectDir });
@ -698,6 +765,142 @@ describe('local scan', () => {
expect(result.report.warnings).toEqual([]);
});
it('keeps prototype connector methods when enabled_tables is configured', async () => {
project.config.connections.warehouse = {
...project.config.connections.warehouse,
enabled_tables: ['public.customers', 'public.orders'],
};
const scopedAdapter: SourceAdapter = {
source: 'live-database',
skillNames: ['live_database_ingest'],
async fetch(_pullConfig, stagedDir) {
await mkdir(join(stagedDir, 'tables'), { recursive: true });
await writeFile(
join(stagedDir, 'connection.json'),
'{"connectionId":"warehouse","driver":"postgres","scope":{"schemas":["public"]},"metadata":{}}\n',
'utf-8',
);
await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8');
await writeFile(
join(stagedDir, 'tables', 'customers.json'),
'{"name":"customers","catalog":null,"db":"public","kind":"table","comment":null,"estimatedRows":100,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":true,"comment":null}],"foreignKeys":[]}\n',
'utf-8',
);
await writeFile(
join(stagedDir, 'tables', 'orders.json'),
'{"name":"orders","catalog":null,"db":"public","kind":"table","comment":null,"estimatedRows":1000,"columns":[{"name":"customer_id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":false,"comment":null}],"foreignKeys":[]}\n',
'utf-8',
);
},
async detect() {
return true;
},
async chunk() {
return {
workUnits: [
{
unitKey: 'live-database-public-customers',
rawFiles: ['tables/customers.json'],
dependencyPaths: ['connection.json', 'foreign-keys.json'],
peerFileIndex: [],
},
{
unitKey: 'live-database-public-orders',
rawFiles: ['tables/orders.json'],
dependencyPaths: ['connection.json', 'foreign-keys.json'],
peerFileIndex: [],
},
],
};
},
};
class FakeClassConnector implements KtxScanConnector {
readonly id = 'test:warehouse';
readonly driver = 'postgres' as const;
readonly capabilities = {
structuralIntrospection: true as const,
tableSampling: false,
columnSampling: false,
columnStats: true,
readOnlySql: true,
nestedAnalysis: false,
eventStreamDiscovery: false,
formalForeignKeys: false,
estimatedRowCounts: true,
};
async introspect(): Promise<KtxSchemaSnapshot> {
return {
connectionId: 'warehouse',
driver: 'postgres',
extractedAt: '2026-05-22T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: {},
tables: [
{
catalog: null,
db: 'public',
name: 'customers',
kind: 'table',
comment: null,
estimatedRows: 100,
foreignKeys: [],
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
},
{
catalog: null,
db: 'public',
name: 'orders',
kind: 'table',
comment: null,
estimatedRows: 1000,
foreignKeys: [],
columns: [
{
name: 'customer_id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
],
},
],
};
}
async executeReadOnly(input: KtxReadOnlyQueryInput): Promise<KtxQueryResult> {
return relationshipSqlResult(input);
}
}
const result = await runLocalScan({
project,
adapters: [scopedAdapter],
connectionId: 'warehouse',
mode: 'relationships',
detectRelationships: true,
connector: new FakeClassConnector(),
jobId: 'scan-prototype-connector-scope',
now: () => new Date('2026-05-22T00:00:00.000Z'),
});
expect(result.report.relationships.accepted).toBe(1);
expect(result.report.warnings).toEqual([]);
});
it('threads scan relationship settings into relationship-only local scans', async () => {
project.config.scan.enrichment = { mode: 'deterministic' };
project.config.scan.relationships = {
@ -1664,69 +1867,18 @@ describe('resolveEnabledTables', () => {
expect(resolveEnabledTables({ driver: 'postgres', enabled_tables: [] })).toBeNull();
});
it('returns Set of enabled table names', () => {
it('returns a canonical set of enabled table refs', () => {
const result = resolveEnabledTables({
driver: 'postgres',
enabled_tables: ['public.users', 'public.orders'],
});
expect(result).toBeInstanceOf(Set);
expect(result!.size).toBe(2);
expect(result!.has('public.users')).toBe(true);
expect(result!.has('public.orders')).toBe(true);
expect(result!.has(tableRefKey({ catalog: null, db: 'public', name: 'users' }))).toBe(true);
expect(result!.has(tableRefKey({ catalog: null, db: 'public', name: 'orders' }))).toBe(true);
});
it('returns null for undefined connection', () => {
expect(resolveEnabledTables(undefined)).toBeNull();
});
});
describe('filterSnapshotTables', () => {
function makeSnapshot(tables: Array<{ db: string; name: string }>): KtxSchemaSnapshot {
return {
connectionId: 'test',
driver: 'postgres',
extractedAt: '2026-01-01T00:00:00Z',
scope: {},
metadata: {},
tables: tables.map(
(t): KtxSchemaTable => ({
catalog: null,
db: t.db,
name: t.name,
kind: 'table',
comment: null,
estimatedRows: null,
columns: [],
foreignKeys: [],
}),
),
};
}
it('keeps only enabled tables', () => {
const snapshot = makeSnapshot([
{ db: 'public', name: 'users' },
{ db: 'public', name: 'orders' },
{ db: 'public', name: 'logs' },
]);
const enabled = new Set(['public.users', 'public.orders']);
const filtered = filterSnapshotTables(snapshot, enabled);
expect(filtered.tables).toHaveLength(2);
expect(filtered.tables.map((t) => t.name)).toEqual(['users', 'orders']);
});
it('returns empty tables when none match', () => {
const snapshot = makeSnapshot([{ db: 'public', name: 'users' }]);
const enabled = new Set(['public.orders']);
const filtered = filterSnapshotTables(snapshot, enabled);
expect(filtered.tables).toHaveLength(0);
});
it('preserves other snapshot fields', () => {
const snapshot = makeSnapshot([{ db: 'public', name: 'users' }]);
const enabled = new Set(['public.users']);
const filtered = filterSnapshotTables(snapshot, enabled);
expect(filtered.connectionId).toBe('test');
expect(filtered.driver).toBe('postgres');
});
});

View file

@ -10,7 +10,7 @@ import type { KtxProjectLlmConfig, KtxScanEnrichmentConfig, KtxScanRelationshipC
import type { KtxLocalProject } from '../../context/project/project.js';
import { ktxLocalStateDbPath } from '../project/local-state-db.js';
import { redactKtxScanReport } from './credentials.js';
import { filterSnapshotTables, resolveEnabledTables } from './enabled-tables.js';
import { resolveEnabledTables } from './enabled-tables.js';
import { completedKtxScanEnrichmentStateSummary } from './enrichment-state.js';
import { failedKtxScanEnrichmentSummary, ktxScanErrorMessage } from './enrichment-summary.js';
import {
@ -25,9 +25,7 @@ import type {
KtxConnectionDriver,
KtxProgressPort,
KtxScanConnector,
KtxScanContext,
KtxScanEnrichmentStateSummary,
KtxScanInput,
KtxScanMode,
KtxScanReport,
KtxScanTrigger,
@ -370,17 +368,6 @@ async function readScanReport(
}
}
function createFilteredConnector(connector: KtxScanConnector, enabledTables: Set<string>): KtxScanConnector {
return {
...connector,
async introspect(input: KtxScanInput, ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
const snapshot = await connector.introspect(input, ctx);
return filterSnapshotTables(snapshot, enabledTables);
},
};
}
function withInternalLiveDatabaseAdapter(project: KtxLocalProject): KtxLocalProject {
if (project.config.ingest.adapters.includes(LIVE_DATABASE_ADAPTER)) {
return project;
@ -411,8 +398,8 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise<LocalS
throw new Error(`Connection "${options.connectionId}" is not configured in ktx.yaml`);
}
const driver = normalizeDriver(connection.driver);
const enabledTables = resolveEnabledTables(connection);
const connector = rawConnector && enabledTables ? createFilteredConnector(rawConnector, enabledTables) : rawConnector;
const tableScope = resolveEnabledTables(connection) ?? undefined;
const connector = rawConnector;
const adapters =
options.adapters ??
createDefaultLocalIngestAdapters(options.project, { databaseIntrospectionUrl: options.databaseIntrospectionUrl });
@ -444,6 +431,7 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise<LocalS
jobId: options.jobId,
now: options.now,
dryRun: options.dryRun,
tableScope,
});
await options.progress?.update(0.55, scanChangeSummary(scanDiffSummaryFromRecord(record)));
let report = reportFromIngest({
@ -480,28 +468,13 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise<LocalS
rawSourcesDir: report.artifactPaths.rawSourcesDir,
extractedAtFallback: report.createdAt,
});
const structuralSnapshot = enabledTables ? filterSnapshotTables(rawSnapshot, enabledTables) : rawSnapshot;
if (enabledTables && structuralSnapshot.tables.length < rawSnapshot.tables.length) {
const excluded = rawSnapshot.tables.length - structuralSnapshot.tables.length;
let remaining = excluded;
const ds = report.diffSummary;
const subFrom = (field: 'tablesAdded' | 'tablesUnchanged' | 'tablesModified') => {
const take = Math.min(remaining, ds[field]);
ds[field] -= take;
remaining -= take;
};
subFrom('tablesAdded');
subFrom('tablesUnchanged');
subFrom('tablesModified');
await options.progress?.update(0.6, scanChangeSummary(report.diffSummary));
}
enrichmentSnapshot = structuralSnapshot;
enrichmentSnapshot = rawSnapshot;
const manifestArtifacts = await writeLocalScanManifestShards({
project: options.project,
connectionId: options.connectionId,
syncId: record.syncId,
driver,
snapshot: structuralSnapshot,
snapshot: rawSnapshot,
dryRun: false,
});
report.artifactPaths.manifestShards = manifestArtifacts.manifestShards;

View file

@ -229,6 +229,9 @@ function sampleAggregateSql(driver: KtxConnectionDriver, innerSql: string): stri
if (driver === 'clickhouse') {
return `(SELECT arrayStringConcat(groupArray(toString(value)), '\\x1F') FROM (${innerSql}) AS relationship_profile_values)`;
}
if (driver === 'snowflake') {
return `(SELECT LISTAGG(CAST(value AS VARCHAR), '\\x1f') FROM (${innerSql}) AS relationship_profile_values)`;
}
return `(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}

View file

@ -0,0 +1,67 @@
import { describe, expect, it } from 'vitest';
import {
scopedTableNames,
tableRefFromKey,
tableRefKey,
tableRefSet,
type KtxTableRefKey,
} from './table-ref.js';
describe('tableRefKey roundtrip', () => {
it('encodes and decodes a three-part ref', () => {
const ref = { catalog: 'ANALYTICS', db: 'MARTS', name: 'LISTINGS' };
expect(tableRefFromKey(tableRefKey(ref))).toEqual(ref);
});
it('treats null catalog/db as the empty segment', () => {
const ref = { catalog: null, db: 'public', name: 'users' };
expect(tableRefFromKey(tableRefKey(ref))).toEqual(ref);
});
it('roundtrips a bare-name ref', () => {
const ref = { catalog: null, db: null, name: 'orders' };
expect(tableRefFromKey(tableRefKey(ref))).toEqual(ref);
});
});
describe('tableRefSet', () => {
it('produces a set with member-equality on canonical keys', () => {
const scope = tableRefSet([
{ catalog: 'ANALYTICS', db: 'MARTS', name: 'LISTINGS' },
{ catalog: 'ANALYTICS', db: 'MARTS', name: 'ITEMS' },
]);
expect(scope.size).toBe(2);
expect(scope.has(tableRefKey({ catalog: 'ANALYTICS', db: 'MARTS', name: 'LISTINGS' }))).toBe(true);
expect(scope.has(tableRefKey({ catalog: 'ANALYTICS', db: 'MARTS', name: 'OTHER' }))).toBe(false);
});
});
describe('scopedTableNames', () => {
it('projects to the requested (catalog, db) namespace', () => {
const scope = tableRefSet([
{ catalog: 'ANALYTICS', db: 'MARTS', name: 'LISTINGS' },
{ catalog: 'ANALYTICS', db: 'MARTS', name: 'ITEMS' },
{ catalog: 'ANALYTICS', db: 'STAGING', name: 'LISTINGS' },
]);
expect(scopedTableNames(scope, { catalog: 'ANALYTICS', db: 'MARTS' }).sort()).toEqual(['ITEMS', 'LISTINGS']);
expect(scopedTableNames(scope, { catalog: 'ANALYTICS', db: 'STAGING' })).toEqual(['LISTINGS']);
});
it('treats null in the scope entry as a wildcard for that segment', () => {
const scope = tableRefSet([{ catalog: null, db: 'public', name: 'users' }]);
expect(scopedTableNames(scope, { catalog: 'any-catalog', db: 'public' })).toEqual(['users']);
});
it('returns empty when no scope entry matches the namespace', () => {
const scope = tableRefSet([{ catalog: 'A', db: 'B', name: 'C' }]);
expect(scopedTableNames(scope, { catalog: 'X', db: 'Y' })).toEqual([]);
});
it('dedupes when the same name appears under different catalog projections', () => {
const scope: ReadonlySet<KtxTableRefKey> = tableRefSet([
{ catalog: null, db: 'public', name: 'users' },
{ catalog: 'A', db: 'public', name: 'users' },
]);
expect(scopedTableNames(scope, { catalog: 'A', db: 'public' })).toEqual(['users']);
});
});

View file

@ -0,0 +1,53 @@
import type { KtxTableRef } from './types.js';
/**
* Branded canonical string representation of a {@link KtxTableRef}.
*
* Connectors compare scopes for set membership via these keys instead of the
* raw object (JS `Set<object>` uses identity equality, which would be useless
* here). Build a key with {@link tableRefKey} and decode with
* {@link tableRefFromKey}.
*/
export type KtxTableRefKey = string & { readonly __brand: 'KtxTableRefKey' };
const SEPARATOR = '\x1f';
/** @internal */
export function tableRefKey(ref: KtxTableRef): KtxTableRefKey {
return `${ref.catalog ?? ''}${SEPARATOR}${ref.db ?? ''}${SEPARATOR}${ref.name}` as KtxTableRefKey;
}
/** @internal */
export function tableRefFromKey(key: KtxTableRefKey): KtxTableRef {
const [catalog = '', db = '', name = ''] = key.split(SEPARATOR);
return {
catalog: catalog.length > 0 ? catalog : null,
db: db.length > 0 ? db : null,
name,
};
}
export function tableRefSet(refs: readonly KtxTableRef[]): ReadonlySet<KtxTableRefKey> {
return new Set(refs.map(tableRefKey));
}
/**
* Return the bare table names from a scope that fall within the given
* (catalog, db) namespace. `catalog: null` is treated as a wildcard so that
* legacy 2-part `"db.name"` entries continue to match. Same for `db: null`.
*/
export function scopedTableNames(
scope: ReadonlySet<KtxTableRefKey>,
namespace: { catalog?: string | null; db?: string | null },
): string[] {
const names = new Set<string>();
const wantCatalog = namespace.catalog ?? null;
const wantDb = namespace.db ?? null;
for (const key of scope) {
const ref = tableRefFromKey(key);
if (wantCatalog !== null && ref.catalog !== null && ref.catalog !== wantCatalog) continue;
if (wantDb !== null && ref.db !== null && ref.db !== wantDb) continue;
names.add(ref.name);
}
return [...names];
}

View file

@ -1,3 +1,5 @@
import type { KtxTableRefKey } from './table-ref.js';
export type KtxConnectionDriver =
| 'sqlite'
| 'postgres'
@ -137,6 +139,14 @@ export interface KtxScanInput {
connectionId: string;
driver: KtxConnectionDriver;
scope?: KtxSchemaScope;
/**
* Restricts introspection to a specific set of fully-qualified tables.
* `undefined` means "all tables within {@link scope}". Connectors that honor
* this field should push the filter into their metadata queries. Callers do
* not post-filter, so a connector that ignores `tableScope` will over-fetch
* and surface the extra tables in output.
*/
tableScope?: ReadonlySet<KtxTableRefKey>;
mode?: KtxScanMode;
dryRun?: boolean;
detectRelationships?: boolean;

View file

@ -2,7 +2,7 @@ import { access, readFile, rm, stat } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { fileURLToPath } from 'node:url';
import { afterEach, describe, expect, it } from 'vitest';
import { afterEach, describe, expect, it, vi } from 'vitest';
import {
DEMO_ADAPTER,
DEMO_CONNECTION_ID,
@ -22,10 +22,27 @@ async function readPackagedJson<T>(relativePath: string): Promise<T> {
return JSON.parse(await readFile(packagedDemoAssetPath(relativePath), 'utf-8')) as T;
}
function makeIo() {
let stderr = '';
return {
stdout: {
isTTY: true,
write() {},
},
stderr: {
write(chunk: string) {
stderr += chunk;
},
},
stderrText: () => stderr,
};
}
describe('demo assets', () => {
const projectDir = join(tmpdir(), `ktx-demo-assets-${process.pid}`);
afterEach(async () => {
vi.unstubAllEnvs();
await rm(projectDir, { recursive: true, force: true });
});
@ -125,6 +142,19 @@ describe('demo assets', () => {
await expect(ensureDemoProject({ projectDir, force: true })).resolves.toMatchObject({ projectDir });
});
it('emits debug telemetry when the demo connection is created', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const io = makeIo();
await ensureDemoProject({ projectDir, force: false, io, cliVersion: '0.2.0' });
expect(io.stderrText()).toContain('"event":"connection_added"');
expect(io.stderrText()).toContain('"driver":"sqlite"');
expect(io.stderrText()).toContain('"isDemoConnection":true');
expect(io.stderrText()).not.toContain(projectDir);
});
it('copies the seeded project assets used by the setup wizard tour', async () => {
await ensureSeededDemoProject({ projectDir, force: false });

View file

@ -4,6 +4,7 @@ import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { randomBytes } from 'node:crypto';
import { getKtxCliPackageInfo, type KtxCliIo } from './cli-runtime.js';
interface DemoProjectResult {
projectDir: string;
@ -15,6 +16,8 @@ interface DemoProjectResult {
interface EnsureDemoProjectOptions {
projectDir: string;
force: boolean;
io?: KtxCliIo;
cliVersion?: string;
}
/** @internal */
@ -143,6 +146,19 @@ export async function ensureDemoProject(options: EnsureDemoProjectOptions): Prom
await copyFile(join(assetDir(), 'manifest.json'), join(projectDir, 'manifest.json'));
const replayPath = await copyPackagedReplay(projectDir);
await writeFile(configPath, demoConfig(databasePath), 'utf-8');
if (options.io) {
const { emitTelemetryEvent } = await import('./telemetry/index.js');
await emitTelemetryEvent({
name: 'connection_added',
projectDir,
io: options.io,
packageInfo: { ...getKtxCliPackageInfo(), version: options.cliVersion ?? getKtxCliPackageInfo().version },
fields: {
driver: 'sqlite',
isDemoConnection: true,
},
});
}
return { projectDir, configPath, databasePath, replayPath };
}

View file

@ -8,12 +8,13 @@ import { writeLocalKnowledgePage } from './context/wiki/local-knowledge.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { runKtxKnowledge } from './knowledge.js';
function makeIo() {
function makeIo(options: { isTTY?: boolean } = {}) {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: options.isTTY,
write: (chunk: string) => {
stdout += chunk;
},
@ -72,6 +73,7 @@ describe('runKtxKnowledge', () => {
});
afterEach(async () => {
vi.unstubAllEnvs();
await rm(tempDir, { recursive: true, force: true });
});
@ -96,6 +98,26 @@ describe('runKtxKnowledge', () => {
expect(searchIo.stdout()).toContain('metrics-revenue');
});
it('emits debug telemetry for wiki search without query text', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await seedWikiPage(projectDir);
const searchIo = makeIo({ isTTY: true });
await expect(
runKtxKnowledge(
{ command: 'search', projectDir, query: 'revenue recognition', userId: 'local', cliVersion: '0.0.0-test' },
searchIo.io,
),
).resolves.toBe(0);
expect(searchIo.stderr()).toContain('"event":"wiki_query_completed"');
expect(searchIo.stderr()).toContain('"queryLength"');
expect(searchIo.stderr()).not.toContain('revenue recognition');
});
it('prints wiki search rank badges in pretty output', async () => {
const projectDir = join(tempDir, 'rank-project');
await initKtxProject({ projectDir });

View file

@ -8,6 +8,7 @@ import {
} from './embedding-resolution.js';
import { resolveOutputMode } from './io/mode.js';
import { createRankBadgeFormatter, printList, type PrintListColumn } from './io/print-list.js';
import { emitTelemetryEvent } from './telemetry/index.js';
export type KtxKnowledgeArgs =
| { command: 'list'; projectDir: string; userId: string; output?: string; json?: boolean; cliVersion: string }
@ -108,6 +109,7 @@ export async function runKtxKnowledge(
io: KtxKnowledgeIo = process,
deps: KtxKnowledgeDeps = {},
): Promise<number> {
const startedAt = performance.now();
try {
const project = await loadKtxProject({ projectDir: args.projectDir });
if (args.command === 'list') {
@ -135,6 +137,17 @@ export async function runKtxKnowledge(
embeddingService,
limit: args.limit,
});
await emitTelemetryEvent({
name: 'wiki_query_completed',
projectDir: args.projectDir,
io,
fields: {
queryLength: args.query.length,
resultCount: results.length,
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'ok',
},
});
if (args.debug) {
writeWikiSearchDebug(io, {
mode: project.config.storage.search,
@ -167,6 +180,19 @@ export async function runKtxKnowledge(
}
return 0;
} catch (error) {
if (args.command === 'search') {
await emitTelemetryEvent({
name: 'wiki_query_completed',
projectDir: args.projectDir,
io,
fields: {
queryLength: args.query.length,
resultCount: 0,
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'error',
},
});
}
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return 1;
}

View file

@ -15,7 +15,10 @@ import { BigQueryHistoricSqlQueryHistoryReader } from './context/ingest/adapters
import { createDaemonLiveDatabaseIntrospection } from './context/ingest/adapters/live-database/daemon-introspection.js';
import { createDefaultLocalIngestAdapters, type DefaultLocalIngestAdaptersOptions } from './context/ingest/local-adapters.js';
import type { HistoricSqlReader } from './context/ingest/adapters/historic-sql/types.js';
import type { LiveDatabaseIntrospectionPort } from './context/ingest/adapters/live-database/types.js';
import type {
LiveDatabaseIntrospectionOptions,
LiveDatabaseIntrospectionPort,
} from './context/ingest/adapters/live-database/types.js';
import { LiveDatabaseSourceAdapter } from './context/ingest/adapters/live-database/live-database.adapter.js';
import { PostgresPgssReader } from './context/ingest/adapters/historic-sql/postgres-pgss-reader.js';
import { SnowflakeHistoricSqlQueryHistoryReader } from './context/ingest/adapters/historic-sql/snowflake-query-history-reader.js';
@ -116,38 +119,39 @@ function createKtxCliLiveDatabaseIntrospection(
connections: project.config.connections,
});
return {
async extractSchema(connectionId: string) {
async extractSchema(connectionId: string, options?: LiveDatabaseIntrospectionOptions) {
const connection = project.config.connections[connectionId];
if (isKtxPostgresConnectionConfig(connection)) {
return postgres.extractSchema(connectionId);
return postgres.extractSchema(connectionId, options);
}
if (isKtxSqliteConnectionConfig(connection)) {
return sqlite.extractSchema(connectionId);
return sqlite.extractSchema(connectionId, options);
}
if (isKtxMysqlConnectionConfig(connection)) {
return mysql.extractSchema(connectionId);
return mysql.extractSchema(connectionId, options);
}
if (isKtxClickHouseConnectionConfig(connection)) {
return clickhouse.extractSchema(connectionId);
return clickhouse.extractSchema(connectionId, options);
}
if (isKtxSqlServerConnectionConfig(connection)) {
return sqlserver.extractSchema(connectionId);
return sqlserver.extractSchema(connectionId, options);
}
if (isKtxBigQueryConnectionConfig(connection)) {
return bigquery.extractSchema(connectionId);
return bigquery.extractSchema(connectionId, options);
}
if (hasSnowflakeDriver(connection)) {
const { createSnowflakeLiveDatabaseIntrospection } = await import('./connectors/snowflake/live-database-introspection.js');
const { isKtxSnowflakeConnectionConfig } = await import('./connectors/snowflake/connector.js');;
if (!isKtxSnowflakeConnectionConfig(connection)) {
return daemon.extractSchema(connectionId);
return daemon.extractSchema(connectionId, options);
}
const snowflake = createSnowflakeLiveDatabaseIntrospection({
connections: project.config.connections,
projectDir: project.projectDir,
});
return snowflake.extractSchema(connectionId);
return snowflake.extractSchema(connectionId, options);
}
return daemon.extractSchema(connectionId);
return daemon.extractSchema(connectionId, options);
},
};
}
@ -263,6 +267,7 @@ async function createEphemeralSnowflakeHistoricSqlClient(
const connector = new connectorModule.KtxSnowflakeScanConnector({
connectionId,
connection,
projectDir: project.projectDir,
});
try {
const result = await connector.executeReadOnly({ connectionId, sql: query }, {} as never);

View file

@ -64,7 +64,7 @@ export async function createKtxCliScanConnector(
if (!isKtxSnowflakeConnectionConfig(connection)) {
throw invalidConnectionConfigError(connectionId, driver);
}
return new KtxSnowflakeScanConnector({ connectionId, connection });
return new KtxSnowflakeScanConnector({ connectionId, connection, projectDir: project.projectDir });
}
throw new Error(
`Connection "${connectionId}" uses driver "${driver}", which has no native standalone KTX scan connector. Supported drivers: ${SUPPORTED_DRIVERS}.`,

View file

@ -12,6 +12,7 @@ import {
type ManagedPythonRuntimeLayoutOptions,
type ManagedPythonRuntimeStatus,
} from './managed-python-runtime.js';
import { readExistingTelemetryProjectId } from './telemetry/identity.js';
export type KtxManagedPythonInstallPolicy = 'prompt' | 'auto' | 'never';
@ -49,6 +50,7 @@ export interface ManagedPythonCommandOptions extends ManagedPythonCommandDeps {
export interface ManagedPythonSemanticLayerComputeOptions extends ManagedPythonCommandOptions {
createPythonCompute?: typeof createPythonSemanticLayerComputePort;
projectDir?: string;
}
/** @internal */
@ -133,8 +135,12 @@ export async function createManagedPythonSemanticLayerComputePort(
...(options.spinner ? { spinner: options.spinner } : {}),
});
const createPythonCompute = options.createPythonCompute ?? createPythonSemanticLayerComputePort;
const projectId = options.projectDir
? await readExistingTelemetryProjectId({ projectDir: options.projectDir })
: undefined;
return createPythonCompute({
command: runtime.manifest.python.daemonExecutable,
args: [],
...(projectId ? { projectId } : {}),
});
}

View file

@ -73,6 +73,8 @@ export async function createKtxMcpServerFactory(input: {
name: 'ktx',
version: input.cliVersion,
userContext: { userId: 'local' },
projectDir: input.projectDir,
io,
contextTools: {
...contextTools,
...(memoryIngest ? { memoryIngest } : {}),

View file

@ -1,5 +1,9 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from './context/project/config.js';
import { describe, expect, it, vi } from 'vitest';
import { initKtxProject } from './context/project/project.js';
import { afterEach, describe, expect, it, vi } from 'vitest';
import {
buildPublicIngestPlan,
type KtxPublicIngestDeps,
@ -395,6 +399,10 @@ describe('buildPublicIngestPlan', () => {
});
describe('runKtxPublicIngest', () => {
afterEach(() => {
vi.unstubAllEnvs();
});
it('maps fast and deep database targets to scan internals', async () => {
const io = makeIo();
const project = deepReadyProject({
@ -423,6 +431,32 @@ describe('runKtxPublicIngest', () => {
);
});
it('emits debug telemetry for ingest targets and project snapshots without project paths', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const projectDir = await mkdtemp(join(tmpdir(), 'ktx-public-ingest-telemetry-'));
try {
await initKtxProject({ projectDir });
const io = makeIo({ isTTY: true });
const project = projectWithConnections({
warehouse: { driver: 'sqlite', path: join(projectDir, 'warehouse.sqlite') },
});
const code = await runKtxPublicIngest(
{ command: 'run', projectDir, targetConnectionId: 'warehouse', all: false, json: false, inputMode: 'disabled' },
io.io,
{ loadProject: vi.fn(async () => project), runScan: vi.fn(async () => 0) },
);
expect(code).toBe(0);
expect(io.stderr()).toContain('"event":"ingest_completed"');
expect(io.stderr()).toContain('"event":"project_stack_snapshot"');
expect(io.stderr()).not.toContain(projectDir);
} finally {
await rm(projectDir, { recursive: true, force: true });
}
});
it('runs query history after schema ingest with current-run window override', async () => {
const io = makeIo();
const runtimeIo = makeIo({ isTTY: true });
@ -908,7 +942,7 @@ describe('runKtxPublicIngest', () => {
expect(io.stdout()).not.toContain('Debug:');
});
it('prints query-history retry guidance for query-history facet failures', async () => {
it('skips the query-history facet but keeps the target green when query-history fails', async () => {
const io = makeIo();
const project = deepReadyProject({
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
@ -935,11 +969,13 @@ describe('runKtxPublicIngest', () => {
io.io,
{ loadProject: vi.fn(async () => project), runScan, runIngest },
),
).resolves.toBe(1);
).resolves.toBe(0);
expect(io.stdout()).toMatch(/warehouse\s+done\s+failed\s+skipped\s+skipped/);
expect(io.stdout()).toContain('Ingest finished with skipped query history');
expect(io.stdout()).toMatch(/warehouse\s+done\s+skipped\s+skipped\s+skipped/);
expect(io.stdout()).toContain('Skipped query history:');
expect(io.stdout()).toContain(
'warehouse failed: Query history failed for 60 tasks. First failure: Google Cloud authentication failed while analyzing query history',
'Query history failed for 60 tasks. First failure: Google Cloud authentication failed while analyzing query history',
);
expect(io.stdout()).not.toContain('warehouse failed: Error:');
expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history');
@ -973,8 +1009,9 @@ describe('runKtxPublicIngest', () => {
io.io,
{ loadProject: vi.fn(async () => project), runScan, runIngest },
),
).resolves.toBe(1);
).resolves.toBe(0);
expect(io.stdout()).toContain('Ingest finished with skipped query history');
expect(io.stdout()).toContain('Missing bundled Python runtime manifest');
expect(io.stdout()).toContain(
'In a source checkout, build the local runtime assets with: pnpm run artifacts:build',

View file

@ -21,6 +21,8 @@ import { publicIngestOutputLine } from './public-ingest-copy.js';
import { resolvePublicIngestRuntimeRequirements } from './runtime-requirements.js';
import type { KtxScanArgs, KtxScanDeps } from './scan.js';
import { profileMark } from './startup-profile.js';
import { isDemoConnection } from './telemetry/demo-detect.js';
import { emitProjectStackSnapshot, emitTelemetryEvent } from './telemetry/index.js';
profileMark('module:public-ingest');
@ -599,17 +601,97 @@ function markTargetResult(
};
}
function markTargetWithSkippedQueryHistory(
target: KtxPublicIngestPlanTarget,
args: Extract<KtxPublicIngestArgs, { command: 'run' }>,
detail: string,
): KtxPublicIngestTargetResult {
const baseline = markTargetResult(target, args, 'done');
return {
...baseline,
steps: baseline.steps.map((step) =>
step.operation === 'query-history' ? { ...step, status: 'skipped', detail } : step,
),
};
}
function queryHistoryFailureDetail(input: {
target: KtxPublicIngestPlanTarget;
args: Extract<KtxPublicIngestArgs, { command: 'run' }>;
capturedOutput?: string;
}): string {
const captured = capturedFailureMessage(input.capturedOutput ?? '');
return failureDetailWithRetry({
target: input.target,
args: input.args,
failedOperation: 'query-history',
failureDetail: captured,
});
}
function resultFailed(result: KtxPublicIngestTargetResult): boolean {
return result.steps.some((step) => step.status === 'failed');
}
function resultSkippedQueryHistory(
result: KtxPublicIngestTargetResult,
): { connectionId: string; detail: string } | null {
const skipped = result.steps.find(
(step) => step.operation === 'query-history' && step.status === 'skipped' && step.detail !== undefined,
);
return skipped?.detail ? { connectionId: result.connectionId, detail: skipped.detail } : null;
}
function rowsBucket(): '<10k' | '<100k' | '<1M' | '<10M' | '>=10M' {
return '<10k';
}
async function emitIngestCompleted(input: {
args: Extract<KtxPublicIngestArgs, { command: 'run' }>;
project: KtxPublicIngestProject;
target: KtxPublicIngestPlanTarget;
result: KtxPublicIngestTargetResult;
startedAt: number;
io: KtxCliIo;
}): Promise<void> {
const failed = resultFailed(input.result);
await emitTelemetryEvent({
name: 'ingest_completed',
projectDir: input.args.projectDir,
io: input.io,
fields: {
driver: input.target.driver,
isDemoConnection: isDemoConnection(
input.target.connectionId,
input.project.config.connections[input.target.connectionId],
),
schemaCount: 0,
tableCount: 0,
columnCount: 0,
rowsBucket: rowsBucket(),
durationMs: Math.max(0, performance.now() - input.startedAt),
outcome: failed ? 'error' : 'ok',
},
});
}
function stepStatus(result: KtxPublicIngestTargetResult, operation: KtxPublicIngestStepName): string {
return result.steps.find((step) => step.operation === operation)?.status ?? 'not-run';
}
function renderPlainResults(results: KtxPublicIngestTargetResult[], io: KtxCliIo): void {
const failures = results.filter(resultFailed);
io.stdout.write(failures.length > 0 ? 'Ingest finished with partial failures\n' : 'Ingest finished\n');
const skippedQueryHistory = results.map(resultSkippedQueryHistory).filter((entry) => entry !== null) as Array<{
connectionId: string;
detail: string;
}>;
const headerSuffix =
failures.length > 0
? ' with partial failures'
: skippedQueryHistory.length > 0
? ' with skipped query history'
: '';
io.stdout.write(`Ingest finished${headerSuffix}\n`);
io.stdout.write('\n');
io.stdout.write('Source Database schema Query history Source ingest Memory update\n');
for (const result of results) {
@ -624,17 +706,22 @@ function renderPlainResults(results: KtxPublicIngestTargetResult[], io: KtxCliIo
);
}
if (failures.length === 0) {
return;
if (failures.length > 0) {
io.stdout.write('\nFailed sources:\n');
for (const result of failures) {
const failedStep = result.steps.find((step) => step.status === 'failed');
if (!failedStep) {
continue;
}
io.stdout.write(` ${failedStep.detail ?? `${result.connectionId} failed.`}\n`);
}
}
io.stdout.write('\nFailed sources:\n');
for (const result of failures) {
const failedStep = result.steps.find((step) => step.status === 'failed');
if (!failedStep) {
continue;
if (skippedQueryHistory.length > 0) {
io.stdout.write('\nSkipped query history:\n');
for (const { detail } of skippedQueryHistory) {
io.stdout.write(` ${detail}\n`);
}
io.stdout.write(` ${failedStep.detail ?? `${result.connectionId} failed.`}\n`);
}
}
@ -814,14 +901,13 @@ export async function executePublicIngestTarget(
? await runIngest(ingestArgs, ingestIo, ingestDeps)
: await runIngest(ingestArgs, ingestIo);
if (qhExitCode !== 0) {
deps.onPhaseEnd?.('query-history', 'failed');
return markTargetResult(
const detail = queryHistoryFailureDetail({
target,
args,
'failed',
'query-history',
capturedIngestIo ? capturedFailureMessage(capturedIngestIo.capturedOutput()) : undefined,
);
capturedOutput: capturedIngestIo ? capturedIngestIo.capturedOutput() : undefined,
});
deps.onPhaseEnd?.('query-history', 'failed', detail);
return markTargetWithSkippedQueryHistory(target, args, detail);
}
deps.onPhaseEnd?.('query-history', 'done');
}
@ -928,7 +1014,10 @@ export async function runKtxPublicIngest(
}
for (const target of plan.targets) {
results.push(await executePublicIngestTarget(target, args, io, deps));
const startedAt = performance.now();
const result = await executePublicIngestTarget(target, args, io, deps);
results.push(result);
await emitIngestCompleted({ args, project, target, result, startedAt, io });
}
if (args.json) {
@ -937,5 +1026,7 @@ export async function runKtxPublicIngest(
renderPlainResults(results, io);
}
await emitProjectStackSnapshot({ projectDir: args.projectDir, io });
return results.some(resultFailed) ? 1 : 0;
}

View file

@ -320,6 +320,7 @@ describe('runKtxScan', () => {
});
afterEach(async () => {
vi.unstubAllEnvs();
await rm(tempDir, { recursive: true, force: true });
});
@ -384,6 +385,44 @@ describe('runKtxScan', () => {
expect(io.stdout()).not.toContain('/~');
});
it('emits debug telemetry for completed scans without project paths', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
await initKtxProject({ projectDir: tempDir });
const runLocalScan = vi.fn(
async (): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
}),
);
const io = makeIo({ isTTY: true });
const code = await runKtxScan(
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
databaseIntrospectionUrl: 'http://127.0.0.1:8765',
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
);
expect(code).toBe(0);
expect(io.stderr()).toContain('"event":"scan_completed"');
expect(io.stderr()).toContain('"tableCount"');
expect(io.stderr()).not.toContain(tempDir);
});
it('passes KTX daemon options to local ingest adapters when no explicit daemon URL is set', async () => {
await initKtxProject({ projectDir: tempDir });
const createLocalIngestAdapters = vi.fn(() => []);

View file

@ -8,6 +8,8 @@ import { createKtxCliLocalIngestAdapters } from './local-adapters.js';
import { createKtxCliScanConnector } from './local-scan-connectors.js';
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
import { profileMark } from './startup-profile.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import { scrubErrorClass } from './telemetry/scrubber.js';
profileMark('module:scan');
@ -62,6 +64,14 @@ function totalTableCount(report: KtxScanReport): number {
return tableChangeCount(report) + report.diffSummary.tablesUnchanged;
}
function scanColumnCount(report: KtxScanReport): number {
return report.structuralSyncStats.columnsCreated + report.structuralSyncStats.columnsUpdated;
}
function inferredFkCount(report: KtxScanReport): number {
return report.relationships.accepted + report.relationships.review + report.relationships.rejected;
}
function writeScanIdentity(report: KtxScanReport, io: KtxCliIo): void {
io.stdout.write(`Run: ${report.runId}\n`);
io.stdout.write(`Connection: ${report.connectionId}\n`);
@ -311,6 +321,7 @@ export function createCliScanProgress(
}
export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps: KtxScanDeps = {}): Promise<number> {
const startedAt = performance.now();
try {
const project = await loadKtxProject({ projectDir: args.projectDir });
const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider;
@ -347,6 +358,20 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps
...(progress ? { progress } : {}),
});
cliProgress?.flush();
await emitTelemetryEvent({
name: 'scan_completed',
projectDir: args.projectDir,
io,
fields: {
driver: result.report.driver,
tableCount: totalTableCount(result.report),
columnCount: scanColumnCount(result.report),
inferredFkCount: inferredFkCount(result.report),
declaredFkCount: 0,
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'ok',
},
});
writeRunSummary(result.report, args.projectDir, io);
} finally {
cliProgress?.flush();
@ -354,6 +379,22 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps
}
return 0;
} catch (error) {
const errorClass = scrubErrorClass(error);
await emitTelemetryEvent({
name: 'scan_completed',
projectDir: args.projectDir,
io,
fields: {
driver: 'unknown',
tableCount: 0,
columnCount: 0,
inferredFkCount: 0,
declaredFkCount: 0,
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'error',
...(errorClass ? { errorClass } : {}),
},
});
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return 1;
}

View file

@ -145,6 +145,7 @@ describe('setup databases step', () => {
});
afterEach(async () => {
vi.unstubAllEnvs();
await rm(tempDir, { recursive: true, force: true });
});
@ -378,6 +379,34 @@ describe('setup databases step', () => {
});
});
it('emits debug telemetry when setup writes a database connection', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const io = makeIo();
const prompts = makePromptAdapter({
selectValues: ['url'],
textValues: ['', 'env:DATABASE_URL'],
});
const result = await runKtxSetupDatabasesStep(
{
projectDir: tempDir,
inputMode: 'auto',
databaseDrivers: ['postgres'],
databaseSchemas: [],
skipDatabases: false,
},
io.io,
{ prompts, testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0) },
);
expect(result.status).toBe('ready');
expect(io.stderr()).toContain('"event":"connection_added"');
expect(io.stderr()).toContain('"driver":"postgres"');
expect(io.stderr()).toContain('"isDemoConnection":false');
expect(io.stderr()).not.toContain(tempDir);
});
it('tells users Escape goes back in free-text connection prompts', async () => {
const prompts = makePromptAdapter({
selectValues: ['url'],
@ -517,7 +546,7 @@ describe('setup databases step', () => {
{
driver: 'snowflake',
selectValues: ['password', 'no'],
textValues: ['', 'env:SNOWFLAKE_ACCOUNT', 'ANALYTICS_WH', 'ANALYTICS', '', 'env:SNOWFLAKE_USER', ''],
textValues: ['', 'env:SNOWFLAKE_ACCOUNT', 'ANALYTICS_WH', 'ANALYTICS', 'env:SNOWFLAKE_USER', ''],
passwordValues: ['env:SNOWFLAKE_PASSWORD'],
expectedTextPrompts: [
{
@ -534,11 +563,6 @@ describe('setup databases step', () => {
{
message: 'Snowflake database name',
},
{
message: 'Snowflake schema\nPress Enter for PUBLIC, or enter a schema name.',
placeholder: 'PUBLIC',
initialValue: 'PUBLIC',
},
{
message: 'Snowflake username',
},
@ -573,6 +597,8 @@ describe('setup databases step', () => {
prompts,
testConnection: vi.fn(async () => 0),
scanConnection: vi.fn(async () => 0),
listSchemas: vi.fn(async () => []),
listTables: vi.fn(async () => []),
},
);
@ -746,6 +772,8 @@ describe('setup databases step', () => {
});
const testConnection = vi.fn(async () => 0);
const scanConnection = vi.fn(async () => 0);
const listSchemas = vi.fn(async () => []);
const listTables = vi.fn(async () => []);
const result = await runKtxSetupDatabasesStep(
{
@ -756,7 +784,7 @@ describe('setup databases step', () => {
disableQueryHistory: true,
},
makeIo().io,
{ prompts, testConnection, scanConnection },
{ prompts, testConnection, scanConnection, listSchemas, listTables },
);
expect(result).toEqual({
@ -1663,6 +1691,62 @@ describe('setup databases step', () => {
expect(io.stdout()).toContain('✓ orbit_analytics, orbit_raw');
});
it('falls back to comma-separated free-text when listSchemas fails interactively', async () => {
const io = makeIo();
const prompts = makePromptAdapter({
selectValues: ['url'],
textValues: ['', 'env:DATABASE_URL', 'orbit_analytics, orbit_raw'],
});
const testConnection = vi.fn(async () => 0);
const scanConnection = vi.fn(async () => 0);
const listSchemas = vi.fn(async () => {
throw new Error('permission denied to list schemas');
});
const listTables = vi.fn(async (_projectDir: string, _connectionId: string, schemas?: string[]) =>
(schemas ?? []).map((schema) => ({ schema, name: 'events', kind: 'table' as const })),
);
const pickers = makePickerStubs({
scopes: [
{
schemas: ['orbit_analytics', 'orbit_raw'],
tables: ['orbit_analytics.events', 'orbit_raw.events'],
},
],
});
const result = await runKtxSetupDatabasesStep(
{
projectDir: tempDir,
inputMode: 'auto',
databaseDrivers: ['postgres'],
databaseSchemas: [],
skipDatabases: false,
},
io.io,
{
prompts,
testConnection,
scanConnection,
listSchemas,
listTables,
pickDatabaseScope: pickers.pickDatabaseScope,
},
);
expect(result.status).toBe('ready');
expect(io.stderr()).toContain('Could not discover postgresql schemas');
expect(vi.mocked(prompts.text).mock.calls.map(([options]) => options.message)).toContain(
textInputPrompt(
'Enter schemas for postgres-warehouse as a comma-separated list (e.g. SALES, MARKETING).',
),
);
expect(pickers.scopeCalls[0]).toMatchObject({
schemas: ['orbit_analytics', 'orbit_raw'],
initialSchemas: ['orbit_analytics', 'orbit_raw'],
schemaSuggestion: { suggested: new Set(['orbit_analytics', 'orbit_raw']) },
});
});
it('passes schemas and a lazy table callback to the scope picker instead of eager table discovery', async () => {
const listSchemas = vi.fn(async () => ['analytics', 'raw']);
const listTables = vi.fn(async (_projectDir: string, _connectionId: string, schemas?: string[]) =>
@ -1986,6 +2070,7 @@ describe('setup databases step', () => {
it('writes query history config for supported Snowflake databases after validation succeeds', async () => {
const io = makeIo();
const historicSqlProbe = vi.fn(async () => ({ ok: true, lines: [] }));
const result = await runKtxSetupDatabasesStep(
{
projectDir: tempDir,
@ -2003,13 +2088,21 @@ describe('setup databases step', () => {
{
testConnection: vi.fn(async () => 0),
scanConnection: vi.fn(async () => 0),
historicSqlProbe,
prompts: makePromptAdapter({
selectValues: ['password'],
textValues: ['env:SNOWFLAKE_ACCOUNT', 'WH', 'ANALYTICS', 'PUBLIC', 'reader', ''],
textValues: ['env:SNOWFLAKE_ACCOUNT', 'WH', 'ANALYTICS', 'reader', ''],
passwordValues: ['env:SNOWFLAKE_PASSWORD'],
}),
},
);
expect(historicSqlProbe).toHaveBeenCalledWith(
expect.objectContaining({
projectDir: tempDir,
connectionId: 'snowflake',
dialect: 'snowflake',
}),
);
expect(result.status).toBe('ready');
const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8');
@ -2060,7 +2153,6 @@ describe('setup databases step', () => {
'env:SNOWFLAKE_ACCOUNT',
'WH',
'ANALYTICS',
'PUBLIC',
'reader',
'~/.ssh/snowflake_rsa_key.p8',
'',
@ -2078,7 +2170,6 @@ describe('setup databases step', () => {
account: 'env:SNOWFLAKE_ACCOUNT',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
privateKey: 'file:~/.ssh/snowflake_rsa_key.p8', // pragma: allowlist secret
passphrase: 'env:SNOWFLAKE_KEY_PASS', // pragma: allowlist secret
@ -2446,7 +2537,53 @@ describe('setup databases step', () => {
expect(io.stdout()).toContain('Query history probe...');
expect(io.stdout()).not.toContain('Historic SQL probe...');
expect(io.stdout()).toContain('pg_stat_statements extension is not installed');
expect(io.stdout()).toContain('Setup written; first ingest run will fail until fixed.');
expect(io.stdout()).toContain('Setup written; query history will be skipped until fixed.');
});
it('prints a non-blocking Snowflake query history probe failure with the grants remediation', async () => {
const io = makeIo();
const historicSqlProbe = vi.fn(async () => ({
ok: false,
lines: [
' FAIL Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY',
' Fix: Run (as ACCOUNTADMIN): GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE <connection role>;',
],
}));
const result = await runKtxSetupDatabasesStep(
{
projectDir: tempDir,
inputMode: 'disabled',
databaseDrivers: ['snowflake'],
databaseConnectionId: 'warehouse',
databaseSchemas: [],
enableQueryHistory: true,
skipDatabases: false,
},
io.io,
{
testConnection: vi.fn(async () => 0),
scanConnection: vi.fn(async () => 0),
historicSqlProbe,
prompts: makePromptAdapter({
textValues: ['env:SNOWFLAKE_ACCOUNT', 'WH', 'ANALYTICS', 'reader', ''],
passwordValues: ['env:SNOWFLAKE_PASSWORD'],
}),
},
);
expect(result.status).toBe('ready');
expect(historicSqlProbe).toHaveBeenCalledWith(
expect.objectContaining({
projectDir: tempDir,
connectionId: 'warehouse',
dialect: 'snowflake',
}),
);
expect(io.stdout()).toContain('Query history probe...');
expect(io.stdout()).toContain('Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY');
expect(io.stdout()).toContain('GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE');
expect(io.stdout()).toContain('Setup written; query history will be skipped until fixed.');
});
it('does not run the query history probe when the regular connection test fails', async () => {

View file

@ -19,6 +19,8 @@ import { withMultiselectNavigation, withTextInputNavigation } from './prompt-nav
import { runKtxScan } from './scan.js';
import { applySetupDatabaseContextDepth } from './setup-database-context-depth.js';
import { writeProjectLocalSecretReference } from './setup-secrets.js';
import { isDemoConnection } from './telemetry/demo-detect.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import {
createKtxSetupPromptAdapter,
type KtxSetupPromptOption,
@ -341,6 +343,13 @@ function historicSqlProbeFailureLines(error: unknown): string[] {
];
}
if (error instanceof Error && error.name === 'HistoricSqlGrantsMissingError') {
const dialect = (error as { dialect?: unknown }).dialect;
if (dialect === 'snowflake') {
return [
' FAIL Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY',
' Fix: Run (as ACCOUNTADMIN): GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE <connection role>;',
];
}
return [
' FAIL Postgres connection role lacks pg_read_all_stats',
' Fix: Run: GRANT pg_read_all_stats TO <connection role>;',
@ -353,10 +362,18 @@ function historicSqlProbeFailureLines(error: unknown): string[] {
}
async function defaultHistoricSqlProbe(input: KtxSetupHistoricSqlProbeInput): Promise<KtxSetupHistoricSqlProbeResult> {
if (input.dialect !== 'postgres') {
return { ok: true, lines: [] };
if (input.dialect === 'postgres') {
return probePostgresHistoricSql(input);
}
if (input.dialect === 'snowflake') {
return probeSnowflakeHistoricSql(input);
}
return { ok: true, lines: [] };
}
async function probePostgresHistoricSql(
input: KtxSetupHistoricSqlProbeInput,
): Promise<KtxSetupHistoricSqlProbeResult> {
const project = await loadKtxProject({ projectDir: input.projectDir });
const connection = project.config.connections[input.connectionId];
const [{ PostgresPgssReader }, { KtxPostgresHistoricSqlQueryClient }, { isKtxPostgresConnectionConfig }] =
@ -394,6 +411,46 @@ async function defaultHistoricSqlProbe(input: KtxSetupHistoricSqlProbeInput): Pr
}
}
async function probeSnowflakeHistoricSql(
input: KtxSetupHistoricSqlProbeInput,
): Promise<KtxSetupHistoricSqlProbeResult> {
const project = await loadKtxProject({ projectDir: input.projectDir });
const connection = project.config.connections[input.connectionId];
const [{ SnowflakeHistoricSqlQueryHistoryReader }, { KtxSnowflakeHistoricSqlQueryClient }, { isKtxSnowflakeConnectionConfig }] =
await Promise.all([
import('./context/ingest/adapters/historic-sql/snowflake-query-history-reader.js'),
import('./connectors/snowflake/historic-sql-query-client.js'),
import('./connectors/snowflake/connector.js'),
]);
if (!isKtxSnowflakeConnectionConfig(connection)) {
return {
ok: false,
lines: [` FAIL Connection ${input.connectionId} is not a native Snowflake connection.`],
};
}
const client = new KtxSnowflakeHistoricSqlQueryClient({
connectionId: input.connectionId,
connection,
projectDir: input.projectDir,
});
try {
const result = await new SnowflakeHistoricSqlQueryHistoryReader().probe(client);
return {
ok: true,
lines: [
' OK SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY accessible',
...result.warnings.map((warning: string) => ` ! ${warning}`),
],
};
} catch (error) {
return { ok: false, lines: historicSqlProbeFailureLines(error) };
} finally {
await client.cleanup();
}
}
async function defaultListSchemas(projectDir: string, connectionId: string): Promise<string[]> {
const project = await loadKtxProject({ projectDir });
const connection = project.config.connections[connectionId];
@ -457,7 +514,7 @@ async function defaultListSchemas(projectDir: string, connectionId: string): Pro
if (driver === 'snowflake') {
const { KtxSnowflakeScanConnector, isKtxSnowflakeConnectionConfig } = await import('./connectors/snowflake/connector.js');;
if (!isKtxSnowflakeConnectionConfig(connection)) return [];
const connector = new KtxSnowflakeScanConnector({ connectionId, connection });
const connector = new KtxSnowflakeScanConnector({ connectionId, connection, projectDir });
try {
return await connector.listSchemas();
} finally {
@ -533,7 +590,7 @@ async function defaultListTables(
if (driver === 'snowflake') {
const { KtxSnowflakeScanConnector, isKtxSnowflakeConnectionConfig } = await import('./connectors/snowflake/connector.js');;
if (!isKtxSnowflakeConnectionConfig(connection)) return [];
const connector = new KtxSnowflakeScanConnector({ connectionId, connection });
const connector = new KtxSnowflakeScanConnector({ connectionId, connection, projectDir });
try {
return await connector.listTables(schemas);
} finally {
@ -952,12 +1009,6 @@ async function buildConnectionConfig(input: {
stringConfigField(input.existingConnection, 'database'),
);
if (database === undefined) return 'back';
const schemaName = await promptText(
prompts,
'Snowflake schema\nPress Enter for PUBLIC, or enter a schema name.',
stringConfigField(input.existingConnection, 'schema_name') ?? 'PUBLIC',
);
if (schemaName === undefined) return 'back';
const username = await promptText(
prompts,
'Snowflake username',
@ -1012,14 +1063,13 @@ async function buildConnectionConfig(input: {
if (role === undefined) return 'back';
if (authMethod === 'password') {
const resolvedPasswordRef = passwordRef ?? stringConfigField(input.existingConnection, 'password');
if (!account || !warehouse || !database || !schemaName || !username || !resolvedPasswordRef) return null;
if (!account || !warehouse || !database || !username || !resolvedPasswordRef) return null;
return {
driver: 'snowflake',
authMethod: 'password',
account,
warehouse,
database,
schema_name: schemaName,
username,
password: resolvedPasswordRef,
...(role ? { role } : {}),
@ -1028,7 +1078,7 @@ async function buildConnectionConfig(input: {
const resolvedPrivateKey = privateKeyInput
? normalizeFileReference(privateKeyInput)
: stringConfigField(input.existingConnection, 'privateKey');
if (!account || !warehouse || !database || !schemaName || !username || !resolvedPrivateKey) return null;
if (!account || !warehouse || !database || !username || !resolvedPrivateKey) return null;
const resolvedPassphrase = passphraseRef ?? stringConfigField(input.existingConnection, 'passphrase');
return {
driver: 'snowflake',
@ -1036,7 +1086,6 @@ async function buildConnectionConfig(input: {
account,
warehouse,
database,
schema_name: schemaName,
username,
privateKey: resolvedPrivateKey,
...(resolvedPassphrase ? { passphrase: resolvedPassphrase } : {}),
@ -1334,6 +1383,7 @@ async function writeConnectionConfig(input: {
projectDir: string;
connectionId: string;
connection: KtxProjectConnectionConfig;
io?: KtxCliIo;
}): Promise<void> {
const project = await loadKtxProject({ projectDir: input.projectDir });
const migratedConnections = Object.fromEntries(
@ -1351,6 +1401,17 @@ async function writeConnectionConfig(input: {
},
};
await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8');
if (input.io) {
await emitTelemetryEvent({
name: 'connection_added',
projectDir: input.projectDir,
io: input.io,
fields: {
driver: String(nextConnection.driver ?? 'unknown').toLowerCase(),
isDemoConnection: isDemoConnection(input.connectionId, nextConnection),
},
});
}
const queryHistory = queryHistoryConfigRecord(nextConnection);
if (queryHistory?.enabled === true) {
@ -1462,6 +1523,21 @@ async function writeScopeConfig(input: {
});
}
async function promptCommaSeparatedScope(input: {
prompts: KtxSetupDatabasesPromptAdapter;
connectionId: string;
spec: ScopeDiscoverySpec;
}): Promise<string[] | undefined> {
const example =
input.spec.nounPlural === 'datasets' ? 'sales, marketing' : 'SALES, MARKETING';
const value = await promptText(
input.prompts,
`Enter ${input.spec.nounPlural} for ${input.connectionId} as a comma-separated list (e.g. ${example}).`,
);
if (value === undefined) return undefined;
return unique(value.split(',').map((part) => part.trim()));
}
async function maybeConfigureDatabaseScope(input: {
projectDir: string;
connectionId: string;
@ -1531,28 +1607,48 @@ async function maybeConfigureDatabaseScope(input: {
writeSetupSection(input.io, 'Discovering tables', [`Connecting to ${input.connectionId}`]);
const schemas = unique(
cliSchemas.length > 0
? cliSchemas
: await (async (): Promise<string[]> => {
if (!spec) return [];
try {
return await (input.deps.listSchemas ?? defaultListSchemas)(input.projectDir, input.connectionId);
} catch (error) {
const detail = error instanceof Error ? error.message : String(error);
input.io.stderr.write(
`Could not discover ${spec.promptLabel.toLowerCase()} for ${input.connectionId}; ${detail}\n`,
);
return [];
}
})(),
);
let effectiveCliSchemas = cliSchemas;
let listedSchemas: string[];
if (cliSchemas.length > 0) {
listedSchemas = cliSchemas;
} else if (!spec) {
listedSchemas = [];
} else {
try {
listedSchemas = await (input.deps.listSchemas ?? defaultListSchemas)(
input.projectDir,
input.connectionId,
);
} catch (error) {
const detail = error instanceof Error ? error.message : String(error);
input.io.stderr.write(
`Could not discover ${spec.promptLabel.toLowerCase()} for ${input.connectionId}; ${detail}\n`,
);
const typed = await promptCommaSeparatedScope({
prompts: input.prompts,
connectionId: input.connectionId,
spec,
});
if (typed === undefined) return 'back';
effectiveCliSchemas = typed;
listedSchemas = typed;
if (typed.length > 0) {
await writeScopeConfig({
projectDir: input.projectDir,
connectionId: input.connectionId,
values: typed,
spec,
});
}
}
}
const schemas = unique(listedSchemas);
if (spec && schemas.length === 0) {
return 'ready';
}
const schemaSuggestion =
cliSchemas.length > 0
? { excluded: new Set<string>(), suggested: new Set(cliSchemas) }
effectiveCliSchemas.length > 0
? { excluded: new Set<string>(), suggested: new Set(effectiveCliSchemas) }
: spec?.suggest(schemas) ?? { excluded: new Set<string>(), suggested: new Set<string>() };
const existingEnabled =
hasExistingTables && input.forcePrompt === true
@ -1570,7 +1666,7 @@ async function maybeConfigureDatabaseScope(input: {
schemaSuggestion,
existing: { enabledTables: existingEnabled },
supportsSchemaScope: spec !== undefined,
initialSchemas: cliSchemas.length > 0 ? cliSchemas : undefined,
initialSchemas: effectiveCliSchemas.length > 0 ? effectiveCliSchemas : undefined,
prompts: input.prompts,
listTablesForSchemas: (selectedSchemas) =>
(input.deps.listTables ?? defaultListTables)(input.projectDir, input.connectionId, selectedSchemas),
@ -1607,6 +1703,7 @@ async function maybeConfigureDatabaseScope(input: {
projectDir: input.projectDir,
connectionId: input.connectionId,
connection: { ...currentConnection, enabled_tables: enabledTables },
io: input.io,
});
if (spec && activeSchemas.length > 0) {
@ -1674,7 +1771,12 @@ async function maybeRunHistoricSqlSetupProbe(input: {
const connection = project.config.connections[input.connectionId];
const queryHistory = queryHistoryConfigRecord(connection) ?? historicSqlConfigRecord(connection);
const driver = normalizeDriver(connection?.driver);
if (queryHistory?.enabled !== true || driver !== 'postgres') {
if (queryHistory?.enabled !== true) {
return;
}
const dialect: 'postgres' | 'snowflake' | null =
driver === 'postgres' ? 'postgres' : driver === 'snowflake' ? 'snowflake' : null;
if (!dialect) {
return;
}
@ -1683,13 +1785,13 @@ async function maybeRunHistoricSqlSetupProbe(input: {
const result = await probe({
projectDir: input.projectDir,
connectionId: input.connectionId,
dialect: 'postgres',
dialect,
});
for (const line of result.lines) {
input.io.stdout.write(`${line}\n`);
}
if (!result.ok) {
input.io.stdout.write('│ Setup written; first ingest run will fail until fixed.\n');
input.io.stdout.write('│ Setup written; query history will be skipped until fixed.\n');
}
}
@ -2039,6 +2141,7 @@ async function runPrimarySourceFullEdit(input: {
},
driver,
}),
io: input.io,
});
const validated = await validateAndScanConnection({
@ -2274,6 +2377,7 @@ export async function runKtxSetupDatabasesStep(
projectDir: args.projectDir,
connectionId: connectionChoice.connectionId,
connection: withContextDepth,
io,
});
} else {
const existing = project.config.connections[connectionChoice.connectionId];
@ -2299,6 +2403,7 @@ export async function runKtxSetupDatabasesStep(
projectDir: args.projectDir,
connectionId: connectionChoice.connectionId,
connection: withContextDepth,
io,
});
}
@ -2382,6 +2487,7 @@ export async function runKtxSetupDatabasesStep(
projectDir: args.projectDir,
connectionId: connectionChoice.connectionId,
connection: withContextDepth,
io,
});
setupStatus = await validateAndScanConnection({
projectDir: args.projectDir,

View file

@ -339,7 +339,7 @@ export interface DemoTourDeps {
}
export async function runDemoTour(
args: { inputMode: 'auto' | 'disabled' },
args: { inputMode: 'auto' | 'disabled'; cliVersion?: string },
io: KtxCliIo,
deps: DemoTourDeps = {},
): Promise<number> {
@ -347,7 +347,7 @@ export async function runDemoTour(
const ensureProject = deps.ensureProject ?? ensureSeededDemoProject;
const projectDir = defaultDemoProjectDir();
await ensureProject({ projectDir, force: false });
await ensureProject({ projectDir, force: false, io, cliVersion: args.cliVersion });
io.stdout.write(renderDemoBanner(projectDir) + '\n');
io.stdout.write(`\n│ ${dim('Press Enter to continue, Escape to go back')}\n└\n`);

View file

@ -80,6 +80,7 @@ describe('setup sources step', () => {
});
afterEach(async () => {
vi.unstubAllEnvs();
await rm(tempDir, { recursive: true, force: true });
});
@ -170,6 +171,34 @@ describe('setup sources step', () => {
expect(runInitialIngest).toHaveBeenCalledWith(projectDir, 'analytics_dbt', io.io, { inputMode: 'disabled' });
});
it('emits debug telemetry when setup writes a source connection', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
await addPrimarySource();
const io = makeIo();
const result = await runKtxSetupSourcesStep(
{
projectDir,
inputMode: 'disabled',
source: 'dbt',
sourceConnectionId: 'analytics_dbt',
sourcePath: '/repo/dbt',
sourceProjectName: 'analytics',
runInitialSourceIngest: false,
skipSources: false,
},
io.io,
{ validateDbt: vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })) },
);
expect(result.status).toBe('ready');
expect(io.stderr()).toContain('"event":"connection_added"');
expect(io.stderr()).toContain('"driver":"dbt"');
expect(io.stderr()).toContain('"isDemoConnection":false');
expect(io.stderr()).not.toContain(projectDir);
});
it('writes Metabase config and validates mapping through existing mapping path', async () => {
await addPrimarySource();
const validateMetabase = vi.fn(async () => ({ ok: true as const, detail: 'user=admin@example.com' }));

View file

@ -22,6 +22,8 @@ import { runKtxSourceMapping } from './source-mapping.js';
import { withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js';
import { runKtxPublicIngest } from './public-ingest.js';
import { writeProjectLocalSecretReference } from './setup-secrets.js';
import { isDemoConnection } from './telemetry/demo-detect.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import {
createKtxSetupPromptAdapter,
type KtxSetupPromptOption,
@ -325,6 +327,7 @@ async function writeSourceConnection(
connectionId: string,
connection: KtxProjectConnectionConfig,
adapter: string,
io?: KtxCliIo,
): Promise<() => Promise<void>> {
assertSafeConnectionId(connectionId);
const project = await loadKtxProject({ projectDir });
@ -345,6 +348,17 @@ async function writeSourceConnection(
},
};
await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8');
if (io) {
await emitTelemetryEvent({
name: 'connection_added',
projectDir,
io,
fields: {
driver: String(connection.driver ?? adapter).toLowerCase(),
isDemoConnection: isDemoConnection(connectionId, connection),
},
});
}
return async () => {
const latest = await loadKtxProject({ projectDir });
const connections = { ...latest.config.connections };
@ -1736,6 +1750,7 @@ async function saveValidateAndMaybeBuildSource(input: {
connectionId,
connection,
sourceAdapter(input.source),
input.io,
);
if (input.sourceChoice.kind === 'existing') {

View file

@ -23,6 +23,7 @@ function makeIo() {
return {
io: {
stdout: {
isTTY: false,
write: (chunk: string) => {
stdout += chunk;
},
@ -91,6 +92,7 @@ describe('setup status', () => {
});
afterEach(async () => {
vi.unstubAllEnvs();
await rm(tempDir, { recursive: true, force: true });
});
@ -528,6 +530,43 @@ describe('setup status', () => {
expect(output).not.toContain('Finish agent setup');
});
it('emits debug telemetry for setup steps without project paths', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const testIo = makeIo();
testIo.io.stdout.isTTY = true;
await expect(
runKtxSetup(
{
command: 'run',
projectDir: tempDir,
mode: 'auto',
agents: false,
skipAgents: true,
inputMode: 'disabled',
yes: true,
cliVersion: '0.2.0',
skipLlm: true,
skipEmbeddings: true,
skipDatabases: true,
skipSources: true,
databaseSchemas: [],
},
testIo.io,
{
runtime: async () => runtimeReady(tempDir),
context: async () => ({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' }),
},
),
).resolves.toBe(0);
expect(testIo.stderr()).toContain('"event":"setup_step"');
expect(testIo.stderr()).toContain('"step":"project"');
expect(testIo.stderr()).toContain('"step":"models"');
expect(testIo.stderr()).not.toContain(tempDir);
});
it('prints the setup shell intro for auto-created run mode', async () => {
const testIo = makeIo();
@ -1047,7 +1086,7 @@ describe('setup status', () => {
).resolves.toBe(0);
expect(runDemoTour).toHaveBeenCalledWith(
{ inputMode: 'auto' },
{ inputMode: 'auto', cliVersion: '0.2.0' },
testIo.io,
expect.objectContaining({}),
);

View file

@ -6,7 +6,7 @@ import { savedMemoryCountsForReport } from './context/ingest/reports.js';
import { ktxLocalStateDbPath } from './context/project/local-state-db.js';
import { loadKtxProject, type KtxLocalProject } from './context/project/project.js';
import { readKtxSetupState } from './context/project/setup-config.js';
import type { KtxCliIo } from './cli-runtime.js';
import { getKtxCliPackageInfo, type KtxCliIo } from './cli-runtime.js';
import { formatSetupNextStepLines } from './next-steps.js';
import { runtimeInstallPolicyFromFlags } from './managed-python-command.js';
import { readManagedPythonRuntimeStatus } from './managed-python-runtime.js';
@ -179,6 +179,16 @@ type KtxSetupFlowStatus =
| 'back'
| 'missing-input'
| 'failed';
type TelemetrySetupStep =
| 'project'
| 'runtime'
| 'models'
| 'embeddings'
| 'databases'
| 'sources'
| 'context'
| 'agents'
| 'demo-tour';
export interface KtxSetupEntryMenuPromptAdapter {
select(options: { message: string; options: KtxSetupPromptOption[] }): Promise<string>;
@ -196,6 +206,36 @@ function createEntryMenuPromptAdapter(): KtxSetupEntryMenuPromptAdapter {
});
}
function setupTelemetryOutcome(
status: KtxSetupFlowStatus | Extract<Awaited<ReturnType<typeof runKtxSetupProjectStep>>, { status: string }>['status'],
): 'completed' | 'skipped' | 'abandoned' {
if (status === 'ready') return 'completed';
if (status === 'skipped') return 'skipped';
return 'abandoned';
}
async function recordSetupStep(input: {
projectDir: string;
step: TelemetrySetupStep;
status: KtxSetupFlowStatus | Extract<Awaited<ReturnType<typeof runKtxSetupProjectStep>>, { status: string }>['status'];
startedAt: number;
io: KtxCliIo;
cliVersion?: string;
}): Promise<void> {
const { emitTelemetryEvent } = await import('./telemetry/index.js');
await emitTelemetryEvent({
name: 'setup_step',
projectDir: input.projectDir,
io: input.io,
packageInfo: { ...getKtxCliPackageInfo(), version: input.cliVersion ?? getKtxCliPackageInfo().version },
fields: {
step: input.step,
outcome: setupTelemetryOutcome(input.status),
durationMs: Math.max(0, performance.now() - input.startedAt),
},
});
}
async function runKtxSetupEntryMenu(
status: KtxSetupStatus,
deps: KtxSetupEntryMenuDeps = {},
@ -229,11 +269,21 @@ async function runKtxSetupDemoFromEntryMenu(
deps: KtxSetupDeps,
): Promise<number> {
const { runDemoTour } = await import('./setup-demo-tour.js');
return await runDemoTour(
{ inputMode: args.inputMode },
const startedAt = performance.now();
const result = await runDemoTour(
{ inputMode: args.inputMode, cliVersion: args.cliVersion },
io,
{ agents: deps.agents },
);
await recordSetupStep({
projectDir: args.projectDir,
step: 'demo-tour',
status: result === 0 ? 'ready' : 'failed',
startedAt,
io,
cliVersion: args.cliVersion,
});
return result;
}
function embeddingsReady(status: KtxSetupStatus['embeddings']): boolean {
@ -564,6 +614,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
}
const projectMode = entryAction === 'new-project' ? 'prompt-new' : args.mode;
const projectStepStartedAt = performance.now();
projectResult = await runKtxSetupProjectStep(
{
projectDir: args.projectDir,
@ -575,6 +626,14 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
io,
deps.project,
);
await recordSetupStep({
projectDir: projectResult.projectDir,
step: 'project',
status: projectResult.status,
startedAt: projectStepStartedAt,
io,
cliVersion: args.cliVersion,
});
if (projectResult.status === 'back') {
continue;
@ -640,6 +699,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
const step = setupSteps[stepIndex];
if (!step) break;
const stepStartedAt = performance.now();
let stepResult: { status: KtxSetupFlowStatus };
if (step === 'models') {
const modelRunner =
@ -792,6 +852,15 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
}
}
await recordSetupStep({
projectDir: projectResult.projectDir,
step,
status: stepResult.status,
startedAt: stepStartedAt,
io,
cliVersion: args.cliVersion,
});
if (stepResult.status === 'failed') {
await cleanupCreatedProjectScaffold(projectResult.createdProjectCleanup);
return 1;

View file

@ -18,12 +18,13 @@ const ORDERS_YAML = [
'',
].join('\n');
function makeIo() {
function makeIo(options: { isTTY?: boolean } = {}) {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: options.isTTY,
write: (chunk: string) => {
stdout += chunk;
},
@ -63,6 +64,7 @@ describe('runKtxSl', () => {
});
afterEach(async () => {
vi.unstubAllEnvs();
await rm(tempDir, { recursive: true, force: true });
});
@ -289,6 +291,43 @@ joins: []
expect(stderr.write).not.toHaveBeenCalled();
});
it('emits debug telemetry for sl query without project paths', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const projectDir = join(tempDir, 'project');
await seedSlSource({ projectDir });
const io = makeIo({ isTTY: true });
const createSemanticLayerCompute = vi.fn(() => ({
query: vi.fn(async () => ({
sql: 'select count(*) as order_count from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: {},
})),
validateSources: vi.fn(),
generateSources: vi.fn(),
}));
const code = await runKtxSl(
{
command: 'query',
projectDir,
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [] },
format: 'json',
execute: false,
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
},
io.io,
{ createSemanticLayerCompute },
);
expect(code).toBe(0);
expect(io.stderr()).toContain('"event":"sl_query_completed"');
expect(io.stderr()).not.toContain(projectDir);
});
it('runs sl query from a JSON query file', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir });
@ -413,6 +452,7 @@ joins: []
cliVersion: '0.2.0',
installPolicy: 'auto',
io: { stdout, stderr },
projectDir,
});
expect(stdout.write).toHaveBeenCalledWith('select count(*) as order_count from public.orders\n');
});

View file

@ -1,4 +1,5 @@
import { readFile } from 'node:fs/promises';
import type { KtxCliIo } from './cli-runtime.js';
import { createDefaultLocalQueryExecutor } from './context/connections/local-query-executor.js';
import type { KtxSqlQueryExecutorPort } from './context/connections/query-executor.js';
import { KtxIngestEmbeddingPortAdapter } from './context/llm/embedding-port.js';
@ -18,6 +19,8 @@ import {
type KtxManagedPythonInstallPolicy,
} from './managed-python-command.js';
import { profileMark } from './startup-profile.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import { scrubErrorClass } from './telemetry/scrubber.js';
profileMark('module:sl');
@ -56,10 +59,7 @@ export type KtxSlArgs =
runtimeInstallPolicy: KtxManagedPythonInstallPolicy;
};
interface KtxSlIo {
stdout: { write(chunk: string): void };
stderr: { write(chunk: string): void };
}
type KtxSlIo = KtxCliIo;
interface KtxSlDeps {
loadProject?: typeof loadKtxProject;
@ -70,6 +70,7 @@ interface KtxSlDeps {
cliVersion: string;
installPolicy: KtxManagedPythonInstallPolicy;
io: KtxSlIo;
projectDir?: string;
}) => Promise<KtxSemanticLayerComputePort>;
createQueryExecutor?: () => KtxSqlQueryExecutorPort;
}
@ -85,6 +86,14 @@ function resolutionToEmbeddingPort(resolution: EmbeddingProviderResolution): Ktx
return null;
}
function queryMeasureCount(query: SemanticLayerQueryInput): number {
return Array.isArray(query.measures) ? query.measures.length : 0;
}
function queryDimensionCount(query: SemanticLayerQueryInput): number {
return Array.isArray(query.dimensions) ? query.dimensions.length : 0;
}
async function printSlSources(input: {
rows: ReadonlyArray<LocalSlSourceSummary>;
command: 'sl list';
@ -177,6 +186,8 @@ async function readSlQueryFile(path: string): Promise<SemanticLayerQueryInput> {
}
export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: KtxSlDeps = {}): Promise<number> {
const startedAt = performance.now();
let queryForTelemetry: SemanticLayerQueryInput | undefined;
try {
const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir });
if (args.command === 'list') {
@ -234,6 +245,18 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
connectionId: args.connectionId,
sourceName: args.sourceName,
});
await emitTelemetryEvent({
name: 'sl_validate_completed',
projectDir: args.projectDir,
io,
fields: {
sourceCount: source ? 1 : 0,
modelCount: 0,
validationErrorCount: result.valid ? 0 : result.errors.length,
outcome: result.valid ? 'ok' : 'error',
durationMs: Math.max(0, performance.now() - startedAt),
},
});
if (!result.valid) {
for (const error of result.errors) {
io.stderr.write(`${error}\n`);
@ -248,12 +271,14 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
if (!query) {
throw new Error('sl query requires query input from --query-file or at least one --measure');
}
queryForTelemetry = query;
const compute = deps.createSemanticLayerCompute
? deps.createSemanticLayerCompute()
: await (deps.createManagedSemanticLayerCompute ?? createManagedPythonSemanticLayerComputePort)({
cliVersion: args.cliVersion,
installPolicy: args.runtimeInstallPolicy,
io,
projectDir: args.projectDir,
});
const queryExecutor = args.execute ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() : undefined;
const result = await compileLocalSlQuery(project as KtxLocalProject, {
@ -264,6 +289,19 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
maxRows: args.maxRows,
queryExecutor,
});
await emitTelemetryEvent({
name: 'sl_query_completed',
projectDir: args.projectDir,
io,
fields: {
mode: args.execute ? 'execute' : 'compile',
referencedSourceCount: result.plan && typeof result.plan === 'object' ? 1 : 0,
referencedDimensionCount: queryDimensionCount(query),
referencedMeasureCount: queryMeasureCount(query),
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'ok',
},
});
if (args.format === 'sql') {
io.stdout.write(`${result.sql}\n`);
return 0;
@ -274,6 +312,39 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
const _exhaustive: never = args;
throw new Error(`Unsupported sl command: ${JSON.stringify(_exhaustive)}`);
} catch (error) {
if (args.command === 'validate') {
const errorClass = scrubErrorClass(error);
await emitTelemetryEvent({
name: 'sl_validate_completed',
projectDir: args.projectDir,
io,
fields: {
sourceCount: 0,
modelCount: 0,
validationErrorCount: 0,
outcome: 'error',
...(errorClass ? { errorClass } : {}),
durationMs: Math.max(0, performance.now() - startedAt),
},
});
}
if (args.command === 'query') {
const errorClass = scrubErrorClass(error);
await emitTelemetryEvent({
name: 'sl_query_completed',
projectDir: args.projectDir,
io,
fields: {
mode: args.execute ? 'execute' : 'compile',
referencedSourceCount: 0,
referencedDimensionCount: queryForTelemetry ? queryDimensionCount(queryForTelemetry) : 0,
referencedMeasureCount: queryForTelemetry ? queryMeasureCount(queryForTelemetry) : 0,
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'error',
...(errorClass ? { errorClass } : {}),
},
});
}
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return 1;
}

View file

@ -8,12 +8,13 @@ import type { SqlAnalysisPort } from './context/sql-analysis/ports.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { runKtxSql } from './sql.js';
function makeIo() {
function makeIo(options: { isTTY?: boolean } = {}) {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: options.isTTY,
write: (chunk: string) => {
stdout += chunk;
},
@ -32,7 +33,7 @@ function makeIo() {
function makeSqlAnalysis(result: Awaited<ReturnType<SqlAnalysisPort['validateReadOnly']>>): SqlAnalysisPort {
return {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([['cli-sql', { tablesTouched: ['orders'], columnsByClause: {} }]])),
validateReadOnly: vi.fn(async () => result),
};
}
@ -76,6 +77,7 @@ describe('runKtxSql', () => {
});
afterEach(async () => {
vi.unstubAllEnvs();
await rm(tempDir, { recursive: true, force: true });
});
@ -130,6 +132,39 @@ describe('runKtxSql', () => {
expect(io.stderr()).toBe('');
});
it('emits debug telemetry for SQL without raw query text', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
const io = makeIo({ isTTY: true });
await expect(
runKtxSql(
{
command: 'execute',
projectDir,
connectionId: 'warehouse',
sql: 'select count(*) from orders',
maxRows: 10,
output: 'json',
json: true,
cliVersion: '0.0.0-test',
},
io.io,
{
createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }),
createScanConnector: vi.fn(async () => makeConnector()),
},
),
).resolves.toBe(0);
expect(io.stderr()).toContain('"event":"sql_completed"');
expect(io.stderr()).toContain('"queryVerb":"select"');
expect(io.stderr()).not.toContain('select count(*)');
});
it('prints JSON output', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });

View file

@ -6,6 +6,9 @@ import { type KtxOutputMode, resolveOutputMode } from './io/mode.js';
import { createKtxCliScanConnector } from './local-scan-connectors.js';
import { createManagedDaemonSqlAnalysisPort } from './managed-python-http.js';
import { profileMark } from './startup-profile.js';
import { isDemoConnection } from './telemetry/demo-detect.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import { scrubErrorClass } from './telemetry/scrubber.js';
profileMark('module:sql');
@ -54,6 +57,27 @@ function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDia
return map[normalized] ?? 'postgres';
}
function queryVerb(sql: string): 'select' | 'explain' | 'show' | 'with' | 'other' {
const first = sql.trim().split(/\s+/, 1)[0]?.toLowerCase();
if (first === 'select' || first === 'explain' || first === 'show' || first === 'with') {
return first;
}
return 'other';
}
async function safeReferencedTableCount(
port: SqlAnalysisPort,
sql: string,
dialect: SqlAnalysisDialect,
): Promise<number> {
try {
const results = await port.analyzeBatch([{ id: 'cli-sql', sql }], dialect);
return results.get('cli-sql')?.tablesTouched.length ?? 0;
} catch {
return 0;
}
}
function formatValue(value: unknown): string {
if (value === null || value === undefined) return '';
if (typeof value === 'string') return value;
@ -119,14 +143,19 @@ function resultOutput(connectionId: string, result: KtxQueryResult): SqlExecutio
}
export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: KtxSqlDeps = {}): Promise<number> {
const startedAt = performance.now();
let driver = 'unknown';
let demoConnection = false;
try {
const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir });
const connection = project.config.connections[args.connectionId];
if (!connection) {
throw new Error(`Connection "${args.connectionId}" is not configured in ktx.yaml`);
}
driver = String(connection.driver ?? 'unknown').toLowerCase();
demoConnection = isDemoConnection(args.connectionId, connection);
const sqlAnalysis =
const createSqlAnalysis =
deps.createSqlAnalysis ??
(() =>
createManagedDaemonSqlAnalysisPort({
@ -135,10 +164,13 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps:
installPolicy: 'auto',
io,
}));
const validation = await sqlAnalysis().validateReadOnly(args.sql, sqlAnalysisDialectForDriver(connection.driver));
const analysisPort = createSqlAnalysis();
const dialect = sqlAnalysisDialectForDriver(connection.driver);
const validation = await analysisPort.validateReadOnly(args.sql, dialect);
if (!validation.ok) {
throw new Error(validation.error ?? 'SQL is not read-only.');
}
const referencedTableCount = await safeReferencedTableCount(analysisPort, args.sql, dialect);
const createScanConnector = deps.createScanConnector ?? createKtxCliScanConnector;
let connector: KtxScanConnector | null = null;
@ -157,11 +189,39 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps:
);
const mode = resolveOutputMode({ explicit: args.output, json: args.json, io });
printSqlResult(resultOutput(args.connectionId, result), mode, io);
await emitTelemetryEvent({
name: 'sql_completed',
projectDir: args.projectDir,
io,
fields: {
driver,
isDemoConnection: demoConnection,
queryVerb: queryVerb(args.sql),
referencedTableCount,
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'ok',
},
});
return 0;
} finally {
await cleanupConnector(connector);
}
} catch (error) {
const errorClass = scrubErrorClass(error);
await emitTelemetryEvent({
name: 'sql_completed',
projectDir: args.projectDir,
io,
fields: {
driver,
isDemoConnection: demoConnection,
queryVerb: queryVerb(args.sql),
referencedTableCount: 0,
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'error',
...(errorClass ? { errorClass } : {}),
},
});
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return 1;
}

View file

@ -0,0 +1,37 @@
import { describe, expect, it } from 'vitest';
import { beginCommandSpan, completeCommandSpan, resetCommandSpan } from './command-hook.js';
describe('telemetry command hook', () => {
it('builds a completed command event from a span', () => {
resetCommandSpan();
beginCommandSpan({
commandPath: ['ktx', 'status'],
flagsPresent: { projectDir: true, json: true },
projectDir: '/tmp/private',
hasProject: true,
attachProjectGroup: true,
startedAt: 100,
});
expect(
completeCommandSpan({
completedAt: 125,
outcome: 'ok',
}),
).toEqual({
commandPath: ['ktx', 'status'],
durationMs: 25,
outcome: 'ok',
flagsPresent: { projectDir: true, json: true },
hasProject: true,
projectDir: '/tmp/private',
projectGroupAttached: true,
});
});
it('returns undefined when no preAction span exists', () => {
resetCommandSpan();
expect(completeCommandSpan({ completedAt: 200, outcome: 'ok' })).toBeUndefined();
});
});

View file

@ -0,0 +1,59 @@
import { scrubErrorClass } from './scrubber.js';
export type CommandOutcome = 'ok' | 'error' | 'aborted';
interface CommandSpan {
commandPath: string[];
flagsPresent: Record<string, boolean>;
projectDir?: string;
hasProject: boolean;
attachProjectGroup: boolean;
startedAt: number;
}
export interface CompletedCommandSpan {
commandPath: string[];
durationMs: number;
outcome: CommandOutcome;
errorClass?: string;
flagsPresent: Record<string, boolean>;
hasProject: boolean;
projectDir?: string;
projectGroupAttached: boolean;
}
let activeCommandSpan: CommandSpan | undefined;
export function beginCommandSpan(input: CommandSpan): void {
activeCommandSpan = input;
}
export function completeCommandSpan(input: {
completedAt: number;
outcome: CommandOutcome;
error?: unknown;
}): CompletedCommandSpan | undefined {
const span = activeCommandSpan;
activeCommandSpan = undefined;
if (!span) {
return undefined;
}
const errorClass = input.error ? scrubErrorClass(input.error) : undefined;
return {
commandPath: span.commandPath,
durationMs: Math.max(0, input.completedAt - span.startedAt),
outcome: input.outcome,
...(errorClass ? { errorClass } : {}),
flagsPresent: span.flagsPresent,
hasProject: span.hasProject,
projectDir: span.projectDir,
projectGroupAttached: span.attachProjectGroup,
};
}
/** @internal */
export function resetCommandSpan(): void {
activeCommandSpan = undefined;
}

View file

@ -0,0 +1,33 @@
import { describe, expect, it } from 'vitest';
import { isDemoConnection } from './demo-detect.js';
describe('isDemoConnection', () => {
it('detects only the packaged Orbit SQLite demo recipe', () => {
expect(
isDemoConnection('orbit_demo', {
driver: 'sqlite',
path: '/tmp/ktx-demo/demo.db',
}),
).toBe(true);
expect(
isDemoConnection('orbit_demo', {
driver: 'postgres',
path: '/tmp/ktx-demo/demo.db',
}),
).toBe(false);
expect(
isDemoConnection('warehouse', {
driver: 'sqlite',
path: '/tmp/ktx-demo/demo.db',
}),
).toBe(false);
expect(
isDemoConnection('orbit_demo', {
driver: 'sqlite',
path: '/tmp/ktx-demo/private.db',
}),
).toBe(false);
});
});

View file

@ -0,0 +1,15 @@
import { basename } from 'node:path';
import type { KtxProjectConnectionConfig } from '../context/project/config.js';
import { DEMO_CONNECTION_ID } from '../demo-assets.js';
export function isDemoConnection(
connectionId: string,
connection: KtxProjectConnectionConfig | undefined,
): boolean {
if (!connection) {
return false;
}
const path = typeof connection.path === 'string' ? connection.path : '';
return connectionId === DEMO_CONNECTION_ID && connection.driver === 'sqlite' && basename(path) === 'demo.db';
}

View file

@ -0,0 +1,123 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';
import {
__resetTelemetryEmitterForTests,
shutdownTelemetryEmitter,
trackTelemetryEvent,
} from './emitter.js';
import type { BuiltTelemetryEvent } from './events.js';
const captures: unknown[] = [];
const shutdown = vi.fn(async () => {});
function liveConfigId(): string {
return 'fixture';
}
vi.mock('posthog-node', () => ({
PostHog: vi.fn().mockImplementation(function () {
return {
capture: (event: unknown) => captures.push(event),
shutdown,
};
}),
}));
function commandEvent(): BuiltTelemetryEvent<'command'> {
return {
name: 'command',
properties: {
cliVersion: '0.4.1',
nodeVersion: 'v22.0.0',
osPlatform: 'darwin',
osRelease: '25.0.0',
arch: 'arm64',
runtime: 'node',
isCi: false,
commandPath: ['ktx', 'status'],
durationMs: 1,
outcome: 'ok',
flagsPresent: {},
hasProject: true,
projectGroupAttached: true,
},
};
}
describe('telemetry emitter', () => {
beforeEach(() => {
captures.length = 0;
shutdown.mockClear();
__resetTelemetryEmitterForTests();
});
it('prints debug payloads without importing or sending to PostHog', async () => {
const stderr: string[] = [];
await trackTelemetryEvent({
event: commandEvent(),
distinctId: 'install-1',
projectId: 'project-1',
env: { KTX_TELEMETRY_DEBUG: '1' },
stderr: { write: (chunk) => stderr.push(chunk) },
});
expect(stderr.join('')).toContain('[telemetry]');
expect(stderr.join('')).toContain('"event":"command"');
expect(captures).toEqual([]);
});
it('sends to PostHog by default once config constants are populated', async () => {
await trackTelemetryEvent({
event: commandEvent(),
distinctId: 'install-1',
projectId: 'project-1',
env: {},
stderr: { write: () => {} },
});
expect(captures).toHaveLength(1);
expect(captures[0]).toMatchObject({
distinctId: 'install-1',
event: 'command',
groups: { project: 'project-1' },
});
});
it('captures with distinctId, properties, and groups when live config is supplied', async () => {
await trackTelemetryEvent({
event: commandEvent(),
distinctId: 'install-1',
projectId: 'project-1',
projectApiKey: liveConfigId(),
host: 'https://us.i.posthog.com',
env: {},
stderr: { write: () => {} },
});
expect(captures).toHaveLength(1);
expect(captures[0]).toMatchObject({
distinctId: 'install-1',
event: 'command',
groups: { project: 'project-1' },
properties: {
cliVersion: '0.4.1',
commandPath: ['ktx', 'status'],
},
});
});
it('shuts down the client without throwing', async () => {
await trackTelemetryEvent({
event: commandEvent(),
distinctId: 'install-1',
projectApiKey: liveConfigId(),
host: 'https://us.i.posthog.com',
env: {},
stderr: { write: () => {} },
});
await expect(shutdownTelemetryEmitter()).resolves.toBeUndefined();
expect(shutdown).toHaveBeenCalledTimes(1);
});
});

View file

@ -0,0 +1,125 @@
import type { BuiltTelemetryEvent } from './events.js';
export interface TelemetryEmitterEnv {
KTX_TELEMETRY_DEBUG?: string;
KTX_TELEMETRY_ENDPOINT?: string;
}
export interface TelemetrySink {
write(chunk: string): void;
}
type PostHogClient = {
capture(event: {
distinctId: string;
event: string;
properties: Record<string, unknown>;
groups?: Record<string, string>;
}): void;
shutdown(): Promise<void> | void;
};
// PostHog public project ingestion key — safe to embed; capture-only, no read access.
const POSTHOG_PROJECT_API_KEY = 'phc_xbvZpbu8ZNLnogTbY7MEMWhCF2rzzApYsDndjKaRBXXx'; // pragma: allowlist secret
const POSTHOG_HOST = 'https://us.i.posthog.com';
const SHUTDOWN_TIMEOUT_MS = 1500;
let clientPromise: Promise<PostHogClient | null> | undefined;
function telemetryHost(env: TelemetryEmitterEnv, explicitHost?: string): string {
return explicitHost ?? env.KTX_TELEMETRY_ENDPOINT ?? POSTHOG_HOST;
}
function telemetryProjectApiKey(explicitProjectApiKey?: string): string {
return explicitProjectApiKey ?? POSTHOG_PROJECT_API_KEY;
}
function liveTelemetryConfigured(projectApiKey: string, host: string): boolean {
return projectApiKey.trim() !== '' && host.trim() !== '';
}
async function getPostHogClient(projectApiKey: string, host: string): Promise<PostHogClient | null> {
if (!liveTelemetryConfigured(projectApiKey, host)) {
return null;
}
clientPromise ??= import('posthog-node')
.then(({ PostHog }) => new PostHog(projectApiKey, { host, flushAt: 1, flushInterval: 0 }))
.catch(() => null);
return await clientPromise;
}
function debugEnabled(env: TelemetryEmitterEnv): boolean {
return env.KTX_TELEMETRY_DEBUG === '1';
}
function writeDebugPayload(input: {
event: BuiltTelemetryEvent;
distinctId: string;
projectId?: string;
stderr: TelemetrySink;
}): void {
input.stderr.write(
`[telemetry] ${JSON.stringify({
distinctId: input.distinctId,
event: input.event.name,
properties: input.event.properties,
groups: input.projectId ? { project: input.projectId } : undefined,
})}\n`,
);
}
export async function trackTelemetryEvent(input: {
event: BuiltTelemetryEvent;
distinctId: string;
projectId?: string;
env?: TelemetryEmitterEnv;
stderr: TelemetrySink;
projectApiKey?: string;
host?: string;
}): Promise<void> {
const env = input.env ?? process.env;
if (debugEnabled(env)) {
writeDebugPayload(input);
return;
}
const projectApiKey = telemetryProjectApiKey(input.projectApiKey);
const host = telemetryHost(env, input.host);
const client = await getPostHogClient(projectApiKey, host);
if (!client) {
return;
}
try {
client.capture({
distinctId: input.distinctId,
event: input.event.name,
properties: input.event.properties,
groups: input.projectId ? { project: input.projectId } : undefined,
});
} catch {
return;
}
}
export async function shutdownTelemetryEmitter(): Promise<void> {
const client = await clientPromise;
if (!client) {
return;
}
await Promise.race([
Promise.resolve(client.shutdown()).catch(() => undefined),
new Promise<void>((resolve) => {
setTimeout(resolve, SHUTDOWN_TIMEOUT_MS);
}),
]);
}
/** @internal */
export function __resetTelemetryEmitterForTests(): void {
clientPromise = undefined;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,141 @@
import { describe, expect, it } from 'vitest';
import { buildTelemetryEvent, type TelemetryCommonEnvelope } from './events.js';
const BLACKLIST = [
'/Users/',
'/home/',
'C:\\',
'localhost',
'.local',
'kaelio.com',
'select ',
'SELECT ',
'INSERT',
'CREATE',
'@',
'password',
'secret',
'token',
'key',
];
const envelope: TelemetryCommonEnvelope = {
cliVersion: '0.4.1',
nodeVersion: 'v22.0.0',
osPlatform: 'darwin',
osRelease: '25.0.0',
arch: 'arm64',
runtime: 'node',
isCi: false,
};
describe('telemetry privacy snapshot', () => {
it('does not emit known private substrings from phase 1 event payloads', () => {
const events = [
buildTelemetryEvent('install_first_run', envelope, {}),
buildTelemetryEvent('command', envelope, {
commandPath: ['ktx', 'sql'],
durationMs: 10,
outcome: 'error',
errorClass: 'KtxProjectMissingAbortError',
flagsPresent: {
'project-dir': true,
connection: true,
c: true,
},
hasProject: false,
projectGroupAttached: false,
}),
buildTelemetryEvent('setup_step', envelope, {
step: 'databases',
outcome: 'completed',
durationMs: 42,
}),
buildTelemetryEvent('connection_added', envelope, {
driver: 'postgres',
isDemoConnection: false,
}),
buildTelemetryEvent('connection_test', envelope, {
driver: 'postgres',
isDemoConnection: false,
outcome: 'error',
errorClass: 'KtxConnectionTestAbortError',
durationMs: 34,
serverVersion: '16',
}),
buildTelemetryEvent('project_stack_snapshot', envelope, {
connectors: [
{ driver: 'sqlite', isDemo: true },
{ driver: 'postgres', isDemo: false },
],
connectionCount: 2,
hasSl: true,
hasWiki: true,
hasMcp: true,
hasManagedRuntime: true,
}),
buildTelemetryEvent('ingest_completed', envelope, {
driver: 'postgres',
isDemoConnection: false,
schemaCount: 2,
tableCount: 4,
columnCount: 20,
rowsBucket: '<100k',
durationMs: 100,
outcome: 'ok',
}),
buildTelemetryEvent('scan_completed', envelope, {
driver: 'postgres',
tableCount: 4,
columnCount: 20,
inferredFkCount: 2,
declaredFkCount: 1,
durationMs: 70,
outcome: 'ok',
}),
buildTelemetryEvent('sl_validate_completed', envelope, {
sourceCount: 1,
modelCount: 3,
validationErrorCount: 0,
outcome: 'ok',
durationMs: 15,
}),
buildTelemetryEvent('sl_query_completed', envelope, {
mode: 'compile',
referencedSourceCount: 1,
referencedDimensionCount: 2,
referencedMeasureCount: 1,
durationMs: 18,
outcome: 'ok',
}),
buildTelemetryEvent('sql_completed', envelope, {
driver: 'postgres',
isDemoConnection: false,
queryVerb: 'select',
referencedTableCount: 3,
durationMs: 20,
outcome: 'ok',
}),
buildTelemetryEvent('wiki_query_completed', envelope, {
queryLength: 'select private_table from /Users/alice'.length,
resultCount: 2,
durationMs: 8,
outcome: 'ok',
}),
buildTelemetryEvent('mcp_request_completed', envelope, {
toolName: 'sl_query',
outcome: 'error',
errorClass: 'KtxProjectMissingAbortError',
durationMs: 12,
sampleRate: 0.1,
}),
];
const payload = JSON.stringify(events);
for (const forbidden of BLACKLIST) {
expect(payload).not.toContain(forbidden);
}
});
});

View file

@ -0,0 +1,165 @@
import { describe, expect, it } from 'vitest';
import {
buildTelemetryEvent,
telemetryEventCatalog,
telemetryEventSchemas,
type TelemetryCommonEnvelope,
} from './events.js';
const envelope: TelemetryCommonEnvelope = {
cliVersion: '0.4.1',
nodeVersion: 'v22.0.0',
osPlatform: 'darwin',
osRelease: '25.0.0',
arch: 'arm64',
runtime: 'node',
isCi: false,
};
describe('telemetry event schemas', () => {
it('catalogs all v1 telemetry events', () => {
expect(telemetryEventCatalog.map((event) => event.name)).toEqual([
'install_first_run',
'command',
'setup_step',
'connection_added',
'connection_test',
'project_stack_snapshot',
'ingest_completed',
'scan_completed',
'sl_validate_completed',
'sl_query_completed',
'sql_completed',
'wiki_query_completed',
'mcp_request_completed',
'daemon_started',
'daemon_stopped',
'sl_plan_completed',
'sql_gen_completed',
]);
});
it('builds strict daemon telemetry events', () => {
const daemonEnvelope = {
...envelope,
runtime: 'daemon-py' as const,
nodeVersion: '3.13.0',
};
expect(
buildTelemetryEvent('sl_plan_completed', daemonEnvelope, {
outcome: 'ok',
stage: 'transpile',
durationMs: 25,
sourceCount: 2,
joinCount: 1,
}),
).toMatchObject({
name: 'sl_plan_completed',
properties: {
runtime: 'daemon-py',
outcome: 'ok',
stage: 'transpile',
sourceCount: 2,
joinCount: 1,
},
});
expect(() =>
telemetryEventSchemas.sql_gen_completed.parse({
...daemonEnvelope,
outcome: 'ok',
dialect: 'postgres',
durationMs: 4,
sql: 'select * from private_table',
}),
).toThrow();
});
it('builds a strict install_first_run event', () => {
expect(buildTelemetryEvent('install_first_run', envelope, {})).toEqual({
name: 'install_first_run',
properties: envelope,
});
});
it('builds a strict command event with project grouping fields', () => {
expect(
buildTelemetryEvent('command', envelope, {
commandPath: ['ktx', 'status'],
durationMs: 12,
outcome: 'ok',
flagsPresent: { json: true },
hasProject: true,
projectGroupAttached: true,
}),
).toEqual({
name: 'command',
properties: {
...envelope,
commandPath: ['ktx', 'status'],
durationMs: 12,
outcome: 'ok',
flagsPresent: { json: true },
hasProject: true,
projectGroupAttached: true,
},
});
});
it('rejects unmodeled event properties', () => {
expect(() =>
telemetryEventSchemas.command.parse({
...envelope,
commandPath: ['ktx', 'status'],
durationMs: 12,
outcome: 'ok',
flagsPresent: {},
hasProject: true,
projectGroupAttached: true,
tableName: 'private_table',
}),
).toThrow();
});
it('builds strict Phase 2 events without private names or text', () => {
expect(
buildTelemetryEvent('connection_test', envelope, {
driver: 'postgres',
isDemoConnection: false,
outcome: 'ok',
durationMs: 34,
serverVersion: '16',
}),
).toMatchObject({
name: 'connection_test',
properties: {
driver: 'postgres',
isDemoConnection: false,
outcome: 'ok',
durationMs: 34,
serverVersion: '16',
},
});
expect(() =>
telemetryEventSchemas.sql_completed.parse({
...envelope,
driver: 'postgres',
isDemoConnection: false,
queryVerb: 'select',
referencedTableCount: 1,
durationMs: 10,
outcome: 'ok',
sql: 'select * from private_table',
}),
).toThrow();
});
it('rejects raw private field names that are not in the telemetry schemas', () => {
expect(JSON.stringify(telemetryEventSchemas)).not.toContain('tableName');
expect(Object.keys(telemetryEventSchemas.sql_completed.shape)).not.toContain('sql');
expect(JSON.stringify(telemetryEventSchemas)).not.toContain('path');
});
});

View file

@ -0,0 +1,387 @@
import { arch, platform, release } from 'node:os';
import { z } from 'zod';
const telemetryCommonEnvelopeSchema = z
.object({
cliVersion: z.string(),
nodeVersion: z.string(),
osPlatform: z.string(),
osRelease: z.string(),
arch: z.string(),
runtime: z.enum(['node', 'daemon-py']),
isCi: z.boolean(),
})
.strict();
const installFirstRunSchema = telemetryCommonEnvelopeSchema.strict();
const commandSchema = telemetryCommonEnvelopeSchema
.extend({
commandPath: z.array(z.string()).min(1),
durationMs: z.number().nonnegative(),
outcome: z.enum(['ok', 'error', 'aborted']),
errorClass: z.string().optional(),
flagsPresent: z.record(z.string(), z.boolean()),
hasProject: z.boolean(),
projectGroupAttached: z.boolean(),
})
.strict();
const outcomeSchema = z.enum(['ok', 'error']);
const setupStepSchema = telemetryCommonEnvelopeSchema
.extend({
step: z.enum([
'project',
'runtime',
'models',
'embeddings',
'secrets',
'databases',
'database-context-depth',
'sources',
'context',
'agents',
'demo-tour',
]),
outcome: z.enum(['completed', 'skipped', 'abandoned']),
durationMs: z.number().nonnegative(),
})
.strict();
const connectionAddedSchema = telemetryCommonEnvelopeSchema
.extend({
driver: z.string(),
isDemoConnection: z.boolean(),
})
.strict();
const connectionTestSchema = telemetryCommonEnvelopeSchema
.extend({
driver: z.string(),
isDemoConnection: z.boolean(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
durationMs: z.number().nonnegative(),
serverVersion: z.string().optional(),
})
.strict();
const projectStackSnapshotSchema = telemetryCommonEnvelopeSchema
.extend({
connectors: z.array(z.object({ driver: z.string(), isDemo: z.boolean() }).strict()),
connectionCount: z.number().int().nonnegative(),
hasSl: z.boolean(),
hasWiki: z.boolean(),
hasMcp: z.boolean(),
hasManagedRuntime: z.boolean(),
})
.strict();
const rowsBucketSchema = z.enum(['<10k', '<100k', '<1M', '<10M', '>=10M']);
const ingestCompletedSchema = telemetryCommonEnvelopeSchema
.extend({
driver: z.string(),
isDemoConnection: z.boolean(),
schemaCount: z.number().int().nonnegative(),
tableCount: z.number().int().nonnegative(),
columnCount: z.number().int().nonnegative(),
rowsBucket: rowsBucketSchema,
durationMs: z.number().nonnegative(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
})
.strict();
const scanCompletedSchema = telemetryCommonEnvelopeSchema
.extend({
driver: z.string(),
tableCount: z.number().int().nonnegative(),
columnCount: z.number().int().nonnegative(),
inferredFkCount: z.number().int().nonnegative(),
declaredFkCount: z.number().int().nonnegative(),
durationMs: z.number().nonnegative(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
})
.strict();
const slValidateCompletedSchema = telemetryCommonEnvelopeSchema
.extend({
sourceCount: z.number().int().nonnegative(),
modelCount: z.number().int().nonnegative(),
validationErrorCount: z.number().int().nonnegative(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
durationMs: z.number().nonnegative(),
})
.strict();
const slQueryCompletedSchema = telemetryCommonEnvelopeSchema
.extend({
mode: z.enum(['compile', 'execute']),
referencedSourceCount: z.number().int().nonnegative(),
referencedDimensionCount: z.number().int().nonnegative(),
referencedMeasureCount: z.number().int().nonnegative(),
durationMs: z.number().nonnegative(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
})
.strict();
const sqlCompletedSchema = telemetryCommonEnvelopeSchema
.extend({
driver: z.string(),
isDemoConnection: z.boolean(),
queryVerb: z.enum(['select', 'explain', 'show', 'with', 'other']),
referencedTableCount: z.number().int().nonnegative(),
durationMs: z.number().nonnegative(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
})
.strict();
const wikiQueryCompletedSchema = telemetryCommonEnvelopeSchema
.extend({
queryLength: z.number().int().nonnegative(),
resultCount: z.number().int().nonnegative(),
durationMs: z.number().nonnegative(),
outcome: outcomeSchema,
})
.strict();
const mcpRequestCompletedSchema = telemetryCommonEnvelopeSchema
.extend({
toolName: z.string(),
outcome: outcomeSchema,
durationMs: z.number().nonnegative(),
errorClass: z.string().optional(),
sampleRate: z.literal(0.1),
})
.strict();
const daemonStartedSchema = telemetryCommonEnvelopeSchema
.extend({
daemonVersion: z.string(),
pythonVersion: z.string(),
runtimeVersion: z.string(),
startupDurationMs: z.number().nonnegative(),
})
.strict();
const daemonStoppedSchema = telemetryCommonEnvelopeSchema
.extend({
reason: z.enum(['signal', 'request', 'crash']),
uptimeMs: z.number().nonnegative(),
})
.strict();
const slPlanCompletedSchema = telemetryCommonEnvelopeSchema
.extend({
outcome: z.enum(['ok', 'error']),
stage: z.enum(['parse', 'resolve', 'compile', 'transpile']),
errorClass: z.string().optional(),
durationMs: z.number().nonnegative(),
sourceCount: z.number().int().nonnegative(),
joinCount: z.number().int().nonnegative(),
})
.strict();
const sqlGenCompletedSchema = telemetryCommonEnvelopeSchema
.extend({
outcome: z.enum(['ok', 'error']),
dialect: z.string(),
errorClass: z.string().optional(),
durationMs: z.number().nonnegative(),
})
.strict();
/** @internal */
export const telemetryEventSchemas = {
install_first_run: installFirstRunSchema,
command: commandSchema,
setup_step: setupStepSchema,
connection_added: connectionAddedSchema,
connection_test: connectionTestSchema,
project_stack_snapshot: projectStackSnapshotSchema,
ingest_completed: ingestCompletedSchema,
scan_completed: scanCompletedSchema,
sl_validate_completed: slValidateCompletedSchema,
sl_query_completed: slQueryCompletedSchema,
sql_completed: sqlCompletedSchema,
wiki_query_completed: wikiQueryCompletedSchema,
mcp_request_completed: mcpRequestCompletedSchema,
daemon_started: daemonStartedSchema,
daemon_stopped: daemonStoppedSchema,
sl_plan_completed: slPlanCompletedSchema,
sql_gen_completed: sqlGenCompletedSchema,
} as const;
/** @internal */
export const telemetryEventCatalog = [
{
name: 'install_first_run',
description: 'Emitted once when ~/.ktx/telemetry.json is created.',
fields: [],
},
{
name: 'command',
description: 'Emitted once for each Commander action that reaches preAction.',
fields: [
'commandPath',
'durationMs',
'outcome',
'errorClass',
'flagsPresent',
'hasProject',
'projectGroupAttached',
],
},
{
name: 'setup_step',
description: 'Emitted after an interactive setup step completes, skips, or aborts.',
fields: ['step', 'outcome', 'durationMs'],
},
{
name: 'connection_added',
description: 'Emitted when setup writes a database, source, or demo connection.',
fields: ['driver', 'isDemoConnection'],
},
{
name: 'connection_test',
description: 'Emitted after ktx connection test completes.',
fields: ['driver', 'isDemoConnection', 'outcome', 'errorClass', 'durationMs', 'serverVersion'],
},
{
name: 'project_stack_snapshot',
description: 'Emitted after commands that can summarize the local project stack.',
fields: ['connectors', 'connectionCount', 'hasSl', 'hasWiki', 'hasMcp', 'hasManagedRuntime'],
},
{
name: 'ingest_completed',
description: 'Emitted after a public ingest target completes.',
fields: [
'driver',
'isDemoConnection',
'schemaCount',
'tableCount',
'columnCount',
'rowsBucket',
'durationMs',
'outcome',
'errorClass',
],
},
{
name: 'scan_completed',
description: 'Emitted after schema scan or relationship inference completes.',
fields: [
'driver',
'tableCount',
'columnCount',
'inferredFkCount',
'declaredFkCount',
'durationMs',
'outcome',
'errorClass',
],
},
{
name: 'sl_validate_completed',
description: 'Emitted after ktx sl validate completes.',
fields: ['sourceCount', 'modelCount', 'validationErrorCount', 'outcome', 'errorClass', 'durationMs'],
},
{
name: 'sl_query_completed',
description: 'Emitted after ktx sl query compiles or executes.',
fields: [
'mode',
'referencedSourceCount',
'referencedDimensionCount',
'referencedMeasureCount',
'durationMs',
'outcome',
'errorClass',
],
},
{
name: 'sql_completed',
description: 'Emitted after ktx sql completes validation and execution.',
fields: [
'driver',
'isDemoConnection',
'queryVerb',
'referencedTableCount',
'durationMs',
'outcome',
'errorClass',
],
},
{
name: 'wiki_query_completed',
description: 'Emitted after a wiki query completes.',
fields: ['queryLength', 'resultCount', 'durationMs', 'outcome'],
},
{
name: 'mcp_request_completed',
description: 'Emitted for sampled MCP tool requests.',
fields: ['toolName', 'outcome', 'durationMs', 'errorClass', 'sampleRate'],
},
{
name: 'daemon_started',
description: 'Emitted when the long-lived ktx-daemon HTTP server starts.',
fields: ['daemonVersion', 'pythonVersion', 'runtimeVersion', 'startupDurationMs'],
},
{
name: 'daemon_stopped',
description: 'Emitted when the long-lived ktx-daemon HTTP server shuts down.',
fields: ['reason', 'uptimeMs'],
},
{
name: 'sl_plan_completed',
description: 'Emitted after a daemon semantic-layer planning pass completes.',
fields: ['outcome', 'stage', 'errorClass', 'durationMs', 'sourceCount', 'joinCount'],
},
{
name: 'sql_gen_completed',
description: 'Emitted after daemon SQL generation completes.',
fields: ['outcome', 'dialect', 'errorClass', 'durationMs'],
},
] as const;
export type TelemetryEventName = keyof typeof telemetryEventSchemas;
export type TelemetryCommonEnvelope = z.infer<typeof telemetryCommonEnvelopeSchema>;
export type TelemetryEventProperties<Name extends TelemetryEventName> = z.infer<
(typeof telemetryEventSchemas)[Name]
>;
export interface BuiltTelemetryEvent<Name extends TelemetryEventName = TelemetryEventName> {
name: Name;
properties: TelemetryEventProperties<Name>;
}
export function buildCommonEnvelope(input: { cliVersion: string; isCi: boolean }): TelemetryCommonEnvelope {
return {
cliVersion: input.cliVersion,
nodeVersion: process.version,
osPlatform: platform(),
osRelease: release(),
arch: arch(),
runtime: 'node',
isCi: input.isCi,
};
}
export function buildTelemetryEvent<Name extends TelemetryEventName>(
name: Name,
envelope: TelemetryCommonEnvelope,
fields: Omit<TelemetryEventProperties<Name>, keyof TelemetryCommonEnvelope>,
): BuiltTelemetryEvent<Name> {
const schema = telemetryEventSchemas[name];
return {
name,
properties: schema.parse({ ...envelope, ...fields }) as TelemetryEventProperties<Name>,
};
}

View file

@ -0,0 +1,209 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import {
computeTelemetryProjectId,
loadTelemetryIdentity,
readExistingTelemetryProjectId,
TELEMETRY_NOTICE,
type TelemetryIdentityEnv,
} from './identity.js';
function makeIo(stdoutIsTTY = true) {
let stderr = '';
return {
io: {
stdout: { isTTY: stdoutIsTTY, write: () => {} },
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stderr: () => stderr,
};
}
describe('telemetry identity', () => {
let homeDir: string;
let env: TelemetryIdentityEnv;
beforeEach(async () => {
homeDir = await mkdtemp(join(tmpdir(), 'ktx-telemetry-home-'));
env = {};
});
afterEach(async () => {
await rm(homeDir, { recursive: true, force: true });
});
it('creates the telemetry file and one-line notice on first interactive enabled load', async () => {
const testIo = makeIo(true);
const identity = await loadTelemetryIdentity({
homeDir,
env,
stdoutIsTTY: true,
stderr: testIo.io.stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
});
expect(identity.enabled).toBe(true);
expect(identity.installId).toMatch(/^[0-9a-f-]{36}$/);
expect(identity.createdFile).toBe(true);
expect(identity.noticeShown).toBe(true);
expect(testIo.stderr()).toBe(`${TELEMETRY_NOTICE}\n`);
const stored = JSON.parse(await readFile(join(homeDir, '.ktx', 'telemetry.json'), 'utf-8')) as {
enabled: boolean;
noticeShownVersion: number;
};
expect(stored.enabled).toBe(true);
expect(stored.noticeShownVersion).toBe(1);
});
it('emits the notice without ANSI when NO_COLOR is set', async () => {
const testIo = makeIo(true);
await loadTelemetryIdentity({
homeDir,
env: { NO_COLOR: '1' },
stdoutIsTTY: true,
stderr: testIo.io.stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
});
expect(testIo.stderr()).toBe(`${TELEMETRY_NOTICE}\n`);
});
it('does not create a file when env disables telemetry', async () => {
const identity = await loadTelemetryIdentity({
homeDir,
env: { KTX_TELEMETRY_DISABLED: '1' },
stdoutIsTTY: true,
stderr: makeIo(true).io.stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
});
expect(identity.enabled).toBe(false);
await expect(readFile(join(homeDir, '.ktx', 'telemetry.json'), 'utf-8')).rejects.toThrow();
});
it('does not create a file for CI or non-TTY command invocations', async () => {
await expect(
loadTelemetryIdentity({
homeDir,
env: { CI: '1' },
stdoutIsTTY: true,
stderr: makeIo(true).io.stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
}),
).resolves.toMatchObject({ enabled: false, createdFile: false });
await expect(
loadTelemetryIdentity({
homeDir,
env: {},
stdoutIsTTY: false,
stderr: makeIo(false).io.stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
}),
).resolves.toMatchObject({ enabled: false, createdFile: false });
});
it('honors persistent enabled false', async () => {
await mkdir(join(homeDir, '.ktx'), { recursive: true });
await writeFile(
join(homeDir, '.ktx', 'telemetry.json'),
JSON.stringify(
{
installId: '00000000-0000-4000-8000-000000000000',
enabled: false,
noticeShownAt: '2026-05-22T14:33:02.000Z',
noticeShownVersion: 1,
createdAt: '2026-05-22T14:33:02.000Z',
},
null,
2,
) + '\n',
'utf-8',
);
await expect(
loadTelemetryIdentity({
homeDir,
env,
stdoutIsTTY: true,
stderr: makeIo(true).io.stderr,
now: () => new Date('2026-05-22T15:00:00.000Z'),
}),
).resolves.toMatchObject({
installId: '00000000-0000-4000-8000-000000000000',
enabled: false,
createdFile: false,
});
});
it('recreates a corrupted file instead of surfacing an error to users', async () => {
await mkdir(join(homeDir, '.ktx'), { recursive: true });
await writeFile(join(homeDir, '.ktx', 'telemetry.json'), '{bad json', 'utf-8');
const identity = await loadTelemetryIdentity({
homeDir,
env,
stdoutIsTTY: true,
stderr: makeIo(true).io.stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
});
expect(identity.enabled).toBe(true);
expect(identity.createdFile).toBe(true);
});
it('derives a salted project hash without exposing the path', () => {
const projectDir = resolve('/tmp/acme-private-project');
const projectId = computeTelemetryProjectId('00000000-0000-4000-8000-000000000000', projectDir);
expect(projectId).toMatch(/^[a-f0-9]{64}$/);
expect(projectId).not.toContain('acme');
expect(computeTelemetryProjectId('00000000-0000-4000-8000-000000000000', projectDir)).toBe(projectId);
expect(computeTelemetryProjectId('11111111-1111-4111-8111-111111111111', projectDir)).not.toBe(projectId);
});
it('reads an existing project id for Python telemetry without creating identity', async () => {
await mkdir(join(homeDir, '.ktx'), { recursive: true });
await writeFile(
join(homeDir, '.ktx', 'telemetry.json'),
JSON.stringify(
{
installId: '00000000-0000-4000-8000-000000000000',
enabled: true,
noticeShownAt: '2026-05-22T14:33:02.000Z',
noticeShownVersion: 1,
createdAt: '2026-05-22T14:33:02.000Z',
},
null,
2,
) + '\n',
'utf-8',
);
await expect(
readExistingTelemetryProjectId({
homeDir,
projectDir: '/tmp/acme-private-project',
env: {},
}),
).resolves.toMatch(/^[a-f0-9]{64}$/);
await expect(
readExistingTelemetryProjectId({
homeDir,
projectDir: '/tmp/acme-private-project',
env: { KTX_TELEMETRY_DISABLED: '1' },
}),
).resolves.toBeUndefined();
});
});

View file

@ -0,0 +1,151 @@
import { createHash, randomUUID } from 'node:crypto';
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import { homedir } from 'node:os';
import { dirname, join, resolve } from 'node:path';
import { z } from 'zod';
/** @internal */
export const TELEMETRY_NOTICE =
'ktx collects anonymous usage data to improve the product. Opt out: set KTX_TELEMETRY_DISABLED=1.';
const NOTICE_VERSION = 1;
const telemetryFileSchema = z
.object({
installId: z.uuid(),
enabled: z.boolean(),
noticeShownAt: z.string().optional(),
noticeShownVersion: z.number().int().optional(),
createdAt: z.string(),
})
.strict();
/** @internal */
export interface TelemetryIdentityEnv {
KTX_TELEMETRY_DISABLED?: string;
DO_NOT_TRACK?: string;
CI?: string;
NO_COLOR?: string;
TERM?: string;
}
function styleNotice(notice: string, env: TelemetryIdentityEnv): string {
if (env.NO_COLOR || env.TERM === 'dumb') return notice;
return `${notice}`;
}
export interface LoadTelemetryIdentityOptions {
homeDir?: string;
env?: TelemetryIdentityEnv;
stdoutIsTTY: boolean;
stderr: { write(chunk: string): void };
now?: () => Date;
}
export interface TelemetryIdentityState {
installId?: string;
enabled: boolean;
createdFile: boolean;
noticeShown: boolean;
path: string;
}
function telemetryPath(homeDir: string): string {
return join(homeDir, '.ktx', 'telemetry.json');
}
function envDisablesTelemetry(env: TelemetryIdentityEnv): boolean {
return Boolean(env.KTX_TELEMETRY_DISABLED || env.DO_NOT_TRACK || env.CI);
}
async function readTelemetryFile(path: string): Promise<z.infer<typeof telemetryFileSchema> | null> {
try {
return telemetryFileSchema.parse(JSON.parse(await readFile(path, 'utf-8')));
} catch {
return null;
}
}
async function writeTelemetryFile(path: string, value: z.infer<typeof telemetryFileSchema>): Promise<void> {
await mkdir(dirname(path), { recursive: true });
await writeFile(path, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
export async function loadTelemetryIdentity(options: LoadTelemetryIdentityOptions): Promise<TelemetryIdentityState> {
const env = options.env ?? process.env;
const path = telemetryPath(options.homeDir ?? homedir());
if (envDisablesTelemetry(env) || options.stdoutIsTTY !== true) {
const existing = await readTelemetryFile(path);
return {
installId: existing?.installId,
enabled: false,
createdFile: false,
noticeShown: false,
path,
};
}
const existing = await readTelemetryFile(path);
if (existing) {
return {
installId: existing.installId,
enabled: existing.enabled,
createdFile: false,
noticeShown: false,
path,
};
}
const timestamp = (options.now ?? (() => new Date()))().toISOString();
const next = {
installId: randomUUID(),
enabled: true,
noticeShownAt: timestamp,
noticeShownVersion: NOTICE_VERSION,
createdAt: timestamp,
};
try {
await writeTelemetryFile(path, next);
} catch {
return {
enabled: false,
createdFile: false,
noticeShown: false,
path,
};
}
options.stderr.write(`${styleNotice(TELEMETRY_NOTICE, env)}\n`);
return {
installId: next.installId,
enabled: true,
createdFile: true,
noticeShown: true,
path,
};
}
export function computeTelemetryProjectId(installId: string, projectDir: string): string {
return createHash('sha256').update(`${installId}:${resolve(projectDir)}`).digest('hex');
}
export async function readExistingTelemetryProjectId(options: {
projectDir: string;
homeDir?: string;
env?: Pick<TelemetryIdentityEnv, 'KTX_TELEMETRY_DISABLED' | 'DO_NOT_TRACK'>;
}): Promise<string | undefined> {
const env = options.env ?? process.env;
if (env.KTX_TELEMETRY_DISABLED || env.DO_NOT_TRACK) {
return undefined;
}
const existing = await readTelemetryFile(telemetryPath(options.homeDir ?? homedir()));
if (!existing?.enabled) {
return undefined;
}
return computeTelemetryProjectId(existing.installId, options.projectDir);
}

View file

@ -0,0 +1,146 @@
import { getKtxCliPackageInfo, type KtxCliIo, type KtxCliPackageInfo } from '../cli-runtime.js';
import { loadKtxProject } from '../context/project/project.js';
import {
beginCommandSpan,
completeCommandSpan,
type CommandOutcome,
type CompletedCommandSpan,
} from './command-hook.js';
import { shutdownTelemetryEmitter, trackTelemetryEvent } from './emitter.js';
import {
buildCommonEnvelope,
buildTelemetryEvent,
type TelemetryCommonEnvelope,
type TelemetryEventName,
type TelemetryEventProperties,
} from './events.js';
import { computeTelemetryProjectId, loadTelemetryIdentity } from './identity.js';
import { buildProjectStackSnapshotFields } from './project-snapshot.js';
export { beginCommandSpan, completeCommandSpan, shutdownTelemetryEmitter };
export type { CommandOutcome, CompletedCommandSpan };
export async function showTelemetryNoticeIfNeeded(io: KtxCliIo, packageInfo: KtxCliPackageInfo): Promise<void> {
const identity = await loadTelemetryIdentity({
stdoutIsTTY: io.stdout.isTTY === true,
stderr: io.stderr,
env: process.env,
});
if (!identity.enabled || !identity.createdFile || !identity.installId) {
return;
}
await trackTelemetryEvent({
event: buildTelemetryEvent(
'install_first_run',
buildCommonEnvelope({
cliVersion: packageInfo.version,
isCi: Boolean(process.env.CI),
}),
{},
),
distinctId: identity.installId,
env: process.env,
stderr: io.stderr,
});
}
type TelemetryEventFields<Name extends TelemetryEventName> = Omit<
TelemetryEventProperties<Name>,
keyof TelemetryCommonEnvelope
>;
const emittedProjectSnapshots = new Set<string>();
const MCP_SAMPLE_RATE = 0.1 as const;
let mcpSampled: boolean | undefined;
export function shouldEmitMcpTelemetry(): boolean {
mcpSampled ??= Math.random() < MCP_SAMPLE_RATE;
return mcpSampled;
}
export function mcpTelemetrySampleRate(): 0.1 {
return MCP_SAMPLE_RATE;
}
export async function emitTelemetryEvent<Name extends TelemetryEventName>(input: {
name: Name;
fields: TelemetryEventFields<Name>;
io: KtxCliIo;
packageInfo?: KtxCliPackageInfo;
projectDir?: string;
}): Promise<void> {
const identity = await loadTelemetryIdentity({
stdoutIsTTY: input.io.stdout.isTTY === true,
stderr: input.io.stderr,
env: process.env,
});
if (!identity.enabled || !identity.installId) {
return;
}
const packageInfo = input.packageInfo ?? getKtxCliPackageInfo();
const projectId = input.projectDir ? computeTelemetryProjectId(identity.installId, input.projectDir) : undefined;
await trackTelemetryEvent({
event: buildTelemetryEvent(
input.name,
buildCommonEnvelope({
cliVersion: packageInfo.version,
isCi: Boolean(process.env.CI),
}),
input.fields,
),
distinctId: identity.installId,
projectId,
env: process.env,
stderr: input.io.stderr,
});
}
export async function emitProjectStackSnapshot(input: {
projectDir: string;
io: KtxCliIo;
packageInfo?: KtxCliPackageInfo;
}): Promise<void> {
if (emittedProjectSnapshots.has(input.projectDir)) {
return;
}
emittedProjectSnapshots.add(input.projectDir);
let project: Awaited<ReturnType<typeof loadKtxProject>>;
try {
project = await loadKtxProject({ projectDir: input.projectDir });
} catch {
return;
}
await emitTelemetryEvent({
name: 'project_stack_snapshot',
fields: await buildProjectStackSnapshotFields(project),
projectDir: input.projectDir,
io: input.io,
packageInfo: input.packageInfo,
});
}
export async function emitCompletedCommand(input: {
completed: CompletedCommandSpan | undefined;
packageInfo: KtxCliPackageInfo;
io: KtxCliIo;
}): Promise<void> {
if (!input.completed) {
return;
}
const projectDir = input.completed.projectGroupAttached ? input.completed.projectDir : undefined;
const { projectDir: _projectDir, ...eventFields } = input.completed;
await emitTelemetryEvent({
name: 'command',
fields: eventFields,
projectDir,
io: input.io,
packageInfo: input.packageInfo,
});
}

View file

@ -0,0 +1,78 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { buildProjectStackSnapshotFields } from './project-snapshot.js';
describe('buildProjectStackSnapshotFields', () => {
let projectDir: string;
beforeEach(async () => {
projectDir = await mkdtemp(join(tmpdir(), 'ktx-stack-snapshot-'));
});
afterEach(async () => {
await rm(projectDir, { recursive: true, force: true });
});
it('summarizes connectors and project capabilities without names or paths', async () => {
await mkdir(join(projectDir, 'semantic-layer', 'warehouse'), { recursive: true });
await mkdir(join(projectDir, 'wiki', 'global'), { recursive: true });
await writeFile(join(projectDir, 'semantic-layer', 'warehouse', 'orders.yaml'), 'name: orders\n');
await writeFile(join(projectDir, 'wiki', 'global', 'revenue.md'), '# Revenue\n');
await writeFile(join(projectDir, '.mcp.json'), '{"mcpServers":{"ktx":{}}}\n');
const fields = await buildProjectStackSnapshotFields({
projectDir,
config: {
connections: {
orbit_demo: { driver: 'sqlite', path: join(projectDir, 'demo.db') },
warehouse: { driver: 'postgres', readonly: true },
},
ingest: {
adapters: [],
embeddings: { backend: 'sentence-transformers', dimensions: 384 },
workUnits: { stepBudget: 40, maxConcurrency: 1, failureMode: 'continue' },
},
llm: { provider: { backend: 'none' }, models: {}, promptCaching: {} },
scan: {
enrichment: { mode: 'none' },
relationships: {
enabled: true,
llmProposals: true,
validationRequiredForManifest: true,
acceptThreshold: 0.85,
reviewThreshold: 0.55,
maxLlmTablesPerBatch: 40,
maxCandidatesPerColumn: 25,
profileSampleRows: 10000,
profileConcurrency: 4,
validationConcurrency: 4,
},
},
storage: {
state: 'sqlite',
search: 'sqlite-fts5',
git: { auto_commit: true, author: 'ktx <ktx@example.com>' },
},
agent: { run_research: { enabled: false, max_iterations: 20, default_toolset: [] } },
memory: { auto_commit: true },
},
});
expect(fields).toEqual({
connectors: [
{ driver: 'sqlite', isDemo: true },
{ driver: 'postgres', isDemo: false },
],
connectionCount: 2,
hasSl: true,
hasWiki: true,
hasMcp: true,
hasManagedRuntime: true,
});
expect(JSON.stringify(fields)).not.toContain(projectDir);
expect(JSON.stringify(fields)).not.toContain('warehouse');
});
});

View file

@ -0,0 +1,67 @@
import { readdir } from 'node:fs/promises';
import { join } from 'node:path';
import type { KtxProjectConfig } from '../context/project/config.js';
import { resolveProjectRuntimeRequirements } from '../runtime-requirements.js';
import { isDemoConnection } from './demo-detect.js';
async function hasFileWithExtension(dir: string, extensions: Set<string>): Promise<boolean> {
let entries;
try {
entries = await readdir(dir, { withFileTypes: true });
} catch {
return false;
}
for (const entry of entries) {
const path = join(dir, entry.name);
if (entry.isDirectory() && (await hasFileWithExtension(path, extensions))) {
return true;
}
if (entry.isFile() && extensions.has(entry.name.slice(entry.name.lastIndexOf('.')))) {
return true;
}
}
return false;
}
async function hasFileNamed(dir: string, filenames: Set<string>): Promise<boolean> {
let entries;
try {
entries = await readdir(dir, { withFileTypes: true });
} catch {
return false;
}
return entries.some((entry) => entry.isFile() && filenames.has(entry.name));
}
async function hasMcpConfig(projectDir: string): Promise<boolean> {
return (
(await hasFileWithExtension(join(projectDir, '.ktx'), new Set(['.json']))) ||
(await hasFileWithExtension(join(projectDir, '.cursor'), new Set(['.json']))) ||
(await hasFileNamed(projectDir, new Set(['.mcp.json'])))
);
}
export async function buildProjectStackSnapshotFields(input: {
projectDir: string;
config: KtxProjectConfig;
}) {
const connectors = Object.entries(input.config.connections).map(([connectionId, connection]) => ({
driver: String(connection.driver ?? 'unknown').trim().toLowerCase() || 'unknown',
isDemo: isDemoConnection(connectionId, connection),
}));
const runtimeRequirements = resolveProjectRuntimeRequirements(input.config, {
env: process.env,
});
return {
connectors,
connectionCount: connectors.length,
hasSl: await hasFileWithExtension(join(input.projectDir, 'semantic-layer'), new Set(['.yaml', '.yml'])),
hasWiki: await hasFileWithExtension(join(input.projectDir, 'wiki'), new Set(['.md', '.mdx'])),
hasMcp: await hasMcpConfig(input.projectDir),
hasManagedRuntime: runtimeRequirements.features.length > 0,
};
}

View file

@ -0,0 +1,26 @@
import { describe, expect, it } from 'vitest';
import { buildTelemetrySchemaArtifact } from './schema-writer.js';
describe('telemetry schema writer', () => {
it('exports a schema artifact with the full catalog and strict metadata', () => {
const artifact = buildTelemetrySchemaArtifact();
expect(artifact.$schema).toBe('https://json-schema.org/draft/2020-12/schema');
expect(artifact['x-ktx-common-fields']).toEqual([
'cliVersion',
'nodeVersion',
'osPlatform',
'osRelease',
'arch',
'runtime',
'isCi',
]);
expect(artifact['x-ktx-catalog'].map((event) => event.name)).toContain('daemon_started');
expect(artifact['x-ktx-catalog'].map((event) => event.name)).toContain('sql_gen_completed');
expect(artifact.$defs.sql_gen_completed).toMatchObject({
type: 'object',
additionalProperties: false,
});
});
});

View file

@ -0,0 +1,63 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { dirname, resolve } from 'node:path';
import { fileURLToPath, pathToFileURL } from 'node:url';
import { z } from 'zod';
import { telemetryEventCatalog, telemetryEventSchemas } from './events.js';
const commonFields = ['cliVersion', 'nodeVersion', 'osPlatform', 'osRelease', 'arch', 'runtime', 'isCi'] as const;
export interface TelemetrySchemaArtifact {
$schema: 'https://json-schema.org/draft/2020-12/schema';
title: 'ktx telemetry events';
type: 'object';
additionalProperties: false;
'x-ktx-common-fields': string[];
'x-ktx-catalog': Array<{ name: string; description: string; fields: readonly string[] }>;
$defs: Record<string, unknown>;
}
/** @internal */
export function buildTelemetrySchemaArtifact(): TelemetrySchemaArtifact {
return {
$schema: 'https://json-schema.org/draft/2020-12/schema',
title: 'ktx telemetry events',
type: 'object',
additionalProperties: false,
'x-ktx-common-fields': [...commonFields],
'x-ktx-catalog': telemetryEventCatalog.map((event) => ({
name: event.name,
description: event.description,
fields: event.fields,
})),
$defs: Object.fromEntries(
Object.entries(telemetryEventSchemas).map(([name, schema]) => [
name,
z.toJSONSchema(schema, { target: 'draft-2020-12' }),
]),
),
};
}
async function writeTelemetrySchemaArtifact(path: string): Promise<void> {
const target = resolve(path);
await mkdir(dirname(target), { recursive: true });
await writeFile(target, `${JSON.stringify(buildTelemetrySchemaArtifact(), null, 2)}\n`, 'utf-8');
}
async function main(argv: string[]): Promise<void> {
const targets = argv.slice(2);
if (targets.length === 0) {
throw new Error('Usage: node dist/telemetry/schema-writer.js <target> [target...]');
}
for (const target of targets) {
await writeTelemetrySchemaArtifact(target);
}
}
if (import.meta.url === pathToFileURL(fileURLToPath(import.meta.url)).href && process.argv[1]) {
const invoked = pathToFileURL(resolve(process.argv[1])).href;
if (import.meta.url === invoked) {
await main(process.argv);
}
}

View file

@ -0,0 +1,25 @@
import { describe, expect, it } from 'vitest';
import { scrubErrorClass } from './scrubber.js';
class KtxProjectMissingAbortError extends Error {}
describe('scrubErrorClass', () => {
it('keeps normal JavaScript class names', () => {
expect(scrubErrorClass(new KtxProjectMissingAbortError('missing'))).toBe('KtxProjectMissingAbortError');
});
it('drops path-like, URL-like, email-like, and long values', () => {
expect(scrubErrorClass({ constructor: { name: '/Users/alice/project' } })).toBeUndefined();
expect(scrubErrorClass({ constructor: { name: 'https://example.test/error' } })).toBeUndefined();
expect(scrubErrorClass({ constructor: { name: 'alice@example.test' } })).toBeUndefined();
expect(scrubErrorClass({ constructor: { name: 'A'.repeat(81) } })).toBeUndefined();
});
it('drops lowercase, spaced, and non-error-like values', () => {
expect(scrubErrorClass({ constructor: { name: 'lowercaseError' } })).toBeUndefined();
expect(scrubErrorClass({ constructor: { name: 'Bad Error' } })).toBeUndefined();
expect(scrubErrorClass('plain string')).toBeUndefined();
expect(scrubErrorClass(null)).toBeUndefined();
});
});

Some files were not shown because too many files have changed in this diff Show more