ktx/packages/cli/src/connection.ts

489 lines
17 KiB
TypeScript
Raw Normal View History

chore(workspace): gate dead-code with knip production mode (#196) * refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultMetabaseConnectionClientFactory } from './context/ingest/adapters/metabase/client.js';
import { DefaultLookerConnectionClientFactory } from './context/ingest/adapters/looker/factory.js';
import type { LookerClient } from './context/ingest/adapters/looker/client.js';
import type { MetabaseRuntimeClient } from './context/ingest/adapters/metabase/client-port.js';
import { type NotionBotInfo, NotionClient } from './context/ingest/adapters/notion/notion-client.js';
import { createLocalLookerCredentialResolver } from './context/ingest/adapters/looker/local-looker.adapter.js';
import { metabaseRuntimeConfigFromLocalConnection } from './context/ingest/adapters/metabase/local-metabase.adapter.js';
import { testRepoConnection } from './context/ingest/repo-fetch.js';
import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from './context/connections/notion-config.js';
import { resolveKtxConfigReference } from './context/core/config-reference.js';
import { type KtxLocalProject, loadKtxProject } from './context/project/project.js';
import type { KtxScanConnector } from './context/scan/types.js';
2026-05-10 23:51:24 +02:00
import type { KtxCliIo } from './index.js';
import { bold, dim, green, red, SYMBOLS } from './io/symbols.js';
2026-05-10 23:51:24 +02:00
import { createKtxCliScanConnector } from './local-scan-connectors.js';
2026-05-10 23:12:26 +02:00
import { profileMark } from './startup-profile.js';
import { isDemoConnection } from './telemetry/demo-detect.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import { scrubErrorClass } from './telemetry/scrubber.js';
2026-05-10 23:12:26 +02:00
profileMark('module:connection');
2026-05-10 23:51:24 +02:00
export type KtxConnectionArgs =
2026-05-10 23:12:26 +02:00
| { command: 'list'; projectDir: string }
| { command: 'test'; projectDir: string; connectionId: string }
| { command: 'test-all'; projectDir: string };
2026-05-10 23:12:26 +02:00
type MetabaseTestPort = Pick<MetabaseRuntimeClient, 'testConnection' | 'getDatabases' | 'cleanup'>;
type LookerTestPort = Pick<LookerClient, 'testConnection'>;
type NotionTestPort = Pick<NotionClient, 'retrieveBotUser'>;
type TestRepoConnection = typeof testRepoConnection;
export interface KtxConnectionDeps {
2026-05-10 23:51:24 +02:00
createScanConnector?: typeof createKtxCliScanConnector;
createMetabaseClient?: (project: KtxLocalProject, connectionId: string) => Promise<MetabaseTestPort>;
createLookerClient?: (project: KtxLocalProject, connectionId: string) => Promise<LookerTestPort>;
createNotionClient?: (project: KtxLocalProject, connectionId: string) => Promise<NotionTestPort>;
testRepoConnection?: TestRepoConnection;
2026-05-10 23:12:26 +02:00
}
const SUPPORTED_TEST_DRIVERS = [
'sqlite',
'postgres',
'mysql',
'clickhouse',
'sqlserver',
'bigquery',
'snowflake',
'metabase',
'looker',
'notion',
'dbt',
'metricflow',
'lookml',
];
2026-05-10 23:12:26 +02:00
function normalizedConnectionDriver(project: KtxLocalProject, connectionId: string): string {
return String(project.config.connections[connectionId]?.driver ?? '')
.trim()
.toLowerCase();
}
2026-05-10 23:12:26 +02:00
async function testNativeConnection(
2026-05-10 23:51:24 +02:00
project: KtxLocalProject,
2026-05-10 23:12:26 +02:00
connectionId: string,
2026-05-10 23:51:24 +02:00
createScanConnector: typeof createKtxCliScanConnector,
): Promise<{ driver: string }> {
2026-05-10 23:51:24 +02:00
let connector: KtxScanConnector | null = null;
2026-05-10 23:12:26 +02:00
try {
connector = await createScanConnector(project, connectionId);
if (!connector.testConnection) {
throw new Error(`Connector for "${connectionId}" does not implement testConnection`);
}
const result = await connector.testConnection();
if (!result.success) {
throw new Error(result.error ?? 'connection test failed');
}
return { driver: connector.driver };
2026-05-10 23:12:26 +02:00
} finally {
if (connector?.cleanup) {
await connector.cleanup();
}
2026-05-10 23:12:26 +02:00
}
}
async function createDefaultMetabaseClient(
project: KtxLocalProject,
connectionId: string,
): Promise<MetabaseTestPort> {
const factory = new DefaultMetabaseConnectionClientFactory(
(metabaseConnectionId) =>
metabaseRuntimeConfigFromLocalConnection(
metabaseConnectionId,
project.config.connections[metabaseConnectionId],
),
DEFAULT_METABASE_CLIENT_CONFIG,
);
return factory.createClient(connectionId);
}
async function testMetabaseConnection(
project: KtxLocalProject,
connectionId: string,
createClient: (project: KtxLocalProject, connectionId: string) => Promise<MetabaseTestPort>,
): Promise<{ databaseCount: number }> {
let client: MetabaseTestPort | null = null;
try {
client = await createClient(project, connectionId);
const testResult = await client.testConnection();
if (!testResult.success) {
throw new Error(`Metabase connection test failed: ${testResult.error ?? testResult.message ?? 'unknown error'}`);
}
const databases = await client.getDatabases();
const databaseCount = databases.filter((database) => database.is_sample !== true).length;
if (databaseCount === 0) {
throw new Error('Metabase auth worked but no usable databases were returned');
}
return { databaseCount };
} finally {
await client?.cleanup();
}
}
async function createDefaultLookerClient(
project: KtxLocalProject,
connectionId: string,
): Promise<LookerTestPort> {
const factory = new DefaultLookerConnectionClientFactory(createLocalLookerCredentialResolver(project));
return (await factory.createClient(connectionId)) as unknown as LookerTestPort;
}
async function testLookerConnection(
project: KtxLocalProject,
connectionId: string,
createClient: (project: KtxLocalProject, connectionId: string) => Promise<LookerTestPort>,
): Promise<{ user: string }> {
const client = await createClient(project, connectionId);
const result = await client.testConnection();
if (!result.success) {
throw new Error(`Looker connection test failed: ${result.error ?? 'unknown error'}`);
}
const metadata = (result.metadata ?? {}) as { displayName?: string | null; userId?: string };
const user = (metadata.displayName ?? metadata.userId ?? 'unknown').trim() || 'unknown';
return { user };
}
async function createDefaultNotionClient(
project: KtxLocalProject,
connectionId: string,
): Promise<NotionTestPort> {
const connection = project.config.connections[connectionId];
if (!connection) {
throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`);
}
const parsed = parseNotionConnectionConfig(connection);
const token = await resolveNotionConnectionAuthToken(parsed);
return new NotionClient(token);
}
function describeNotionBot(bot: NotionBotInfo): string {
const name = typeof bot.name === 'string' ? bot.name.trim() : '';
if (name) return name;
const id = typeof bot.id === 'string' ? bot.id.trim() : '';
return id || 'unknown';
}
async function testNotionConnection(
project: KtxLocalProject,
connectionId: string,
createClient: (project: KtxLocalProject, connectionId: string) => Promise<NotionTestPort>,
): Promise<{ bot: string }> {
const client = await createClient(project, connectionId);
const bot = await client.retrieveBotUser();
return { bot: describeNotionBot(bot) };
}
interface GitConnectionFields {
repoUrl: string;
authToken: string | null;
}
function extractGitConnectionFields(
project: KtxLocalProject,
connectionId: string,
driver: string,
): GitConnectionFields {
const connection = project.config.connections[connectionId];
if (!connection) {
throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`);
}
const stringField = (value: unknown): string | null =>
typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
const record =
driver === 'metricflow' && typeof connection.metricflow === 'object' && connection.metricflow !== null
? (connection.metricflow as Record<string, unknown>)
: (connection as Record<string, unknown>);
const repoUrl = driver === 'dbt' ? stringField(record.repo_url) : stringField(record.repoUrl);
if (!repoUrl) {
const field = driver === 'dbt' ? 'repo_url' : 'repoUrl';
throw new Error(`Connection "${connectionId}" (driver: ${driver}) is missing ${field}`);
}
const literalToken = stringField(record.auth_token);
const ref = stringField(record.auth_token_ref);
const resolvedRef = ref ? resolveKtxConfigReference(ref, process.env) : null;
return { repoUrl, authToken: literalToken ?? resolvedRef ?? null };
}
async function testGitRepoConnection(
project: KtxLocalProject,
connectionId: string,
driver: string,
runTest: TestRepoConnection,
): Promise<{ repoUrl: string }> {
const { repoUrl, authToken } = extractGitConnectionFields(project, connectionId, driver);
const result = await runTest({ repoUrl, authToken });
if (!result.ok) {
throw new Error(`${driver} repository check failed: ${result.error}`);
}
return { repoUrl };
}
interface DriverTestOutcome {
driver: string;
detailKey: string;
detailValue: string;
}
async function testConnectionByDriver(
project: KtxLocalProject,
connectionId: string,
deps: KtxConnectionDeps,
): Promise<DriverTestOutcome> {
const driver = normalizedConnectionDriver(project, connectionId);
if (!driver) {
throw new Error(`Connection "${connectionId}" has no \`driver\` field in ktx.yaml`);
}
if (driver === 'metabase') {
const result = await testMetabaseConnection(
project,
connectionId,
deps.createMetabaseClient ?? createDefaultMetabaseClient,
);
return { driver, detailKey: 'Databases', detailValue: String(result.databaseCount) };
}
if (driver === 'looker') {
const result = await testLookerConnection(
project,
connectionId,
deps.createLookerClient ?? createDefaultLookerClient,
);
return { driver, detailKey: 'User', detailValue: result.user };
}
if (driver === 'notion') {
const result = await testNotionConnection(
project,
connectionId,
deps.createNotionClient ?? createDefaultNotionClient,
);
return { driver, detailKey: 'Bot', detailValue: result.bot };
}
if (driver === 'dbt' || driver === 'metricflow' || driver === 'lookml') {
const result = await testGitRepoConnection(
project,
connectionId,
driver,
deps.testRepoConnection ?? testRepoConnection,
);
return { driver, detailKey: 'Repo', detailValue: result.repoUrl };
}
if (
driver === 'sqlite' ||
driver === 'sqlite3' ||
driver === 'postgres' ||
driver === 'postgresql' ||
driver === 'mysql' ||
driver === 'clickhouse' ||
driver === 'sqlserver' ||
driver === 'bigquery' ||
driver === 'snowflake'
) {
const result = await testNativeConnection(
project,
connectionId,
deps.createScanConnector ?? createKtxCliScanConnector,
);
return { driver: result.driver, detailKey: 'Status', detailValue: 'ok' };
}
throw new Error(
`Connection "${connectionId}" uses driver "${driver}", which has no test implementation in ktx. Supported: ${SUPPORTED_TEST_DRIVERS.join(', ')}.`,
);
}
interface ConnectionTestRow {
connectionId: string;
driver: string;
ok: boolean;
detail: string;
}
async function emitConnectionTest(input: {
project: KtxLocalProject;
connectionId: string;
driver: string;
outcome: 'ok' | 'error';
durationMs: number;
error?: unknown;
io: KtxCliIo;
}): Promise<void> {
const errorClass = input.error ? scrubErrorClass(input.error) : undefined;
await emitTelemetryEvent({
name: 'connection_test',
projectDir: input.project.projectDir,
io: input.io,
fields: {
driver: input.driver,
isDemoConnection: isDemoConnection(input.connectionId, input.project.config.connections[input.connectionId]),
outcome: input.outcome,
durationMs: input.durationMs,
...(errorClass ? { errorClass } : {}),
},
});
}
function visualWidth(text: string): number {
// styleText wraps content in ANSI escape sequences; strip them before measuring.
return text.replace(/\[[0-9;]*m/g, '').length;
}
function padVisual(text: string, width: number): string {
const pad = width - visualWidth(text);
return pad > 0 ? `${text}${' '.repeat(pad)}` : text;
}
function renderTestAll(io: KtxCliIo, rows: ReadonlyArray<ConnectionTestRow>): void {
io.stdout.write(`${bold('connection test --all')}\n`);
if (rows.length === 0) {
io.stdout.write(`\n No connections configured. Run \`ktx setup\` to add one.\n\n`);
return;
}
io.stdout.write('\n');
const okLabel = green('✓ ok');
const failLabel = red('✗ failed');
const idWidth = Math.max(...rows.map((r) => r.connectionId.length));
const driverWidth = Math.max(...rows.map((r) => r.driver.length));
const statusWidth = Math.max(visualWidth(okLabel), visualWidth(failLabel));
for (const row of rows) {
const id = bold(padVisual(row.connectionId, idWidth));
const driver = dim(padVisual(row.driver, driverWidth));
const status = padVisual(row.ok ? okLabel : failLabel, statusWidth);
const detail = dim(row.detail);
io.stdout.write(` ${id} ${driver} ${status} ${detail}\n`);
}
const failed = rows.filter((r) => !r.ok).length;
const passed = rows.length - failed;
io.stdout.write('\n');
const summary =
failed === 0
? `${rows.length} tested ${dim(SYMBOLS.middot)} ${green(`${passed} passed`)}`
: `${rows.length} tested ${dim(SYMBOLS.middot)} ${green(`${passed} passed`)} ${dim(SYMBOLS.middot)} ${red(`${failed} failed`)}`;
io.stdout.write(`${summary}\n`);
}
async function runTestAll(
project: KtxLocalProject,
io: KtxCliIo,
deps: KtxConnectionDeps,
): Promise<number> {
const entries = Object.entries(project.config.connections).sort(([a], [b]) => a.localeCompare(b));
const rows = await Promise.all(
entries.map(async ([connectionId, connection]): Promise<ConnectionTestRow> => {
const declaredDriver = String(connection.driver ?? '').trim().toLowerCase() || 'unknown';
const startedAt = performance.now();
try {
const outcome = await testConnectionByDriver(project, connectionId, deps);
await emitConnectionTest({
project,
connectionId,
driver: outcome.driver || declaredDriver,
outcome: 'ok',
durationMs: Math.max(0, performance.now() - startedAt),
io,
});
return {
connectionId,
driver: outcome.driver || declaredDriver,
ok: true,
detail: `${outcome.detailKey}: ${outcome.detailValue}`,
};
} catch (error) {
await emitConnectionTest({
project,
connectionId,
driver: declaredDriver,
outcome: 'error',
durationMs: Math.max(0, performance.now() - startedAt),
error,
io,
});
return {
connectionId,
driver: declaredDriver,
ok: false,
detail: error instanceof Error ? error.message : String(error),
};
}
}),
);
renderTestAll(io, rows);
return rows.some((row) => !row.ok) ? 1 : 0;
}
2026-05-10 23:51:24 +02:00
export async function runKtxConnection(
args: KtxConnectionArgs,
io: KtxCliIo = process,
2026-05-10 23:51:24 +02:00
deps: KtxConnectionDeps = {},
2026-05-10 23:12:26 +02:00
): Promise<number> {
try {
2026-05-10 23:51:24 +02:00
const project = await loadKtxProject({ projectDir: args.projectDir });
2026-05-10 23:12:26 +02:00
if (args.command === 'list') {
const entries = Object.entries(project.config.connections).sort(([a], [b]) => a.localeCompare(b));
if (entries.length === 0) {
io.stdout.write('No connections configured. Run `ktx setup` to add one.\n');
2026-05-10 23:12:26 +02:00
return 0;
}
const idWidth = Math.max('ID'.length, ...entries.map(([id]) => id.length));
const driverWidth = Math.max(
'DRIVER'.length,
...entries.map(([, c]) => (c.driver ?? 'unknown').length),
);
io.stdout.write(`${'ID'.padEnd(idWidth)} ${'DRIVER'.padEnd(driverWidth)}\n`);
for (const [id, connection] of entries) {
io.stdout.write(`${id.padEnd(idWidth)} ${(connection.driver ?? 'unknown').padEnd(driverWidth)}\n`);
}
return 0;
}
if (args.command === 'test-all') {
return await runTestAll(project, io, deps);
}
const startedAt = performance.now();
let driver = normalizedConnectionDriver(project, args.connectionId) || 'unknown';
let detailKey: string;
let detailValue: string;
try {
const outcome = await testConnectionByDriver(project, args.connectionId, deps);
driver = outcome.driver;
detailKey = outcome.detailKey;
detailValue = outcome.detailValue;
await emitConnectionTest({
project,
connectionId: args.connectionId,
driver,
outcome: 'ok',
durationMs: Math.max(0, performance.now() - startedAt),
io,
});
} catch (error) {
await emitConnectionTest({
project,
connectionId: args.connectionId,
driver,
outcome: 'error',
durationMs: Math.max(0, performance.now() - startedAt),
error,
io,
});
throw error;
}
2026-05-10 23:12:26 +02:00
io.stdout.write(`Connection test passed: ${args.connectionId}\n`);
io.stdout.write(`Driver: ${driver}\n`);
io.stdout.write(`${detailKey}: ${detailValue}\n`);
2026-05-10 23:12:26 +02:00
return 0;
} catch (error) {
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return 1;
}
}