Improve setup daemon diagnostics

This commit is contained in:
Andrey Avtomonov 2026-05-25 16:14:22 +02:00
parent 853ab10be2
commit 3a3086b7cd
11 changed files with 236 additions and 24 deletions

View file

@ -89,3 +89,41 @@ enough reason to fix it even when the local code "works."
(`loadX` vs `loadHigherX`, `createY` vs `createDefaultY`, `xClient`
vs `xService`), assume callers will pick the wrong one. Unify, or
document inline why both must exist.
## Dispatch and contract leaks across per-variant layers
Layers with multiple per-variant implementations (warehouse drivers,
dialects, LLM providers, ingest adapters, historic-SQL probes) drift
toward parallel switches and informal contracts. The patterns below
look locally reasonable per file but multiply with the number of
variants times the number of consumers — every fix has to be applied
N times, and silent drift between variants is invisible until a user
hits it.
- **MUST NOT**: Maintain two or more files that switch on the same
enum or string union to dispatch to per-variant behavior. Promote
the dispatch to a single registry table keyed by the union, exposed
through one resolution function. If you find yourself writing the
third such switch, the second one was already a bug.
- **MUST**: When every variant of an abstraction implements the same
method, the method belongs on the shared interface. An informal
contract that every implementation happens to satisfy is a leak
waiting to happen — callers will reach for the concrete class
instead of the contract, and the next variant added will silently
forget to implement it.
- **MUST**: When a layer has both a thin shared interface and rich
per-variant concrete classes, they must agree. Either widen the
interface so callers never need the concrete class, or make the
concrete class private (test-only `/** @internal */` JSDoc plus a
boundary check in `scripts/check-boundaries.mjs`). A class that is
public AND has methods the interface does not expose is the exact
configuration that produces leaks.
The warehouse driver / dialect layer in
`packages/cli/src/connectors/<driver>/` plus
`packages/cli/src/context/connections/{dialects,drivers}.ts` is the
canonical worked example: per-driver dialect classes carry
`/** @internal */`, `scripts/check-boundaries.mjs` enforces the import
boundary, and dispatch lives in the two registry files. Apply the
same shape to any other per-variant layer that grows beyond two
implementations.

View file

@ -228,11 +228,14 @@ async function runStageTwoTreePicker(input: {
? initialSelectionForExisting(args.existing.enabledTables, byId)
: initialSelectionFromDefaults(selectedSchemas, schemaIds);
const initialState = buildInitialState({
tree,
existingSelectedIds: initialSelection,
skipEmptyAction: 'save-empty',
});
const initialState = {
...buildInitialState({
tree,
existingSelectedIds: initialSelection,
skipEmptyAction: 'save-empty',
}),
expanded: new Set(schemaIds),
};
const schemaWordPlural = schemaCount === 1 ? args.schemaNoun : args.schemaNounPlural;
const subtitleLines = [

View file

@ -0,0 +1,28 @@
export function describeError(error: unknown): string {
if (!(error instanceof Error)) {
const text = String(error);
return text.length > 0 ? text : 'unknown error';
}
const parts: string[] = [];
if (error.message.length > 0) {
parts.push(error.message);
}
const seen = new Set<unknown>([error]);
let cause: unknown = error.cause;
while (cause && !seen.has(cause)) {
seen.add(cause);
if (cause instanceof Error) {
if (cause.message.length > 0) {
parts.push(cause.message);
}
cause = cause.cause;
} else {
const text = String(cause);
if (text.length > 0) {
parts.push(text);
}
break;
}
}
return parts.length > 0 ? parts.join(': ') : 'unknown error';
}

View file

@ -1,3 +1,4 @@
import { describeError } from '../error-message.js';
import { createKtxEmbeddingProvider, type KtxEmbeddingProviderDeps } from './embedding-provider.js';
import type { KtxEmbeddingConfig } from './types.js';
@ -48,7 +49,6 @@ export async function runKtxEmbeddingHealthCheck(
}
return { ok: true };
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
return { ok: false, message: redactHealthCheckMessage(message, config) };
return { ok: false, message: redactHealthCheckMessage(describeError(error), config) };
}
}

View file

@ -4,6 +4,7 @@ import { createServer } from 'node:net';
import { setTimeout as delay } from 'node:timers/promises';
import { promisify } from 'node:util';
import { z } from 'zod';
import { describeError } from './error-message.js';
import {
installManagedPythonRuntime,
managedPythonDaemonLayout,
@ -16,6 +17,17 @@ import {
} from './managed-python-runtime.js';
import { sanitizeChildProxyEnv } from './proxy-env.js';
export class ManagedPythonDaemonStartError extends Error {
readonly detail: string;
readonly stderrLog: string;
constructor(detail: string, stderrLog: string) {
super(`KTX daemon failed to start: ${detail}. stderr: ${stderrLog}`);
this.name = 'ManagedPythonDaemonStartError';
this.detail = detail;
this.stderrLog = stderrLog;
}
}
export interface ManagedPythonDaemonState {
schemaVersion: 1;
pid: number;
@ -237,7 +249,7 @@ async function healthOk(input: {
}
return { ok: true };
} catch (error) {
return { ok: false, detail: error instanceof Error ? error.message : String(error) };
return { ok: false, detail: describeError(error) };
}
}
@ -328,7 +340,7 @@ async function waitForHealth(input: {
return;
}
lastDetail = finalHealth.detail;
throw new Error(`KTX daemon failed to start: ${lastDetail}. stderr: ${input.state.stderrLog}`);
throw new ManagedPythonDaemonStartError(lastDetail, input.state.stderrLog);
}
async function removeState(layout: ManagedPythonDaemonLayout): Promise<void> {
@ -721,13 +733,21 @@ export async function startManagedPythonDaemon(
stdoutLog: layout.daemonStdoutPath,
stderrLog: layout.daemonStderrPath,
};
await waitForHealth({
state,
cliVersion: options.cliVersion,
fetch: fetchImpl,
timeoutMs: options.startupTimeoutMs ?? 10_000,
pollIntervalMs: options.pollIntervalMs ?? 100,
});
try {
await waitForHealth({
state,
cliVersion: options.cliVersion,
fetch: fetchImpl,
timeoutMs: options.startupTimeoutMs ?? 30_000,
pollIntervalMs: options.pollIntervalMs ?? 100,
});
} catch (error) {
if (processAlive(state.pid)) {
killProcess(state.pid);
}
await removeState(layout);
throw error;
}
await writeState(layout.daemonStatePath, state);
return { status: 'started', layout, state, baseUrl: baseUrl(state) };
} finally {

View file

@ -113,13 +113,13 @@ export interface KtxSetupDatabasesDeps {
}
const DRIVER_OPTIONS: Array<{ value: KtxSetupDatabaseDriver; label: string }> = [
{ value: 'sqlite', label: 'SQLite' },
{ value: 'postgres', label: 'PostgreSQL' },
{ value: 'bigquery', label: 'BigQuery' },
{ value: 'snowflake', label: 'Snowflake' },
{ value: 'mysql', label: 'MySQL' },
{ value: 'clickhouse', label: 'ClickHouse' },
{ value: 'sqlserver', label: 'SQL Server' },
{ value: 'bigquery', label: 'BigQuery' },
{ value: 'snowflake', label: 'Snowflake' },
{ value: 'sqlite', label: 'SQLite' },
];
const DRIVER_LABELS = Object.fromEntries(DRIVER_OPTIONS.map((option) => [option.value, option.label])) as Record<

View file

@ -12,6 +12,7 @@ import {
managedLocalEmbeddingHealthConfig,
type ManagedLocalEmbeddingsDaemon,
} from './managed-local-embeddings.js';
import { ManagedPythonDaemonStartError } from './managed-python-daemon.js';
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
import { withTextInputNavigation } from './prompt-navigation.js';
import { envCredentialReference, writeProjectLocalSecretReference } from './setup-secrets.js';
@ -419,7 +420,12 @@ export async function runKtxSetupEmbeddingsStep(
io,
});
} catch (error) {
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
if (error instanceof ManagedPythonDaemonStartError) {
const tail = await readLocalEmbeddingDaemonStderrTail(error.stderrLog);
io.stderr.write(`${localEmbeddingSetupMessage(error.detail, tail)}\n`);
} else {
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
}
return { status: 'failed', projectDir: args.projectDir };
}
}

View file

@ -161,6 +161,7 @@ describe('pickDatabaseScope', () => {
'public.events',
'public.sessions',
]);
expect([...(capture.state?.expanded ?? [])].sort()).toEqual(['analytics', 'public']);
expect(capture.state?.byId.get('public.events')?.title).toBe('events (view)');
});

View file

@ -3,6 +3,7 @@ import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import {
ManagedPythonDaemonStartError,
readManagedPythonDaemonStatus,
startManagedPythonDaemon,
stopAllManagedPythonDaemons,
@ -244,6 +245,76 @@ describe('KTX daemon lifecycle', () => {
});
});
it('kills the spawned daemon when the startup health check times out', async () => {
const spawnDaemon = makeSpawn(7777);
const killProcess = vi.fn();
const fetch = vi.fn<ManagedPythonDaemonFetch>().mockRejectedValue(new Error('fetch failed'));
await expect(
startManagedPythonDaemon({
...daemonOptionsBase(tempDir),
features: ['core'],
installRuntime: vi.fn(async () => installResult(tempDir)),
spawnDaemon,
fetch,
processAlive: vi.fn(() => true),
killProcess,
allocatePort: vi.fn(async () => 61234),
now: () => new Date('2026-05-11T00:00:00.000Z'),
startupTimeoutMs: 5,
pollIntervalMs: 1,
}),
).rejects.toBeInstanceOf(ManagedPythonDaemonStartError);
expect(killProcess).toHaveBeenCalledWith(7777);
await expect(readFile(layout(tempDir).daemonStatePath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
});
it('surfaces the underlying fetch cause in the startup failure message', async () => {
const cause = new Error('connect ECONNREFUSED 127.0.0.1:61234');
const fetchError = new Error('fetch failed');
(fetchError as Error & { cause?: unknown }).cause = cause;
const error = await startManagedPythonDaemon({
...daemonOptionsBase(tempDir),
features: ['core'],
installRuntime: vi.fn(async () => installResult(tempDir)),
spawnDaemon: makeSpawn(7778),
fetch: vi.fn<ManagedPythonDaemonFetch>().mockRejectedValue(fetchError),
processAlive: vi.fn(() => false),
killProcess: vi.fn(),
allocatePort: vi.fn(async () => 61234),
now: () => new Date('2026-05-11T00:00:00.000Z'),
startupTimeoutMs: 5,
pollIntervalMs: 1,
}).catch((value: unknown) => value);
expect(error).toBeInstanceOf(ManagedPythonDaemonStartError);
const startError = error as ManagedPythonDaemonStartError;
expect(startError.detail).toContain('fetch failed');
expect(startError.detail).toContain('ECONNREFUSED');
expect(startError.message).toContain('ECONNREFUSED');
});
it('exposes the daemon stderr log path on startup failure', async () => {
const error = await startManagedPythonDaemon({
...daemonOptionsBase(tempDir),
features: ['core'],
installRuntime: vi.fn(async () => installResult(tempDir)),
spawnDaemon: makeSpawn(7779),
fetch: vi.fn<ManagedPythonDaemonFetch>().mockRejectedValue(new Error('fetch failed')),
processAlive: vi.fn(() => false),
killProcess: vi.fn(),
allocatePort: vi.fn(async () => 61234),
now: () => new Date('2026-05-11T00:00:00.000Z'),
startupTimeoutMs: 5,
pollIntervalMs: 1,
}).catch((value: unknown) => value);
expect(error).toBeInstanceOf(ManagedPythonDaemonStartError);
expect((error as ManagedPythonDaemonStartError).stderrLog).toBe(layout(tempDir).daemonStderrPath);
});
it('reuses a healthy daemon with the requested feature set', async () => {
await mkdir(layout(tempDir).daemonStateDir, { recursive: true });
await writeFile(layout(tempDir).daemonStatePath, `${JSON.stringify(runningState(tempDir), null, 2)}\n`);

View file

@ -164,13 +164,13 @@ describe('setup databases step', () => {
'Which databases should KTX connect to?\n' +
'Use Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.',
options: [
{ value: 'sqlite', label: 'SQLite' },
{ value: 'postgres', label: 'PostgreSQL' },
{ value: 'bigquery', label: 'BigQuery' },
{ value: 'snowflake', label: 'Snowflake' },
{ value: 'mysql', label: 'MySQL' },
{ value: 'clickhouse', label: 'ClickHouse' },
{ value: 'sqlserver', label: 'SQL Server' },
{ value: 'bigquery', label: 'BigQuery' },
{ value: 'snowflake', label: 'Snowflake' },
{ value: 'sqlite', label: 'SQLite' },
],
required: true,
});
@ -381,12 +381,16 @@ describe('setup databases step', () => {
it('emits debug telemetry when setup writes a database connection', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('KTX_TELEMETRY_DISABLED', '');
vi.stubEnv('DO_NOT_TRACK', '');
vi.stubEnv('CI', '');
const io = makeIo();
const prompts = makePromptAdapter({
selectValues: ['url'],
textValues: ['', 'env:DATABASE_URL'],
});
const listSchemas = vi.fn(async () => []);
const listTables = vi.fn(async () => []);
const result = await runKtxSetupDatabasesStep(
{
@ -397,7 +401,13 @@ describe('setup databases step', () => {
skipDatabases: false,
},
io.io,
{ prompts, testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0) },
{
prompts,
testConnection: vi.fn(async () => 0),
scanConnection: vi.fn(async () => 0),
listSchemas,
listTables,
},
);
expect(result.status).toBe('ready');

View file

@ -5,6 +5,7 @@ import { initKtxProject } from '../src/context/project/project.js';
import { parseKtxProjectConfig } from '../src/context/project/config.js';
import { readKtxSetupState, writeKtxSetupState } from '../src/context/project/setup-config.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { ManagedPythonDaemonStartError } from '../src/managed-python-daemon.js';
import { type KtxSetupEmbeddingsPromptAdapter, runKtxSetupEmbeddingsStep } from '../src/setup-embeddings.js';
const EMBEDDING_OPTION_PROMPT_MESSAGE = [
@ -366,6 +367,40 @@ describe('setup embeddings step', () => {
expect(io.stderr()).not.toContain('daemon traceback line 5');
});
it('prints the daemon stderr tail when the daemon fails to start', async () => {
const io = makeIo();
const stderrLog = join(tempDir, '.ktx', 'runtime', 'daemon.stderr.log');
await mkdir(join(tempDir, '.ktx', 'runtime'), { recursive: true });
await writeFile(
stderrLog,
Array.from({ length: 45 }, (_value, index) => `daemon startup traceback ${index + 1}`).join('\n'),
);
const result = await runKtxSetupEmbeddingsStep(
{
projectDir: tempDir,
inputMode: 'disabled',
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
skipEmbeddings: false,
},
io.io,
{
env: {},
ensureLocalEmbeddings: vi.fn(async () => {
throw new ManagedPythonDaemonStartError('fetch failed: connect ECONNREFUSED 127.0.0.1:61234', stderrLog);
}),
},
);
expect(result.status).toBe('failed');
expect(io.stderr()).toContain('Local embedding health check failed: fetch failed: connect ECONNREFUSED');
expect(io.stderr()).toContain('Recent KTX daemon stderr:');
expect(io.stderr()).toContain('daemon startup traceback 6');
expect(io.stderr()).toContain('daemon startup traceback 45');
expect(io.stderr()).not.toContain('daemon startup traceback 5');
});
it('does not print daemon stderr diagnostics when the log is unavailable or empty', async () => {
const io = makeIo();