feat(telemetry): anonymous posthog usage telemetry across node cli and python daemon (#205)

* feat: add telemetry phase 1

* feat: add node telemetry event catalog

* feat: add telemetry event helpers

* feat: emit setup and connection telemetry

* feat: emit connection and stack telemetry

* feat: emit ingest and scan telemetry

* feat: emit query telemetry

* feat: emit sampled mcp telemetry

* docs: expand telemetry event catalog

* feat: add telemetry schema sync artifact

* feat: pass telemetry project id to semantic daemon

* feat: add daemon telemetry foundation

* feat: emit semantic daemon telemetry

* feat: emit daemon lifecycle telemetry

* docs: document full telemetry event catalog

* feat(telemetry): dim first-run notice

* feat(telemetry): show first-run notice before command output

* feat(telemetry): wire ktx PostHog project for live ingestion

* docs(telemetry): drop posthog project name and host from storage section

* docs(telemetry): trim to general overview and disclaimer

* docs(agents): add short telemetry guidelines

* feat(telemetry): enable posthog geoip enrichment

* docs(telemetry): drop ip-geoip note from public overview

* refactor(telemetry): drop no-op groupIdentify, rely on capture groups field

* fix(telemetry): respect CI kill switch in python daemon identity

* fix(sql): route table-count analysis to existing analyze-batch endpoint

* fix(telemetry): emit install_first_run from notice path and derive flagsPresent from commander

* fix(telemetry): read package info via getKtxCliPackageInfo to satisfy boundary check

* fix(telemetry): make python identity env={} bypass os.environ and unset CI in tests

* fix(telemetry): unset CI kill switch in cli-program-telemetry tests
This commit is contained in:
Andrey Avtomonov 2026-05-22 18:18:47 +02:00 committed by GitHub
parent c87d14a554
commit b0dd13ce7c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
73 changed files with 6576 additions and 48 deletions

View file

@ -1,4 +1,5 @@
import { readFile } from 'node:fs/promises';
import type { KtxCliIo } from './cli-runtime.js';
import { createDefaultLocalQueryExecutor } from './context/connections/local-query-executor.js';
import type { KtxSqlQueryExecutorPort } from './context/connections/query-executor.js';
import { KtxIngestEmbeddingPortAdapter } from './context/llm/embedding-port.js';
@ -18,6 +19,8 @@ import {
type KtxManagedPythonInstallPolicy,
} from './managed-python-command.js';
import { profileMark } from './startup-profile.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import { scrubErrorClass } from './telemetry/scrubber.js';
profileMark('module:sl');
@ -56,10 +59,7 @@ export type KtxSlArgs =
runtimeInstallPolicy: KtxManagedPythonInstallPolicy;
};
interface KtxSlIo {
stdout: { write(chunk: string): void };
stderr: { write(chunk: string): void };
}
type KtxSlIo = KtxCliIo;
interface KtxSlDeps {
loadProject?: typeof loadKtxProject;
@ -70,6 +70,7 @@ interface KtxSlDeps {
cliVersion: string;
installPolicy: KtxManagedPythonInstallPolicy;
io: KtxSlIo;
projectDir?: string;
}) => Promise<KtxSemanticLayerComputePort>;
createQueryExecutor?: () => KtxSqlQueryExecutorPort;
}
@ -85,6 +86,14 @@ function resolutionToEmbeddingPort(resolution: EmbeddingProviderResolution): Ktx
return null;
}
function queryMeasureCount(query: SemanticLayerQueryInput): number {
return Array.isArray(query.measures) ? query.measures.length : 0;
}
function queryDimensionCount(query: SemanticLayerQueryInput): number {
return Array.isArray(query.dimensions) ? query.dimensions.length : 0;
}
async function printSlSources(input: {
rows: ReadonlyArray<LocalSlSourceSummary>;
command: 'sl list';
@ -177,6 +186,8 @@ async function readSlQueryFile(path: string): Promise<SemanticLayerQueryInput> {
}
export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: KtxSlDeps = {}): Promise<number> {
const startedAt = performance.now();
let queryForTelemetry: SemanticLayerQueryInput | undefined;
try {
const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir });
if (args.command === 'list') {
@ -234,6 +245,18 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
connectionId: args.connectionId,
sourceName: args.sourceName,
});
await emitTelemetryEvent({
name: 'sl_validate_completed',
projectDir: args.projectDir,
io,
fields: {
sourceCount: source ? 1 : 0,
modelCount: 0,
validationErrorCount: result.valid ? 0 : result.errors.length,
outcome: result.valid ? 'ok' : 'error',
durationMs: Math.max(0, performance.now() - startedAt),
},
});
if (!result.valid) {
for (const error of result.errors) {
io.stderr.write(`${error}\n`);
@ -248,12 +271,14 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
if (!query) {
throw new Error('sl query requires query input from --query-file or at least one --measure');
}
queryForTelemetry = query;
const compute = deps.createSemanticLayerCompute
? deps.createSemanticLayerCompute()
: await (deps.createManagedSemanticLayerCompute ?? createManagedPythonSemanticLayerComputePort)({
cliVersion: args.cliVersion,
installPolicy: args.runtimeInstallPolicy,
io,
projectDir: args.projectDir,
});
const queryExecutor = args.execute ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() : undefined;
const result = await compileLocalSlQuery(project as KtxLocalProject, {
@ -264,6 +289,19 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
maxRows: args.maxRows,
queryExecutor,
});
await emitTelemetryEvent({
name: 'sl_query_completed',
projectDir: args.projectDir,
io,
fields: {
mode: args.execute ? 'execute' : 'compile',
referencedSourceCount: result.plan && typeof result.plan === 'object' ? 1 : 0,
referencedDimensionCount: queryDimensionCount(query),
referencedMeasureCount: queryMeasureCount(query),
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'ok',
},
});
if (args.format === 'sql') {
io.stdout.write(`${result.sql}\n`);
return 0;
@ -274,6 +312,39 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
const _exhaustive: never = args;
throw new Error(`Unsupported sl command: ${JSON.stringify(_exhaustive)}`);
} catch (error) {
if (args.command === 'validate') {
const errorClass = scrubErrorClass(error);
await emitTelemetryEvent({
name: 'sl_validate_completed',
projectDir: args.projectDir,
io,
fields: {
sourceCount: 0,
modelCount: 0,
validationErrorCount: 0,
outcome: 'error',
...(errorClass ? { errorClass } : {}),
durationMs: Math.max(0, performance.now() - startedAt),
},
});
}
if (args.command === 'query') {
const errorClass = scrubErrorClass(error);
await emitTelemetryEvent({
name: 'sl_query_completed',
projectDir: args.projectDir,
io,
fields: {
mode: args.execute ? 'execute' : 'compile',
referencedSourceCount: 0,
referencedDimensionCount: queryForTelemetry ? queryDimensionCount(queryForTelemetry) : 0,
referencedMeasureCount: queryForTelemetry ? queryMeasureCount(queryForTelemetry) : 0,
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'error',
...(errorClass ? { errorClass } : {}),
},
});
}
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return 1;
}