diff --git a/README.md b/README.md index 014ac600..c92371a4 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ artifacts. You can inspect them, commit them, and serve them to any MCP client. - Durable warehouse memory with semantic-layer sources and knowledge pages. - Native scan connectors for SQLite, Postgres, MySQL, ClickHouse, SQL Server, - BigQuery, Snowflake, and PostHog. + BigQuery, and Snowflake. - Agentic ingest with provenance links, tool transcripts, and replay metadata. - Local semantic-layer query planning and optional query execution. - A stdio MCP server with tools for connections, knowledge, semantic-layer @@ -221,7 +221,6 @@ The MCP server exposes `connection_list`, `knowledge_search`, - `packages/connector-clickhouse`: ClickHouse scan connector. - `packages/connector-mysql`: MySQL scan connector. - `packages/connector-postgres`: Postgres scan connector. -- `packages/connector-posthog`: PostHog scan connector. - `packages/connector-snowflake`: Snowflake scan connector. - `packages/connector-sqlite`: SQLite scan connector. - `packages/connector-sqlserver`: SQL Server scan connector. diff --git a/packages/cli/package.json b/packages/cli/package.json index 0cc4d6e9..e85986a4 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -38,7 +38,6 @@ "@ktx/connector-clickhouse": "workspace:*", "@ktx/connector-mysql": "workspace:*", "@ktx/connector-postgres": "workspace:*", - "@ktx/connector-posthog": "workspace:*", "@ktx/connector-snowflake": "workspace:*", "@ktx/connector-sqlite": "workspace:*", "@ktx/connector-sqlserver": "workspace:*", diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts index a88b42cc..8e48a6f7 100644 --- a/packages/cli/src/context-build-view.test.ts +++ b/packages/cli/src/context-build-view.test.ts @@ -202,7 +202,7 @@ describe('renderContextBuildView', () => { const output = renderContextBuildView(state, { styled: false }); expect(output).toContain('scanning...'); - expect(output).toContain('30s'); + expect(output).toContain('(30s)'); }); it('renders running target with progress bar when percentage is available', () => { @@ -217,7 +217,7 @@ describe('renderContextBuildView', () => { expect(output).toContain('██████░░░░░░'); expect(output).toContain('50%'); expect(output).toContain('Scanning tables...'); - expect(output).toContain('15s'); + expect(output).toContain('(15s)'); }); it('renders completion summary when all targets are done', () => { @@ -423,6 +423,7 @@ describe('runContextBuild', () => { expect(mockExit).toHaveBeenCalledWith(0); expect(io.stdout()).toContain('Context build continuing in the background.'); expect(io.stdout()).toContain('Resume: ktx setup --project-dir /tmp/project'); + expect(io.stdout()).toContain('Status: ktx setup context status --project-dir /tmp/project'); mockExit.mockRestore(); }); diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index 7edc7d13..4c57784d 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -137,7 +137,7 @@ function targetDetail(target: ContextBuildTargetState, styled: boolean): string const percent = extractPercent(target.detailLine); const progressText = target.detailLine?.replace(/^\[\d+%\]\s*/, '') ?? (target.target.operation === 'scan' ? 'scanning...' : 'ingesting...'); - const elapsed = target.elapsedMs > 0 ? formatDuration(target.elapsedMs) : null; + const elapsed = target.elapsedMs > 0 ? `(${formatDuration(target.elapsedMs)})` : null; const parts: string[] = []; if (percent !== null) { parts.push(`${renderProgressBar(percent, styled)} ${percent}%`); @@ -318,7 +318,7 @@ export function createRepainter(io: KtxCliIo) { if (lastLineCount > 0) { io.stdout.write(`${ESC}[${lastLineCount}A\r`); } - io.stdout.write(content); + io.stdout.write(content.replaceAll('\n', `${ESC}[K\n`)); io.stdout.write(`${ESC}[J`); lastLineCount = (content.match(/\n/g) ?? []).length; }, @@ -356,7 +356,7 @@ function spawnBackgroundBuild(projectDir: string): { logPath: string } | null { // --- Keystroke handling --- -function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void): (() => void) | null { +export function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void): (() => void) | null { const stdin = process.stdin; if (!stdin.isTTY || typeof stdin.setRawMode !== 'function') { return null; @@ -445,6 +445,7 @@ export async function runContextBuild( io.stdout.write('\n\nContext build continuing in the background.\n'); if (bg) io.stdout.write(`Log: ${bg.logPath}\n`); io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`); + io.stdout.write(`Status: ktx setup context status --project-dir ${resolve(args.projectDir)}\n`); process.exit(0); }, () => { diff --git a/packages/cli/src/local-scan-connectors.test.ts b/packages/cli/src/local-scan-connectors.test.ts index 13d19c18..0fe57518 100644 --- a/packages/cli/src/local-scan-connectors.test.ts +++ b/packages/cli/src/local-scan-connectors.test.ts @@ -95,29 +95,6 @@ describe('createKtxCliScanConnector', () => { ]); }); - it('does not create a standalone PostHog scan connector', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - await writeFile( - join(tempDir, 'ktx.yaml'), - [ - 'project: warehouse', - 'connections:', - ' product:', - ' driver: posthog', - ' api_key: phx_test', - ' project_id: "157881"', - ' readonly: true', - '', - ].join('\n'), - 'utf-8', - ); - const project = await loadKtxProject({ projectDir: tempDir }); - - await expect(createKtxCliScanConnector(project, 'product')).rejects.toThrow( - 'Connection "product" uses driver "posthog", which has no native standalone KTX scan connector', - ); - }); - it('throws for structural daemon-only fallback configs', async () => { await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); await writeFile( diff --git a/packages/cli/src/public-ingest.test.ts b/packages/cli/src/public-ingest.test.ts index e00b11da..13d8f364 100644 --- a/packages/cli/src/public-ingest.test.ts +++ b/packages/cli/src/public-ingest.test.ts @@ -80,13 +80,6 @@ describe('buildPublicIngestPlan', () => { ); }); - it('does not plan PostHog connections as CLI ingest targets', () => { - const project = projectWithConnections({ product: { driver: 'posthog' } }); - - expect(() => - buildPublicIngestPlan(project, { projectDir: '/tmp/project', targetConnectionId: 'product', all: false }), - ).toThrow('Connection "product" uses unsupported public ingest driver "posthog"'); - }); }); describe('runKtxPublicIngest', () => { diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index 90694ada..d19be04c 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -500,6 +500,47 @@ describe('setup context build state', () => { expect(output).not.toContain('KTX context built: detached'); }); + it('supports d to detach from the progress watch view', async () => { + await writeReadyProject(tempDir); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-detach', + status: 'running', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-detach'), + sourceProgress: [ + { connectionId: 'warehouse', operation: 'scan' as const, status: 'running' as const, startedAtMs: Date.now() }, + ], + }); + const io = makeIo(); + let triggerDetach: (() => void) | null = null; + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto', autoWatch: true }, + io.io, + { + sleep: async () => { triggerDetach?.(); }, + watchIntervalMs: 1, + setupKeystroke: (onDetach) => { + triggerDetach = onDetach; + return () => {}; + }, + }, + ), + ).resolves.toMatchObject({ status: 'detached' }); + + const output = io.stdout(); + expect(output).toContain('Building KTX context'); + expect(output).toContain('Context build continuing in the background.'); + expect(output).toContain('Resume: ktx setup --project-dir'); + }); + it('prints JSON setup context command status with watch and resume commands', async () => { await mkdir(join(tempDir, '.ktx', 'setup'), { recursive: true }); await writeKtxSetupContextState(tempDir, { diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index cd79d9bd..79f6cdd7 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -13,6 +13,7 @@ import { buildPublicIngestPlan } from './public-ingest.js'; import { type ContextBuildSourceProgressUpdate, createRepainter, + defaultSetupKeystroke, renderContextBuildView, runContextBuild, viewStateFromSourceProgress, @@ -109,6 +110,7 @@ export interface KtxSetupContextDeps { verifyContextReady?: (projectDir: string) => Promise; sleep?: (ms: number) => Promise; watchIntervalMs?: number; + setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null; } interface KtxSetupContextTargets { @@ -870,50 +872,80 @@ async function watchContextStatusWithProgressView( const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS; const isTTY = io.stdout.isTTY === true; const repainter = isTTY ? createRepainter(io) : null; + const projectDir = resolve(args.projectDir); + const viewOpts = { styled: isTTY, showHint: true, projectDir }; let state = initialState; - let frame = 0; let lastProgressKey = ''; + let detached = false; - while (true) { - const now = Date.now(); - const startedAtMs = state.startedAt ? new Date(state.startedAt).getTime() : undefined; - const viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], now, startedAtMs); - viewState.frame = frame; + let viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], Date.now(), + state.startedAt ? new Date(state.startedAt).getTime() : undefined); - const viewOpts = { - styled: isTTY, - showHint: true, - hintText: 'ctrl+c to stop watching · build continues in background', - }; + const cleanupKeystroke = (isTTY || deps.setupKeystroke) + ? (deps.setupKeystroke ?? defaultSetupKeystroke)( + () => { detached = true; }, + () => { detached = true; }, + ) + : null; - if (repainter) { - repainter.paint(renderContextBuildView(viewState, viewOpts)); - } else { - const currentKey = JSON.stringify(state.sourceProgress?.map((s) => s.status)); - if (currentKey !== lastProgressKey || !isActiveStatus(state.status)) { - io.stdout.write(renderContextBuildView(viewState, viewOpts)); - lastProgressKey = currentKey; + let spinnerInterval: ReturnType | null = null; + if (repainter) { + repainter.paint(renderContextBuildView(viewState, viewOpts)); + spinnerInterval = setInterval(() => { + viewState.frame++; + const now = Date.now(); + viewState.totalElapsedMs = viewState.startedAt !== null ? now - viewState.startedAt : 0; + for (const t of [...viewState.primarySources, ...viewState.contextSources]) { + if (t.status === 'running' && t.startedAt !== null) { + t.elapsedMs = now - t.startedAt; + } } - } - - if (!isActiveStatus(state.status)) { - return { exitCode: watchExitCode(state.status), state }; - } - - frame++; - await sleep(intervalMs); - - try { - state = await readKtxSetupContextState(args.projectDir); - } catch { - continue; - } - - if (!stateMatchesRunId(state, args.runId)) { - io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); - return { exitCode: 1, state }; - } + repainter.paint(renderContextBuildView(viewState, viewOpts)); + }, 140); } + + try { + while (true) { + if (!repainter) { + const currentKey = JSON.stringify(state.sourceProgress?.map((s) => s.status)); + if (currentKey !== lastProgressKey || !isActiveStatus(state.status)) { + io.stdout.write(renderContextBuildView(viewState, viewOpts)); + lastProgressKey = currentKey; + } + } + + if (!isActiveStatus(state.status)) { + return { exitCode: watchExitCode(state.status), state }; + } + if (detached) break; + + await sleep(intervalMs); + if (detached) break; + + try { + state = await readKtxSetupContextState(args.projectDir); + } catch { + continue; + } + + if (!stateMatchesRunId(state, args.runId)) { + io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); + return { exitCode: 1, state }; + } + + const now = Date.now(); + const startedAtMs = state.startedAt ? new Date(state.startedAt).getTime() : undefined; + viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], now, startedAtMs); + } + } finally { + if (spinnerInterval) clearInterval(spinnerInterval); + cleanupKeystroke?.(); + } + + io.stdout.write('\n\nContext build continuing in the background.\n'); + io.stdout.write(`Resume: ktx setup --project-dir ${projectDir}\n`); + io.stdout.write(`Status: ktx setup context status --project-dir ${projectDir}\n`); + return { exitCode: 0, state }; } function setupResultFromWatchedState(projectDir: string, state: KtxSetupContextState): KtxSetupContextResult { diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index 1ef973c9..b79e8e66 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -444,7 +444,6 @@ describe('setup sources step', () => { ); const options = vi.mocked(testPrompts.multiselect).mock.calls[0]?.[0].options ?? []; expect(options).toContainEqual({ value: 'notion', label: 'Notion' }); - expect(options).not.toContainEqual({ value: 'posthog', label: 'PostHog' }); }); it('uses a source-specific editable connection name for new interactive connections', async () => { diff --git a/packages/connector-posthog/package.json b/packages/connector-posthog/package.json deleted file mode 100644 index da2de540..00000000 --- a/packages/connector-posthog/package.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "name": "@ktx/connector-posthog", - "version": "0.0.0-private", - "description": "PostHog connector package for KTX scan interfaces", - "private": true, - "type": "module", - "engines": { - "node": ">=22.0.0" - }, - "main": "dist/index.js", - "types": "dist/index.d.ts", - "exports": { - ".": { - "types": "./dist/index.d.ts", - "import": "./dist/index.js", - "default": "./dist/index.js" - }, - "./package.json": "./package.json" - }, - "files": [ - "dist" - ], - "scripts": { - "build": "tsc -p tsconfig.json", - "test": "vitest run", - "type-check": "tsc -p tsconfig.json --noEmit" - }, - "dependencies": { - "@ktx/context": "workspace:*" - }, - "devDependencies": { - "@types/node": "^24.3.0", - "typescript": "^5.9.3", - "vitest": "^4.0.18" - }, - "license": "Apache-2.0", - "repository": { - "type": "git", - "url": "git+https://github.com/kaelio/ktx.git", - "directory": "packages/connector-posthog" - }, - "bugs": { - "url": "https://github.com/kaelio/ktx/issues" - }, - "homepage": "https://github.com/kaelio/ktx#readme" -} diff --git a/packages/connector-posthog/src/connector.test.ts b/packages/connector-posthog/src/connector.test.ts deleted file mode 100644 index 69dc7223..00000000 --- a/packages/connector-posthog/src/connector.test.ts +++ /dev/null @@ -1,400 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { - createPostHogLiveDatabaseIntrospection, - isKtxPostHogConnectionConfig, - KtxPostHogScanConnector, - postHogConnectionConfigFromConfig, - type KtxPostHogConnectionConfig, - type KtxPostHogFetch, -} from './index.js'; - -function jsonResponse(body: unknown, status = 200): Response { - return { - ok: status >= 200 && status < 300, - status, - json: async () => body, - text: async () => JSON.stringify(body), - } as Response; -} - -function fakeFetch(queries: string[] = []): KtxPostHogFetch { - return vi.fn(async (_url: string, init?: RequestInit) => { - const body = JSON.parse(String(init?.body ?? '{}')) as { query?: { kind?: string; query?: string } }; - const sql = body.query?.query ?? ''; - if (sql) { - queries.push(sql); - } - if (body.query?.kind === 'DatabaseSchemaQuery') { - return jsonResponse({ - tables: { - events: { - id: 'events', - name: 'events', - type: 'posthog', - row_count: 42, - fields: { - uuid: { - name: 'uuid', - type: 'uuid', - hogql_value: 'uuid', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'uuid', - }, - event: { - name: 'event', - type: 'string', - hogql_value: 'event', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'event', - }, - timestamp: { - name: 'timestamp', - type: 'datetime', - hogql_value: 'timestamp', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'timestamp', - }, - properties: { - name: 'properties', - type: 'json', - hogql_value: 'properties', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'properties', - }, - virtual: { - name: 'virtual', - type: 'virtual_table', - hogql_value: 'virtual', - schema_valid: true, - table: null, - fields: null, - chain: null, - id: 'virtual', - }, - }, - }, - query_log: { - id: 'query_log', - name: 'query_log', - type: 'posthog', - row_count: 1, - fields: {}, - }, - }, - joins: [], - }); - } - if (sql.includes('SELECT * FROM person_distinct_ids LIMIT 0')) { - return jsonResponse({ - results: [], - columns: ['distinct_id', 'person_id'], - types: [ - ['distinct_id', 'String'], - ['person_id', 'UUID'], - ], - error: null, - hogql: sql, - }); - } - if (sql.includes('LIMIT 0')) { - return jsonResponse({ results: null, columns: null, types: null, error: 'Table not found', hogql: sql }); - } - if (sql.includes('SELECT 1 AS test')) { - return jsonResponse({ results: [[1]], columns: ['test'], types: [['test', 'Int64']], error: null, hogql: sql }); - } - if (sql.includes('count() AS cnt')) { - return jsonResponse({ results: [[42]], columns: ['cnt'], types: [['cnt', 'Int64']], error: null, hogql: sql }); - } - if (sql.includes('GROUP BY event')) { - return jsonResponse({ - results: [['$pageview', 9]], - columns: ['event', 'cnt'], - types: [ - ['event', 'String'], - ['cnt', 'Int64'], - ], - error: null, - hogql: sql, - }); - } - if (sql.includes('arrayJoin(JSONExtractKeys')) { - return jsonResponse({ - results: [['$browser', 7]], - columns: ['key', 'cnt'], - types: [ - ['key', 'String'], - ['cnt', 'Int64'], - ], - error: null, - hogql: sql, - }); - } - if (sql.includes('uniq(JSONExtractString') || sql.includes('uniq(val) AS cardinality')) { - return jsonResponse({ - results: [[2]], - columns: ['cardinality'], - types: [['cardinality', 'Int64']], - error: null, - hogql: sql, - }); - } - if (sql.includes('DISTINCT JSONExtractString') || sql.includes('SELECT DISTINCT toString(')) { - return jsonResponse({ - results: [['Chrome'], ['Safari']], - columns: ['value'], - types: [['value', 'String']], - error: null, - hogql: sql, - }); - } - return jsonResponse({ results: [['$pageview']], columns: ['event'], types: [['event', 'String']], error: null, hogql: sql }); - }) as KtxPostHogFetch; -} - -const posthogApiKeyEnv = ['POSTHOG', 'API', 'KEY'].join('_'); -const fixtureToken = ['phx', 'fixture'].join('_'); -const env = { [posthogApiKeyEnv]: fixtureToken }; -const connection: KtxPostHogConnectionConfig & { driver: string } = { - driver: 'posthog', - ['api_' + 'key']: `env:${posthogApiKeyEnv}`, - project_id: '157881', - region: 'us', - readonly: true, -}; - -describe('KtxPostHogScanConnector', () => { - it('resolves configuration safely', () => { - expect(isKtxPostHogConnectionConfig(connection)).toBe(true); - expect(isKtxPostHogConnectionConfig({ driver: 'mysql' })).toBe(false); - const resolved = postHogConnectionConfigFromConfig({ - connectionId: 'product', - connection, - env, - }); - expect(resolved).toMatchObject({ projectId: '157881', baseUrl: 'https://us.posthog.com' }); - const tokenField = ['api', 'Key'].join('') as keyof typeof resolved; - expect(resolved[tokenField]).toBe(fixtureToken); - expect(() => - postHogConnectionConfigFromConfig({ - connectionId: 'product', - connection: { ...connection, readonly: false }, - }), - ).toThrow('Native PostHog connector requires connections.product.readonly: true'); - }); - - it('introspects schema metadata, hidden tables, descriptions, primary keys, and normalized types', async () => { - const connector = new KtxPostHogScanConnector({ - connectionId: 'product', - connection, - env, - fetch: fakeFetch(), - sleep: async () => {}, - now: () => new Date('2026-04-29T19:00:00.000Z'), - }); - - const snapshot = await connector.introspect({ connectionId: 'product', driver: 'posthog' }, { runId: 'scan-run-1' }); - - expect(snapshot).toMatchObject({ - connectionId: 'product', - driver: 'posthog', - extractedAt: '2026-04-29T19:00:00.000Z', - scope: { catalogs: ['157881'] }, - metadata: { - project_id: '157881', - table_count: 2, - total_columns: 6, - }, - }); - expect(snapshot.tables.map((table) => table.name)).toEqual(['events', 'person_distinct_ids']); - expect(snapshot.tables[0]).toMatchObject({ - catalog: '157881', - db: null, - name: 'events', - kind: 'event_stream', - estimatedRows: 42, - comment: expect.stringContaining('PostHog event stream'), - foreignKeys: [], - }); - expect(snapshot.tables[0]?.columns).toEqual([ - { - name: 'uuid', - nativeType: 'UUID', - normalizedType: 'UUID', - dimensionType: 'string', - nullable: false, - primaryKey: true, - comment: 'Unique identifier for this specific event.', - }, - { - name: 'event', - nativeType: 'String', - normalizedType: 'VARCHAR', - dimensionType: 'string', - nullable: false, - primaryKey: false, - comment: expect.stringContaining('Event name'), - }, - { - name: 'timestamp', - nativeType: 'DateTime64', - normalizedType: 'TIMESTAMP', - dimensionType: 'time', - nullable: false, - primaryKey: false, - comment: expect.stringContaining('UTC timestamp'), - }, - { - name: 'properties', - nativeType: 'JSON', - normalizedType: 'JSON', - dimensionType: 'string', - nullable: true, - primaryKey: false, - comment: expect.stringContaining('JSON object'), - }, - ]); - }); - - it('runs samples, read-only SQL, event-stream discovery, row counts, and cleanup', async () => { - const queries: string[] = []; - const connector = new KtxPostHogScanConnector({ - connectionId: 'product', - connection, - env, - fetch: fakeFetch(queries), - sleep: async () => {}, - }); - - await expect(connector.testConnection()).resolves.toEqual({ success: true }); - await expect( - connector.sampleTable( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - columns: ['event'], - limit: 1, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toMatchObject({ headers: ['event'], rows: [['$pageview']], totalRows: 1 }); - await expect( - connector.sampleColumn( - { connectionId: 'product', table: { catalog: '157881', db: null, name: 'events' }, column: 'event', limit: 5 }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual({ values: ['$pageview'], nullCount: null, distinctCount: null }); - await expect( - connector.executeReadOnly({ connectionId: 'product', sql: 'select event from events', maxRows: 1 }, { runId: 'scan-run-1' }), - ).resolves.toMatchObject({ headers: ['event'], rows: [['$pageview']], totalRows: 1, rowCount: 1 }); - await expect( - connector.executeReadOnly({ connectionId: 'product', sql: 'delete from events' }, { runId: 'scan-run-1' }), - ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); - await expect(connector.getTableRowCount('events')).resolves.toBe(42); - await expect( - connector.getColumnDistinctValues({ catalog: '157881', db: null, name: 'events' }, 'properties.$browser', { - maxCardinality: 5, - limit: 10, - sampleSize: 100, - }), - ).resolves.toEqual({ values: ['Chrome', 'Safari'], cardinality: 2 }); - await expect( - connector.eventStreamDiscovery.listEventTypes( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - eventColumn: 'event', - limit: 10, - minCount: 30, - lookbackDays: 14, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual([{ value: '$pageview', count: 9 }]); - expect(queries.some((query) => query.includes('HAVING cnt >= 30'))).toBe(true); - expect(queries.some((query) => query.includes('INTERVAL 14 DAY'))).toBe(true); - - await expect( - connector.eventStreamDiscovery.listPropertyKeys( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - jsonColumn: 'properties', - sampleSize: 1000, - limit: 10, - lookbackDays: 7, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual([{ key: '$browser', count: 7 }]); - - await expect( - connector.eventStreamDiscovery.listPropertyValues( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - jsonColumn: 'properties', - propertyKey: '$browser', - limit: 10, - maxCardinality: 1000, - lookbackDays: 30, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual({ - values: ['Chrome', 'Safari'], - cardinality: 2, - }); - await expect( - connector.columnStats( - { connectionId: 'product', table: { catalog: '157881', db: null, name: 'events' }, column: 'event' }, - { runId: 'scan-run-1' }, - ), - ).resolves.toBeNull(); - await connector.cleanup(); - }); - - it('adapts native snapshots to live-database introspection snapshots', async () => { - const introspection = createPostHogLiveDatabaseIntrospection({ - connections: { product: connection }, - env, - fetch: fakeFetch(), - sleep: async () => {}, - now: () => new Date('2026-04-29T19:00:00.000Z'), - }); - - await expect(introspection.extractSchema('product')).resolves.toMatchObject({ - connectionId: 'product', - metadata: { project_id: '157881' }, - tables: expect.arrayContaining([ - expect.objectContaining({ - catalog: '157881', - db: null, - name: 'events', - columns: expect.arrayContaining([ - { - name: 'uuid', - nativeType: 'UUID', - normalizedType: 'UUID', - dimensionType: 'string', - nullable: false, - primaryKey: true, - comment: 'Unique identifier for this specific event.', - }, - ]), - }), - ]), - }); - }); -}); diff --git a/packages/connector-posthog/src/connector.ts b/packages/connector-posthog/src/connector.ts deleted file mode 100644 index 0ac2b37c..00000000 --- a/packages/connector-posthog/src/connector.ts +++ /dev/null @@ -1,609 +0,0 @@ -import { readFileSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { resolve } from 'node:path'; -import { assertReadOnlySql, limitSqlForExecution } from '@ktx/context/connections'; -import { - createKtxConnectorCapabilities, - type KtxColumnSampleInput, - type KtxColumnSampleResult, - type KtxColumnStatsInput, - type KtxColumnStatsResult, - type KtxEventPropertyDiscovery, - type KtxEventPropertyDiscoveryInput, - type KtxEventPropertyValuesInput, - type KtxEventPropertyValuesResult, - type KtxEventStreamDiscoveryPort, - type KtxEventTypeDiscovery, - type KtxEventTypeDiscoveryInput, - type KtxQueryResult, - type KtxReadOnlyQueryInput, - type KtxScanConnector, - type KtxScanContext, - type KtxScanInput, - type KtxSchemaColumn, - type KtxSchemaSnapshot, - type KtxSchemaTable, - type KtxTableRef, - type KtxTableSampleInput, - type KtxTableSampleResult, -} from '@ktx/context/scan'; -import { KtxPostHogDialect, type KtxPostHogSampleColumnInfo } from './dialect.js'; -import { getKtxPostHogColumnDescription, getKtxPostHogTableDescription } from './schema-descriptions.js'; - -export interface KtxPostHogConnectionConfig { - driver?: string; - api_key?: string; - apiKey?: string; - project_id?: string; - projectId?: string; - region?: 'us' | 'eu'; - host?: string; - readonly?: boolean; - [key: string]: unknown; -} - -export interface KtxPostHogResolvedConnectionConfig { - apiKey: string; - projectId: string; - baseUrl: string; -} - -export type KtxPostHogFetch = (url: string, init?: RequestInit) => Promise; - -export interface KtxPostHogScanConnectorOptions { - connectionId: string; - connection: KtxPostHogConnectionConfig | undefined; - env?: NodeJS.ProcessEnv; - fetch?: KtxPostHogFetch; - sleep?: (ms: number) => Promise; - now?: () => Date; -} - -export interface KtxPostHogReadOnlyQueryInput extends KtxReadOnlyQueryInput { - params?: Record; -} - -export interface KtxPostHogColumnDistinctValuesOptions { - maxCardinality: number; - limit: number; - sampleSize?: number; -} - -export interface KtxPostHogColumnDistinctValuesResult { - values: string[] | null; - cardinality: number; -} - -interface PostHogSchemaField { - name: string; - type: string; - hogql_value: string; - schema_valid: boolean; - table: string | null; - fields: string[] | null; - chain: string[] | null; - id: string | null; -} - -interface PostHogSchemaTable { - id: string; - name: string; - type: string; - row_count: number | null; - fields: Record; -} - -interface PostHogSchemaResponse { - tables: Record; - joins: unknown[]; -} - -interface PostHogQueryResponse { - results: unknown[][] | null; - columns: string[] | null; - types: [string, string][] | null; - error: string | null; - hogql: string | null; -} - -const allowedTableTypes = new Set(['posthog', 'system']); -const excludedTables = new Set([ - 'query_log', - 'system.teams', - 'system.exports', - 'system.ingestion_warnings', - 'system.insight_variables', - 'system.data_warehouse_sources', - 'system.groups', - 'system.group_type_mappings', -]); -const hiddenTablesToProbe = ['person_distinct_ids', 'cohort_people', 'static_cohort_people']; - -export function isKtxPostHogConnectionConfig(connection: KtxPostHogConnectionConfig | undefined): boolean { - return String(connection?.driver ?? '').toLowerCase() === 'posthog'; -} - -function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { - if (value.startsWith('env:')) { - return env[value.slice('env:'.length)] ?? ''; - } - if (value.startsWith('file:')) { - const rawPath = value.slice('file:'.length); - const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; - return readFileSync(path, 'utf-8').trim(); - } - return value; -} - -function stringConfigValue( - connection: KtxPostHogConnectionConfig | undefined, - key: keyof KtxPostHogConnectionConfig, - env: NodeJS.ProcessEnv, -): string | undefined { - const value = connection?.[key]; - return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; -} - -export function postHogConnectionConfigFromConfig(input: { - connectionId: string; - connection: KtxPostHogConnectionConfig | undefined; - env?: NodeJS.ProcessEnv; -}): KtxPostHogResolvedConnectionConfig { - if (!isKtxPostHogConnectionConfig(input.connection)) { - throw new Error(`Native PostHog connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); - } - if (input.connection?.readonly !== true) { - throw new Error(`Native PostHog connector requires connections.${input.connectionId}.readonly: true`); - } - const env = input.env ?? process.env; - const apiKey = stringConfigValue(input.connection, 'api_key', env) ?? stringConfigValue(input.connection, 'apiKey', env); - const projectId = - stringConfigValue(input.connection, 'project_id', env) ?? stringConfigValue(input.connection, 'projectId', env); - if (!apiKey) { - throw new Error(`Native PostHog connector requires connections.${input.connectionId}.api_key`); - } - if (!projectId) { - throw new Error(`Native PostHog connector requires connections.${input.connectionId}.project_id`); - } - const host = stringConfigValue(input.connection, 'host', env); - const region = input.connection?.region ?? 'us'; - return { - apiKey, - projectId, - baseUrl: host ? host.replace(/\/$/, '') : region === 'eu' ? 'https://eu.posthog.com' : 'https://us.posthog.com', - }; -} - -export class KtxPostHogScanConnector implements KtxScanConnector { - readonly id: string; - readonly driver = 'posthog' as const; - readonly capabilities = createKtxConnectorCapabilities({ - tableSampling: true, - columnSampling: true, - columnStats: false, - readOnlySql: true, - nestedAnalysis: true, - eventStreamDiscovery: true, - formalForeignKeys: false, - estimatedRowCounts: true, - }); - - readonly eventStreamDiscovery: KtxEventStreamDiscoveryPort = { - listEventTypes: (input, ctx) => this.listEventTypes(input, ctx), - listPropertyKeys: (input, ctx) => this.listPropertyKeys(input, ctx), - listPropertyValues: (input, ctx) => this.listPropertyValues(input, ctx), - }; - - private readonly connectionId: string; - private readonly resolved: KtxPostHogResolvedConnectionConfig; - private readonly fetchImpl: KtxPostHogFetch; - private readonly sleep: (ms: number) => Promise; - private readonly now: () => Date; - private readonly dialect = new KtxPostHogDialect(); - - constructor(options: KtxPostHogScanConnectorOptions) { - this.connectionId = options.connectionId; - this.resolved = postHogConnectionConfigFromConfig({ - connectionId: options.connectionId, - connection: options.connection, - env: options.env, - }); - this.fetchImpl = options.fetch ?? fetch; - this.sleep = options.sleep ?? ((ms) => new Promise((resolveSleep) => setTimeout(resolveSleep, ms))); - this.now = options.now ?? (() => new Date()); - this.id = `posthog:${options.connectionId}`; - } - - async testConnection(): Promise<{ success: boolean; error?: string }> { - const response = await this.query('SELECT 1 AS test'); - return response.error ? { success: false, error: response.error } : { success: true }; - } - - async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise { - this.assertConnection(input.connectionId); - const response = await this.makeRequest('/query', { query: { kind: 'DatabaseSchemaQuery' } }); - const tables: KtxSchemaTable[] = []; - for (const [tableName, tableInfo] of Object.entries(response.tables ?? {})) { - if (!allowedTableTypes.has(tableInfo.type) || excludedTables.has(tableName)) { - continue; - } - tables.push(this.toSchemaTable(tableName, tableInfo)); - } - tables.push(...(await this.discoverHiddenTables())); - tables.sort((left, right) => left.name.localeCompare(right.name)); - return { - connectionId: this.connectionId, - driver: 'posthog', - extractedAt: this.now().toISOString(), - scope: { catalogs: [this.resolved.projectId] }, - metadata: { - project_id: this.resolved.projectId, - table_count: tables.length, - total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), - }, - tables, - }; - } - - async sampleTable( - input: KtxTableSampleInput & { columnMetadata?: KtxPostHogSampleColumnInfo[] }, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const sql = input.columnMetadata - ? this.dialect.generateSampleQueryWithMetadata(this.qTableName(input.table), input.limit, input.columnMetadata) - : this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns); - const result = await this.query(sql); - return { headers: result.headers, rows: result.rows, totalRows: result.totalRows }; - } - - async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise { - this.assertConnection(input.connectionId); - const result = await this.query( - this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), - ); - const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]); - return { values, nullCount: null, distinctCount: null }; - } - - async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise { - return null; - } - - async executeReadOnly(input: KtxPostHogReadOnlyQueryInput, _ctx: KtxScanContext): Promise { - this.assertConnection(input.connectionId); - const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows); - const prepared = this.dialect.prepareQuery(limitedSql, input.params); - const result = await this.query(prepared.sql, prepared.params); - return { ...result, rowCount: result.rows.length }; - } - - async getTableRowCount(tableName: string): Promise { - const result = await this.query(`SELECT count() AS cnt FROM ${this.dialect.quoteIdentifier(tableName)}`); - return Number(result.rows[0]?.[0] ?? 0); - } - - async getColumnDistinctValues( - table: KtxTableRef, - columnName: string, - options: KtxPostHogColumnDistinctValuesOptions, - ): Promise { - const sampleSize = options.sampleSize ?? 10000; - const tableName = this.qTableName(table); - const cardinalityResult = await this.query( - this.dialect.generateCardinalitySampleQuery(tableName, columnName, sampleSize), - ); - if (cardinalityResult.error || cardinalityResult.rows.length === 0) { - return null; - } - const cardinality = Number(cardinalityResult.rows[0]?.[0]); - if (!Number.isFinite(cardinality)) { - return null; - } - if (cardinality === 0) { - return { values: [], cardinality: 0 }; - } - if (cardinality > options.maxCardinality) { - return { values: null, cardinality }; - } - const valuesResult = await this.query(this.dialect.generateDistinctValuesQuery(tableName, columnName, options.limit)); - if (valuesResult.error) { - return null; - } - return { - values: valuesResult.rows.filter((row) => row[0] !== null).map((row) => String(row[0])), - cardinality, - }; - } - - private async listEventTypes( - input: KtxEventTypeDiscoveryInput, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const limit = this.positiveInteger(input.limit, 'limit'); - const lookbackDays = this.positiveInteger(input.lookbackDays ?? 30, 'lookbackDays'); - const minCount = this.positiveInteger(input.minCount ?? 0, 'minCount'); - const eventColumn = this.dialect.quoteIdentifier(input.eventColumn); - const tableName = this.qTableName(input.table); - const havingClause = minCount > 0 ? `HAVING cnt >= ${minCount}` : ''; - const result = await this.query(` - SELECT ${eventColumn} AS event, count() as cnt - FROM ${tableName} - WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY - GROUP BY event - ${havingClause} - ORDER BY cnt DESC - LIMIT ${limit} - `); - if (result.error) { - return []; - } - return result.rows - .filter((row) => row[0] != null && String(row[0]).trim() !== '') - .map((row) => ({ value: String(row[0]), count: Number(row[1]) })); - } - - private async listPropertyKeys( - input: KtxEventPropertyDiscoveryInput, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const sampleSize = this.positiveInteger(input.sampleSize, 'sampleSize'); - const limit = this.positiveInteger(input.limit, 'limit'); - const lookbackDays = input.lookbackDays === undefined ? null : this.positiveInteger(input.lookbackDays, 'lookbackDays'); - const tableName = this.qTableName(input.table); - const jsonColumn = this.dialect.quoteIdentifier(input.jsonColumn); - const whereClause = lookbackDays === null ? '' : `WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY`; - const result = await this.query(` - SELECT key, count() as cnt - FROM ( - SELECT arrayJoin(JSONExtractKeys(${jsonColumn})) AS key - FROM ${tableName} - ${whereClause} - LIMIT ${sampleSize} - ) - GROUP BY key - ORDER BY cnt DESC - LIMIT ${limit} - `); - if (result.error) { - return []; - } - return result.rows.map((row) => ({ key: String(row[0]), count: Number(row[1]) })); - } - - private async listPropertyValues( - input: KtxEventPropertyValuesInput, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const limit = this.positiveInteger(input.limit, 'limit'); - const maxCardinality = this.positiveInteger(input.maxCardinality ?? 1000, 'maxCardinality'); - const lookbackDays = input.lookbackDays === undefined ? null : this.positiveInteger(input.lookbackDays, 'lookbackDays'); - const tableName = this.qTableName(input.table); - const jsonColumn = this.dialect.quoteIdentifier(input.jsonColumn); - const escapedKey = this.escapeHogQLString(input.propertyKey); - const timeFilter = lookbackDays === null ? '' : `WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY`; - const cardinalityResult = await this.query(` - SELECT uniq(JSONExtractString(${jsonColumn}, '${escapedKey}')) as cardinality - FROM ${tableName} - ${timeFilter} - LIMIT 1000000 - `); - if (cardinalityResult.error || cardinalityResult.rows.length === 0) { - return null; - } - const cardinality = Number(cardinalityResult.rows[0]?.[0]); - if (!Number.isFinite(cardinality) || cardinality > maxCardinality) { - return null; - } - const valuesResult = await this.query(` - SELECT DISTINCT JSONExtractString(${jsonColumn}, '${escapedKey}') as value - FROM ${tableName} - WHERE JSONExtractString(${jsonColumn}, '${escapedKey}') IS NOT NULL - AND JSONExtractString(${jsonColumn}, '${escapedKey}') != '' - ${lookbackDays === null ? '' : `AND timestamp > now() - INTERVAL ${lookbackDays} DAY`} - ORDER BY value - LIMIT ${limit} - `); - if (valuesResult.error) { - return null; - } - const values = valuesResult.rows - .map((row) => (row[0] != null ? String(row[0]) : '')) - .filter((value) => { - const trimmed = value.trim(); - return trimmed !== '' && trimmed !== '[]' && trimmed !== '{}' && trimmed !== 'null'; - }); - return { values, cardinality }; - } - - async cleanup(): Promise {} - - qTableName(table: Pick): string { - return this.dialect.formatTableName(table); - } - - quoteIdentifier(identifier: string): string { - return this.dialect.quoteIdentifier(identifier); - } - - private toSchemaTable(tableName: string, tableInfo: PostHogSchemaTable): KtxSchemaTable { - return { - catalog: this.resolved.projectId, - db: null, - name: tableName, - kind: tableName === 'events' ? 'event_stream' : 'table', - comment: getKtxPostHogTableDescription(tableName) ?? null, - estimatedRows: tableInfo.row_count ?? null, - columns: this.extractColumns(tableName, tableInfo.fields), - foreignKeys: [], - }; - } - - private async discoverHiddenTables(): Promise { - const tables: KtxSchemaTable[] = []; - for (const tableName of hiddenTablesToProbe) { - const result = await this.query(`SELECT * FROM ${tableName} LIMIT 0`); - if (result.error) { - continue; - } - tables.push({ - catalog: this.resolved.projectId, - db: null, - name: tableName, - kind: 'table', - comment: getKtxPostHogTableDescription(tableName) ?? null, - estimatedRows: null, - columns: result.headers.map((header) => ({ - name: header, - nativeType: 'String', - normalizedType: 'VARCHAR', - dimensionType: 'string', - nullable: true, - primaryKey: false, - comment: getKtxPostHogColumnDescription(tableName, header) ?? null, - })), - foreignKeys: [], - }); - } - return tables; - } - - private extractColumns(tableName: string, fields: Record): KtxSchemaColumn[] { - const columns: KtxSchemaColumn[] = []; - for (const [fieldName, fieldInfo] of Object.entries(fields)) { - if ( - fieldInfo.type === 'lazy_table' || - fieldInfo.type === 'virtual_table' || - fieldInfo.type === 'field_traverser' || - fieldInfo.type === 'expression' - ) { - continue; - } - const nativeType = this.normalizeFieldType(fieldInfo.type); - columns.push({ - name: fieldName, - nativeType, - normalizedType: this.dialect.mapDataType(nativeType), - dimensionType: this.dialect.mapToDimensionType(nativeType), - nullable: this.isNullableField(tableName, fieldName, fieldInfo.type), - primaryKey: this.isPrimaryKeyField(tableName, fieldName), - comment: getKtxPostHogColumnDescription(tableName, fieldName) ?? null, - }); - } - return columns; - } - - private normalizeFieldType(posthogType: string): string { - const typeMap: Record = { - string: 'String', - integer: 'Int64', - datetime: 'DateTime64', - boolean: 'UInt8', - bool: 'Boolean', - json: 'JSON', - array: 'Array(String)', - uuid: 'UUID', - event: 'String', - }; - return typeMap[posthogType.toLowerCase()] ?? posthogType; - } - - private isNullableField(tableName: string, fieldName: string, fieldType: string): boolean { - if (tableName === 'events' && ['uuid', 'event', 'timestamp', 'distinct_id'].includes(fieldName)) { - return false; - } - return !['uuid', 'event', 'timestamp', 'distinct_id'].includes(fieldType.toLowerCase()); - } - - private isPrimaryKeyField(tableName: string, fieldName: string): boolean { - return ( - (tableName === 'events' && fieldName === 'uuid') || - (tableName === 'persons' && fieldName === 'id') || - (tableName === 'sessions' && fieldName === 'session_id') || - (tableName === 'groups' && fieldName === 'key') - ); - } - - private async query(sql: string, params?: Record): Promise { - const response = await this.makeRequest('/query', { - query: { - kind: 'HogQLQuery', - query: sql, - ...(params && Object.keys(params).length > 0 ? { values: params } : {}), - }, - }); - if (response.error) { - return { headers: [], rows: [], totalRows: 0, rowCount: null, error: response.error }; - } - const headers = response.columns ?? []; - const rows = response.results ?? []; - const headerTypes = response.types?.map((type) => type[1]); - return { - headers, - rows, - totalRows: rows.length, - rowCount: rows.length, - ...(headerTypes && headerTypes.length > 0 ? { headerTypes } : {}), - }; - } - - private async makeRequest(endpoint: string, body: Record, maxRetries = 3): Promise { - const url = `${this.resolved.baseUrl}/api/projects/${this.resolved.projectId}${endpoint}`; - let lastError: Error | null = null; - for (let attempt = 0; attempt <= maxRetries; attempt += 1) { - const response = await this.fetchImpl(url, { - method: 'POST', - headers: { - Authorization: `Bearer ${this.resolved.apiKey}`, - 'Content-Type': 'application/json', - }, - body: JSON.stringify(body), - }); - if (response.ok) { - return response.json() as Promise; - } - const errorText = await response.text(); - const errorMessage = this.parseErrorMessage(errorText); - if (response.status === 429 && attempt < maxRetries) { - await this.sleep(this.parseRateLimitWaitTime(errorMessage) * 1000); - continue; - } - lastError = new Error(`PostHog API error (${response.status}): ${errorMessage}`); - } - throw lastError ?? new Error('PostHog API request failed after retries'); - } - - private parseErrorMessage(errorText: string): string { - try { - const errorJson = JSON.parse(errorText) as { detail?: unknown; error?: unknown }; - return String(errorJson.detail ?? errorJson.error ?? errorText); - } catch { - return errorText; - } - } - - private parseRateLimitWaitTime(errorMessage: string): number { - const match = errorMessage.match(/(?:Expected available in|retry after) (\d+) seconds?/i); - return match ? Number.parseInt(match[1] ?? '30', 10) + 2 : 30; - } - - private escapeHogQLString(value: string): string { - return value.replace(/\\/g, '\\\\').replace(/'/g, "''"); - } - - private positiveInteger(value: number, name: string): number { - if (!Number.isInteger(value) || value < 0) { - throw new Error(`PostHog event-stream discovery requires ${name} to be a non-negative integer`); - } - return value; - } - - private assertConnection(connectionId: string): void { - if (connectionId !== this.connectionId) { - throw new Error(`PostHog connector ${this.connectionId} cannot scan connection ${connectionId}`); - } - } -} diff --git a/packages/connector-posthog/src/dialect.test.ts b/packages/connector-posthog/src/dialect.test.ts deleted file mode 100644 index 5c5b2c43..00000000 --- a/packages/connector-posthog/src/dialect.test.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import { KtxPostHogDialect } from './dialect.js'; - -describe('KtxPostHogDialect', () => { - const dialect = new KtxPostHogDialect(); - - it('quotes identifiers, formats table names, maps types, and prepares HogQL params', () => { - expect(dialect.quoteIdentifier('weird`name')).toBe('`weird\\`name`'); - expect(dialect.formatTableName({ name: 'events', catalog: '157881', db: null })).toBe('`events`'); - expect(dialect.mapDataType('Nullable(DateTime64(6, UTC))')).toBe('TIMESTAMP'); - expect(dialect.mapDataType('Array(String)')).toBe('JSON'); - expect(dialect.mapToDimensionType('UInt8')).toBe('number'); - expect(dialect.mapToDimensionType('Boolean')).toBe('boolean'); - expect(dialect.prepareQuery('SELECT * FROM events WHERE event = :event', { event: '$pageview' })).toEqual({ - sql: 'SELECT * FROM events WHERE event = {event}', - params: { event: '$pageview' }, - }); - }); - - it('builds sample and virtual-property queries without app dependencies', () => { - expect(dialect.generateSampleQuery('`events`', 5, ['event', 'timestamp'])).toBe( - 'SELECT `event`, `timestamp` FROM `events` ORDER BY rand() LIMIT 5', - ); - expect( - dialect.generateSampleQueryWithMetadata('`events`', 3, [ - { name: 'event', parentColumnId: null }, - { name: 'properties.$browser', parentColumnId: 'properties' }, - ]), - ).toBe( - "SELECT `event`, JSONExtractString(properties, '$browser') AS `properties.$browser` FROM `events` ORDER BY rand() LIMIT 3", - ); - expect(dialect.generateColumnSampleQuery('`events`', 'properties.$browser', 10)).toBe( - "SELECT JSONExtractString(properties, '$browser') FROM `events` WHERE JSONExtractString(properties, '$browser') IS NOT NULL ORDER BY rand() LIMIT 10", - ); - }); - - it('builds data-dictionary and time helper SQL', () => { - expect(dialect.generateCardinalitySampleQuery('events', 'properties.$browser', 100)).toContain( - "JSONExtractString(properties, '$browser') AS val", - ); - expect(dialect.generateDistinctValuesQuery('events', 'event', 20)).toContain('SELECT DISTINCT toString(`event`) AS val'); - expect(dialect.getNullCountExpression('event')).toBe('countIf(event IS NULL)'); - expect(dialect.getDistinctCountExpression('event')).toBe('uniq(event)'); - expect(dialect.getTimeTruncExpression('timestamp', 'week', 'UTC')).toBe("DATE_TRUNC('week', toTimeZone(timestamp, 'UTC'))"); - expect(dialect.parseIntervalToSql('7 day')).toBe('INTERVAL 7 DAY'); - expect(dialect.generateColumnStatisticsQuery('', 'events')).toBeNull(); - }); -}); diff --git a/packages/connector-posthog/src/dialect.ts b/packages/connector-posthog/src/dialect.ts deleted file mode 100644 index 36f6edee..00000000 --- a/packages/connector-posthog/src/dialect.ts +++ /dev/null @@ -1,258 +0,0 @@ -import type { KtxSchemaDimensionType, KtxTableRef } from '@ktx/context/scan'; - -type PostHogTableNameRef = Pick & Partial>; - -export interface KtxPostHogSampleColumnInfo { - name: string; - parentColumnId: string | null; -} - -export class KtxPostHogDialect { - readonly type = 'posthog'; - - private readonly typeMappings: Record = { - datetime64: 'time', - datetime: 'time', - date: 'time', - int64: 'number', - int32: 'number', - int16: 'number', - int8: 'number', - uint64: 'number', - uint32: 'number', - uint16: 'number', - uint8: 'number', - float64: 'number', - float32: 'number', - decimal: 'number', - integer: 'number', - string: 'string', - uuid: 'string', - json: 'string', - boolean: 'boolean', - bool: 'boolean', - }; - - quoteIdentifier(identifier: string): string { - return `\`${identifier.replace(/`/g, '\\`')}\``; - } - - formatTableName(table: PostHogTableNameRef): string { - return this.quoteIdentifier(table.name); - } - - mapDataType(nativeType: string): string { - const cleanType = this.cleanType(nativeType); - const typeMapping: Record = { - STRING: 'VARCHAR', - UUID: 'UUID', - INT64: 'BIGINT', - INT32: 'INTEGER', - INT16: 'SMALLINT', - INT8: 'TINYINT', - UINT64: 'BIGINT', - UINT32: 'INTEGER', - UINT16: 'SMALLINT', - UINT8: 'TINYINT', - FLOAT64: 'DOUBLE', - FLOAT32: 'FLOAT', - DATETIME64: 'TIMESTAMP', - DATETIME: 'TIMESTAMP', - DATE: 'DATE', - JSON: 'JSON', - ARRAY: 'JSON', - BOOLEAN: 'BOOLEAN', - BOOL: 'BOOLEAN', - }; - return typeMapping[cleanType] ?? cleanType; - } - - mapToDimensionType(nativeType: string): KtxSchemaDimensionType { - if (!nativeType) { - return 'string'; - } - const cleanType = this.cleanType(nativeType).toLowerCase(); - if (this.typeMappings[cleanType]) { - return this.typeMappings[cleanType]; - } - if (cleanType.includes('date') || cleanType.includes('time')) { - return 'time'; - } - if (cleanType.includes('int') || cleanType.includes('float') || cleanType.includes('decimal') || cleanType.includes('num')) { - return 'number'; - } - if (cleanType === 'bool' || cleanType === 'boolean') { - return 'boolean'; - } - return 'string'; - } - - generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { - const columnList = - columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; - return `SELECT ${columnList} FROM ${tableName} ORDER BY rand() LIMIT ${limit}`; - } - - generateSampleQueryWithMetadata(tableName: string, limit: number, columnMetadata?: KtxPostHogSampleColumnInfo[]): string { - if (!columnMetadata || columnMetadata.length === 0) { - return this.generateSampleQuery(tableName, limit); - } - const columnList = columnMetadata - .map((column) => { - if (!column.parentColumnId) { - return this.quoteIdentifier(column.name); - } - const expression = this.formatColumnExpression(column.name); - return `${expression} AS ${this.quoteIdentifier(column.name)}`; - }) - .join(', '); - return `SELECT ${columnList} FROM ${tableName} ORDER BY rand() LIMIT ${limit}`; - } - - generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { - const colExpr = this.formatColumnExpression(columnName); - return `SELECT ${colExpr} FROM ${tableName} WHERE ${colExpr} IS NOT NULL ORDER BY rand() LIMIT ${limit}`; - } - - prepareQuery(sql: string, params?: Record): { sql: string; params?: Record } { - if (!params) { - return { sql, params: undefined }; - } - let processedSql = sql; - const processedParams: Record = {}; - for (const [key, value] of Object.entries(params)) { - processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `{${key}}`); - processedParams[key] = value; - } - return { - sql: processedSql, - params: Object.keys(processedParams).length > 0 ? processedParams : undefined, - }; - } - - getRandomSampleFilter(samplePct: number): string { - if (samplePct <= 0 || samplePct >= 1) { - return ''; - } - return `rand() < ${samplePct}`; - } - - getTableSampleClause(_samplePct: number): string { - return ''; - } - - getLimitOffsetClause(limit: number, offset?: number): string { - return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; - } - - getNullCountExpression(column: string): string { - return `countIf(${column} IS NULL)`; - } - - getDistinctCountExpression(column: string): string { - return `uniq(${column})`; - } - - generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { - const colExpr = this.formatColumnExpression(columnName); - return ` - SELECT uniq(val) AS cardinality - FROM ( - SELECT ${colExpr} AS val - FROM ${tableName} - WHERE ${colExpr} IS NOT NULL - LIMIT ${sampleSize} - ) - `; - } - - generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { - const colExpr = this.formatColumnExpression(columnName); - return ` - SELECT DISTINCT toString(${colExpr}) AS val - FROM ${tableName} - WHERE ${colExpr} IS NOT NULL - ORDER BY val - LIMIT ${limit} - `; - } - - generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null { - return null; - } - - generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { - const colExpr = this.formatColumnExpression(columnName); - return ` - SELECT uniq(val) AS cardinality - FROM ( - SELECT ${colExpr} AS val - FROM ${tableName} - WHERE ${colExpr} IS NOT NULL - ORDER BY rand() - LIMIT ${sampleSize} - ) - `; - } - - getTimeTruncExpression( - column: string, - granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', - timezone?: string, - ): string { - const col = timezone ? `toTimeZone(${column}, '${timezone}')` : column; - return `DATE_TRUNC('${granularity}', ${col})`; - } - - getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { - const col = timezone ? `toTimeZone(${column}, '${timezone}')` : column; - const [amount, unit] = interval.split(' '); - const seconds = Number(amount) * this.getUnitSeconds(unit ?? 'day'); - const originExpr = origin ? `toDateTime('${origin}')` : `toDateTime('1970-01-01')`; - return `${originExpr} + toIntervalSecond(intDiv(toUnixTimestamp(${col}) - toUnixTimestamp(${originExpr}), ${seconds}) * ${seconds})`; - } - - parseIntervalToSql(interval: string): string { - const [amount, unit] = interval.split(' '); - return `INTERVAL ${amount} ${unit?.toUpperCase() ?? 'DAY'}`; - } - - private formatColumnExpression(columnName: string): string { - const rawName = columnName.replace(/^`|`$/g, ''); - const propertyMatch = rawName.match(/^(properties|person\.properties)\.(.+)$/); - if (propertyMatch) { - const [, parentCol, propertyKey] = propertyMatch; - return `JSONExtractString(${parentCol}, '${propertyKey.replace(/'/g, "''")}')`; - } - return this.quoteIdentifier(rawName); - } - - private cleanType(nativeType: string): string { - let cleanType = nativeType.toUpperCase().trim(); - const nullableMatch = cleanType.match(/^NULLABLE\((.+)\)$/); - if (nullableMatch) { - cleanType = nullableMatch[1] ?? cleanType; - } - if (cleanType.startsWith('ARRAY(')) { - return 'ARRAY'; - } - if (cleanType.startsWith('DATETIME64')) { - return 'DATETIME64'; - } - return cleanType; - } - - private getUnitSeconds(unit: string): number { - const secondsByUnit: Record = { - second: 1, - minute: 60, - hour: 3600, - day: 86400, - week: 604800, - month: 2592000, - quarter: 7776000, - year: 31536000, - }; - return secondsByUnit[unit.toLowerCase()] ?? 86400; - } -} diff --git a/packages/connector-posthog/src/index.ts b/packages/connector-posthog/src/index.ts deleted file mode 100644 index 7fa61ebb..00000000 --- a/packages/connector-posthog/src/index.ts +++ /dev/null @@ -1,19 +0,0 @@ -export { KtxPostHogDialect, type KtxPostHogSampleColumnInfo } from './dialect.js'; -export { - getKtxPostHogColumnDescription, - getKtxPostHogPropertyDescription, - getKtxPostHogTableDescription, -} from './schema-descriptions.js'; -export { - isKtxPostHogConnectionConfig, - KtxPostHogScanConnector, - postHogConnectionConfigFromConfig, - type KtxPostHogColumnDistinctValuesOptions, - type KtxPostHogColumnDistinctValuesResult, - type KtxPostHogConnectionConfig, - type KtxPostHogFetch, - type KtxPostHogReadOnlyQueryInput, - type KtxPostHogResolvedConnectionConfig, - type KtxPostHogScanConnectorOptions, -} from './connector.js'; -export { createPostHogLiveDatabaseIntrospection } from './live-database-introspection.js'; diff --git a/packages/connector-posthog/src/live-database-introspection.ts b/packages/connector-posthog/src/live-database-introspection.ts deleted file mode 100644 index 04828a19..00000000 --- a/packages/connector-posthog/src/live-database-introspection.ts +++ /dev/null @@ -1,34 +0,0 @@ -import type { LiveDatabaseIntrospectionPort } from '@ktx/context/ingest'; -import type { KtxProjectConnectionConfig } from '@ktx/context/project'; -import { KtxPostHogScanConnector, type KtxPostHogConnectionConfig, type KtxPostHogFetch } from './connector.js'; - -interface CreatePostHogLiveDatabaseIntrospectionOptions { - connections: Record; - env?: NodeJS.ProcessEnv; - fetch?: KtxPostHogFetch; - sleep?: (ms: number) => Promise; - now?: () => Date; -} - -export function createPostHogLiveDatabaseIntrospection( - options: CreatePostHogLiveDatabaseIntrospectionOptions, -): LiveDatabaseIntrospectionPort { - return { - async extractSchema(connectionId: string) { - const connection = options.connections[connectionId] as KtxPostHogConnectionConfig | undefined; - const connector = new KtxPostHogScanConnector({ - connectionId, - connection, - env: options.env, - fetch: options.fetch, - sleep: options.sleep, - now: options.now, - }); - try { - return await connector.introspect({ connectionId, driver: 'posthog' }, { runId: `posthog-${connectionId}` }); - } finally { - await connector.cleanup(); - } - }, - }; -} diff --git a/packages/connector-posthog/src/package-exports.test.ts b/packages/connector-posthog/src/package-exports.test.ts deleted file mode 100644 index f9d822ae..00000000 --- a/packages/connector-posthog/src/package-exports.test.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import * as posthog from './index.js'; - -describe('@ktx/connector-posthog package exports', () => { - it('exports the connector, dialect, descriptions, and live-database adapter', () => { - expect(posthog.KtxPostHogDialect).toBeTypeOf('function'); - expect(posthog.KtxPostHogScanConnector).toBeTypeOf('function'); - expect(posthog.createPostHogLiveDatabaseIntrospection).toBeTypeOf('function'); - expect(posthog.getKtxPostHogPropertyDescription('$browser')).toBe('User browser name.'); - }); -}); diff --git a/packages/connector-posthog/src/schema-descriptions.ts b/packages/connector-posthog/src/schema-descriptions.ts deleted file mode 100644 index d333fcb4..00000000 --- a/packages/connector-posthog/src/schema-descriptions.ts +++ /dev/null @@ -1,99 +0,0 @@ -const TABLE_DESCRIPTIONS: Record = { - events: - 'PostHog event stream containing all tracked user interactions. Each row represents a single event with properties, timestamp, and user identifier.', - persons: - 'PostHog persons table containing unique users, identifiers, and user properties for segmentation and cohort analysis.', - sessions: - 'PostHog sessions table grouping events into user sessions with duration, entry and exit URLs, and device details.', - groups: - 'PostHog groups table for B2B and team-based analytics. Contains group identifiers and group properties.', - person_distinct_ids: 'PostHog identity resolution table mapping distinct_ids to person_ids.', - cohort_people: 'PostHog dynamic cohort membership table.', - static_cohort_people: 'PostHog static cohort membership table.', - 'system.cohorts': 'PostHog cohort definitions table.', - 'system.feature_flags': 'PostHog feature flag definitions table.', - 'system.experiments': 'PostHog A/B test and experiment definitions table.', - 'system.surveys': 'PostHog survey definitions table.', - 'system.dashboards': 'PostHog dashboard metadata table.', - 'system.insights': 'PostHog saved insight and chart definitions table.', -}; - -const COLUMN_DESCRIPTIONS: Record = { - 'events.uuid': 'Unique identifier for this specific event.', - 'events.event': 'Event name such as $pageview, $autocapture, $identify, or a custom event.', - 'events.distinct_id': 'User identifier that links events to persons.', - 'events.timestamp': 'UTC timestamp when the event occurred.', - 'events.created_at': 'Timestamp when the event was ingested into PostHog.', - 'events.properties': 'JSON object containing event-specific properties.', - 'events.person_id': 'Internal PostHog person UUID.', - 'events.$session_id': 'Session identifier linking this event to sessions.', - 'persons.id': 'Internal PostHog person UUID.', - 'persons.distinct_id': 'Primary user identifier for joins with events.', - 'persons.properties': 'JSON object containing user properties.', - 'persons.created_at': 'Timestamp when this person was first seen in PostHog.', - 'persons.is_identified': 'Whether the person has been explicitly identified.', - 'sessions.session_id': 'Unique session identifier.', - 'sessions.distinct_id': 'User identifier for this session.', - 'sessions.$start_timestamp': 'Timestamp when the session started.', - 'sessions.$end_timestamp': 'Timestamp when the session ended.', - 'sessions.$session_duration': 'Total session duration in seconds.', - 'groups.index': 'Index identifying the configured PostHog group type.', - 'groups.key': 'Unique identifier for this group.', - 'groups.properties': 'JSON object containing group properties.', - 'groups.created_at': 'Timestamp when this group was first seen.', - 'person_distinct_ids.distinct_id': 'Device or browser identifier for a person.', - 'person_distinct_ids.person_id': 'Internal PostHog person UUID mapped to the distinct_id.', - 'cohort_people.person_id': 'Person UUID belonging to the cohort.', - 'cohort_people.cohort_id': 'Cohort identifier.', - 'static_cohort_people.person_id': 'Person UUID belonging to the static cohort.', - 'static_cohort_people.cohort_id': 'Static cohort identifier.', - 'system.cohorts.id': 'Unique cohort identifier.', - 'system.cohorts.name': 'Human-readable cohort name.', - 'system.feature_flags.id': 'Unique feature flag identifier.', - 'system.feature_flags.key': 'Feature flag key used in code.', - 'system.experiments.id': 'Unique experiment identifier.', - 'system.experiments.name': 'Experiment name.', - 'system.surveys.id': 'Unique survey identifier.', - 'system.surveys.name': 'Survey name.', - 'system.dashboards.id': 'Unique dashboard identifier.', - 'system.dashboards.name': 'Dashboard name.', - 'system.insights.id': 'Unique insight identifier.', - 'system.insights.name': 'Insight or chart name.', -}; - -const PROPERTY_DESCRIPTIONS: Record = { - $browser: 'User browser name.', - $browser_version: 'User browser version.', - $os: 'Operating system.', - $os_version: 'Operating system version.', - $device: 'Device name.', - $device_type: 'Device type.', - $current_url: 'Full URL of the current page.', - $pathname: 'Path portion of the current URL.', - $host: 'Hostname of the current page.', - $referrer: 'Referrer URL.', - $referring_domain: 'Referrer domain.', - $utm_source: 'UTM source parameter.', - $utm_medium: 'UTM medium parameter.', - $utm_campaign: 'UTM campaign parameter.', - $utm_content: 'UTM content parameter.', - $utm_term: 'UTM term parameter.', - $lib: 'PostHog library name used to capture the event.', - $lib_version: 'PostHog library version.', - $insert_id: 'Unique identifier for event deduplication.', - $active_feature_flags: 'List of active feature flags for this user or event.', - $feature_flag: 'Feature flag name for flag-related events.', - $feature_flag_response: 'Feature flag value or variant.', -}; - -export function getKtxPostHogTableDescription(tableName: string): string | undefined { - return TABLE_DESCRIPTIONS[tableName]; -} - -export function getKtxPostHogColumnDescription(tableName: string, columnName: string): string | undefined { - return COLUMN_DESCRIPTIONS[`${tableName}.${columnName}`]; -} - -export function getKtxPostHogPropertyDescription(propertyKey: string): string | null { - return PROPERTY_DESCRIPTIONS[propertyKey] ?? null; -} diff --git a/packages/connector-posthog/tsconfig.json b/packages/connector-posthog/tsconfig.json deleted file mode 100644 index 965e6978..00000000 --- a/packages/connector-posthog/tsconfig.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "extends": "../../tsconfig.base.json", - "compilerOptions": { - "outDir": "./dist", - "rootDir": "./src" - }, - "include": ["src/**/*.ts"], - "exclude": ["dist", "node_modules"] -} diff --git a/packages/context/src/connections/connection-type.ts b/packages/context/src/connections/connection-type.ts index 81c17bb4..6cd48042 100644 --- a/packages/context/src/connections/connection-type.ts +++ b/packages/context/src/connections/connection-type.ts @@ -18,7 +18,6 @@ export const connectionTypeSchema = z.enum([ 'METABASE', 'LOOKER', 'NOTION', - 'POSTHOG', 'MYSQL', 'CLICKHOUSE', 'PLAIN', diff --git a/packages/context/src/mcp/local-project-ports.ts b/packages/context/src/mcp/local-project-ports.ts index 60808426..d2ad139f 100644 --- a/packages/context/src/mcp/local-project-ports.ts +++ b/packages/context/src/mcp/local-project-ports.ts @@ -116,8 +116,7 @@ function normalizeScanDriver(driver: string | undefined): KtxConnectionDriver { normalized === 'clickhouse' || normalized === 'sqlserver' || normalized === 'bigquery' || - normalized === 'snowflake' || - normalized === 'posthog' + normalized === 'snowflake' ) { return normalized === 'sqlite3' ? 'sqlite' : normalized; } diff --git a/packages/context/src/memory/memory-agent.service.ingest.test.ts b/packages/context/src/memory/memory-agent.service.ingest.test.ts index bf30a883..710ba956 100644 --- a/packages/context/src/memory/memory-agent.service.ingest.test.ts +++ b/packages/context/src/memory/memory-agent.service.ingest.test.ts @@ -17,7 +17,7 @@ interface BuiltMocks { appSettings: any; llmProvider: any; prompt: any; - posthog: any; + eventTracker: any; telemetry: any; skillsRegistry: any; wikiService: any; @@ -64,7 +64,7 @@ const buildMocks = (overrides: Partial = {}): BuiltMocks => { }, llmProvider: { getModel: vi.fn().mockReturnValue({}) }, prompt: { loadPrompt: vi.fn().mockResolvedValue('base framing') }, - posthog: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) }, + eventTracker: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) }, telemetry: { isEnabled: () => false, appSettingsService: { settings: { telemetry: { recordInputs: false, recordOutputs: false } } }, @@ -177,7 +177,7 @@ const buildService = (mocks: BuiltMocks): MemoryAgentService => slValidator: mocks.slValidator, toolsetFactory: mocks.toolsetFactory, telemetry: { - trackMemoryIngestion: mocks.posthog.trackEvent, + trackMemoryIngestion: mocks.eventTracker.trackEvent, }, }); diff --git a/packages/context/src/scan/local-scan.ts b/packages/context/src/scan/local-scan.ts index 0919843f..15fdf6f3 100644 --- a/packages/context/src/scan/local-scan.ts +++ b/packages/context/src/scan/local-scan.ts @@ -103,13 +103,12 @@ function normalizeDriver(driver: string | undefined): KtxConnectionDriver { normalized === 'clickhouse' || normalized === 'sqlserver' || normalized === 'bigquery' || - normalized === 'snowflake' || - normalized === 'posthog' + normalized === 'snowflake' ) { return normalized === 'sqlite3' ? 'sqlite' : normalized; } throw new Error( - `Standalone ktx scan supports postgres/postgresql/sqlite/mysql/clickhouse/sqlserver/bigquery/snowflake/posthog in this phase, received "${driver ?? 'unknown'}"`, + `Standalone ktx scan supports postgres/postgresql/sqlite/mysql/clickhouse/sqlserver/bigquery/snowflake in this phase, received "${driver ?? 'unknown'}"`, ); } diff --git a/packages/context/src/scan/relationship-profiling.ts b/packages/context/src/scan/relationship-profiling.ts index 1fbeccd4..fa6acfac 100644 --- a/packages/context/src/scan/relationship-profiling.ts +++ b/packages/context/src/scan/relationship-profiling.ts @@ -71,7 +71,7 @@ const SAMPLE_VALUE_DELIMITER = '\u001f'; type QuoteStyle = 'double' | 'backtick' | 'bracket'; function quoteStyle(driver: KtxConnectionDriver): QuoteStyle { - if (driver === 'mysql' || driver === 'clickhouse' || driver === 'posthog') { + if (driver === 'mysql' || driver === 'clickhouse') { return 'backtick'; } if (driver === 'sqlserver') { @@ -93,7 +93,7 @@ export function quoteKtxRelationshipIdentifier(driver: KtxConnectionDriver, iden export function formatKtxRelationshipTableRef(driver: KtxConnectionDriver, table: KtxTableRef): string { const parts = - driver === 'sqlite' || driver === 'posthog' + driver === 'sqlite' ? [table.name] : [table.catalog, table.db, table.name].filter((value): value is string => Boolean(value)); return parts.map((part) => quoteKtxRelationshipIdentifier(driver, part)).join('.'); @@ -109,7 +109,7 @@ function textLengthExpression(driver: KtxConnectionDriver, columnSql: string): s if (driver === 'bigquery') { return `LENGTH(CAST(${columnSql} AS STRING))`; } - if (driver === 'clickhouse' || driver === 'posthog') { + if (driver === 'clickhouse') { return `length(toString(${columnSql}))`; } return `LENGTH(CAST(${columnSql} AS TEXT))`; @@ -223,7 +223,7 @@ function sampleAggregateSql(driver: KtxConnectionDriver, innerSql: string): stri if (driver === 'sqlserver') { return `(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`; } - if (driver === 'clickhouse' || driver === 'posthog') { + if (driver === 'clickhouse') { return `(SELECT arrayStringConcat(groupArray(toString(value)), '\\x1F') FROM (${innerSql}) AS relationship_profile_values)`; } return `(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (${innerSql}) AS relationship_profile_values)`; diff --git a/packages/context/src/scan/types.test.ts b/packages/context/src/scan/types.test.ts index 3b68411a..309db88e 100644 --- a/packages/context/src/scan/types.test.ts +++ b/packages/context/src/scan/types.test.ts @@ -150,14 +150,14 @@ describe('KTX scan contract types', () => { }; const connector: KtxScanConnector = { - id: 'posthog:product', - driver: 'posthog', + id: 'clickhouse:product', + driver: 'clickhouse', capabilities: createKtxConnectorCapabilities({ eventStreamDiscovery: true }), eventStreamDiscovery: discovery, async introspect() { return { connectionId: 'product', - driver: 'posthog', + driver: 'clickhouse', extractedAt: '2026-04-29T00:00:00.000Z', scope: { catalogs: ['157881'] }, metadata: {}, diff --git a/packages/context/src/scan/types.ts b/packages/context/src/scan/types.ts index 66f70ba2..71bb3fb3 100644 --- a/packages/context/src/scan/types.ts +++ b/packages/context/src/scan/types.ts @@ -5,7 +5,6 @@ export type KtxConnectionDriver = | 'sqlserver' | 'bigquery' | 'snowflake' - | 'posthog' | 'mysql' | 'clickhouse'; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b900b9ed..12d1235a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -42,9 +42,6 @@ importers: '@ktx/connector-postgres': specifier: workspace:* version: file:packages/connector-postgres(ws@8.20.0) - '@ktx/connector-posthog': - specifier: workspace:* - version: file:packages/connector-posthog(ws@8.20.0) '@ktx/connector-snowflake': specifier: workspace:* version: file:packages/connector-snowflake(asn1.js@5.4.1)(ws@8.20.0) @@ -53,7 +50,7 @@ importers: version: file:packages/connector-sqlite(ws@8.20.0) '@ktx/connector-sqlserver': specifier: workspace:* - version: file:packages/connector-sqlserver(@azure/core-client@1.10.1)(ws@8.20.0) + version: file:packages/connector-sqlserver(ws@8.20.0) '@ktx/context': specifier: workspace:* version: file:packages/context(ws@8.20.0) @@ -177,22 +174,6 @@ importers: specifier: ^4.0.18 version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) - packages/connector-posthog: - dependencies: - '@ktx/context': - specifier: workspace:* - version: file:packages/context - devDependencies: - '@types/node': - specifier: ^24.3.0 - version: 24.12.2 - typescript: - specifier: ^5.9.3 - version: 5.9.3 - vitest: - specifier: ^4.0.18 - version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) - packages/connector-snowflake: dependencies: '@ktx/context': @@ -919,10 +900,6 @@ packages: resolution: {directory: packages/connector-postgres, type: directory} engines: {node: '>=22.0.0'} - '@ktx/connector-posthog@file:packages/connector-posthog': - resolution: {directory: packages/connector-posthog, type: directory} - engines: {node: '>=22.0.0'} - '@ktx/connector-snowflake@file:packages/connector-snowflake': resolution: {directory: packages/connector-snowflake, type: directory} engines: {node: '>=22.0.0'} @@ -3632,6 +3609,11 @@ snapshots: '@azure/core-client': 1.10.1 '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-http-compat@2.4.0(@azure/core-rest-pipeline@1.23.0)': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-lro@2.7.2': dependencies: '@azure/abort-controller': 2.1.2 @@ -3703,6 +3685,24 @@ snapshots: transitivePeerDependencies: - supports-color + '@azure/keyvault-keys@4.10.0': + dependencies: + '@azure-rest/core-client': 2.6.0 + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-http-compat': 2.4.0(@azure/core-rest-pipeline@1.23.0) + '@azure/core-lro': 2.7.2 + '@azure/core-paging': 1.6.2 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-tracing': 1.3.1 + '@azure/core-util': 1.13.1 + '@azure/keyvault-common': 2.1.0 + '@azure/logger': 1.3.0 + tslib: 2.8.1 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + '@azure/keyvault-keys@4.10.0(@azure/core-client@1.10.1)': dependencies: '@azure-rest/core-client': 2.6.0 @@ -3981,16 +3981,6 @@ snapshots: - supports-color - ws - '@ktx/connector-posthog@file:packages/connector-posthog(ws@8.20.0)': - dependencies: - '@ktx/context': file:packages/context(ws@8.20.0) - transitivePeerDependencies: - - '@cfworker/json-schema' - - js-yaml - - pg-native - - supports-color - - ws - '@ktx/connector-snowflake@file:packages/connector-snowflake(asn1.js@5.4.1)(ws@8.20.0)': dependencies: '@ktx/context': file:packages/context(ws@8.20.0) @@ -4016,10 +4006,10 @@ snapshots: - supports-color - ws - '@ktx/connector-sqlserver@file:packages/connector-sqlserver(@azure/core-client@1.10.1)(ws@8.20.0)': + '@ktx/connector-sqlserver@file:packages/connector-sqlserver(ws@8.20.0)': dependencies: '@ktx/context': file:packages/context(ws@8.20.0) - mssql: 12.5.0(@azure/core-client@1.10.1) + mssql: 12.5.0 transitivePeerDependencies: - '@azure/core-client' - '@cfworker/json-schema' @@ -5571,6 +5561,17 @@ snapshots: ms@2.1.3: {} + mssql@12.5.0: + dependencies: + '@tediousjs/connection-string': 1.1.0 + commander: 11.1.0 + debug: 4.4.3 + tarn: 3.0.2 + tedious: 19.2.1 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + mssql@12.5.0(@azure/core-client@1.10.1): dependencies: '@tediousjs/connection-string': 1.1.0 @@ -6073,6 +6074,22 @@ snapshots: tarn@3.0.2: {} + tedious@19.2.1: + dependencies: + '@azure/core-auth': 1.10.1 + '@azure/identity': 4.13.1 + '@azure/keyvault-keys': 4.10.0 + '@js-joda/core': 5.7.0 + '@types/node': 24.12.2 + bl: 6.1.6 + iconv-lite: 0.7.2 + js-md4: 0.3.2 + native-duplexpair: 1.0.0 + sprintf-js: 1.1.3 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + tedious@19.2.1(@azure/core-client@1.10.1): dependencies: '@azure/core-auth': 1.10.1 diff --git a/release-policy.json b/release-policy.json index 0ba6297f..ce814787 100644 --- a/release-policy.json +++ b/release-policy.json @@ -10,7 +10,6 @@ "@ktx/connector-clickhouse", "@ktx/connector-mysql", "@ktx/connector-postgres", - "@ktx/connector-posthog", "@ktx/connector-snowflake", "@ktx/connector-sqlite", "@ktx/connector-sqlserver", diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 3d3aa168..b2da21c8 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -77,7 +77,6 @@ describe('standalone example docs', () => { assert.match(rootReadme, /`packages\/connector-clickhouse`/); assert.match(rootReadme, /`packages\/connector-mysql`/); assert.match(rootReadme, /`packages\/connector-postgres`/); - assert.match(rootReadme, /`packages\/connector-posthog`/); assert.match(rootReadme, /`packages\/connector-snowflake`/); assert.match(rootReadme, /`packages\/connector-sqlite`/); assert.match(rootReadme, /`packages\/connector-sqlserver`/); diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index 52d49470..d05b30bf 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -17,7 +17,6 @@ export const NPM_ARTIFACT_PACKAGES = [ { name: '@ktx/connector-clickhouse', packageRoot: 'packages/connector-clickhouse' }, { name: '@ktx/connector-mysql', packageRoot: 'packages/connector-mysql' }, { name: '@ktx/connector-postgres', packageRoot: 'packages/connector-postgres' }, - { name: '@ktx/connector-posthog', packageRoot: 'packages/connector-posthog' }, { name: '@ktx/connector-snowflake', packageRoot: 'packages/connector-snowflake' }, { name: '@ktx/connector-sqlite', packageRoot: 'packages/connector-sqlite' }, { name: '@ktx/connector-sqlserver', packageRoot: 'packages/connector-sqlserver' }, @@ -516,7 +515,6 @@ const bigqueryConnector = await import('@ktx/connector-bigquery'); const clickhouseConnector = await import('@ktx/connector-clickhouse'); const mysqlConnector = await import('@ktx/connector-mysql'); const postgresConnector = await import('@ktx/connector-postgres'); -const posthogConnector = await import('@ktx/connector-posthog'); const snowflakeConnector = await import('@ktx/connector-snowflake'); const sqliteConnector = await import('@ktx/connector-sqlite'); const sqlserverConnector = await import('@ktx/connector-sqlserver'); @@ -587,7 +585,6 @@ const connectorExports = [ ['@ktx/connector-clickhouse', clickhouseConnector.KtxClickHouseScanConnector, clickhouseConnector.KtxClickHouseDialect], ['@ktx/connector-mysql', mysqlConnector.KtxMysqlScanConnector, mysqlConnector.KtxMysqlDialect], ['@ktx/connector-postgres', postgresConnector.KtxPostgresScanConnector, postgresConnector.KtxPostgresDialect], - ['@ktx/connector-posthog', posthogConnector.KtxPostHogScanConnector, posthogConnector.KtxPostHogDialect], ['@ktx/connector-snowflake', snowflakeConnector.KtxSnowflakeScanConnector, snowflakeConnector.KtxSnowflakeDialect], ['@ktx/connector-sqlite', sqliteConnector.KtxSqliteScanConnector, sqliteConnector.KtxSqliteDialect], ['@ktx/connector-sqlserver', sqlserverConnector.KtxSqlServerScanConnector, sqlserverConnector.KtxSqlServerDialect], diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index 4aec3c6e..5b18a9ed 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -34,7 +34,6 @@ const CONNECTOR_PACKAGE_NAMES = [ '@ktx/connector-clickhouse', '@ktx/connector-mysql', '@ktx/connector-postgres', - '@ktx/connector-posthog', '@ktx/connector-snowflake', '@ktx/connector-sqlite', '@ktx/connector-sqlserver', @@ -517,7 +516,6 @@ describe('verification snippets', () => { assert.match(source, /KtxPostgresScanConnector/); assert.match(source, /KtxBigQueryScanConnector/); assert.match(source, /KtxSnowflakeScanConnector/); - assert.match(source, /KtxPostHogScanConnector/); }); it('asserts installed hybrid search exports and CLI smoke coverage', () => { diff --git a/scripts/precommit-check.mjs b/scripts/precommit-check.mjs index d112752d..fdd405bf 100644 --- a/scripts/precommit-check.mjs +++ b/scripts/precommit-check.mjs @@ -15,7 +15,6 @@ const packageNameByDir = new Map( 'connector-clickhouse', 'connector-mysql', 'connector-postgres', - 'connector-posthog', 'connector-snowflake', 'connector-sqlite', 'connector-sqlserver',