mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
* feat(cli): profile ingest runs to find where wall-clock time goes Add opt-in profiling for `ktx ingest`. Each timed phase, work unit, and agent loop now records durationMs / step count / token usage in the trace, and a post-run aggregator rolls them up into a "where did the time go" report printed to stderr. Enable per run with KTX_PROFILE_INGEST (1/true -> human table, json -> raw structured profile) or persistently via `ingest.profile` in ktx.yaml. The json form emits raw milliseconds, token counts, and a summary.headline one-line diagnosis so coding agents can parse it directly; json wins when both env and config request profiling. - runtime-port: RunLoopMetrics (totalMs, usage, stepCount, stepBoundariesMs) plus onMetrics callbacks on text/object generation - ai-sdk + claude-code runtimes: capture per-loop timing and token usage - work-unit-executor and stages 3/4: thread metrics into trace events - ingest-bundle.runner: time worktree / triage / clustering / index / reconcile / squash phases and emit the profile in a finally block (best-effort; never affects the run outcome) - ingest-profile: new trace+transcript aggregator with table/json formatters - config: ingest.profile flag; docs: profiling section in ktx-ingest.mdx * fix(cli): flush tool-call logs before reading ingest profile Tool transcripts are appended fire-and-forget so the agent hot path never blocks on logging. The ingest profiler read them before the writes settled, so per-work-unit toolMs (and the model-vs-tool split derived from it) could be incomplete. Track in-flight appends and expose flushToolCallLogs() — bounded by a timeout so it can never hang — and flush before the profiler reads the transcript.
79 lines
2.8 KiB
TypeScript
79 lines
2.8 KiB
TypeScript
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
|
import { tmpdir } from 'node:os';
|
|
import { join } from 'node:path';
|
|
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
|
|
|
import { buildProjectStackSnapshotFields } from '../../src/telemetry/project-snapshot.js';
|
|
|
|
describe('buildProjectStackSnapshotFields', () => {
|
|
let projectDir: string;
|
|
|
|
beforeEach(async () => {
|
|
projectDir = await mkdtemp(join(tmpdir(), 'ktx-stack-snapshot-'));
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await rm(projectDir, { recursive: true, force: true });
|
|
});
|
|
|
|
it('summarizes connectors and project capabilities without names or paths', async () => {
|
|
await mkdir(join(projectDir, 'semantic-layer', 'warehouse'), { recursive: true });
|
|
await mkdir(join(projectDir, 'wiki', 'global'), { recursive: true });
|
|
await writeFile(join(projectDir, 'semantic-layer', 'warehouse', 'orders.yaml'), 'name: orders\n');
|
|
await writeFile(join(projectDir, 'wiki', 'global', 'revenue.md'), '# Revenue\n');
|
|
await writeFile(join(projectDir, '.mcp.json'), '{"mcpServers":{"ktx":{}}}\n');
|
|
|
|
const fields = await buildProjectStackSnapshotFields({
|
|
projectDir,
|
|
config: {
|
|
connections: {
|
|
orbit_demo: { driver: 'sqlite', path: join(projectDir, 'demo.db') },
|
|
warehouse: { driver: 'postgres', readonly: true },
|
|
},
|
|
ingest: {
|
|
adapters: [],
|
|
embeddings: { backend: 'sentence-transformers', dimensions: 384 },
|
|
workUnits: { stepBudget: 40, maxConcurrency: 1, failureMode: 'continue' },
|
|
profile: false,
|
|
},
|
|
llm: { provider: { backend: 'none' }, models: {}, promptCaching: {} },
|
|
scan: {
|
|
enrichment: { mode: 'none' },
|
|
relationships: {
|
|
enabled: true,
|
|
llmProposals: true,
|
|
validationRequiredForManifest: true,
|
|
acceptThreshold: 0.85,
|
|
reviewThreshold: 0.55,
|
|
maxLlmTablesPerBatch: 40,
|
|
maxCandidatesPerColumn: 25,
|
|
profileSampleRows: 10000,
|
|
profileConcurrency: 4,
|
|
validationConcurrency: 4,
|
|
},
|
|
},
|
|
storage: {
|
|
state: 'sqlite',
|
|
search: 'sqlite-fts5',
|
|
git: { auto_commit: true, author: 'ktx <ktx@example.com>' },
|
|
},
|
|
agent: { run_research: { enabled: false, max_iterations: 20, default_toolset: [] } },
|
|
memory: { auto_commit: true },
|
|
},
|
|
});
|
|
|
|
expect(fields).toEqual({
|
|
connectors: [
|
|
{ driver: 'sqlite', isDemo: true },
|
|
{ driver: 'postgres', isDemo: false },
|
|
],
|
|
connectionCount: 2,
|
|
hasSl: true,
|
|
hasWiki: true,
|
|
hasMcp: true,
|
|
hasManagedRuntime: true,
|
|
});
|
|
expect(JSON.stringify(fields)).not.toContain(projectDir);
|
|
expect(JSON.stringify(fields)).not.toContain('warehouse');
|
|
});
|
|
});
|