From 66536f8937fdef5c527c1934e62e326767ddd897 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Thu, 21 May 2026 14:07:48 +0200 Subject: [PATCH] feat(cli): add --fast flag and Local data section to ktx status Add --fast to skip checks requiring external communication (Claude Code auth probe and Postgres pg_stat_statements probe); skipped checks render as `-` and carry `"status": "skipped"` in JSON output. Always show a new Local data section sourced from .ktx/db.sqlite (ingest run counts and last-completed per connection, knowledge page counts by scope, semantic layer source/dictionary value counts) plus on-disk sizes for .ktx/db.sqlite, .ktx/cache/, raw-sources/, wiki/global/, and semantic-layer/. Wrap the remaining slow probes in a @clack/prompts spinner when stdout is a TTY. --- .../content/docs/cli-reference/ktx-status.mdx | 15 +- packages/cli/src/commands/status-commands.ts | 4 +- packages/cli/src/doctor.ts | 11 +- packages/cli/src/index.test.ts | 2 +- packages/cli/src/status-project.test.ts | 287 +++++++++++- packages/cli/src/status-project.ts | 434 +++++++++++++++++- 6 files changed, 740 insertions(+), 13 deletions(-) diff --git a/docs-site/content/docs/cli-reference/ktx-status.mdx b/docs-site/content/docs/cli-reference/ktx-status.mdx index 17c07a09..c86c12e0 100644 --- a/docs-site/content/docs/cli-reference/ktx-status.mdx +++ b/docs-site/content/docs/cli-reference/ktx-status.mdx @@ -21,6 +21,7 @@ ktx status [options] | `--json` | Print JSON output | `false` | | `-v`, `--verbose` | Show every check, including passing ones | `false` | | `--validate` | Only validate the `ktx.yaml` schema; skip readiness checks | `false` | +| `--fast` | Skip checks that require external communication (Postgres query-history probe, Claude Code auth probe) | `false` | | `--no-input` | Disable interactive terminal input | - | ## Examples @@ -38,6 +39,9 @@ ktx status --verbose # Validate ktx.yaml without running readiness checks ktx status --validate +# Skip slow probes (Postgres pg_stat_statements, Claude Code auth) +ktx status --fast + # Check a project from another directory ktx status --project-dir ./analytics ``` @@ -49,7 +53,16 @@ ktx status --project-dir ./analytics For `llm.provider.backend: claude-code`, `ktx status` checks that the local Claude Code session is usable. If auth fails, run the Claude Code CLI login -flow, then rerun `ktx status`. +flow, then rerun `ktx status`. Use `--fast` to skip this probe (useful in CI +or offline contexts); skipped checks render as `-` and carry +`"status": "skipped"` in JSON output. + +A `Local data` section summarises what the project has accumulated locally: +ingest run counts, last completed timestamp per connection, knowledge page +counts by scope, semantic-layer source and dictionary value counts, and the +on-disk size of `.ktx/db.sqlite`, `.ktx/cache/`, `raw-sources/`, `wiki/global/`, +and `semantic-layer/`. These are read from `.ktx/db.sqlite` and local file +stats, and are always shown (they do not require external communication). ```json { diff --git a/packages/cli/src/commands/status-commands.ts b/packages/cli/src/commands/status-commands.ts index 62c857f9..e2adf8f1 100644 --- a/packages/cli/src/commands/status-commands.ts +++ b/packages/cli/src/commands/status-commands.ts @@ -18,10 +18,11 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC .option('--json', 'Print JSON output', false) .option('-v, --verbose', 'Show every check, including passing ones', false) .option('--validate', 'Only validate the ktx.yaml schema; skip readiness checks', false) + .option('--fast', 'Skip checks that require external communication (DB probes, auth probes)', false) .option('--no-input', 'Disable interactive terminal input') .action( async ( - options: { json?: boolean; verbose?: boolean; validate?: boolean; input?: boolean }, + options: { json?: boolean; verbose?: boolean; validate?: boolean; fast?: boolean; input?: boolean }, command, ) => { const runner = context.deps.doctor ?? (await import('../doctor.js')).runKtxDoctor; @@ -64,6 +65,7 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC projectDir: resolveCommandProjectDir(command), outputMode: outputMode(options), verbose: options.verbose === true, + fast: options.fast === true, ...inputMode(options), }, context.io, diff --git a/packages/cli/src/doctor.ts b/packages/cli/src/doctor.ts index efb87e2b..40ab4a72 100644 --- a/packages/cli/src/doctor.ts +++ b/packages/cli/src/doctor.ts @@ -42,6 +42,7 @@ export type KtxDoctorArgs = outputMode: KtxDoctorOutputMode; inputMode?: KtxDoctorInputMode; verbose?: boolean; + fast?: boolean; } | { command: 'validate'; @@ -619,7 +620,15 @@ export async function runKtxDoctor( return 1; } const project = await loadKtxProject({ projectDir: args.projectDir }); - const projectStatus = await buildProjectStatus(project, { ...deps, configIssues: validation.issues }); + const fast = args.fast ?? false; + const useSpinner = + !fast && args.outputMode === 'plain' && io.stdout.isTTY === true; + const projectStatus = await buildProjectStatus(project, { + ...deps, + configIssues: validation.issues, + fast, + useSpinner, + }); const verbose = args.verbose ?? false; const toolchainChecks = verbose ? await runSetupChecks() : undefined; if (args.outputMode === 'json') { diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index cd5b3239..00ae6ca8 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -1064,7 +1064,7 @@ describe('runKtxCli', () => { expect(setup).not.toHaveBeenCalled(); expect(doctor).toHaveBeenCalledWith( - { command: 'project', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled', verbose: false }, + { command: 'project', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled', verbose: false, fast: false }, statusIo.io, ); expect(statusIo.stderr()).toBe(''); diff --git a/packages/cli/src/status-project.test.ts b/packages/cli/src/status-project.test.ts index 749af664..84f7b48f 100644 --- a/packages/cli/src/status-project.test.ts +++ b/packages/cli/src/status-project.test.ts @@ -1,6 +1,14 @@ -import { describe, expect, it } from 'vitest'; +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { buildDefaultKtxProjectConfig, type KtxLocalProject, type KtxProjectConfig } from '@ktx/context/project'; -import { buildProjectStatus } from './status-project.js'; +import { + buildLocalStatsStatus, + buildProjectStatus, + renderProjectStatus, +} from './status-project.js'; function projectWithConfig(config: KtxProjectConfig): KtxLocalProject { return { @@ -124,3 +132,278 @@ describe('buildProjectStatus embeddings', () => { expect(status.verdictReason).toMatch(/embedding credentials missing/); }); }); + +function withPostgresQueryHistory(config: KtxProjectConfig): KtxProjectConfig { + return { + ...config, + connections: { + ...config.connections, + analytics: { + driver: 'postgres', + url: 'env:ANALYTICS_DATABASE_URL', + context: { queryHistory: { enabled: true } }, + } as KtxProjectConfig['connections'][string], + }, + }; +} + +describe('buildProjectStatus --fast', () => { + it('skips claude-code probe and Postgres query-history probe', async () => { + let claudeProbeCalls = 0; + let pgProbeCalls = 0; + const project = projectWithConfig(withPostgresQueryHistory(baseProjectConfig())); + + const status = await buildProjectStatus(project, { + env: { ANALYTICS_DATABASE_URL: 'postgres://example' }, + fast: true, + claudeCodeAuthProbe: async () => { + claudeProbeCalls += 1; + return { ok: true }; + }, + postgresQueryHistoryProbe: async () => { + pgProbeCalls += 1; + throw new Error('should not be called'); + }, + }); + + expect(claudeProbeCalls).toBe(0); + expect(pgProbeCalls).toBe(0); + expect(status.llm.status).toBe('skipped'); + expect(status.llm.detail).toMatch(/--fast/); + expect(status.queryHistory).toHaveLength(1); + expect(status.queryHistory[0]).toMatchObject({ + connection: 'analytics', + status: 'skipped', + }); + expect(status.verdict).not.toBe('blocked'); + }); + + it('does not call probes lazily when fast and reports skipped in render', async () => { + const project = projectWithConfig(withPostgresQueryHistory(baseProjectConfig())); + const status = await buildProjectStatus(project, { + env: { ANALYTICS_DATABASE_URL: 'postgres://example' }, + fast: true, + claudeCodeAuthProbe: stubClaudeCodeAuthProbe, + postgresQueryHistoryProbe: async () => { + throw new Error('should not be called'); + }, + }); + const rendered = renderProjectStatus(status, { verbose: false, useColor: false }); + expect(rendered).toContain('auth probe skipped (--fast)'); + expect(rendered).toContain('pg_stat_statements probe skipped (--fast)'); + }); +}); + +describe('buildLocalStatsStatus', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-status-stats-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + function projectIn(dir: string): KtxLocalProject { + return { + projectDir: dir, + configPath: join(dir, 'ktx.yaml'), + config: baseProjectConfig(), + coreConfig: {} as KtxLocalProject['coreConfig'], + git: {} as KtxLocalProject['git'], + fileStore: {} as KtxLocalProject['fileStore'], + }; + } + + it('returns unavailable when .ktx/db.sqlite is missing', async () => { + const stats = await buildLocalStatsStatus(projectIn(tempDir)); + expect(stats.unavailable).toMatch(/no \.ktx\/db\.sqlite/); + expect(stats.ingest.totalCompletedRuns).toBe(0); + expect(stats.projectDir.dbSqliteBytes).toBeNull(); + }); + + it('reads counts from a seeded SQLite DB and walks projectDir folders', async () => { + await mkdir(join(tempDir, '.ktx'), { recursive: true }); + await mkdir(join(tempDir, '.ktx', 'cache'), { recursive: true }); + await writeFile(join(tempDir, '.ktx', 'cache', 'a.bin'), Buffer.alloc(2048)); + await mkdir(join(tempDir, 'raw-sources', 'analytics'), { recursive: true }); + await writeFile(join(tempDir, 'raw-sources', 'analytics', 'snap.json'), 'x'.repeat(100)); + await writeFile(join(tempDir, 'raw-sources', 'analytics', 'snap.bin'), Buffer.alloc(512)); + await mkdir(join(tempDir, 'wiki', 'global', 'sub'), { recursive: true }); + await writeFile(join(tempDir, 'wiki', 'global', 'one.md'), '# one'); + await writeFile(join(tempDir, 'wiki', 'global', 'sub', 'two.md'), '# two'); + await mkdir(join(tempDir, 'semantic-layer'), { recursive: true }); + await writeFile(join(tempDir, 'semantic-layer', 'orders.yaml'), 'name: orders'); + await writeFile(join(tempDir, 'semantic-layer', 'users.yml'), 'name: users'); + + const dbPath = join(tempDir, '.ktx', 'db.sqlite'); + const db = new Database(dbPath); + db.exec(` + CREATE TABLE local_ingest_reports ( + run_id TEXT PRIMARY KEY, + adapter TEXT NOT NULL, + connection_id TEXT NOT NULL, + status TEXT NOT NULL, + completed_at TEXT NOT NULL, + raw_content_hashes_json TEXT NOT NULL, + body_json TEXT NOT NULL + ); + INSERT INTO local_ingest_reports VALUES + ('r1', 'live-database', 'analytics', 'done', '2026-04-01T10:00:00Z', '{}', '{}'), + ('r2', 'live-database', 'analytics', 'done', '2026-05-10T10:00:00Z', '{}', '{}'), + ('r3', 'notion', 'docs', 'done', '2026-05-01T10:00:00Z', '{}', '{}'), + ('r4', 'notion', 'docs', 'error', '2026-05-02T10:00:00Z', '{}', '{}'); + + CREATE TABLE knowledge_pages ( + path TEXT PRIMARY KEY, + key TEXT NOT NULL, + scope TEXT NOT NULL, + scope_id TEXT, + summary TEXT NOT NULL, + content TEXT NOT NULL, + tags TEXT NOT NULL, + search_text TEXT NOT NULL DEFAULT '', + embedding_json TEXT + ); + INSERT INTO knowledge_pages VALUES + ('a.md', 'a', 'GLOBAL', NULL, '', '', '[]', '', NULL), + ('b.md', 'b', 'GLOBAL', NULL, '', '', '[]', '', NULL), + ('c.md', 'c', 'PROJECT', NULL, '', '', '[]', '', NULL); + + CREATE TABLE local_sl_sources ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + search_text TEXT NOT NULL, + embedding_json TEXT, + content_hash TEXT, + updated_at TEXT NOT NULL, + PRIMARY KEY (connection_id, source_name) + ); + INSERT INTO local_sl_sources VALUES + ('analytics', 'orders', '', NULL, NULL, '2026-05-10T10:00:00Z'), + ('analytics', 'users', '', NULL, NULL, '2026-05-10T10:00:00Z'); + + CREATE TABLE local_sl_dictionary_values ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + column_name TEXT NOT NULL, + value TEXT NOT NULL, + value_lower TEXT NOT NULL, + cardinality INTEGER, + updated_at TEXT NOT NULL, + PRIMARY KEY (connection_id, source_name, column_name, value) + ); + INSERT INTO local_sl_dictionary_values VALUES + ('analytics', 'orders', 'status', 'open', 'open', 1, '2026-05-10T10:00:00Z'), + ('analytics', 'orders', 'status', 'closed', 'closed', 1, '2026-05-10T10:00:00Z'); + `); + db.close(); + + const stats = await buildLocalStatsStatus(projectIn(tempDir)); + expect(stats.unavailable).toBeUndefined(); + expect(stats.ingest.totalCompletedRuns).toBe(3); + expect(stats.ingest.perConnection).toEqual([ + { connectionId: 'analytics', adapter: 'live-database', lastCompletedAt: '2026-05-10T10:00:00Z' }, + { connectionId: 'docs', adapter: 'notion', lastCompletedAt: '2026-05-01T10:00:00Z' }, + ]); + expect(stats.knowledgePages).toEqual([ + { scope: 'GLOBAL', count: 2 }, + { scope: 'PROJECT', count: 1 }, + ]); + expect(stats.semanticLayer).toEqual([ + { connectionId: 'analytics', sourceCount: 2, dictionaryValueCount: 2 }, + ]); + expect(stats.projectDir.dbSqliteBytes).toBeGreaterThan(0); + expect(stats.projectDir.ktxCacheBytes).toBe(2048); + expect(stats.projectDir.rawSources).toEqual({ fileCount: 2, bytes: 612 }); + expect(stats.projectDir.wikiGlobalMarkdownCount).toBe(2); + expect(stats.projectDir.semanticLayerYamlCount).toBe(2); + }); + + it('tolerates a SQLite DB missing some tables', async () => { + await mkdir(join(tempDir, '.ktx'), { recursive: true }); + const dbPath = join(tempDir, '.ktx', 'db.sqlite'); + const db = new Database(dbPath); + db.exec(` + CREATE TABLE local_ingest_reports ( + run_id TEXT PRIMARY KEY, + adapter TEXT NOT NULL, + connection_id TEXT NOT NULL, + status TEXT NOT NULL, + completed_at TEXT NOT NULL, + raw_content_hashes_json TEXT NOT NULL, + body_json TEXT NOT NULL + ); + INSERT INTO local_ingest_reports VALUES + ('r1', 'live-database', 'analytics', 'done', '2026-05-10T10:00:00Z', '{}', '{}'); + `); + db.close(); + + const stats = await buildLocalStatsStatus(projectIn(tempDir)); + expect(stats.unavailable).toBeUndefined(); + expect(stats.ingest.totalCompletedRuns).toBe(1); + expect(stats.knowledgePages).toEqual([]); + expect(stats.semanticLayer).toEqual([]); + }); +}); + +describe('renderProjectStatus Local data', () => { + it('renders the Local data section with seeded stats', async () => { + const project = projectWithConfig(baseProjectConfig()); + const status = await buildProjectStatus(project, { claudeCodeAuthProbe: stubClaudeCodeAuthProbe }); + status.localStats = { + ingest: { + totalCompletedRuns: 3, + perConnection: [ + { connectionId: 'analytics', adapter: 'live-database', lastCompletedAt: new Date(Date.now() - 60 * 60 * 1000).toISOString() }, + ], + }, + knowledgePages: [ + { scope: 'GLOBAL', count: 2 }, + { scope: 'PROJECT', count: 1 }, + ], + semanticLayer: [ + { connectionId: 'analytics', sourceCount: 12, dictionaryValueCount: 200 }, + ], + projectDir: { + dbSqliteBytes: 4096, + ktxCacheBytes: 1_048_576, + rawSources: { fileCount: 5, bytes: 200 }, + wikiGlobalMarkdownCount: 7, + semanticLayerYamlCount: 3, + }, + }; + const rendered = renderProjectStatus(status, { useColor: false }); + expect(rendered).toContain('Local data'); + expect(rendered).toContain('3 completed runs'); + expect(rendered).toContain('GLOBAL=2'); + expect(rendered).toContain('PROJECT=1'); + expect(rendered).toContain('12 sources · 200 dictionary values'); + expect(rendered).toContain('db=4.00 KiB'); + expect(rendered).toContain('cache=1.00 MiB'); + expect(rendered).toContain('wiki=7 md'); + expect(rendered).toContain('semantic-layer=3 yaml'); + }); + + it('renders unavailable note when DB is missing', async () => { + const project = projectWithConfig(baseProjectConfig()); + const status = await buildProjectStatus(project, { claudeCodeAuthProbe: stubClaudeCodeAuthProbe }); + status.localStats = { + ingest: { totalCompletedRuns: 0, perConnection: [] }, + knowledgePages: [], + semanticLayer: [], + projectDir: { + dbSqliteBytes: null, + ktxCacheBytes: 0, + rawSources: { fileCount: 0, bytes: 0 }, + wikiGlobalMarkdownCount: 0, + semanticLayerYamlCount: 0, + }, + unavailable: 'no .ktx/db.sqlite yet', + }; + const rendered = renderProjectStatus(status, { useColor: false }); + expect(rendered).toContain('Local data'); + expect(rendered).toContain('no .ktx/db.sqlite yet'); + }); +}); diff --git a/packages/cli/src/status-project.ts b/packages/cli/src/status-project.ts index 8e8662dd..cf85cf13 100644 --- a/packages/cli/src/status-project.ts +++ b/packages/cli/src/status-project.ts @@ -1,4 +1,6 @@ -import { basename } from 'node:path'; +import type { Dirent } from 'node:fs'; +import { stat as statAsync, readdir as readdirAsync } from 'node:fs/promises'; +import { basename, join } from 'node:path'; import { runClaudeCodeAuthProbe } from '@ktx/context'; import type { KtxConfigIssue, @@ -8,6 +10,7 @@ import type { KtxProjectEmbeddingConfig, KtxProjectLlmConfig, } from '@ktx/context/project'; +import { ktxLocalStateDbPath } from '@ktx/context/project'; import type { PostgresPgssProbeResult } from '@ktx/context/ingest'; import { formatClaudeCodePromptCachingFix, @@ -24,7 +27,7 @@ import { } from './io/symbols.js'; import { KTX_NEXT_STEP_DIRECT_COMMANDS } from './next-steps.js'; -type ProjectStatusLevel = 'ok' | 'warn' | 'fail'; +type ProjectStatusLevel = 'ok' | 'warn' | 'fail' | 'skipped'; type ProjectVerdict = 'ready' | 'partial' | 'blocked'; interface ProjectStatusLine { @@ -99,6 +102,42 @@ function hasOwnField(value: Record, key: string): boolean { return Object.prototype.hasOwnProperty.call(value, key); } +interface LocalStatsIngestPerConnection { + connectionId: string; + adapter: string; + lastCompletedAt: string; +} + +interface LocalStatsSemanticLayerEntry { + connectionId: string; + sourceCount: number; + dictionaryValueCount: number; +} + +interface LocalStatsKnowledgeEntry { + scope: string; + count: number; +} + +interface LocalStatsProjectDir { + dbSqliteBytes: number | null; + ktxCacheBytes: number; + rawSources: { fileCount: number; bytes: number }; + wikiGlobalMarkdownCount: number; + semanticLayerYamlCount: number; +} + +export interface LocalStatsStatus { + ingest: { + totalCompletedRuns: number; + perConnection: LocalStatsIngestPerConnection[]; + }; + knowledgePages: LocalStatsKnowledgeEntry[]; + semanticLayer: LocalStatsSemanticLayerEntry[]; + projectDir: LocalStatsProjectDir; + unavailable?: string; +} + export interface ProjectStatus { projectName: string; projectDir: string; @@ -110,6 +149,7 @@ export interface ProjectStatus { queryHistory: QueryHistoryStatus[]; pipeline: PipelineStatus; warnings: WarningItem[]; + localStats: LocalStatsStatus; verdict: ProjectVerdict; verdictReason: string; nextActions: string[]; @@ -152,6 +192,8 @@ async function buildLlmStatus( projectDir: string; env: NodeJS.ProcessEnv; claudeCodeAuthProbe?: ClaudeCodeAuthProbe; + fast?: boolean; + useSpinner?: boolean; }, ): Promise { const env = options.env; @@ -208,8 +250,18 @@ async function buildLlmStatus( } if (backend === 'claude-code') { const modelName = model ?? 'sonnet'; + if (options.fast === true) { + return { + backend, + model: modelName, + status: 'skipped', + detail: 'auth probe skipped (--fast)', + }; + } const probe = options.claudeCodeAuthProbe ?? runClaudeCodeAuthProbe; - const auth = await probe({ projectDir: options.projectDir, model: modelName, env }); + const auth = await withSpinner(options.useSpinner === true, 'Probing Claude Code authentication', () => + probe({ projectDir: options.projectDir, model: modelName, env }), + ); if (auth.ok) { return { backend, @@ -461,8 +513,22 @@ async function buildQueryHistoryStatus( continue; } + if (options.fast === true) { + statuses.push({ + connection: connectionId, + dialect: 'postgres', + status: 'skipped', + detail: 'pg_stat_statements probe skipped (--fast)', + }); + continue; + } + try { - const result = await probe({ projectDir: project.projectDir, connectionId, connection, env }); + const result = await withSpinner( + options.useSpinner === true, + `Probing pg_stat_statements on ${connectionId}`, + () => probe({ projectDir: project.projectDir, connectionId, connection, env }), + ); statuses.push({ connection: connectionId, dialect: 'postgres', @@ -641,7 +707,7 @@ function buildVerdict( reasons.push('embedding credentials missing'); } } - const missing = connections.filter((c) => c.status !== 'ok').length; + const missing = connections.filter((c) => c.status !== 'ok' && c.status !== 'skipped').length; if (missing > 0) reasons.push(`${missing} connection${missing === 1 ? '' : 's'} need configuration`); const queryHistoryWarnings = queryHistory.filter((entry) => entry.status === 'warn').length; if (queryHistoryWarnings > 0) { @@ -669,6 +735,27 @@ export interface BuildProjectStatusOptions { postgresQueryHistoryProbe?: PostgresQueryHistoryProbe; claudeCodeAuthProbe?: ClaudeCodeAuthProbe; configIssues?: KtxConfigIssue[]; + fast?: boolean; + useSpinner?: boolean; +} + +async function withSpinner( + useSpinner: boolean, + label: string, + run: () => Promise, +): Promise { + if (!useSpinner) return run(); + const { spinner } = await import('@clack/prompts'); + const s = spinner(); + s.start(label); + try { + const result = await run(); + s.stop(label); + return result; + } catch (error) { + s.stop(`${label} — failed`); + throw error; + } } function buildConfigStatus(issues: KtxConfigIssue[] | undefined): ConfigStatus { @@ -683,6 +770,219 @@ function buildConfigStatus(issues: KtxConfigIssue[] | undefined): ConfigStatus { }; } +interface DirSummary { + fileCount: number; + bytes: number; +} + +async function summarizeDir( + dir: string, + filter?: (entry: Dirent, fullPath: string) => boolean, + maxDepth = 10, +): Promise { + let fileCount = 0; + let bytes = 0; + const walk = async (current: string, depth: number): Promise => { + if (depth > maxDepth) return; + let entries: Dirent[]; + try { + entries = await readdirAsync(current, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + const full = join(current, entry.name); + if (entry.isDirectory()) { + await walk(full, depth + 1); + continue; + } + if (!entry.isFile()) continue; + if (filter && !filter(entry, full)) continue; + try { + const s = await statAsync(full); + fileCount += 1; + bytes += s.size; + } catch { + // skip individual stat failures + } + } + }; + await walk(dir, 0); + return { fileCount, bytes }; +} + +function isMarkdownEntry(entry: Dirent): boolean { + return entry.isFile() && /\.mdx?$/i.test(entry.name); +} + +function isYamlEntry(entry: Dirent): boolean { + return entry.isFile() && /\.ya?ml$/i.test(entry.name); +} + +async function fileSizeOrNull(filePath: string): Promise { + try { + const s = await statAsync(filePath); + return s.isFile() ? s.size : null; + } catch { + return null; + } +} + +function tryQuery(run: () => T, fallback: T): T { + try { + return run(); + } catch { + return fallback; + } +} + +export async function buildLocalStatsStatus(project: KtxLocalProject): Promise { + const dbPath = ktxLocalStateDbPath(project); + const dbSqliteBytes = await fileSizeOrNull(dbPath); + + const projectDirSummary: LocalStatsProjectDir = { + dbSqliteBytes, + ktxCacheBytes: (await summarizeDir(join(project.projectDir, '.ktx', 'cache'))).bytes, + rawSources: await summarizeDir(join(project.projectDir, 'raw-sources')), + wikiGlobalMarkdownCount: ( + await summarizeDir(join(project.projectDir, 'wiki', 'global'), isMarkdownEntry) + ).fileCount, + semanticLayerYamlCount: ( + await summarizeDir(join(project.projectDir, 'semantic-layer'), isYamlEntry) + ).fileCount, + }; + + if (dbSqliteBytes === null) { + return { + ingest: { totalCompletedRuns: 0, perConnection: [] }, + knowledgePages: [], + semanticLayer: [], + projectDir: projectDirSummary, + unavailable: 'no .ktx/db.sqlite yet', + }; + } + + let database: import('better-sqlite3').Database | null = null; + try { + const { default: Database } = await import('better-sqlite3'); + database = new Database(dbPath, { readonly: true, fileMustExist: true }); + const db = database; + + const totalCompletedRuns = tryQuery( + () => + ( + db + .prepare(`SELECT COUNT(*) AS n FROM local_ingest_reports WHERE status = 'done'`) + .get() as { n: number } | undefined + )?.n ?? 0, + 0, + ); + + const ingestRows = tryQuery( + () => + db + .prepare( + `SELECT connection_id, adapter, MAX(completed_at) AS last_completed_at + FROM local_ingest_reports + WHERE status = 'done' + GROUP BY connection_id, adapter`, + ) + .all() as Array<{ connection_id: string; adapter: string; last_completed_at: string }>, + [] as Array<{ connection_id: string; adapter: string; last_completed_at: string }>, + ); + const perConnectionMap = new Map(); + for (const row of ingestRows) { + const existing = perConnectionMap.get(row.connection_id); + if (!existing || row.last_completed_at > existing.lastCompletedAt) { + perConnectionMap.set(row.connection_id, { + connectionId: row.connection_id, + adapter: row.adapter, + lastCompletedAt: row.last_completed_at, + }); + } + } + const perConnection = [...perConnectionMap.values()].sort((left, right) => + left.connectionId.localeCompare(right.connectionId), + ); + + const knowledgeRows = tryQuery( + () => + db + .prepare( + `SELECT scope, COUNT(*) AS n FROM knowledge_pages GROUP BY scope ORDER BY scope`, + ) + .all() as Array<{ scope: string; n: number }>, + [] as Array<{ scope: string; n: number }>, + ); + const knowledgePages: LocalStatsKnowledgeEntry[] = knowledgeRows.map((row) => ({ + scope: row.scope, + count: row.n, + })); + + const sourceRows = tryQuery( + () => + db + .prepare( + `SELECT connection_id, COUNT(*) AS n FROM local_sl_sources GROUP BY connection_id`, + ) + .all() as Array<{ connection_id: string; n: number }>, + [] as Array<{ connection_id: string; n: number }>, + ); + const dictionaryRows = tryQuery( + () => + db + .prepare( + `SELECT connection_id, COUNT(*) AS n FROM local_sl_dictionary_values GROUP BY connection_id`, + ) + .all() as Array<{ connection_id: string; n: number }>, + [] as Array<{ connection_id: string; n: number }>, + ); + const slMap = new Map(); + for (const row of sourceRows) { + slMap.set(row.connection_id, { + connectionId: row.connection_id, + sourceCount: row.n, + dictionaryValueCount: 0, + }); + } + for (const row of dictionaryRows) { + const existing = slMap.get(row.connection_id) ?? { + connectionId: row.connection_id, + sourceCount: 0, + dictionaryValueCount: 0, + }; + existing.dictionaryValueCount = row.n; + slMap.set(row.connection_id, existing); + } + const semanticLayer = [...slMap.values()].sort((left, right) => + left.connectionId.localeCompare(right.connectionId), + ); + + return { + ingest: { totalCompletedRuns, perConnection }, + knowledgePages, + semanticLayer, + projectDir: projectDirSummary, + }; + } catch (error) { + return { + ingest: { totalCompletedRuns: 0, perConnection: [] }, + knowledgePages: [], + semanticLayer: [], + projectDir: projectDirSummary, + unavailable: failureDetail(error), + }; + } finally { + if (database) { + try { + database.close(); + } catch { + // ignore close failures + } + } + } +} + export async function buildProjectStatus(project: KtxLocalProject, options: BuildProjectStatusOptions = {}): Promise { const env = options.env ?? process.env; const config = project.config; @@ -692,6 +992,8 @@ export async function buildProjectStatus(project: KtxLocalProject, options: Buil projectDir: project.projectDir, env, claudeCodeAuthProbe: options.claudeCodeAuthProbe, + fast: options.fast, + useSpinner: options.useSpinner, }); const embeddings = buildEmbeddingsStatus(config.ingest.embeddings, env); const storage = buildStorageStatus(config); @@ -701,6 +1003,7 @@ export async function buildProjectStatus(project: KtxLocalProject, options: Buil const queryHistory = await buildQueryHistoryStatus(project, options); const pipeline = buildPipelineStatus(config); const warnings = buildWarnings(config, connections, llm, embeddings); + const localStats = await buildLocalStatsStatus(project); const { verdict, reason, nextActions } = buildVerdict(llm, embeddings, connections, queryHistory, warnings); return { @@ -714,6 +1017,7 @@ export async function buildProjectStatus(project: KtxLocalProject, options: Buil queryHistory, pipeline, warnings, + localStats, verdict, verdictReason: reason, nextActions, @@ -742,11 +1046,51 @@ export async function buildProjectStatus(project: KtxLocalProject, options: Buil // ─── Rendering ────────────────────────────────────────────────────────────── -const SYMBOL: Record = { ok: '✓', warn: '⚠', fail: '✗' }; +const SYMBOL: Record = { ok: '✓', warn: '⚠', fail: '✗', skipped: '-' }; function colorForLevel(useColor: boolean, level: ProjectStatusLevel, text: string): string { if (!useColor) return text; - return level === 'ok' ? green(text) : level === 'warn' ? yellow(text) : red(text); + if (level === 'ok') return green(text); + if (level === 'warn') return yellow(text); + if (level === 'fail') return red(text); + return _dim(text); +} + +function formatBytes(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + const units = ['KiB', 'MiB', 'GiB', 'TiB']; + let value = bytes / 1024; + let unitIndex = 0; + while (value >= 1024 && unitIndex < units.length - 1) { + value /= 1024; + unitIndex += 1; + } + const precision = value >= 100 ? 0 : value >= 10 ? 1 : 2; + return `${value.toFixed(precision)} ${units[unitIndex]}`; +} + +const RELATIVE_TIME_DIVISIONS: Array<{ amount: number; name: Intl.RelativeTimeFormatUnit }> = [ + { amount: 60, name: 'second' }, + { amount: 60, name: 'minute' }, + { amount: 24, name: 'hour' }, + { amount: 7, name: 'day' }, + { amount: 4.34524, name: 'week' }, + { amount: 12, name: 'month' }, + { amount: Number.POSITIVE_INFINITY, name: 'year' }, +]; + +function formatRelativeFromNow(iso: string): string { + const parsed = Date.parse(iso); + if (Number.isNaN(parsed)) return iso; + const formatter = new Intl.RelativeTimeFormat('en', { numeric: 'auto' }); + let duration = (parsed - Date.now()) / 1000; + for (const division of RELATIVE_TIME_DIVISIONS) { + if (Math.abs(duration) < division.amount) { + return formatter.format(Math.round(duration), division.name); + } + duration /= division.amount; + } + return iso; } @@ -758,6 +1102,79 @@ function abbreviateHome(filePath: string, env: NodeJS.ProcessEnv): string { return filePath; } +function renderLocalStats( + lines: string[], + stats: LocalStatsStatus, + dim: (text: string) => string, + bold: (text: string) => string, +): void { + lines.push(` ${bold('Local data')}`); + if (stats.unavailable) { + lines.push(` ${dim(`(—) ${stats.unavailable}`)}`); + lines.push(''); + return; + } + + const localLabelWidth = Math.max( + 'Ingest'.length, + 'Knowledge'.length, + 'Semantic layer'.length, + 'Disk'.length, + ); + const lLabel = (text: string) => text.padEnd(localLabelWidth); + + const ingest = stats.ingest; + const ingestSummary = + ingest.totalCompletedRuns === 0 + ? dim('no completed runs yet') + : `${ingest.totalCompletedRuns} completed run${ingest.totalCompletedRuns === 1 ? '' : 's'}`; + lines.push(` ${lLabel('Ingest')} ${ingestSummary}`); + if (ingest.perConnection.length > 0) { + const nameWidth = Math.max(...ingest.perConnection.map((entry) => entry.connectionId.length)); + const adapterWidth = Math.max(...ingest.perConnection.map((entry) => entry.adapter.length)); + for (const entry of ingest.perConnection) { + lines.push( + ` ${entry.connectionId.padEnd(nameWidth)} ${dim(entry.adapter.padEnd(adapterWidth))} ${dim(`last ${formatRelativeFromNow(entry.lastCompletedAt)}`)}`, + ); + } + } + + if (stats.knowledgePages.length === 0) { + lines.push(` ${lLabel('Knowledge')} ${dim('no pages yet')}`); + } else { + const knowledgeText = stats.knowledgePages + .map((entry) => `${entry.scope}=${entry.count}`) + .join(` ${dim('·')} `); + lines.push(` ${lLabel('Knowledge')} ${knowledgeText}`); + } + + if (stats.semanticLayer.length === 0) { + lines.push(` ${lLabel('Semantic layer')} ${dim('no indexed sources yet')}`); + } else { + const nameWidth = Math.max(...stats.semanticLayer.map((entry) => entry.connectionId.length)); + let firstLine = true; + for (const entry of stats.semanticLayer) { + const prefix = firstLine ? lLabel('Semantic layer') : ' '.repeat(localLabelWidth); + lines.push( + ` ${prefix} ${entry.connectionId.padEnd(nameWidth)} ${dim(`${entry.sourceCount} source${entry.sourceCount === 1 ? '' : 's'} · ${entry.dictionaryValueCount} dictionary value${entry.dictionaryValueCount === 1 ? '' : 's'}`)}`, + ); + firstLine = false; + } + } + + const disk = stats.projectDir; + const diskBits: string[] = []; + diskBits.push(`db=${disk.dbSqliteBytes === null ? '–' : formatBytes(disk.dbSqliteBytes)}`); + diskBits.push(`cache=${formatBytes(disk.ktxCacheBytes)}`); + diskBits.push( + `raw-sources=${disk.rawSources.fileCount} file${disk.rawSources.fileCount === 1 ? '' : 's'} (${formatBytes(disk.rawSources.bytes)})`, + ); + diskBits.push(`wiki=${disk.wikiGlobalMarkdownCount} md`); + diskBits.push(`semantic-layer=${disk.semanticLayerYamlCount} yaml`); + lines.push(` ${lLabel('Disk')} ${dim(diskBits.join(` ${dim('·')} `))}`); + lines.push(''); +} + export interface RenderProjectStatusOptions { verbose?: boolean; useColor?: boolean; @@ -859,6 +1276,9 @@ export function renderProjectStatus(status: ProjectStatus, options: RenderProjec lines.push(` ${pLabel('Research agent')} ${agentDetail}`); lines.push(''); + // Local data + renderLocalStats(lines, status.localStats, dim, bold); + // Warnings if (status.warnings.length > 0) { lines.push(` ${bold('Warnings')}`);