From 0915b21a1a0a4f00853b5a78abd60ca74327c971 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Thu, 21 May 2026 15:13:07 +0200 Subject: [PATCH] feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed. --- packages/cli/src/status-project.test.ts | 54 +++++++++-------- packages/cli/src/status-project.ts | 80 +++++++++++-------------- 2 files changed, 64 insertions(+), 70 deletions(-) diff --git a/packages/cli/src/status-project.test.ts b/packages/cli/src/status-project.test.ts index fdf4bf4e..9f8c879e 100644 --- a/packages/cli/src/status-project.test.ts +++ b/packages/cli/src/status-project.test.ts @@ -268,9 +268,9 @@ describe('buildLocalStatsStatus', () => { embedding_json TEXT ); INSERT INTO knowledge_pages VALUES - ('a.md', 'a', 'GLOBAL', NULL, '', '', '[]', '', NULL), + ('a.md', 'a', 'GLOBAL', NULL, '', '', '[]', '', '[0.1,0.2]'), ('b.md', 'b', 'GLOBAL', NULL, '', '', '[]', '', NULL), - ('c.md', 'c', 'PROJECT', NULL, '', '', '[]', '', NULL); + ('c.md', 'c', 'PROJECT', NULL, '', '', '[]', '', '[0.3,0.4]'); CREATE TABLE local_sl_sources ( connection_id TEXT NOT NULL, @@ -282,7 +282,7 @@ describe('buildLocalStatsStatus', () => { PRIMARY KEY (connection_id, source_name) ); INSERT INTO local_sl_sources VALUES - ('analytics', 'orders', '', NULL, NULL, '2026-05-10T10:00:00Z'), + ('analytics', 'orders', '', '[0.1,0.2]', NULL, '2026-05-10T10:00:00Z'), ('analytics', 'users', '', NULL, NULL, '2026-05-10T10:00:00Z'); CREATE TABLE local_sl_dictionary_values ( @@ -308,18 +308,21 @@ describe('buildLocalStatsStatus', () => { { connectionId: 'analytics', adapter: 'live-database', lastCompletedAt: '2026-05-10T10:00:00Z' }, { connectionId: 'docs', adapter: 'notion', lastCompletedAt: '2026-05-01T10:00:00Z' }, ]); - expect(stats.knowledgePages).toEqual([ - { scope: 'GLOBAL', count: 2 }, - { scope: 'PROJECT', count: 1 }, + expect(stats.wikiPages).toEqual([ + { scope: 'GLOBAL', count: 2, embeddedCount: 1 }, + { scope: 'PROJECT', count: 1, embeddedCount: 1 }, ]); expect(stats.semanticLayer).toEqual([ - { connectionId: 'analytics', sourceCount: 2, dictionaryValueCount: 2 }, + { + connectionId: 'analytics', + sourceCount: 2, + embeddedSourceCount: 1, + dictionaryValueCount: 2, + }, ]); expect(stats.projectDir.dbSqliteBytes).toBeGreaterThan(0); expect(stats.projectDir.ktxCacheBytes).toBe(2048); expect(stats.projectDir.rawSources).toEqual({ fileCount: 2, bytes: 612 }); - expect(stats.projectDir.wikiGlobalMarkdownCount).toBe(2); - expect(stats.projectDir.semanticLayerYamlCount).toBe(2); }); it('tolerates a SQLite DB missing some tables', async () => { @@ -344,7 +347,7 @@ describe('buildLocalStatsStatus', () => { const stats = await buildLocalStatsStatus(projectIn(tempDir)); expect(stats.unavailable).toBeUndefined(); expect(stats.ingest.totalCompletedRuns).toBe(1); - expect(stats.knowledgePages).toEqual([]); + expect(stats.wikiPages).toEqual([]); expect(stats.semanticLayer).toEqual([]); }); }); @@ -360,31 +363,36 @@ describe('renderProjectStatus Local data', () => { { connectionId: 'analytics', adapter: 'live-database', lastCompletedAt: new Date(Date.now() - 60 * 60 * 1000).toISOString() }, ], }, - knowledgePages: [ - { scope: 'GLOBAL', count: 2 }, - { scope: 'PROJECT', count: 1 }, + wikiPages: [ + { scope: 'GLOBAL', count: 2, embeddedCount: 2 }, + { scope: 'PROJECT', count: 1, embeddedCount: 0 }, ], semanticLayer: [ - { connectionId: 'analytics', sourceCount: 12, dictionaryValueCount: 200 }, + { + connectionId: 'analytics', + sourceCount: 12, + embeddedSourceCount: 10, + dictionaryValueCount: 200, + }, ], projectDir: { dbSqliteBytes: 4096, ktxCacheBytes: 1_048_576, rawSources: { fileCount: 5, bytes: 200 }, - wikiGlobalMarkdownCount: 7, - semanticLayerYamlCount: 3, }, }; const rendered = renderProjectStatus(status, { useColor: false }); expect(rendered).toContain('Local data'); + expect(rendered).toContain('Wiki'); + expect(rendered).not.toContain('Knowledge'); expect(rendered).toContain('3 completed runs'); - expect(rendered).toContain('GLOBAL=2'); - expect(rendered).toContain('PROJECT=1'); - expect(rendered).toContain('12 sources · 200 dictionary values'); + expect(rendered).toContain('GLOBAL=2 (2 embedded)'); + expect(rendered).toContain('PROJECT=1 (0 embedded)'); + expect(rendered).toContain('12 sources (10 embedded) · 200 dictionary values'); expect(rendered).toContain('db=4.00 KiB'); expect(rendered).toContain('cache=1.00 MiB'); - expect(rendered).toContain('wiki=7 md'); - expect(rendered).toContain('semantic-layer=3 yaml'); + expect(rendered).not.toMatch(/wiki=\d+ md/); + expect(rendered).not.toMatch(/semantic-layer=\d+ yaml/); }); it('renders unavailable note when DB is missing', async () => { @@ -392,14 +400,12 @@ describe('renderProjectStatus Local data', () => { const status = await buildProjectStatus(project, { claudeCodeAuthProbe: stubClaudeCodeAuthProbe }); status.localStats = { ingest: { totalCompletedRuns: 0, perConnection: [] }, - knowledgePages: [], + wikiPages: [], semanticLayer: [], projectDir: { dbSqliteBytes: null, ktxCacheBytes: 0, rawSources: { fileCount: 0, bytes: 0 }, - wikiGlobalMarkdownCount: 0, - semanticLayerYamlCount: 0, }, unavailable: 'no .ktx/db.sqlite yet', }; diff --git a/packages/cli/src/status-project.ts b/packages/cli/src/status-project.ts index 1b0f8094..9e257157 100644 --- a/packages/cli/src/status-project.ts +++ b/packages/cli/src/status-project.ts @@ -1,4 +1,3 @@ -import type { Dirent } from 'node:fs'; import { stat as statAsync, readdir as readdirAsync } from 'node:fs/promises'; import { basename, join } from 'node:path'; import { runClaudeCodeAuthProbe } from './context/llm/claude-code-runtime.js'; @@ -105,20 +104,20 @@ interface LocalStatsIngestPerConnection { interface LocalStatsSemanticLayerEntry { connectionId: string; sourceCount: number; + embeddedSourceCount: number; dictionaryValueCount: number; } -interface LocalStatsKnowledgeEntry { +interface LocalStatsWikiEntry { scope: string; count: number; + embeddedCount: number; } interface LocalStatsProjectDir { dbSqliteBytes: number | null; ktxCacheBytes: number; rawSources: { fileCount: number; bytes: number }; - wikiGlobalMarkdownCount: number; - semanticLayerYamlCount: number; } /** @internal */ @@ -127,7 +126,7 @@ export interface LocalStatsStatus { totalCompletedRuns: number; perConnection: LocalStatsIngestPerConnection[]; }; - knowledgePages: LocalStatsKnowledgeEntry[]; + wikiPages: LocalStatsWikiEntry[]; semanticLayer: LocalStatsSemanticLayerEntry[]; projectDir: LocalStatsProjectDir; unavailable?: string; @@ -774,16 +773,12 @@ interface DirSummary { bytes: number; } -async function summarizeDir( - dir: string, - filter?: (entry: Dirent, fullPath: string) => boolean, - maxDepth = 10, -): Promise { +async function summarizeDir(dir: string, maxDepth = 10): Promise { let fileCount = 0; let bytes = 0; const walk = async (current: string, depth: number): Promise => { if (depth > maxDepth) return; - let entries: Dirent[]; + let entries; try { entries = await readdirAsync(current, { withFileTypes: true }); } catch { @@ -796,7 +791,6 @@ async function summarizeDir( continue; } if (!entry.isFile()) continue; - if (filter && !filter(entry, full)) continue; try { const s = await statAsync(full); fileCount += 1; @@ -810,14 +804,6 @@ async function summarizeDir( return { fileCount, bytes }; } -function isMarkdownEntry(entry: Dirent): boolean { - return entry.isFile() && /\.mdx?$/i.test(entry.name); -} - -function isYamlEntry(entry: Dirent): boolean { - return entry.isFile() && /\.ya?ml$/i.test(entry.name); -} - async function fileSizeOrNull(filePath: string): Promise { try { const s = await statAsync(filePath); @@ -844,18 +830,12 @@ export async function buildLocalStatsStatus(project: KtxLocalProject): Promise db .prepare( - `SELECT scope, COUNT(*) AS n FROM knowledge_pages GROUP BY scope ORDER BY scope`, + `SELECT scope, COUNT(*) AS n, SUM(CASE WHEN embedding_json IS NOT NULL THEN 1 ELSE 0 END) AS embedded + FROM knowledge_pages + GROUP BY scope + ORDER BY scope`, ) - .all() as Array<{ scope: string; n: number }>, - [] as Array<{ scope: string; n: number }>, + .all() as Array<{ scope: string; n: number; embedded: number | null }>, + [] as Array<{ scope: string; n: number; embedded: number | null }>, ); - const knowledgePages: LocalStatsKnowledgeEntry[] = knowledgeRows.map((row) => ({ + const wikiPages: LocalStatsWikiEntry[] = wikiRows.map((row) => ({ scope: row.scope, count: row.n, + embeddedCount: row.embedded ?? 0, })); const sourceRows = tryQuery( () => db .prepare( - `SELECT connection_id, COUNT(*) AS n FROM local_sl_sources GROUP BY connection_id`, + `SELECT connection_id, COUNT(*) AS n, SUM(CASE WHEN embedding_json IS NOT NULL THEN 1 ELSE 0 END) AS embedded + FROM local_sl_sources + GROUP BY connection_id`, ) - .all() as Array<{ connection_id: string; n: number }>, - [] as Array<{ connection_id: string; n: number }>, + .all() as Array<{ connection_id: string; n: number; embedded: number | null }>, + [] as Array<{ connection_id: string; n: number; embedded: number | null }>, ); const dictionaryRows = tryQuery( () => @@ -942,6 +928,7 @@ export async function buildLocalStatsStatus(project: KtxLocalProject): Promise `${entry.scope}=${entry.count}`) + const wikiText = stats.wikiPages + .map((entry) => `${entry.scope}=${entry.count} ${dim(`(${entry.embeddedCount} embedded)`)}`) .join(` ${dim('·')} `); - lines.push(` ${lLabel('Knowledge')} ${knowledgeText}`); + lines.push(` ${lLabel('Wiki')} ${wikiText}`); } if (stats.semanticLayer.length === 0) { @@ -1154,8 +1142,10 @@ function renderLocalStats( let firstLine = true; for (const entry of stats.semanticLayer) { const prefix = firstLine ? lLabel('Semantic layer') : ' '.repeat(localLabelWidth); + const sourcesText = `${entry.sourceCount} source${entry.sourceCount === 1 ? '' : 's'} (${entry.embeddedSourceCount} embedded)`; + const dictText = `${entry.dictionaryValueCount} dictionary value${entry.dictionaryValueCount === 1 ? '' : 's'}`; lines.push( - ` ${prefix} ${entry.connectionId.padEnd(nameWidth)} ${dim(`${entry.sourceCount} source${entry.sourceCount === 1 ? '' : 's'} · ${entry.dictionaryValueCount} dictionary value${entry.dictionaryValueCount === 1 ? '' : 's'}`)}`, + ` ${prefix} ${entry.connectionId.padEnd(nameWidth)} ${dim(`${sourcesText} · ${dictText}`)}`, ); firstLine = false; } @@ -1168,8 +1158,6 @@ function renderLocalStats( diskBits.push( `raw-sources=${disk.rawSources.fileCount} file${disk.rawSources.fileCount === 1 ? '' : 's'} (${formatBytes(disk.rawSources.bytes)})`, ); - diskBits.push(`wiki=${disk.wikiGlobalMarkdownCount} md`); - diskBits.push(`semantic-layer=${disk.semanticLayerYamlCount} yaml`); lines.push(` ${lLabel('Disk')} ${dim(diskBits.join(` ${dim('·')} `))}`); lines.push(''); }