ktx/packages/cli/src/knowledge.test.ts

233 lines
6.9 KiB
TypeScript
Raw Permalink Normal View History

2026-05-10 23:12:26 +02:00
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
2026-05-10 23:51:24 +02:00
import { initKtxProject } from '@ktx/context/project';
2026-05-11 22:59:45 +02:00
import type { KtxEmbeddingPort } from '@ktx/context';
2026-05-10 23:12:26 +02:00
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
2026-05-10 23:51:24 +02:00
import { runKtxKnowledge } from './knowledge.js';
2026-05-10 23:12:26 +02:00
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
2026-05-11 22:59:45 +02:00
class FakeEmbeddingPort implements KtxEmbeddingPort {
readonly maxBatchSize = 16;
async computeEmbedding(text: string): Promise<number[]> {
const lower = text.toLowerCase();
return lower.includes('revenue') || lower.includes('arr') ? [1, 0] : [0, 1];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return Promise.all(texts.map((text) => this.computeEmbedding(text)));
}
}
2026-05-10 23:51:24 +02:00
describe('runKtxKnowledge', () => {
2026-05-10 23:12:26 +02:00
let tempDir: string;
beforeEach(async () => {
2026-05-10 23:51:24 +02:00
tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-knowledge-'));
2026-05-10 23:12:26 +02:00
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('writes, reads, lists, and searches wiki pages', async () => {
2026-05-10 23:12:26 +02:00
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
const writeIo = makeIo();
await expect(
runKtxKnowledge(
{
command: 'write',
projectDir,
key: 'metrics-revenue',
scope: 'GLOBAL',
userId: 'local',
summary: 'Revenue',
content: 'Revenue is paid order value.',
tags: ['finance'],
refs: [],
slRefs: ['orders'],
},
writeIo.io,
),
).resolves.toBe(0);
expect(writeIo.stdout()).toContain('Wrote wiki/global/metrics-revenue.md');
const readIo = makeIo();
await expect(
runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local' }, readIo.io),
).resolves.toBe(0);
expect(readIo.stdout()).toContain('# metrics-revenue');
expect(readIo.stdout()).toContain('Revenue is paid order value.');
2026-05-10 23:12:26 +02:00
const listIo = makeIo();
2026-05-10 23:51:24 +02:00
await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local' }, listIo.io)).resolves.toBe(0);
expect(listIo.stdout()).toContain('GLOBAL\tmetrics-revenue\tRevenue');
2026-05-10 23:12:26 +02:00
const searchIo = makeIo();
await expect(
2026-05-10 23:51:24 +02:00
runKtxKnowledge({ command: 'search', projectDir, query: 'paid order', userId: 'local' }, searchIo.io),
2026-05-10 23:12:26 +02:00
).resolves.toBe(0);
expect(searchIo.stdout()).toContain('metrics-revenue');
});
it('prints wiki list, search, and read as public JSON envelopes', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await expect(
runKtxKnowledge(
{
command: 'write',
projectDir,
key: 'metrics-revenue',
scope: 'GLOBAL',
userId: 'local',
summary: 'Revenue',
content: 'Revenue is paid order value.',
tags: ['finance'],
refs: [],
slRefs: ['orders'],
},
makeIo().io,
),
).resolves.toBe(0);
const listIo = makeIo();
await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local', json: true }, listIo.io)).resolves.toBe(
0,
);
expect(JSON.parse(listIo.stdout())).toMatchObject({
kind: 'list',
data: { items: [expect.objectContaining({ key: 'metrics-revenue', summary: 'Revenue' })] },
meta: { command: 'wiki list' },
});
const searchIo = makeIo();
await expect(
runKtxKnowledge(
{ command: 'search', projectDir, query: 'paid order', userId: 'local', json: true, limit: 5 },
searchIo.io,
),
).resolves.toBe(0);
expect(JSON.parse(searchIo.stdout())).toMatchObject({
kind: 'list',
data: { items: [expect.objectContaining({ key: 'metrics-revenue', summary: 'Revenue' })] },
meta: { command: 'wiki search' },
});
const readIo = makeIo();
await expect(
runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local', json: true }, readIo.io),
).resolves.toBe(0);
expect(JSON.parse(readIo.stdout())).toMatchObject({
kind: 'wiki.page',
data: {
key: 'metrics-revenue',
summary: 'Revenue',
content: 'Revenue is paid order value.',
},
});
});
it('rejects slash-delimited write keys with a flat-key suggestion', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
const writeIo = makeIo();
await expect(
runKtxKnowledge(
{
command: 'write',
projectDir,
key: 'orbit/company-overview',
scope: 'GLOBAL',
userId: 'local',
summary: 'Orbit',
content: 'Orbit overview.',
tags: [],
refs: [],
slRefs: [],
},
writeIo.io,
),
).resolves.toBe(1);
expect(writeIo.stderr()).toContain(
'Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".',
);
expect(writeIo.stdout()).toBe('');
2026-05-10 23:12:26 +02:00
});
it('explains empty search results for a project without wiki pages', async () => {
const projectDir = join(tempDir, 'empty-project');
await initKtxProject({ projectDir });
2026-05-10 23:12:26 +02:00
const searchIo = makeIo();
await expect(
2026-05-10 23:51:24 +02:00
runKtxKnowledge({ command: 'search', projectDir, query: 'revenue', userId: 'local' }, searchIo.io),
2026-05-10 23:12:26 +02:00
).resolves.toBe(0);
expect(searchIo.stdout()).toBe('');
expect(searchIo.stderr()).toContain('No local wiki pages found');
feat: merge ingest and scan * docs: add CLI component reuse guidance * docs: add unified ingest ux design * Refine unified ingest UX design after adversarial review iteration 1 * Refine unified ingest UX design after adversarial review iteration 2 * Refine unified ingest UX design after adversarial review iteration 3 * feat(cli): route public connection ingest command * feat(cli): hide standalone scan from public help * feat(cli): plan public ingest depth and query history * feat(cli): execute public database ingest facets * feat(ingest): read connection query history config * fix(cli): use public ingest wording * fix(config): stop generating ingest adapter allow lists * docs: document public ingest command * test: align ingest surface expectations * docs: add unified ingest public CLI surface plan * feat(cli): preflight deep public ingest readiness * feat(setup): store query history in connection context * feat(setup): store database context depth * feat(setup): verify context readiness by database depth * fix(setup): keep context build foreground only * fix(config): reject reserved ingest connection ids * test: close unified ingest v1 expectations * docs: add unified ingest v1 closure plan * fix(ingest): bypass adapter allow-list for public source ingest * fix(ingest): honor query history window intent * fix(ingest): hide scan internals from public database ingest * feat(ingest): use foreground view for interactive public ingest * fix(setup): use schema context and query history wording * test(cli): verify unified ingest public output * docs: add unified ingest v1 public output closure plan * fix(setup): forward query history flags * fix(setup): prompt for postgres query history * fix(status): report query history readiness * fix(ingest): remove legacy public guidance * fix(ingest): polish foreground retry copy * docs(examples): use unified query history wording * chore(ingest): finish public query history cleanup * docs: add unified ingest v1 query history status cleanup plan * test(docs): cover unified ingest public docs * docs: align ingest CLI reference with unified UX * docs: update context build guides for unified ingest * docs: update setup and primary source ingest wording * docs: stop advertising adapter-backed example ingest * docs: close unified ingest public docs gaps * docs: add unified ingest v1 docs site closure plan * fix: render unified ingest foreground warnings * fix: explain query history schema order * fix: add public ingest retry guidance * fix: align setup next steps with unified ingest * fix: remove scan wording from demo progress * test: verify unified ingest ux closure * docs: add unified ingest v1 foreground and retry closure plan * fix(cli): preserve query-history pull config in public ingest * fix(cli): omit hidden commands from docs command tree * test(cli): close unified ingest final public surface checks * docs: add unified ingest v1 final public surface closure plan * fix(cli): use public source labels in ingest reports * fix(cli): suppress low-level public ingest output * test(cli): verify unified ingest public plain output * docs: add unified ingest v1 public plain output closure plan * fix(cli): add public ingest copy sanitizers * fix(cli): sanitize public ingest progress copy * fix(cli): rename setup schema scope prompt * docs(plan): add progress copy closure; test: align setup back-nav fixture Adds the iter9 plan and updates the setup back-navigation test fixture to pass disableQueryHistory plus listSchemas/listTables stubs that the unified ingest setup step now requires. * docs(plan): add final ux labels plan with narrowed label scans * fix(cli): aggregate unsupported query-history warnings * fix(cli): align setup database labels * test(cli): fix setup database test type-check * fix(cli): remove primary-source wording from setup output * test(cli): verify unified ingest setup closure * docs(plan): add unified ingest v1 verification copy closure plan * fix(cli): remove top-level scan command * fix(cli): remove legacy ingest and wiki commands * Merge scan into ingest flow * feat(cli): split ingest progress into per-phase rows, rename work units to tasks Each database target in the unified ingest dashboard now renders one row per real subprocess (Schema, then Query history when enabled) instead of a single combined bar. Each phase has its own monotonic 0-100% bar so the progress never snaps back to zero when historic-sql starts after scan completes. Completed phases keep their final bar, summary, and elapsed time visible as an inline audit trail; queued and skipped phases are shown explicitly. Also rename user-facing "work units" / "Failed work units" to "tasks" / "Failed tasks" in ingest output and parseIngestSummary. The parser still accepts the legacy "Work units:" wording in captured output for backward compat. Internal memory-flow event names and type fields are left alone. * Fix test harness failures * Fix CI smoke checks --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-14 01:43:06 +02:00
expect(searchIo.stderr()).toContain('ktx ingest <connectionId>');
2026-05-10 23:12:26 +02:00
});
2026-05-11 22:59:45 +02:00
it('uses configured embeddings for semantic wiki search', async () => {
const projectDir = join(tempDir, 'semantic-project');
await initKtxProject({ projectDir });
await expect(
runKtxKnowledge(
{
command: 'write',
projectDir,
key: 'active-contract-arr-open-tickets',
scope: 'GLOBAL',
userId: 'local',
summary: 'Active Contract ARR Ranked by Open Support Ticket Count',
content: 'Accounts ranked by annual recurring contract value and support ticket load.',
tags: ['historic-sql'],
refs: [],
slRefs: [],
},
makeIo().io,
),
).resolves.toBe(0);
2026-05-11 22:59:45 +02:00
const searchIo = makeIo();
await expect(
runKtxKnowledge(
{ command: 'search', projectDir, query: 'revenue', userId: 'local' },
searchIo.io,
{ embeddingService: new FakeEmbeddingPort() },
),
).resolves.toBe(0);
expect(searchIo.stdout()).toContain('active-contract-arr-open-tickets');
2026-05-11 22:59:45 +02:00
expect(searchIo.stderr()).toBe('');
});
2026-05-10 23:12:26 +02:00
});