feat(cli): add ktx admin reindex (#160)

* feat(cli): add admin reindex

* fix: keep lexical-only reindex incremental
This commit is contained in:
Andrey Avtomonov 2026-05-20 01:36:54 +02:00 committed by GitHub
parent 3db3e724cb
commit 6dbb0c8b3a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
53 changed files with 1640 additions and 393 deletions

View file

@ -0,0 +1,2 @@
export type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
export { discoverReindexScopes, reindexLocalIndexes } from './reindex.js';

View file

@ -0,0 +1,196 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { KtxEmbeddingPort } from '../core/index.js';
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../project/index.js';
import { SqliteKnowledgeIndex } from '../wiki/sqlite-knowledge-index.js';
import { reindexLocalIndexes } from './reindex.js';
class FakeEmbeddingPort implements KtxEmbeddingPort {
readonly maxBatchSize = 8;
async computeEmbedding(text: string): Promise<number[]> {
return [text.length, 1];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return texts.map((text) => [text.length, 1]);
}
}
async function createProject(tempDir: string): Promise<KtxLocalProject> {
await initKtxProject({ projectDir: tempDir, force: true });
return loadKtxProject({ projectDir: tempDir });
}
describe('reindexLocalIndexes', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-reindex-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('returns an empty summary when no wiki or semantic-layer directories exist', async () => {
const project = await createProject(tempDir);
await rm(join(project.projectDir, 'wiki'), { recursive: true, force: true });
await rm(join(project.projectDir, 'semantic-layer'), { recursive: true, force: true });
await expect(reindexLocalIndexes(project, { force: false, embeddingService: null })).resolves.toMatchObject({
scopes: [],
totals: { scanned: 0, updated: 0, deleted: 0, embeddingsRecomputed: 0, embeddingsFailed: 0 },
force: false,
embeddingsAvailable: false,
});
});
it('discovers empty directories as zero-row scopes', async () => {
const project = await createProject(tempDir);
await mkdir(join(project.projectDir, 'wiki/user/local'), { recursive: true });
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(summary.scopes.map((scope) => scope.label)).toEqual(['global', 'user/local', 'warehouse']);
expect(summary.totals.scanned).toBe(0);
});
it('indexes mixed wiki and SL sources and reports totals', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
'utf-8',
);
const summary = await reindexLocalIndexes(project, {
force: false,
embeddingService: new FakeEmbeddingPort(),
});
expect(summary.scopes).toHaveLength(2);
expect(summary.totals).toMatchObject({ scanned: 2, updated: 2, deleted: 0, embeddingsRecomputed: 2 });
expect(summary.embeddingsAvailable).toBe(true);
});
it('does not report unchanged lexical-only rows as updated on repeated runs', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
'utf-8',
);
const first = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(first.totals).toMatchObject({
scanned: 2,
updated: 2,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
const second = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(second.totals).toMatchObject({
scanned: 2,
updated: 0,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
expect(second.scopes.map((scope) => [scope.label, scope.updated])).toEqual([
['global', 0],
['warehouse', 0],
]);
});
it('force clears stale rows before rebuilding each discovered scope', async () => {
const project = await createProject(tempDir);
const wikiIndex = new SqliteKnowledgeIndex({ dbPath: join(project.projectDir, '.ktx/db.sqlite') });
wikiIndex.sync([
{
path: 'wiki/global/stale.md',
key: 'stale',
scope: 'GLOBAL',
scopeId: null,
summary: 'Stale',
content: 'Stale content',
tags: [],
embedding: [1, 0],
},
]);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
const summary = await reindexLocalIndexes(project, {
force: true,
embeddingService: new FakeEmbeddingPort(),
});
expect(summary.force).toBe(true);
expect(summary.totals).toMatchObject({ scanned: 1, updated: 1, deleted: 0 });
expect(wikiIndex.search('Stale', 10)).toEqual([]);
});
it('captures a per-scope error and continues other scopes', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8');
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML');
});
it('marks a scope errored when configured embeddings fail', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
const embeddingService: KtxEmbeddingPort = {
maxBatchSize: 8,
async computeEmbedding() {
throw new Error('embedding provider unavailable');
},
async computeEmbeddingsBulk() {
throw new Error('embedding provider unavailable');
},
};
const summary = await reindexLocalIndexes(project, { force: false, embeddingService });
expect(summary.scopes[0]).toMatchObject({
label: 'global',
embeddingsFailed: 1,
error: '1 embedding recomputation failed',
});
});
});

View file

@ -0,0 +1,162 @@
import { readdir, stat } from 'node:fs/promises';
import { join, relative } from 'node:path';
import { ktxLocalStateDbPath, type KtxLocalProject } from '../project/index.js';
import { loadLocalSlSourceRecords, SlSearchService, SqliteSlSourcesIndex } from '../sl/index.js';
import { KnowledgeWikiService, SqliteKnowledgeIndex } from '../wiki/index.js';
import type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
type DiscoveredScope =
| { kind: 'wiki'; scope: 'GLOBAL'; scopeId: null; label: 'global' }
| { kind: 'wiki'; scope: 'USER'; scopeId: string; label: `user/${string}` }
| { kind: 'sl'; connectionId: string; label: string };
const ZERO: ReindexWorkResult = {
scanned: 0,
updated: 0,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
};
async function directoryExists(path: string): Promise<boolean> {
try {
return (await stat(path)).isDirectory();
} catch {
return false;
}
}
async function childDirectories(path: string): Promise<string[]> {
try {
const entries = await readdir(path, { withFileTypes: true });
return entries
.filter((entry) => entry.isDirectory())
.map((entry) => entry.name)
.sort((left, right) => left.localeCompare(right));
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
return [];
}
throw error;
}
}
export async function discoverReindexScopes(project: KtxLocalProject): Promise<DiscoveredScope[]> {
const scopes: DiscoveredScope[] = [];
if (await directoryExists(join(project.projectDir, 'wiki/global'))) {
scopes.push({ kind: 'wiki', scope: 'GLOBAL', scopeId: null, label: 'global' });
}
for (const userId of await childDirectories(join(project.projectDir, 'wiki/user'))) {
scopes.push({ kind: 'wiki', scope: 'USER', scopeId: userId, label: `user/${userId}` });
}
for (const connectionId of await childDirectories(join(project.projectDir, 'semantic-layer'))) {
if (connectionId !== '_schema') {
scopes.push({ kind: 'sl', connectionId, label: connectionId });
}
}
return scopes;
}
function errorMessage(error: unknown): string {
if (!(error instanceof Error)) {
return String(error);
}
return error.name && error.name !== 'Error' ? `${error.name}: ${error.message}` : error.message;
}
function addTotals(left: ReindexWorkResult, right: ReindexWorkResult): ReindexWorkResult {
return {
scanned: left.scanned + right.scanned,
updated: left.updated + right.updated,
deleted: left.deleted + right.deleted,
embeddingsRecomputed: left.embeddingsRecomputed + right.embeddingsRecomputed,
embeddingsFailed: left.embeddingsFailed + right.embeddingsFailed,
};
}
function durationSince(startedAt: bigint): number {
return Number((process.hrtime.bigint() - startedAt) / 1_000_000n);
}
function embeddingFailureError(work: ReindexWorkResult): string | undefined {
if (work.embeddingsFailed === 0) {
return undefined;
}
return `${work.embeddingsFailed} embedding recomputation${work.embeddingsFailed === 1 ? '' : 's'} failed`;
}
export async function reindexLocalIndexes(
project: KtxLocalProject,
options: ReindexOptions,
): Promise<ReindexSummary> {
const startedAt = process.hrtime.bigint();
const dbPath = ktxLocalStateDbPath(project);
const scopes = await discoverReindexScopes(project);
const wikiIndex = new SqliteKnowledgeIndex({ dbPath });
const slIndex = new SqliteSlSourcesIndex({ dbPath });
const wikiService = new KnowledgeWikiService(project.fileStore, options.embeddingService, wikiIndex, project.git);
const slService = new SlSearchService(options.embeddingService, slIndex);
const results: ReindexScopeResult[] = [];
for (const scope of scopes) {
const scopeStartedAt = process.hrtime.bigint();
try {
let work: ReindexWorkResult;
if (scope.kind === 'wiki') {
if (options.force) {
wikiIndex.clear(scope.scope, scope.scopeId);
}
work = await wikiService.syncIndex(scope.scope, scope.scopeId);
results.push({
kind: 'wiki',
label: scope.label,
scope: scope.scope === 'GLOBAL' ? 'global' : 'user',
scopeId: scope.scopeId,
...work,
...(options.force ? { deleted: 0 } : {}),
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
durationMs: durationSince(scopeStartedAt),
});
continue;
}
if (options.force) {
await slIndex.clear(scope.connectionId);
}
const records = await loadLocalSlSourceRecords(project, { connectionId: scope.connectionId });
work = await slService.indexSources(
scope.connectionId,
records.map((record) => record.source),
);
results.push({
kind: 'sl',
label: scope.label,
connectionId: scope.connectionId,
...work,
...(options.force ? { deleted: 0 } : {}),
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
durationMs: durationSince(scopeStartedAt),
});
} catch (error) {
results.push({
kind: scope.kind,
label: scope.label,
...(scope.kind === 'wiki'
? { scope: scope.scope === 'GLOBAL' ? 'global' : 'user', scopeId: scope.scopeId }
: { connectionId: scope.connectionId }),
...ZERO,
durationMs: durationSince(scopeStartedAt),
error: errorMessage(error),
});
}
}
return {
scopes: results,
totals: results.reduce(addTotals, ZERO),
dbPath: relative(project.projectDir, dbPath) || dbPath,
force: options.force,
embeddingsAvailable: options.embeddingService !== null,
durationMs: durationSince(startedAt),
};
}

View file

@ -0,0 +1,33 @@
import type { KtxEmbeddingPort } from '../core/index.js';
export interface ReindexOptions {
force: boolean;
embeddingService: KtxEmbeddingPort | null;
}
export interface ReindexWorkResult {
scanned: number;
updated: number;
deleted: number;
embeddingsRecomputed: number;
embeddingsFailed: number;
}
export interface ReindexScopeResult extends ReindexWorkResult {
kind: 'wiki' | 'sl';
label: string;
scope?: 'global' | 'user';
scopeId?: string | null;
connectionId?: string;
durationMs: number;
error?: string;
}
export interface ReindexSummary {
scopes: ReindexScopeResult[];
totals: ReindexWorkResult;
dbPath: string;
force: boolean;
embeddingsAvailable: boolean;
durationMs: number;
}