mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
feat(cli): add ktx admin reindex (#160)
* feat(cli): add admin reindex * fix: keep lexical-only reindex incremental
This commit is contained in:
parent
3db3e724cb
commit
6dbb0c8b3a
53 changed files with 1640 additions and 393 deletions
2
packages/context/src/index-sync/index.ts
Normal file
2
packages/context/src/index-sync/index.ts
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
export type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
|
||||
export { discoverReindexScopes, reindexLocalIndexes } from './reindex.js';
|
||||
196
packages/context/src/index-sync/reindex.test.ts
Normal file
196
packages/context/src/index-sync/reindex.test.ts
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { KtxEmbeddingPort } from '../core/index.js';
|
||||
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../project/index.js';
|
||||
import { SqliteKnowledgeIndex } from '../wiki/sqlite-knowledge-index.js';
|
||||
import { reindexLocalIndexes } from './reindex.js';
|
||||
|
||||
class FakeEmbeddingPort implements KtxEmbeddingPort {
|
||||
readonly maxBatchSize = 8;
|
||||
|
||||
async computeEmbedding(text: string): Promise<number[]> {
|
||||
return [text.length, 1];
|
||||
}
|
||||
|
||||
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
|
||||
return texts.map((text) => [text.length, 1]);
|
||||
}
|
||||
}
|
||||
|
||||
async function createProject(tempDir: string): Promise<KtxLocalProject> {
|
||||
await initKtxProject({ projectDir: tempDir, force: true });
|
||||
return loadKtxProject({ projectDir: tempDir });
|
||||
}
|
||||
|
||||
describe('reindexLocalIndexes', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-reindex-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('returns an empty summary when no wiki or semantic-layer directories exist', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await rm(join(project.projectDir, 'wiki'), { recursive: true, force: true });
|
||||
await rm(join(project.projectDir, 'semantic-layer'), { recursive: true, force: true });
|
||||
|
||||
await expect(reindexLocalIndexes(project, { force: false, embeddingService: null })).resolves.toMatchObject({
|
||||
scopes: [],
|
||||
totals: { scanned: 0, updated: 0, deleted: 0, embeddingsRecomputed: 0, embeddingsFailed: 0 },
|
||||
force: false,
|
||||
embeddingsAvailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('discovers empty directories as zero-row scopes', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await mkdir(join(project.projectDir, 'wiki/user/local'), { recursive: true });
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(summary.scopes.map((scope) => scope.label)).toEqual(['global', 'user/local', 'warehouse']);
|
||||
expect(summary.totals.scanned).toBe(0);
|
||||
});
|
||||
|
||||
it('indexes mixed wiki and SL sources and reports totals', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
|
||||
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const summary = await reindexLocalIndexes(project, {
|
||||
force: false,
|
||||
embeddingService: new FakeEmbeddingPort(),
|
||||
});
|
||||
|
||||
expect(summary.scopes).toHaveLength(2);
|
||||
expect(summary.totals).toMatchObject({ scanned: 2, updated: 2, deleted: 0, embeddingsRecomputed: 2 });
|
||||
expect(summary.embeddingsAvailable).toBe(true);
|
||||
});
|
||||
|
||||
it('does not report unchanged lexical-only rows as updated on repeated runs', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
|
||||
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const first = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
expect(first.totals).toMatchObject({
|
||||
scanned: 2,
|
||||
updated: 2,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
|
||||
const second = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(second.totals).toMatchObject({
|
||||
scanned: 2,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
expect(second.scopes.map((scope) => [scope.label, scope.updated])).toEqual([
|
||||
['global', 0],
|
||||
['warehouse', 0],
|
||||
]);
|
||||
});
|
||||
|
||||
it('force clears stale rows before rebuilding each discovered scope', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
const wikiIndex = new SqliteKnowledgeIndex({ dbPath: join(project.projectDir, '.ktx/db.sqlite') });
|
||||
wikiIndex.sync([
|
||||
{
|
||||
path: 'wiki/global/stale.md',
|
||||
key: 'stale',
|
||||
scope: 'GLOBAL',
|
||||
scopeId: null,
|
||||
summary: 'Stale',
|
||||
content: 'Stale content',
|
||||
tags: [],
|
||||
embedding: [1, 0],
|
||||
},
|
||||
]);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const summary = await reindexLocalIndexes(project, {
|
||||
force: true,
|
||||
embeddingService: new FakeEmbeddingPort(),
|
||||
});
|
||||
|
||||
expect(summary.force).toBe(true);
|
||||
expect(summary.totals).toMatchObject({ scanned: 1, updated: 1, deleted: 0 });
|
||||
expect(wikiIndex.search('Stale', 10)).toEqual([]);
|
||||
});
|
||||
|
||||
it('captures a per-scope error and continues other scopes', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8');
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
|
||||
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML');
|
||||
});
|
||||
|
||||
it('marks a scope errored when configured embeddings fail', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
const embeddingService: KtxEmbeddingPort = {
|
||||
maxBatchSize: 8,
|
||||
async computeEmbedding() {
|
||||
throw new Error('embedding provider unavailable');
|
||||
},
|
||||
async computeEmbeddingsBulk() {
|
||||
throw new Error('embedding provider unavailable');
|
||||
},
|
||||
};
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService });
|
||||
|
||||
expect(summary.scopes[0]).toMatchObject({
|
||||
label: 'global',
|
||||
embeddingsFailed: 1,
|
||||
error: '1 embedding recomputation failed',
|
||||
});
|
||||
});
|
||||
});
|
||||
162
packages/context/src/index-sync/reindex.ts
Normal file
162
packages/context/src/index-sync/reindex.ts
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
import { readdir, stat } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import { ktxLocalStateDbPath, type KtxLocalProject } from '../project/index.js';
|
||||
import { loadLocalSlSourceRecords, SlSearchService, SqliteSlSourcesIndex } from '../sl/index.js';
|
||||
import { KnowledgeWikiService, SqliteKnowledgeIndex } from '../wiki/index.js';
|
||||
import type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
|
||||
|
||||
type DiscoveredScope =
|
||||
| { kind: 'wiki'; scope: 'GLOBAL'; scopeId: null; label: 'global' }
|
||||
| { kind: 'wiki'; scope: 'USER'; scopeId: string; label: `user/${string}` }
|
||||
| { kind: 'sl'; connectionId: string; label: string };
|
||||
|
||||
const ZERO: ReindexWorkResult = {
|
||||
scanned: 0,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
};
|
||||
|
||||
async function directoryExists(path: string): Promise<boolean> {
|
||||
try {
|
||||
return (await stat(path)).isDirectory();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function childDirectories(path: string): Promise<string[]> {
|
||||
try {
|
||||
const entries = await readdir(path, { withFileTypes: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isDirectory())
|
||||
.map((entry) => entry.name)
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
return [];
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function discoverReindexScopes(project: KtxLocalProject): Promise<DiscoveredScope[]> {
|
||||
const scopes: DiscoveredScope[] = [];
|
||||
if (await directoryExists(join(project.projectDir, 'wiki/global'))) {
|
||||
scopes.push({ kind: 'wiki', scope: 'GLOBAL', scopeId: null, label: 'global' });
|
||||
}
|
||||
for (const userId of await childDirectories(join(project.projectDir, 'wiki/user'))) {
|
||||
scopes.push({ kind: 'wiki', scope: 'USER', scopeId: userId, label: `user/${userId}` });
|
||||
}
|
||||
for (const connectionId of await childDirectories(join(project.projectDir, 'semantic-layer'))) {
|
||||
if (connectionId !== '_schema') {
|
||||
scopes.push({ kind: 'sl', connectionId, label: connectionId });
|
||||
}
|
||||
}
|
||||
return scopes;
|
||||
}
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
if (!(error instanceof Error)) {
|
||||
return String(error);
|
||||
}
|
||||
return error.name && error.name !== 'Error' ? `${error.name}: ${error.message}` : error.message;
|
||||
}
|
||||
|
||||
function addTotals(left: ReindexWorkResult, right: ReindexWorkResult): ReindexWorkResult {
|
||||
return {
|
||||
scanned: left.scanned + right.scanned,
|
||||
updated: left.updated + right.updated,
|
||||
deleted: left.deleted + right.deleted,
|
||||
embeddingsRecomputed: left.embeddingsRecomputed + right.embeddingsRecomputed,
|
||||
embeddingsFailed: left.embeddingsFailed + right.embeddingsFailed,
|
||||
};
|
||||
}
|
||||
|
||||
function durationSince(startedAt: bigint): number {
|
||||
return Number((process.hrtime.bigint() - startedAt) / 1_000_000n);
|
||||
}
|
||||
|
||||
function embeddingFailureError(work: ReindexWorkResult): string | undefined {
|
||||
if (work.embeddingsFailed === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return `${work.embeddingsFailed} embedding recomputation${work.embeddingsFailed === 1 ? '' : 's'} failed`;
|
||||
}
|
||||
|
||||
export async function reindexLocalIndexes(
|
||||
project: KtxLocalProject,
|
||||
options: ReindexOptions,
|
||||
): Promise<ReindexSummary> {
|
||||
const startedAt = process.hrtime.bigint();
|
||||
const dbPath = ktxLocalStateDbPath(project);
|
||||
const scopes = await discoverReindexScopes(project);
|
||||
const wikiIndex = new SqliteKnowledgeIndex({ dbPath });
|
||||
const slIndex = new SqliteSlSourcesIndex({ dbPath });
|
||||
const wikiService = new KnowledgeWikiService(project.fileStore, options.embeddingService, wikiIndex, project.git);
|
||||
const slService = new SlSearchService(options.embeddingService, slIndex);
|
||||
const results: ReindexScopeResult[] = [];
|
||||
|
||||
for (const scope of scopes) {
|
||||
const scopeStartedAt = process.hrtime.bigint();
|
||||
try {
|
||||
let work: ReindexWorkResult;
|
||||
if (scope.kind === 'wiki') {
|
||||
if (options.force) {
|
||||
wikiIndex.clear(scope.scope, scope.scopeId);
|
||||
}
|
||||
work = await wikiService.syncIndex(scope.scope, scope.scopeId);
|
||||
results.push({
|
||||
kind: 'wiki',
|
||||
label: scope.label,
|
||||
scope: scope.scope === 'GLOBAL' ? 'global' : 'user',
|
||||
scopeId: scope.scopeId,
|
||||
...work,
|
||||
...(options.force ? { deleted: 0 } : {}),
|
||||
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (options.force) {
|
||||
await slIndex.clear(scope.connectionId);
|
||||
}
|
||||
const records = await loadLocalSlSourceRecords(project, { connectionId: scope.connectionId });
|
||||
work = await slService.indexSources(
|
||||
scope.connectionId,
|
||||
records.map((record) => record.source),
|
||||
);
|
||||
results.push({
|
||||
kind: 'sl',
|
||||
label: scope.label,
|
||||
connectionId: scope.connectionId,
|
||||
...work,
|
||||
...(options.force ? { deleted: 0 } : {}),
|
||||
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
});
|
||||
} catch (error) {
|
||||
results.push({
|
||||
kind: scope.kind,
|
||||
label: scope.label,
|
||||
...(scope.kind === 'wiki'
|
||||
? { scope: scope.scope === 'GLOBAL' ? 'global' : 'user', scopeId: scope.scopeId }
|
||||
: { connectionId: scope.connectionId }),
|
||||
...ZERO,
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
error: errorMessage(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
scopes: results,
|
||||
totals: results.reduce(addTotals, ZERO),
|
||||
dbPath: relative(project.projectDir, dbPath) || dbPath,
|
||||
force: options.force,
|
||||
embeddingsAvailable: options.embeddingService !== null,
|
||||
durationMs: durationSince(startedAt),
|
||||
};
|
||||
}
|
||||
33
packages/context/src/index-sync/types.ts
Normal file
33
packages/context/src/index-sync/types.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import type { KtxEmbeddingPort } from '../core/index.js';
|
||||
|
||||
export interface ReindexOptions {
|
||||
force: boolean;
|
||||
embeddingService: KtxEmbeddingPort | null;
|
||||
}
|
||||
|
||||
export interface ReindexWorkResult {
|
||||
scanned: number;
|
||||
updated: number;
|
||||
deleted: number;
|
||||
embeddingsRecomputed: number;
|
||||
embeddingsFailed: number;
|
||||
}
|
||||
|
||||
export interface ReindexScopeResult extends ReindexWorkResult {
|
||||
kind: 'wiki' | 'sl';
|
||||
label: string;
|
||||
scope?: 'global' | 'user';
|
||||
scopeId?: string | null;
|
||||
connectionId?: string;
|
||||
durationMs: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface ReindexSummary {
|
||||
scopes: ReindexScopeResult[];
|
||||
totals: ReindexWorkResult;
|
||||
dbPath: string;
|
||||
force: boolean;
|
||||
embeddingsAvailable: boolean;
|
||||
durationMs: number;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue