feat(cli): add ktx admin reindex (#160)

* feat(cli): add admin reindex

* fix: keep lexical-only reindex incremental
This commit is contained in:
Andrey Avtomonov 2026-05-20 01:36:54 +02:00 committed by GitHub
parent 3db3e724cb
commit 6dbb0c8b3a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
53 changed files with 1640 additions and 393 deletions

View file

@ -4,9 +4,9 @@ import { KnowledgeWikiService, type WikiFrontmatter } from './knowledge-wiki.ser
function makeService() {
const pagesRepository: Record<string, ReturnType<typeof vi.fn>> = {
upsertPage: vi.fn().mockResolvedValue(undefined),
deleteByKey: vi.fn().mockResolvedValue(undefined),
deleteByScope: vi.fn().mockResolvedValue(undefined),
deleteStale: vi.fn().mockResolvedValue(undefined),
deleteByKey: vi.fn().mockResolvedValue(0),
deleteByScope: vi.fn().mockResolvedValue(0),
deleteStale: vi.fn().mockResolvedValue(0),
getExistingSearchTexts: vi.fn().mockResolvedValue(new Map()),
applyDiffTransactional: vi.fn().mockResolvedValue(undefined),
};
@ -50,6 +50,87 @@ function makeService() {
const fm: WikiFrontmatter = { summary: 'sum', usage_mode: 'auto' };
describe('KnowledgeWikiService.syncIndex result stats', () => {
it('reports scanned, updated, deleted, and embedding counts', async () => {
const { service, pagesRepository, embeddingService, configService } = makeService();
configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] });
configService.readFile.mockResolvedValue({
content: '---\nsummary: Revenue\nusage_mode: auto\ntags:\n - finance\n---\n\nPaid orders.\n',
});
pagesRepository.getExistingSearchTexts.mockResolvedValue(
new Map([
['old-page', { searchText: 'old', hasEmbedding: true }],
]),
);
embeddingService.computeEmbeddingsBulk.mockResolvedValue([[0.1, 0.2, 0.3]]);
pagesRepository.deleteStale.mockResolvedValue(1);
await expect(service.syncIndex('GLOBAL', null)).resolves.toEqual({
scanned: 1,
updated: 1,
deleted: 1,
embeddingsRecomputed: 1,
embeddingsFailed: 0,
});
});
it('indexes lexical rows when embeddings are not configured', async () => {
const { pagesRepository, configService, gitService, logger } = makeService();
const service = new KnowledgeWikiService(
configService as any,
null,
pagesRepository as any,
gitService as any,
logger as any,
);
configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] });
configService.readFile.mockResolvedValue({
content: '---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
});
pagesRepository.getExistingSearchTexts.mockResolvedValue(new Map());
pagesRepository.deleteStale.mockResolvedValue(0);
const result = await service.syncIndex('GLOBAL', null);
expect(result.embeddingsRecomputed).toBe(0);
expect(result.embeddingsFailed).toBe(0);
expect(pagesRepository.upsertPage).toHaveBeenCalledWith(
expect.objectContaining({ pageKey: 'revenue', embedding: null }),
);
});
it('does not update unchanged lexical-only wiki rows on repeated sync', async () => {
const { pagesRepository, configService, gitService, logger } = makeService();
const service = new KnowledgeWikiService(
configService as any,
null,
pagesRepository as any,
gitService as any,
logger as any,
);
configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] });
configService.readFile.mockResolvedValue({
content: '---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
});
pagesRepository.getExistingSearchTexts.mockResolvedValue(
new Map([
['revenue', { searchText: 'revenue\nRevenue\nPaid orders.', hasEmbedding: false }],
]),
);
pagesRepository.deleteStale.mockResolvedValue(0);
await expect(service.syncIndex('GLOBAL', null)).resolves.toEqual({
scanned: 1,
updated: 0,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
expect(pagesRepository.upsertPage).not.toHaveBeenCalled();
expect(pagesRepository.deleteStale).toHaveBeenCalledWith('GLOBAL', null, ['revenue']);
});
});
describe('KnowledgeWikiService.forWorktree isolation', () => {
it('syncSinglePage in worktree scope does not call pagesRepository.upsertPage', async () => {
const { service, pagesRepository, embeddingService } = makeService();

View file

@ -2,6 +2,7 @@ import { createHash } from 'node:crypto';
import YAML from 'yaml';
import type { KtxEmbeddingPort, KtxFileStorePort, KtxLogger } from '../core/index.js';
import { noopLogger } from '../core/index.js';
import type { ReindexWorkResult } from '../index-sync/types.js';
import { assertFlatWikiKey, isFlatWikiKey } from './keys.js';
import { buildKnowledgeSearchText } from './knowledge-search-text.js';
import type { KnowledgeGitDiffPort, KnowledgeIndexPort, UpsertPageParams } from './ports.js';
@ -16,7 +17,7 @@ export class KnowledgeWikiService {
constructor(
private readonly configService: KtxFileStorePort,
private readonly embeddingService: KtxEmbeddingPort,
private readonly embeddingService: KtxEmbeddingPort | null,
private readonly pagesRepository: KnowledgeIndexPort,
private readonly gitService: KnowledgeGitDiffPort,
private readonly logger: KtxLogger = noopLogger,
@ -246,10 +247,12 @@ export class KnowledgeWikiService {
const searchText = buildKnowledgeSearchText(pageKey, frontmatter.summary, content, frontmatter.tags);
let embedding: number[] | null = null;
try {
embedding = await this.embeddingService.computeEmbedding(searchText);
} catch (err) {
this.logger.warn(`Embedding failed for page "${pageKey}": ${err instanceof Error ? err.message : String(err)}`);
if (this.embeddingService) {
try {
embedding = await this.embeddingService.computeEmbedding(searchText);
} catch (err) {
this.logger.warn(`Embedding failed for page "${pageKey}": ${err instanceof Error ? err.message : String(err)}`);
}
}
await this.pagesRepository.upsertPage({
@ -269,14 +272,21 @@ export class KnowledgeWikiService {
* Full sync: load all pages from disk for a scope, reindex changed pages, clean stale entries.
* Mirrors SlSearchService.indexSources() pattern.
*/
async syncIndex(scope: string, scopeId?: string | null): Promise<void> {
async syncIndex(scope: string, scopeId?: string | null): Promise<ReindexWorkResult> {
const pageKeys = await this.listPageKeys(scope, scopeId);
const existing = await this.pagesRepository.getExistingSearchTexts(scope, scopeId ?? null);
if (pageKeys.length === 0) {
await this.pagesRepository.deleteByScope(scope, scopeId ?? null);
return;
const deleted = await this.pagesRepository.deleteByScope(scope, scopeId ?? null);
return {
scanned: 0,
updated: 0,
deleted,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
};
}
// Load and parse all pages
const pages: Array<{ pageKey: string; frontmatter: WikiFrontmatter; content: string; searchText: string }> = [];
for (const key of pageKeys) {
const page = await this.readPage(scope, scopeId, key);
@ -286,58 +296,58 @@ export class KnowledgeWikiService {
}
}
// Detect changes
const existing = await this.pagesRepository.getExistingSearchTexts(scope, scopeId ?? null);
const changedPages = pages.filter((p) => {
const ex = existing.get(p.pageKey);
return !ex || ex.searchText !== p.searchText || !ex.hasEmbedding;
const embeddingService = this.embeddingService;
const changedPages = pages.filter((page) => {
const previous = existing.get(page.pageKey);
return (
!previous ||
previous.searchText !== page.searchText ||
(embeddingService !== null && !previous.hasEmbedding)
);
});
if (changedPages.length === 0) {
// Still clean up stale
await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys);
this.logger.log(`Wiki sync ${scope}: all ${pages.length} pages up to date`);
return;
}
let embeddings: (number[] | null)[] = changedPages.map(() => null);
let embeddingsRecomputed = 0;
let embeddingsFailed = 0;
// Compute embeddings for changed pages (batched)
const changedTexts = changedPages.map((p) => p.searchText);
let embeddings: (number[] | null)[];
try {
const batchSize = this.embeddingService.maxBatchSize;
const all: number[][] = [];
for (let i = 0; i < changedTexts.length; i += batchSize) {
const batch = changedTexts.slice(i, i + batchSize);
const batchEmb = await this.embeddingService.computeEmbeddingsBulk(batch);
all.push(...batchEmb);
if (embeddingService && changedPages.length > 0) {
try {
const changedTexts = changedPages.map((page) => page.searchText);
const all: number[][] = [];
for (let i = 0; i < changedTexts.length; i += embeddingService.maxBatchSize) {
const batch = changedTexts.slice(i, i + embeddingService.maxBatchSize);
all.push(...(await embeddingService.computeEmbeddingsBulk(batch)));
}
embeddings = all;
embeddingsRecomputed = all.length;
} catch (err) {
this.logger.warn(`Embedding batch failed during sync: ${err instanceof Error ? err.message : String(err)}`);
embeddingsFailed = changedPages.length;
}
embeddings = all;
} catch (err) {
this.logger.warn(`Embedding batch failed during sync: ${err instanceof Error ? err.message : String(err)}`);
embeddings = changedPages.map(() => null);
}
// Upsert changed pages
for (let i = 0; i < changedPages.length; i++) {
const p = changedPages[i];
for (let i = 0; i < changedPages.length; i += 1) {
const page = changedPages[i]!;
await this.pagesRepository.upsertPage({
scope,
scopeId: scopeId ?? null,
pageKey: p.pageKey,
summary: p.frontmatter.summary,
usageMode: p.frontmatter.usage_mode,
sortOrder: p.frontmatter.sort_order ?? 0,
searchText: p.searchText,
embedding: embeddings[i],
pageKey: page.pageKey,
summary: page.frontmatter.summary,
usageMode: page.frontmatter.usage_mode,
sortOrder: page.frontmatter.sort_order ?? 0,
searchText: page.searchText,
embedding: embeddings[i] ?? null,
});
}
// Clean stale entries
await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys);
this.logger.log(
`Wiki sync ${scope}: ${changedPages.length}/${pages.length} reindexed, ${pages.length - changedPages.length} unchanged`,
);
const deleted = await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys);
return {
scanned: pages.length,
updated: changedPages.length,
deleted,
embeddingsRecomputed,
embeddingsFailed,
};
}
/**
@ -388,12 +398,14 @@ export class KnowledgeWikiService {
parsed.frontmatter.tags,
);
let embedding: number[] | null = null;
try {
embedding = await this.embeddingService.computeEmbedding(searchText);
} catch (err) {
this.logger.warn(
`[wiki.sync] embedding failed for ${parsedPath.pageKey}: ${err instanceof Error ? err.message : String(err)}`,
);
if (this.embeddingService) {
try {
embedding = await this.embeddingService.computeEmbedding(searchText);
} catch (err) {
this.logger.warn(
`[wiki.sync] embedding failed for ${parsedPath.pageKey}: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
const contentHash = createHash('sha256').update(content).digest('hex');
upserts.push({

View file

@ -33,9 +33,9 @@ export interface KnowledgeIndexPort {
scope: string,
scopeId: string | null,
): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>>;
deleteStale(scope: string, scopeId: string | null, keepKeys: string[]): Promise<void>;
deleteByScope(scope: string, scopeId: string | null): Promise<void>;
deleteByKey(scope: string, scopeId: string | null, pageKey: string): Promise<void>;
deleteStale(scope: string, scopeId: string | null, keepKeys: string[]): Promise<number>;
deleteByScope(scope: string, scopeId: string | null): Promise<number>;
deleteByKey(scope: string, scopeId: string | null, pageKey: string): Promise<number>;
findPageByKey(
scope: string,
scopeId: string | null,

View file

@ -65,6 +65,35 @@ describe('SqliteKnowledgeIndex', () => {
expect(index.search('churn', 10)).toEqual([]);
});
it('clear removes one wiki scope and leaves other scopes intact', async () => {
const index = new SqliteKnowledgeIndex({ dbPath });
index.sync([
page({ path: 'wiki/global/revenue.md', key: 'revenue', scope: 'GLOBAL', scopeId: null }),
page({
path: 'wiki/user/local/revenue.md',
key: 'revenue',
scope: 'USER',
scopeId: 'local',
summary: 'Local revenue',
content: 'Local revenue notes.',
}),
page({
path: 'wiki/user/alex/revenue.md',
key: 'revenue',
scope: 'USER',
scopeId: 'alex',
summary: 'Alex revenue',
content: 'Alex revenue notes.',
}),
]);
expect(index.clear('USER', 'local')).toBe(1);
expect(index.search('Local', 10)).toEqual([]);
expect(index.search('Alex', 10)).toEqual([expect.objectContaining({ path: 'wiki/user/alex/revenue.md' })]);
expect(index.search('definition', 10)).toEqual([expect.objectContaining({ path: 'wiki/global/revenue.md' })]);
});
it('exposes existing search text and embedding state for incremental refresh', () => {
const index = new SqliteKnowledgeIndex({ dbPath });
index.sync([page({ path: 'wiki/global/revenue.md', key: 'revenue', embedding: [1, 0] })]);

View file

@ -3,6 +3,7 @@ import { dirname } from 'node:path';
import Database from 'better-sqlite3';
import { buildKnowledgeSearchText } from './knowledge-search-text.js';
import type { LocalKnowledgeScope } from './local-knowledge.js';
import type { KnowledgeIndexPageListing, UpsertPageParams } from './ports.js';
export interface SqliteKnowledgeIndexOptions {
dbPath: string;
@ -12,6 +13,7 @@ export interface SqliteKnowledgeIndexPage {
path: string;
key: string;
scope: LocalKnowledgeScope;
scopeId?: string | null;
summary: string;
content: string;
tags: string[];
@ -106,6 +108,7 @@ export class SqliteKnowledgeIndex {
path TEXT PRIMARY KEY,
key TEXT NOT NULL,
scope TEXT NOT NULL,
scope_id TEXT,
summary TEXT NOT NULL,
content TEXT NOT NULL,
tags TEXT NOT NULL,
@ -129,6 +132,9 @@ export class SqliteKnowledgeIndex {
if (!columnNames.has('embedding_json')) {
this.db.exec('ALTER TABLE knowledge_pages ADD COLUMN embedding_json TEXT');
}
if (!columnNames.has('scope_id')) {
this.db.exec('ALTER TABLE knowledge_pages ADD COLUMN scope_id TEXT');
}
}
sync(pages: SqliteKnowledgeIndexPage[]): void {
@ -142,11 +148,12 @@ export class SqliteKnowledgeIndex {
? this.db.prepare('DELETE FROM knowledge_pages_fts')
: this.db.prepare(`DELETE FROM knowledge_pages_fts WHERE path NOT IN (${keepPaths.map(() => '?').join(', ')})`);
const upsertPage = this.db.prepare(`
INSERT INTO knowledge_pages (path, key, scope, summary, content, tags, search_text, embedding_json)
VALUES (@path, @key, @scope, @summary, @content, @tags, @searchText, @embeddingJson)
INSERT INTO knowledge_pages (path, key, scope, scope_id, summary, content, tags, search_text, embedding_json)
VALUES (@path, @key, @scope, @scopeId, @summary, @content, @tags, @searchText, @embeddingJson)
ON CONFLICT(path) DO UPDATE SET
key = excluded.key,
scope = excluded.scope,
scope_id = excluded.scope_id,
summary = excluded.summary,
content = excluded.content,
tags = excluded.tags,
@ -168,6 +175,7 @@ export class SqliteKnowledgeIndex {
path: page.path,
key: page.key,
scope: page.scope,
scopeId: page.scopeId ?? null,
summary: page.summary,
content: searchText,
tags: page.tags.join(' '),
@ -275,4 +283,201 @@ export class SqliteKnowledgeIndex {
score: scoreFromRank(row.rawScore),
}));
}
private pathForPage(scope: string, scopeId: string | null, pageKey: string): string {
return scope === 'GLOBAL' ? `wiki/global/${pageKey}.md` : `wiki/user/${scopeId ?? 'local'}/${pageKey}.md`;
}
async upsertPage(params: UpsertPageParams): Promise<void> {
const path = this.pathForPage(params.scope, params.scopeId, params.pageKey);
const row = {
path,
key: params.pageKey,
scope: params.scope,
scopeId: params.scopeId,
summary: params.summary,
content: params.searchText,
tags: '',
searchText: params.searchText,
embeddingJson: params.embedding && params.embedding.length > 0 ? JSON.stringify(params.embedding) : null,
};
const write = this.db.transaction(() => {
this.db
.prepare(
`
INSERT INTO knowledge_pages (path, key, scope, scope_id, summary, content, tags, search_text, embedding_json)
VALUES (@path, @key, @scope, @scopeId, @summary, @content, @tags, @searchText, @embeddingJson)
ON CONFLICT(path) DO UPDATE SET
key = excluded.key,
scope = excluded.scope,
scope_id = excluded.scope_id,
summary = excluded.summary,
content = excluded.content,
tags = excluded.tags,
search_text = excluded.search_text,
embedding_json = excluded.embedding_json
`,
)
.run(row);
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = @path').run(row);
this.db
.prepare(
`
INSERT INTO knowledge_pages_fts (path, key, summary, content, tags)
VALUES (@path, @key, @summary, @content, @tags)
`,
)
.run(row);
});
write();
}
async getExistingSearchTexts(
scope: string,
scopeId: string | null,
): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>> {
const rows = this.db
.prepare(
`
SELECT key, search_text, embedding_json
FROM knowledge_pages
WHERE scope = ?
AND scope_id IS ?
ORDER BY key ASC
`,
)
.all(scope, scopeId) as Array<{ key: string; search_text: string; embedding_json: string | null }>;
return new Map(
rows.map((row) => [row.key, { searchText: row.search_text, hasEmbedding: row.embedding_json !== null }]),
);
}
async deleteStale(scope: string, scopeId: string | null, keepKeys: string[]): Promise<number> {
if (keepKeys.length === 0) {
return this.deleteByScope(scope, scopeId);
}
const placeholders = keepKeys.map(() => '?').join(', ');
const stale = this.db
.prepare(
`
SELECT key
FROM knowledge_pages
WHERE scope = ?
AND scope_id IS ?
AND key NOT IN (${placeholders})
`,
)
.all(scope, scopeId, ...keepKeys) as Array<{ key: string }>;
for (const row of stale) {
await this.deleteByKey(scope, scopeId, row.key);
}
return stale.length;
}
async deleteByScope(scope: string, scopeId: string | null): Promise<number> {
return this.clear(scope, scopeId);
}
async deleteByKey(scope: string, scopeId: string | null, pageKey: string): Promise<number> {
const path = this.pathForPage(scope, scopeId, pageKey);
const remove = this.db.transaction(() => {
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = ?').run(path);
const result = this.db.prepare('DELETE FROM knowledge_pages WHERE path = ?').run(path);
return Number(result.changes);
});
return remove();
}
clear(scope: string, scopeId: string | null): number {
const rows = this.db
.prepare('SELECT path FROM knowledge_pages WHERE scope = ? AND scope_id IS ?')
.all(scope, scopeId) as Array<{ path: string }>;
const remove = this.db.transaction((paths: string[]) => {
for (const path of paths) {
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = ?').run(path);
this.db.prepare('DELETE FROM knowledge_pages WHERE path = ?').run(path);
}
});
remove(rows.map((row) => row.path));
return rows.length;
}
async applyDiffTransactional(params: {
runId: string;
upserts: UpsertPageParams[];
deletes: Array<{ scope: string; scopeId: string | null; pageKey: string }>;
}): Promise<void> {
void params.runId;
for (const page of params.upserts) {
await this.upsertPage(page);
}
for (const page of params.deletes) {
await this.deleteByKey(page.scope, page.scopeId, page.pageKey);
}
}
async findPageByKey(
scope: string,
scopeId: string | null,
pageKey: string,
): Promise<{ id?: string; page_key: string } | null> {
const path = this.pathForPage(scope, scopeId, pageKey);
const row = this.db.prepare('SELECT path, key FROM knowledge_pages WHERE path = ?').get(path) as
| { path: string; key: string }
| undefined;
return row ? { id: row.path, page_key: row.key } : null;
}
async listPagesForUser(userId: string): Promise<KnowledgeIndexPageListing[]> {
const rows = this.db
.prepare(
`
SELECT path, key, scope, scope_id, summary, tags
FROM knowledge_pages
WHERE scope = 'GLOBAL'
OR (scope = 'USER' AND scope_id = ?)
ORDER BY scope ASC, key ASC
`,
)
.all(userId) as Array<{
path: string;
key: string;
scope: string;
scope_id: string | null;
summary: string;
tags: string;
}>;
return rows.map((row) => ({
id: row.path,
page_key: row.key,
summary: row.summary,
scope: row.scope,
scope_id: row.scope_id,
tags: row.tags.split(/\s+/).filter(Boolean),
}));
}
async getUserPageCount(userId: string): Promise<number> {
const row = this.db
.prepare("SELECT COUNT(*) AS count FROM knowledge_pages WHERE scope = 'USER' AND scope_id = ?")
.get(userId) as { count: number };
return row.count;
}
async incrementUsageCount(): Promise<void> {}
async searchRRF(
userId: string,
_embedding: number[] | null,
queryText: string,
limit: number,
): Promise<Array<{ pageKey: string; summary: string; rrfScore: number }>> {
const allowedPages = new Map((await this.listPagesForUser(userId)).map((page) => [page.id, page]));
return this.search(queryText, limit)
.map((row) => {
const page = allowedPages.get(row.path);
return page ? { pageKey: page.page_key, summary: page.summary, rrfScore: row.score } : null;
})
.filter((row): row is { pageKey: string; summary: string; rrfScore: number } => row !== null);
}
}