feat(cli): add ktx admin reindex (#160)

* feat(cli): add admin reindex

* fix: keep lexical-only reindex incremental
This commit is contained in:
Andrey Avtomonov 2026-05-20 01:36:54 +02:00 committed by GitHub
parent 3db3e724cb
commit 6dbb0c8b3a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
53 changed files with 1640 additions and 393 deletions

View file

@ -0,0 +1,2 @@
export type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
export { discoverReindexScopes, reindexLocalIndexes } from './reindex.js';

View file

@ -0,0 +1,196 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { KtxEmbeddingPort } from '../core/index.js';
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../project/index.js';
import { SqliteKnowledgeIndex } from '../wiki/sqlite-knowledge-index.js';
import { reindexLocalIndexes } from './reindex.js';
class FakeEmbeddingPort implements KtxEmbeddingPort {
readonly maxBatchSize = 8;
async computeEmbedding(text: string): Promise<number[]> {
return [text.length, 1];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return texts.map((text) => [text.length, 1]);
}
}
async function createProject(tempDir: string): Promise<KtxLocalProject> {
await initKtxProject({ projectDir: tempDir, force: true });
return loadKtxProject({ projectDir: tempDir });
}
describe('reindexLocalIndexes', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-reindex-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('returns an empty summary when no wiki or semantic-layer directories exist', async () => {
const project = await createProject(tempDir);
await rm(join(project.projectDir, 'wiki'), { recursive: true, force: true });
await rm(join(project.projectDir, 'semantic-layer'), { recursive: true, force: true });
await expect(reindexLocalIndexes(project, { force: false, embeddingService: null })).resolves.toMatchObject({
scopes: [],
totals: { scanned: 0, updated: 0, deleted: 0, embeddingsRecomputed: 0, embeddingsFailed: 0 },
force: false,
embeddingsAvailable: false,
});
});
it('discovers empty directories as zero-row scopes', async () => {
const project = await createProject(tempDir);
await mkdir(join(project.projectDir, 'wiki/user/local'), { recursive: true });
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(summary.scopes.map((scope) => scope.label)).toEqual(['global', 'user/local', 'warehouse']);
expect(summary.totals.scanned).toBe(0);
});
it('indexes mixed wiki and SL sources and reports totals', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
'utf-8',
);
const summary = await reindexLocalIndexes(project, {
force: false,
embeddingService: new FakeEmbeddingPort(),
});
expect(summary.scopes).toHaveLength(2);
expect(summary.totals).toMatchObject({ scanned: 2, updated: 2, deleted: 0, embeddingsRecomputed: 2 });
expect(summary.embeddingsAvailable).toBe(true);
});
it('does not report unchanged lexical-only rows as updated on repeated runs', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
'utf-8',
);
const first = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(first.totals).toMatchObject({
scanned: 2,
updated: 2,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
const second = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(second.totals).toMatchObject({
scanned: 2,
updated: 0,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
expect(second.scopes.map((scope) => [scope.label, scope.updated])).toEqual([
['global', 0],
['warehouse', 0],
]);
});
it('force clears stale rows before rebuilding each discovered scope', async () => {
const project = await createProject(tempDir);
const wikiIndex = new SqliteKnowledgeIndex({ dbPath: join(project.projectDir, '.ktx/db.sqlite') });
wikiIndex.sync([
{
path: 'wiki/global/stale.md',
key: 'stale',
scope: 'GLOBAL',
scopeId: null,
summary: 'Stale',
content: 'Stale content',
tags: [],
embedding: [1, 0],
},
]);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
const summary = await reindexLocalIndexes(project, {
force: true,
embeddingService: new FakeEmbeddingPort(),
});
expect(summary.force).toBe(true);
expect(summary.totals).toMatchObject({ scanned: 1, updated: 1, deleted: 0 });
expect(wikiIndex.search('Stale', 10)).toEqual([]);
});
it('captures a per-scope error and continues other scopes', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8');
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML');
});
it('marks a scope errored when configured embeddings fail', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
const embeddingService: KtxEmbeddingPort = {
maxBatchSize: 8,
async computeEmbedding() {
throw new Error('embedding provider unavailable');
},
async computeEmbeddingsBulk() {
throw new Error('embedding provider unavailable');
},
};
const summary = await reindexLocalIndexes(project, { force: false, embeddingService });
expect(summary.scopes[0]).toMatchObject({
label: 'global',
embeddingsFailed: 1,
error: '1 embedding recomputation failed',
});
});
});

View file

@ -0,0 +1,162 @@
import { readdir, stat } from 'node:fs/promises';
import { join, relative } from 'node:path';
import { ktxLocalStateDbPath, type KtxLocalProject } from '../project/index.js';
import { loadLocalSlSourceRecords, SlSearchService, SqliteSlSourcesIndex } from '../sl/index.js';
import { KnowledgeWikiService, SqliteKnowledgeIndex } from '../wiki/index.js';
import type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
type DiscoveredScope =
| { kind: 'wiki'; scope: 'GLOBAL'; scopeId: null; label: 'global' }
| { kind: 'wiki'; scope: 'USER'; scopeId: string; label: `user/${string}` }
| { kind: 'sl'; connectionId: string; label: string };
const ZERO: ReindexWorkResult = {
scanned: 0,
updated: 0,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
};
async function directoryExists(path: string): Promise<boolean> {
try {
return (await stat(path)).isDirectory();
} catch {
return false;
}
}
async function childDirectories(path: string): Promise<string[]> {
try {
const entries = await readdir(path, { withFileTypes: true });
return entries
.filter((entry) => entry.isDirectory())
.map((entry) => entry.name)
.sort((left, right) => left.localeCompare(right));
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
return [];
}
throw error;
}
}
export async function discoverReindexScopes(project: KtxLocalProject): Promise<DiscoveredScope[]> {
const scopes: DiscoveredScope[] = [];
if (await directoryExists(join(project.projectDir, 'wiki/global'))) {
scopes.push({ kind: 'wiki', scope: 'GLOBAL', scopeId: null, label: 'global' });
}
for (const userId of await childDirectories(join(project.projectDir, 'wiki/user'))) {
scopes.push({ kind: 'wiki', scope: 'USER', scopeId: userId, label: `user/${userId}` });
}
for (const connectionId of await childDirectories(join(project.projectDir, 'semantic-layer'))) {
if (connectionId !== '_schema') {
scopes.push({ kind: 'sl', connectionId, label: connectionId });
}
}
return scopes;
}
function errorMessage(error: unknown): string {
if (!(error instanceof Error)) {
return String(error);
}
return error.name && error.name !== 'Error' ? `${error.name}: ${error.message}` : error.message;
}
function addTotals(left: ReindexWorkResult, right: ReindexWorkResult): ReindexWorkResult {
return {
scanned: left.scanned + right.scanned,
updated: left.updated + right.updated,
deleted: left.deleted + right.deleted,
embeddingsRecomputed: left.embeddingsRecomputed + right.embeddingsRecomputed,
embeddingsFailed: left.embeddingsFailed + right.embeddingsFailed,
};
}
function durationSince(startedAt: bigint): number {
return Number((process.hrtime.bigint() - startedAt) / 1_000_000n);
}
function embeddingFailureError(work: ReindexWorkResult): string | undefined {
if (work.embeddingsFailed === 0) {
return undefined;
}
return `${work.embeddingsFailed} embedding recomputation${work.embeddingsFailed === 1 ? '' : 's'} failed`;
}
export async function reindexLocalIndexes(
project: KtxLocalProject,
options: ReindexOptions,
): Promise<ReindexSummary> {
const startedAt = process.hrtime.bigint();
const dbPath = ktxLocalStateDbPath(project);
const scopes = await discoverReindexScopes(project);
const wikiIndex = new SqliteKnowledgeIndex({ dbPath });
const slIndex = new SqliteSlSourcesIndex({ dbPath });
const wikiService = new KnowledgeWikiService(project.fileStore, options.embeddingService, wikiIndex, project.git);
const slService = new SlSearchService(options.embeddingService, slIndex);
const results: ReindexScopeResult[] = [];
for (const scope of scopes) {
const scopeStartedAt = process.hrtime.bigint();
try {
let work: ReindexWorkResult;
if (scope.kind === 'wiki') {
if (options.force) {
wikiIndex.clear(scope.scope, scope.scopeId);
}
work = await wikiService.syncIndex(scope.scope, scope.scopeId);
results.push({
kind: 'wiki',
label: scope.label,
scope: scope.scope === 'GLOBAL' ? 'global' : 'user',
scopeId: scope.scopeId,
...work,
...(options.force ? { deleted: 0 } : {}),
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
durationMs: durationSince(scopeStartedAt),
});
continue;
}
if (options.force) {
await slIndex.clear(scope.connectionId);
}
const records = await loadLocalSlSourceRecords(project, { connectionId: scope.connectionId });
work = await slService.indexSources(
scope.connectionId,
records.map((record) => record.source),
);
results.push({
kind: 'sl',
label: scope.label,
connectionId: scope.connectionId,
...work,
...(options.force ? { deleted: 0 } : {}),
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
durationMs: durationSince(scopeStartedAt),
});
} catch (error) {
results.push({
kind: scope.kind,
label: scope.label,
...(scope.kind === 'wiki'
? { scope: scope.scope === 'GLOBAL' ? 'global' : 'user', scopeId: scope.scopeId }
: { connectionId: scope.connectionId }),
...ZERO,
durationMs: durationSince(scopeStartedAt),
error: errorMessage(error),
});
}
}
return {
scopes: results,
totals: results.reduce(addTotals, ZERO),
dbPath: relative(project.projectDir, dbPath) || dbPath,
force: options.force,
embeddingsAvailable: options.embeddingService !== null,
durationMs: durationSince(startedAt),
};
}

View file

@ -0,0 +1,33 @@
import type { KtxEmbeddingPort } from '../core/index.js';
export interface ReindexOptions {
force: boolean;
embeddingService: KtxEmbeddingPort | null;
}
export interface ReindexWorkResult {
scanned: number;
updated: number;
deleted: number;
embeddingsRecomputed: number;
embeddingsFailed: number;
}
export interface ReindexScopeResult extends ReindexWorkResult {
kind: 'wiki' | 'sl';
label: string;
scope?: 'global' | 'user';
scopeId?: string | null;
connectionId?: string;
durationMs: number;
error?: string;
}
export interface ReindexSummary {
scopes: ReindexScopeResult[];
totals: ReindexWorkResult;
dbPath: string;
force: boolean;
embeddingsAvailable: boolean;
durationMs: number;
}

View file

@ -12,6 +12,7 @@ export * from './agent/index.js';
export * from './core/index.js';
export * from './daemon/index.js';
export * from './ingest/index.js';
export * from './index-sync/index.js';
export * from './llm/index.js';
export type {
CaptureSession,

View file

@ -380,16 +380,19 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
return result;
}
async deleteStale(): Promise<void> {
async deleteStale(): Promise<number> {
await this.syncAllPagesFromDisk();
return 0;
}
async deleteByScope(): Promise<void> {
async deleteByScope(): Promise<number> {
await this.syncAllPagesFromDisk();
return 0;
}
async deleteByKey(): Promise<void> {
async deleteByKey(): Promise<number> {
await this.syncAllPagesFromDisk();
return 0;
}
async findPageByKey(scope: string, scopeId: string | null, pageKey: string) {

View file

@ -205,11 +205,17 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
return new Map();
}
async deleteStale(): Promise<void> {}
async deleteStale(): Promise<number> {
return 0;
}
async deleteByScope(): Promise<void> {}
async deleteByScope(): Promise<number> {
return 0;
}
async deleteByKey(): Promise<void> {}
async deleteByKey(): Promise<number> {
return 0;
}
async findPageByKey(scope: string, scopeId: string | null, pageKey: string) {
const path = this.pagePath(scope, scopeId, pageKey);

View file

@ -40,9 +40,9 @@ export interface SlSourcesIndexPort {
sources: Array<{ sourceName: string; searchText: string; embedding: number[] | null; contentHash?: string | null }>,
): Promise<void>;
getExistingSearchTexts(connectionId: string): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>>;
deleteStale(connectionId: string, keepNames: string[]): Promise<void>;
deleteByConnection(connectionId: string): Promise<void>;
deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<void>;
deleteStale(connectionId: string, keepNames: string[]): Promise<number>;
deleteByConnection(connectionId: string): Promise<number>;
deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<number>;
search(
connectionId: string,
queryEmbedding: number[] | null,

View file

@ -223,4 +223,73 @@ describe('SlSearchService', () => {
},
]);
});
it('indexSources reports stats and supports lexical-only indexing', async () => {
const repository = {
upsertSources: vi.fn().mockResolvedValue(undefined),
getExistingSearchTexts: vi.fn().mockResolvedValue(
new Map([
['old_source', { searchText: 'old source', hasEmbedding: true }],
]),
),
deleteStale: vi.fn().mockResolvedValue(1),
deleteByConnection: vi.fn().mockResolvedValue(0),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
};
const service = new SlSearchService(null, repository);
const source: SemanticLayerSource = {
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [],
};
await expect(service.indexSources('warehouse', [source])).resolves.toEqual({
scanned: 1,
updated: 1,
deleted: 1,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
expect(repository.upsertSources).toHaveBeenCalledWith('warehouse', [
expect.objectContaining({ sourceName: 'orders', embedding: null }),
]);
});
it('does not update unchanged lexical-only SL rows on repeated sync', async () => {
const repository = {
upsertSources: vi.fn().mockResolvedValue(undefined),
getExistingSearchTexts: vi.fn().mockResolvedValue(
new Map([
['orders', { searchText: 'orders. table: public.orders. id (number)', hasEmbedding: false }],
]),
),
deleteStale: vi.fn().mockResolvedValue(0),
deleteByConnection: vi.fn().mockResolvedValue(0),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
};
const service = new SlSearchService(null, repository);
const source: SemanticLayerSource = {
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [],
};
await expect(service.indexSources('warehouse', [source])).resolves.toEqual({
scanned: 1,
updated: 0,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
expect(repository.upsertSources).toHaveBeenCalledWith('warehouse', []);
expect(repository.deleteStale).toHaveBeenCalledWith('warehouse', ['orders']);
});
});

View file

@ -1,5 +1,6 @@
import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js';
import { noopLogger } from '../core/index.js';
import type { ReindexWorkResult } from '../index-sync/types.js';
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
import type { SlSourcesIndexPort } from './ports.js';
@ -94,73 +95,71 @@ export function buildSemanticLayerSourceSearchText(
export class SlSearchService {
constructor(
private readonly embeddingService: KtxEmbeddingPort,
private readonly embeddingService: KtxEmbeddingPort | null,
private readonly slSourcesRepository: SlSourcesIndexPort,
private readonly logger: KtxLogger = noopLogger,
) {}
async indexSources(connectionId: string, sources: SemanticLayerSource[]): Promise<void> {
async indexSources(connectionId: string, sources: SemanticLayerSource[]): Promise<ReindexWorkResult> {
const existing = await this.slSourcesRepository.getExistingSearchTexts(connectionId);
if (sources.length === 0) {
await this.slSourcesRepository.deleteByConnection(connectionId);
return;
const deleted = await this.slSourcesRepository.deleteByConnection(connectionId);
return { scanned: 0, updated: 0, deleted, embeddingsRecomputed: 0, embeddingsFailed: 0 };
}
// Detect which sources actually changed by comparing search_text
const existing = await this.slSourcesRepository.getExistingSearchTexts(connectionId);
const searchTexts = sources.map((s) => this.buildSearchText(s));
const embeddingService = this.embeddingService;
const changedIndices: number[] = [];
for (let i = 0; i < sources.length; i++) {
const prev = existing.get(sources[i].name);
if (!prev || prev.searchText !== searchTexts[i] || !prev.hasEmbedding) {
for (let i = 0; i < sources.length; i += 1) {
const previous = existing.get(sources[i]!.name);
if (
!previous ||
previous.searchText !== searchTexts[i] ||
(embeddingService !== null && !previous.hasEmbedding)
) {
changedIndices.push(i);
}
}
if (changedIndices.length === 0) {
// Still clean up stale sources even if nothing changed
const keepNames = sources.map((s) => s.name);
await this.slSourcesRepository.deleteStale(connectionId, keepNames);
this.logger.log(`SL sources for connection ${connectionId}: all ${sources.length} up to date, 0 reindexed`);
return;
}
let changedEmbeddings: (number[] | null)[] = changedIndices.map(() => null);
let embeddingsRecomputed = 0;
let embeddingsFailed = 0;
// Compute embeddings only for changed sources
const changedTexts = changedIndices.map((i) => searchTexts[i]);
let changedEmbeddings: (number[] | null)[];
try {
const batchSize = this.embeddingService.maxBatchSize;
const allEmbeddings: number[][] = [];
for (let i = 0; i < changedTexts.length; i += batchSize) {
const batch = changedTexts.slice(i, i + batchSize);
const batchEmbeddings = await this.embeddingService.computeEmbeddingsBulk(batch);
allEmbeddings.push(...batchEmbeddings);
if (embeddingService && changedIndices.length > 0) {
try {
const changedTexts = changedIndices.map((index) => searchTexts[index]!);
const allEmbeddings: number[][] = [];
for (let i = 0; i < changedTexts.length; i += embeddingService.maxBatchSize) {
const batch = changedTexts.slice(i, i + embeddingService.maxBatchSize);
allEmbeddings.push(...(await embeddingService.computeEmbeddingsBulk(batch)));
}
changedEmbeddings = allEmbeddings;
embeddingsRecomputed = allEmbeddings.length;
} catch (error) {
this.logger.warn(
`Failed to compute SL source embeddings: ${error instanceof Error ? error.message : String(error)}`,
);
embeddingsFailed = changedIndices.length;
}
changedEmbeddings = allEmbeddings;
} catch (error) {
this.logger.warn(
`Failed to compute SL source embeddings: ${error instanceof Error ? error.message : String(error)}`,
);
changedEmbeddings = changedIndices.map(() => null);
}
const rows = changedIndices.map((srcIdx, i) => {
return {
sourceName: sources[srcIdx].name,
searchText: searchTexts[srcIdx],
embedding: changedEmbeddings[i],
};
});
const rows = changedIndices.map((sourceIndex, embeddingIndex) => ({
sourceName: sources[sourceIndex]!.name,
searchText: searchTexts[sourceIndex]!,
embedding: changedEmbeddings[embeddingIndex] ?? null,
}));
await this.slSourcesRepository.upsertSources(connectionId, rows);
// Remove sources that no longer exist in YAML
const keepNames = sources.map((s) => s.name);
await this.slSourcesRepository.deleteStale(connectionId, keepNames);
this.logger.log(
`SL sources for connection ${connectionId}: ${changedIndices.length}/${sources.length} reindexed, ${sources.length - changedIndices.length} unchanged`,
);
const keepNames = sources.map((source) => source.name);
const deleted = await this.slSourcesRepository.deleteStale(connectionId, keepNames);
return {
scanned: sources.length,
updated: changedIndices.length,
deleted,
embeddingsRecomputed,
embeddingsFailed,
};
}
async search(
@ -170,12 +169,14 @@ export class SlSearchService {
minRrfScore = 0,
): Promise<Array<{ sourceName: string; score: number; snippet?: string }>> {
let queryEmbedding: number[] | null = null;
try {
queryEmbedding = await this.embeddingService.computeEmbedding(query);
} catch (error) {
this.logger.warn(
`Failed to compute query embedding, falling back to FTS + trigram: ${error instanceof Error ? error.message : String(error)}`,
);
if (this.embeddingService) {
try {
queryEmbedding = await this.embeddingService.computeEmbedding(query);
} catch (error) {
this.logger.warn(
`Failed to compute query embedding, falling back to FTS + trigram: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
const results = await this.slSourcesRepository.search(connectionId, queryEmbedding, query, limit, minRrfScore);

View file

@ -105,6 +105,33 @@ describe('SqliteSlSourcesIndex', () => {
expect(await index.search('finance', null, 'revenue', 10)).toEqual([]);
});
it('clear removes sources and dictionary rows for one connection only', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{ sourceName: 'orders', searchText: 'orders revenue paid', embedding: null },
]);
await index.upsertSources('finance', [
{ sourceName: 'invoices', searchText: 'invoices revenue paid', embedding: null },
]);
await index.replaceDictionaryEntries('warehouse', [
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'paid', cardinality: 1 },
]);
await index.replaceDictionaryEntries('finance', [
{ connectionId: 'finance', sourceName: 'invoices', columnName: 'status', value: 'paid', cardinality: 1 },
]);
await expect(index.clear('warehouse')).resolves.toBe(1);
expect(await index.search('warehouse', null, 'revenue', 10)).toEqual([]);
expect(await index.search('finance', null, 'revenue', 10)).toEqual([
expect.objectContaining({ sourceName: 'invoices' }),
]);
await expect(index.searchDictionaryCandidates({ connectionIds: ['warehouse'], queryText: 'paid', limit: 10 }))
.resolves.toEqual([]);
await expect(index.searchDictionaryCandidates({ connectionIds: ['finance'], queryText: 'paid', limit: 10 }))
.resolves.toEqual([expect.objectContaining({ connectionId: 'finance', sourceName: 'invoices' })]);
});
it('returns lane candidates with stable connection-scoped IDs', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });

View file

@ -221,10 +221,9 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
);
}
async deleteStale(connectionId: string, keepNames: string[]): Promise<void> {
async deleteStale(connectionId: string, keepNames: string[]): Promise<number> {
if (keepNames.length === 0) {
await this.deleteByConnection(connectionId);
return;
return this.deleteByConnection(connectionId);
}
const placeholders = keepNames.map(() => '?').join(', ');
@ -257,18 +256,29 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
});
remove(stale.map((row) => row.source_name));
return stale.length;
}
async deleteByConnection(connectionId: string): Promise<void> {
async deleteByConnection(connectionId: string): Promise<number> {
return this.clear(connectionId);
}
async clear(connectionId: string): Promise<number> {
const rows = this.db
.prepare('SELECT source_name FROM local_sl_sources WHERE connection_id = ?')
.all(connectionId) as Array<{ source_name: string }>;
const remove = this.db.transaction(() => {
this.db.prepare('DELETE FROM local_sl_sources_fts WHERE connection_id = ?').run(connectionId);
this.db.prepare('DELETE FROM local_sl_sources WHERE connection_id = ?').run(connectionId);
this.db.prepare('DELETE FROM local_sl_dictionary_values_fts WHERE connection_id = ?').run(connectionId);
this.db.prepare('DELETE FROM local_sl_dictionary_values WHERE connection_id = ?').run(connectionId);
});
remove();
return rows.length;
}
async deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<void> {
this.deleteByConnectionAndNameSync(connectionId, sourceName);
async deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<number> {
return this.deleteByConnectionAndNameSync(connectionId, sourceName);
}
async replaceDictionaryEntries(connectionId: string, entries: SlDictionaryEntry[]): Promise<void> {
@ -537,7 +547,7 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
.filter((row) => row.rrfScore >= minRrfScore);
}
private deleteByConnectionAndNameSync(connectionId: string, sourceName: string): void {
private deleteByConnectionAndNameSync(connectionId: string, sourceName: string): number {
const remove = this.db.transaction(() => {
this.db
.prepare(
@ -548,7 +558,7 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
`,
)
.run(connectionId, sourceName);
this.db
const result = this.db
.prepare(
`
DELETE FROM local_sl_sources
@ -557,7 +567,8 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
`,
)
.run(connectionId, sourceName);
return Number(result.changes);
});
remove();
return remove();
}
}

View file

@ -4,9 +4,9 @@ import { KnowledgeWikiService, type WikiFrontmatter } from './knowledge-wiki.ser
function makeService() {
const pagesRepository: Record<string, ReturnType<typeof vi.fn>> = {
upsertPage: vi.fn().mockResolvedValue(undefined),
deleteByKey: vi.fn().mockResolvedValue(undefined),
deleteByScope: vi.fn().mockResolvedValue(undefined),
deleteStale: vi.fn().mockResolvedValue(undefined),
deleteByKey: vi.fn().mockResolvedValue(0),
deleteByScope: vi.fn().mockResolvedValue(0),
deleteStale: vi.fn().mockResolvedValue(0),
getExistingSearchTexts: vi.fn().mockResolvedValue(new Map()),
applyDiffTransactional: vi.fn().mockResolvedValue(undefined),
};
@ -50,6 +50,87 @@ function makeService() {
const fm: WikiFrontmatter = { summary: 'sum', usage_mode: 'auto' };
describe('KnowledgeWikiService.syncIndex result stats', () => {
it('reports scanned, updated, deleted, and embedding counts', async () => {
const { service, pagesRepository, embeddingService, configService } = makeService();
configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] });
configService.readFile.mockResolvedValue({
content: '---\nsummary: Revenue\nusage_mode: auto\ntags:\n - finance\n---\n\nPaid orders.\n',
});
pagesRepository.getExistingSearchTexts.mockResolvedValue(
new Map([
['old-page', { searchText: 'old', hasEmbedding: true }],
]),
);
embeddingService.computeEmbeddingsBulk.mockResolvedValue([[0.1, 0.2, 0.3]]);
pagesRepository.deleteStale.mockResolvedValue(1);
await expect(service.syncIndex('GLOBAL', null)).resolves.toEqual({
scanned: 1,
updated: 1,
deleted: 1,
embeddingsRecomputed: 1,
embeddingsFailed: 0,
});
});
it('indexes lexical rows when embeddings are not configured', async () => {
const { pagesRepository, configService, gitService, logger } = makeService();
const service = new KnowledgeWikiService(
configService as any,
null,
pagesRepository as any,
gitService as any,
logger as any,
);
configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] });
configService.readFile.mockResolvedValue({
content: '---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
});
pagesRepository.getExistingSearchTexts.mockResolvedValue(new Map());
pagesRepository.deleteStale.mockResolvedValue(0);
const result = await service.syncIndex('GLOBAL', null);
expect(result.embeddingsRecomputed).toBe(0);
expect(result.embeddingsFailed).toBe(0);
expect(pagesRepository.upsertPage).toHaveBeenCalledWith(
expect.objectContaining({ pageKey: 'revenue', embedding: null }),
);
});
it('does not update unchanged lexical-only wiki rows on repeated sync', async () => {
const { pagesRepository, configService, gitService, logger } = makeService();
const service = new KnowledgeWikiService(
configService as any,
null,
pagesRepository as any,
gitService as any,
logger as any,
);
configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] });
configService.readFile.mockResolvedValue({
content: '---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
});
pagesRepository.getExistingSearchTexts.mockResolvedValue(
new Map([
['revenue', { searchText: 'revenue\nRevenue\nPaid orders.', hasEmbedding: false }],
]),
);
pagesRepository.deleteStale.mockResolvedValue(0);
await expect(service.syncIndex('GLOBAL', null)).resolves.toEqual({
scanned: 1,
updated: 0,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
expect(pagesRepository.upsertPage).not.toHaveBeenCalled();
expect(pagesRepository.deleteStale).toHaveBeenCalledWith('GLOBAL', null, ['revenue']);
});
});
describe('KnowledgeWikiService.forWorktree isolation', () => {
it('syncSinglePage in worktree scope does not call pagesRepository.upsertPage', async () => {
const { service, pagesRepository, embeddingService } = makeService();

View file

@ -2,6 +2,7 @@ import { createHash } from 'node:crypto';
import YAML from 'yaml';
import type { KtxEmbeddingPort, KtxFileStorePort, KtxLogger } from '../core/index.js';
import { noopLogger } from '../core/index.js';
import type { ReindexWorkResult } from '../index-sync/types.js';
import { assertFlatWikiKey, isFlatWikiKey } from './keys.js';
import { buildKnowledgeSearchText } from './knowledge-search-text.js';
import type { KnowledgeGitDiffPort, KnowledgeIndexPort, UpsertPageParams } from './ports.js';
@ -16,7 +17,7 @@ export class KnowledgeWikiService {
constructor(
private readonly configService: KtxFileStorePort,
private readonly embeddingService: KtxEmbeddingPort,
private readonly embeddingService: KtxEmbeddingPort | null,
private readonly pagesRepository: KnowledgeIndexPort,
private readonly gitService: KnowledgeGitDiffPort,
private readonly logger: KtxLogger = noopLogger,
@ -246,10 +247,12 @@ export class KnowledgeWikiService {
const searchText = buildKnowledgeSearchText(pageKey, frontmatter.summary, content, frontmatter.tags);
let embedding: number[] | null = null;
try {
embedding = await this.embeddingService.computeEmbedding(searchText);
} catch (err) {
this.logger.warn(`Embedding failed for page "${pageKey}": ${err instanceof Error ? err.message : String(err)}`);
if (this.embeddingService) {
try {
embedding = await this.embeddingService.computeEmbedding(searchText);
} catch (err) {
this.logger.warn(`Embedding failed for page "${pageKey}": ${err instanceof Error ? err.message : String(err)}`);
}
}
await this.pagesRepository.upsertPage({
@ -269,14 +272,21 @@ export class KnowledgeWikiService {
* Full sync: load all pages from disk for a scope, reindex changed pages, clean stale entries.
* Mirrors SlSearchService.indexSources() pattern.
*/
async syncIndex(scope: string, scopeId?: string | null): Promise<void> {
async syncIndex(scope: string, scopeId?: string | null): Promise<ReindexWorkResult> {
const pageKeys = await this.listPageKeys(scope, scopeId);
const existing = await this.pagesRepository.getExistingSearchTexts(scope, scopeId ?? null);
if (pageKeys.length === 0) {
await this.pagesRepository.deleteByScope(scope, scopeId ?? null);
return;
const deleted = await this.pagesRepository.deleteByScope(scope, scopeId ?? null);
return {
scanned: 0,
updated: 0,
deleted,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
};
}
// Load and parse all pages
const pages: Array<{ pageKey: string; frontmatter: WikiFrontmatter; content: string; searchText: string }> = [];
for (const key of pageKeys) {
const page = await this.readPage(scope, scopeId, key);
@ -286,58 +296,58 @@ export class KnowledgeWikiService {
}
}
// Detect changes
const existing = await this.pagesRepository.getExistingSearchTexts(scope, scopeId ?? null);
const changedPages = pages.filter((p) => {
const ex = existing.get(p.pageKey);
return !ex || ex.searchText !== p.searchText || !ex.hasEmbedding;
const embeddingService = this.embeddingService;
const changedPages = pages.filter((page) => {
const previous = existing.get(page.pageKey);
return (
!previous ||
previous.searchText !== page.searchText ||
(embeddingService !== null && !previous.hasEmbedding)
);
});
if (changedPages.length === 0) {
// Still clean up stale
await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys);
this.logger.log(`Wiki sync ${scope}: all ${pages.length} pages up to date`);
return;
}
let embeddings: (number[] | null)[] = changedPages.map(() => null);
let embeddingsRecomputed = 0;
let embeddingsFailed = 0;
// Compute embeddings for changed pages (batched)
const changedTexts = changedPages.map((p) => p.searchText);
let embeddings: (number[] | null)[];
try {
const batchSize = this.embeddingService.maxBatchSize;
const all: number[][] = [];
for (let i = 0; i < changedTexts.length; i += batchSize) {
const batch = changedTexts.slice(i, i + batchSize);
const batchEmb = await this.embeddingService.computeEmbeddingsBulk(batch);
all.push(...batchEmb);
if (embeddingService && changedPages.length > 0) {
try {
const changedTexts = changedPages.map((page) => page.searchText);
const all: number[][] = [];
for (let i = 0; i < changedTexts.length; i += embeddingService.maxBatchSize) {
const batch = changedTexts.slice(i, i + embeddingService.maxBatchSize);
all.push(...(await embeddingService.computeEmbeddingsBulk(batch)));
}
embeddings = all;
embeddingsRecomputed = all.length;
} catch (err) {
this.logger.warn(`Embedding batch failed during sync: ${err instanceof Error ? err.message : String(err)}`);
embeddingsFailed = changedPages.length;
}
embeddings = all;
} catch (err) {
this.logger.warn(`Embedding batch failed during sync: ${err instanceof Error ? err.message : String(err)}`);
embeddings = changedPages.map(() => null);
}
// Upsert changed pages
for (let i = 0; i < changedPages.length; i++) {
const p = changedPages[i];
for (let i = 0; i < changedPages.length; i += 1) {
const page = changedPages[i]!;
await this.pagesRepository.upsertPage({
scope,
scopeId: scopeId ?? null,
pageKey: p.pageKey,
summary: p.frontmatter.summary,
usageMode: p.frontmatter.usage_mode,
sortOrder: p.frontmatter.sort_order ?? 0,
searchText: p.searchText,
embedding: embeddings[i],
pageKey: page.pageKey,
summary: page.frontmatter.summary,
usageMode: page.frontmatter.usage_mode,
sortOrder: page.frontmatter.sort_order ?? 0,
searchText: page.searchText,
embedding: embeddings[i] ?? null,
});
}
// Clean stale entries
await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys);
this.logger.log(
`Wiki sync ${scope}: ${changedPages.length}/${pages.length} reindexed, ${pages.length - changedPages.length} unchanged`,
);
const deleted = await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys);
return {
scanned: pages.length,
updated: changedPages.length,
deleted,
embeddingsRecomputed,
embeddingsFailed,
};
}
/**
@ -388,12 +398,14 @@ export class KnowledgeWikiService {
parsed.frontmatter.tags,
);
let embedding: number[] | null = null;
try {
embedding = await this.embeddingService.computeEmbedding(searchText);
} catch (err) {
this.logger.warn(
`[wiki.sync] embedding failed for ${parsedPath.pageKey}: ${err instanceof Error ? err.message : String(err)}`,
);
if (this.embeddingService) {
try {
embedding = await this.embeddingService.computeEmbedding(searchText);
} catch (err) {
this.logger.warn(
`[wiki.sync] embedding failed for ${parsedPath.pageKey}: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
const contentHash = createHash('sha256').update(content).digest('hex');
upserts.push({

View file

@ -33,9 +33,9 @@ export interface KnowledgeIndexPort {
scope: string,
scopeId: string | null,
): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>>;
deleteStale(scope: string, scopeId: string | null, keepKeys: string[]): Promise<void>;
deleteByScope(scope: string, scopeId: string | null): Promise<void>;
deleteByKey(scope: string, scopeId: string | null, pageKey: string): Promise<void>;
deleteStale(scope: string, scopeId: string | null, keepKeys: string[]): Promise<number>;
deleteByScope(scope: string, scopeId: string | null): Promise<number>;
deleteByKey(scope: string, scopeId: string | null, pageKey: string): Promise<number>;
findPageByKey(
scope: string,
scopeId: string | null,

View file

@ -65,6 +65,35 @@ describe('SqliteKnowledgeIndex', () => {
expect(index.search('churn', 10)).toEqual([]);
});
it('clear removes one wiki scope and leaves other scopes intact', async () => {
const index = new SqliteKnowledgeIndex({ dbPath });
index.sync([
page({ path: 'wiki/global/revenue.md', key: 'revenue', scope: 'GLOBAL', scopeId: null }),
page({
path: 'wiki/user/local/revenue.md',
key: 'revenue',
scope: 'USER',
scopeId: 'local',
summary: 'Local revenue',
content: 'Local revenue notes.',
}),
page({
path: 'wiki/user/alex/revenue.md',
key: 'revenue',
scope: 'USER',
scopeId: 'alex',
summary: 'Alex revenue',
content: 'Alex revenue notes.',
}),
]);
expect(index.clear('USER', 'local')).toBe(1);
expect(index.search('Local', 10)).toEqual([]);
expect(index.search('Alex', 10)).toEqual([expect.objectContaining({ path: 'wiki/user/alex/revenue.md' })]);
expect(index.search('definition', 10)).toEqual([expect.objectContaining({ path: 'wiki/global/revenue.md' })]);
});
it('exposes existing search text and embedding state for incremental refresh', () => {
const index = new SqliteKnowledgeIndex({ dbPath });
index.sync([page({ path: 'wiki/global/revenue.md', key: 'revenue', embedding: [1, 0] })]);

View file

@ -3,6 +3,7 @@ import { dirname } from 'node:path';
import Database from 'better-sqlite3';
import { buildKnowledgeSearchText } from './knowledge-search-text.js';
import type { LocalKnowledgeScope } from './local-knowledge.js';
import type { KnowledgeIndexPageListing, UpsertPageParams } from './ports.js';
export interface SqliteKnowledgeIndexOptions {
dbPath: string;
@ -12,6 +13,7 @@ export interface SqliteKnowledgeIndexPage {
path: string;
key: string;
scope: LocalKnowledgeScope;
scopeId?: string | null;
summary: string;
content: string;
tags: string[];
@ -106,6 +108,7 @@ export class SqliteKnowledgeIndex {
path TEXT PRIMARY KEY,
key TEXT NOT NULL,
scope TEXT NOT NULL,
scope_id TEXT,
summary TEXT NOT NULL,
content TEXT NOT NULL,
tags TEXT NOT NULL,
@ -129,6 +132,9 @@ export class SqliteKnowledgeIndex {
if (!columnNames.has('embedding_json')) {
this.db.exec('ALTER TABLE knowledge_pages ADD COLUMN embedding_json TEXT');
}
if (!columnNames.has('scope_id')) {
this.db.exec('ALTER TABLE knowledge_pages ADD COLUMN scope_id TEXT');
}
}
sync(pages: SqliteKnowledgeIndexPage[]): void {
@ -142,11 +148,12 @@ export class SqliteKnowledgeIndex {
? this.db.prepare('DELETE FROM knowledge_pages_fts')
: this.db.prepare(`DELETE FROM knowledge_pages_fts WHERE path NOT IN (${keepPaths.map(() => '?').join(', ')})`);
const upsertPage = this.db.prepare(`
INSERT INTO knowledge_pages (path, key, scope, summary, content, tags, search_text, embedding_json)
VALUES (@path, @key, @scope, @summary, @content, @tags, @searchText, @embeddingJson)
INSERT INTO knowledge_pages (path, key, scope, scope_id, summary, content, tags, search_text, embedding_json)
VALUES (@path, @key, @scope, @scopeId, @summary, @content, @tags, @searchText, @embeddingJson)
ON CONFLICT(path) DO UPDATE SET
key = excluded.key,
scope = excluded.scope,
scope_id = excluded.scope_id,
summary = excluded.summary,
content = excluded.content,
tags = excluded.tags,
@ -168,6 +175,7 @@ export class SqliteKnowledgeIndex {
path: page.path,
key: page.key,
scope: page.scope,
scopeId: page.scopeId ?? null,
summary: page.summary,
content: searchText,
tags: page.tags.join(' '),
@ -275,4 +283,201 @@ export class SqliteKnowledgeIndex {
score: scoreFromRank(row.rawScore),
}));
}
private pathForPage(scope: string, scopeId: string | null, pageKey: string): string {
return scope === 'GLOBAL' ? `wiki/global/${pageKey}.md` : `wiki/user/${scopeId ?? 'local'}/${pageKey}.md`;
}
async upsertPage(params: UpsertPageParams): Promise<void> {
const path = this.pathForPage(params.scope, params.scopeId, params.pageKey);
const row = {
path,
key: params.pageKey,
scope: params.scope,
scopeId: params.scopeId,
summary: params.summary,
content: params.searchText,
tags: '',
searchText: params.searchText,
embeddingJson: params.embedding && params.embedding.length > 0 ? JSON.stringify(params.embedding) : null,
};
const write = this.db.transaction(() => {
this.db
.prepare(
`
INSERT INTO knowledge_pages (path, key, scope, scope_id, summary, content, tags, search_text, embedding_json)
VALUES (@path, @key, @scope, @scopeId, @summary, @content, @tags, @searchText, @embeddingJson)
ON CONFLICT(path) DO UPDATE SET
key = excluded.key,
scope = excluded.scope,
scope_id = excluded.scope_id,
summary = excluded.summary,
content = excluded.content,
tags = excluded.tags,
search_text = excluded.search_text,
embedding_json = excluded.embedding_json
`,
)
.run(row);
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = @path').run(row);
this.db
.prepare(
`
INSERT INTO knowledge_pages_fts (path, key, summary, content, tags)
VALUES (@path, @key, @summary, @content, @tags)
`,
)
.run(row);
});
write();
}
async getExistingSearchTexts(
scope: string,
scopeId: string | null,
): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>> {
const rows = this.db
.prepare(
`
SELECT key, search_text, embedding_json
FROM knowledge_pages
WHERE scope = ?
AND scope_id IS ?
ORDER BY key ASC
`,
)
.all(scope, scopeId) as Array<{ key: string; search_text: string; embedding_json: string | null }>;
return new Map(
rows.map((row) => [row.key, { searchText: row.search_text, hasEmbedding: row.embedding_json !== null }]),
);
}
async deleteStale(scope: string, scopeId: string | null, keepKeys: string[]): Promise<number> {
if (keepKeys.length === 0) {
return this.deleteByScope(scope, scopeId);
}
const placeholders = keepKeys.map(() => '?').join(', ');
const stale = this.db
.prepare(
`
SELECT key
FROM knowledge_pages
WHERE scope = ?
AND scope_id IS ?
AND key NOT IN (${placeholders})
`,
)
.all(scope, scopeId, ...keepKeys) as Array<{ key: string }>;
for (const row of stale) {
await this.deleteByKey(scope, scopeId, row.key);
}
return stale.length;
}
async deleteByScope(scope: string, scopeId: string | null): Promise<number> {
return this.clear(scope, scopeId);
}
async deleteByKey(scope: string, scopeId: string | null, pageKey: string): Promise<number> {
const path = this.pathForPage(scope, scopeId, pageKey);
const remove = this.db.transaction(() => {
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = ?').run(path);
const result = this.db.prepare('DELETE FROM knowledge_pages WHERE path = ?').run(path);
return Number(result.changes);
});
return remove();
}
clear(scope: string, scopeId: string | null): number {
const rows = this.db
.prepare('SELECT path FROM knowledge_pages WHERE scope = ? AND scope_id IS ?')
.all(scope, scopeId) as Array<{ path: string }>;
const remove = this.db.transaction((paths: string[]) => {
for (const path of paths) {
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = ?').run(path);
this.db.prepare('DELETE FROM knowledge_pages WHERE path = ?').run(path);
}
});
remove(rows.map((row) => row.path));
return rows.length;
}
async applyDiffTransactional(params: {
runId: string;
upserts: UpsertPageParams[];
deletes: Array<{ scope: string; scopeId: string | null; pageKey: string }>;
}): Promise<void> {
void params.runId;
for (const page of params.upserts) {
await this.upsertPage(page);
}
for (const page of params.deletes) {
await this.deleteByKey(page.scope, page.scopeId, page.pageKey);
}
}
async findPageByKey(
scope: string,
scopeId: string | null,
pageKey: string,
): Promise<{ id?: string; page_key: string } | null> {
const path = this.pathForPage(scope, scopeId, pageKey);
const row = this.db.prepare('SELECT path, key FROM knowledge_pages WHERE path = ?').get(path) as
| { path: string; key: string }
| undefined;
return row ? { id: row.path, page_key: row.key } : null;
}
async listPagesForUser(userId: string): Promise<KnowledgeIndexPageListing[]> {
const rows = this.db
.prepare(
`
SELECT path, key, scope, scope_id, summary, tags
FROM knowledge_pages
WHERE scope = 'GLOBAL'
OR (scope = 'USER' AND scope_id = ?)
ORDER BY scope ASC, key ASC
`,
)
.all(userId) as Array<{
path: string;
key: string;
scope: string;
scope_id: string | null;
summary: string;
tags: string;
}>;
return rows.map((row) => ({
id: row.path,
page_key: row.key,
summary: row.summary,
scope: row.scope,
scope_id: row.scope_id,
tags: row.tags.split(/\s+/).filter(Boolean),
}));
}
async getUserPageCount(userId: string): Promise<number> {
const row = this.db
.prepare("SELECT COUNT(*) AS count FROM knowledge_pages WHERE scope = 'USER' AND scope_id = ?")
.get(userId) as { count: number };
return row.count;
}
async incrementUsageCount(): Promise<void> {}
async searchRRF(
userId: string,
_embedding: number[] | null,
queryText: string,
limit: number,
): Promise<Array<{ pageKey: string; summary: string; rrfScore: number }>> {
const allowedPages = new Map((await this.listPagesForUser(userId)).map((page) => [page.id, page]));
return this.search(queryText, limit)
.map((row) => {
const page = allowedPages.get(row.path);
return page ? { pageKey: page.page_key, summary: page.summary, rrfScore: row.score } : null;
})
.filter((row): row is { pageKey: string; summary: string; rrfScore: number } => row !== null);
}
}