mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
feat(cli): add ktx admin reindex (#160)
* feat(cli): add admin reindex * fix: keep lexical-only reindex incremental
This commit is contained in:
parent
3db3e724cb
commit
6dbb0c8b3a
53 changed files with 1640 additions and 393 deletions
2
packages/context/src/index-sync/index.ts
Normal file
2
packages/context/src/index-sync/index.ts
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
export type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
|
||||
export { discoverReindexScopes, reindexLocalIndexes } from './reindex.js';
|
||||
196
packages/context/src/index-sync/reindex.test.ts
Normal file
196
packages/context/src/index-sync/reindex.test.ts
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { KtxEmbeddingPort } from '../core/index.js';
|
||||
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../project/index.js';
|
||||
import { SqliteKnowledgeIndex } from '../wiki/sqlite-knowledge-index.js';
|
||||
import { reindexLocalIndexes } from './reindex.js';
|
||||
|
||||
class FakeEmbeddingPort implements KtxEmbeddingPort {
|
||||
readonly maxBatchSize = 8;
|
||||
|
||||
async computeEmbedding(text: string): Promise<number[]> {
|
||||
return [text.length, 1];
|
||||
}
|
||||
|
||||
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
|
||||
return texts.map((text) => [text.length, 1]);
|
||||
}
|
||||
}
|
||||
|
||||
async function createProject(tempDir: string): Promise<KtxLocalProject> {
|
||||
await initKtxProject({ projectDir: tempDir, force: true });
|
||||
return loadKtxProject({ projectDir: tempDir });
|
||||
}
|
||||
|
||||
describe('reindexLocalIndexes', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-reindex-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('returns an empty summary when no wiki or semantic-layer directories exist', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await rm(join(project.projectDir, 'wiki'), { recursive: true, force: true });
|
||||
await rm(join(project.projectDir, 'semantic-layer'), { recursive: true, force: true });
|
||||
|
||||
await expect(reindexLocalIndexes(project, { force: false, embeddingService: null })).resolves.toMatchObject({
|
||||
scopes: [],
|
||||
totals: { scanned: 0, updated: 0, deleted: 0, embeddingsRecomputed: 0, embeddingsFailed: 0 },
|
||||
force: false,
|
||||
embeddingsAvailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('discovers empty directories as zero-row scopes', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await mkdir(join(project.projectDir, 'wiki/user/local'), { recursive: true });
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(summary.scopes.map((scope) => scope.label)).toEqual(['global', 'user/local', 'warehouse']);
|
||||
expect(summary.totals.scanned).toBe(0);
|
||||
});
|
||||
|
||||
it('indexes mixed wiki and SL sources and reports totals', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
|
||||
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const summary = await reindexLocalIndexes(project, {
|
||||
force: false,
|
||||
embeddingService: new FakeEmbeddingPort(),
|
||||
});
|
||||
|
||||
expect(summary.scopes).toHaveLength(2);
|
||||
expect(summary.totals).toMatchObject({ scanned: 2, updated: 2, deleted: 0, embeddingsRecomputed: 2 });
|
||||
expect(summary.embeddingsAvailable).toBe(true);
|
||||
});
|
||||
|
||||
it('does not report unchanged lexical-only rows as updated on repeated runs', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
|
||||
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const first = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
expect(first.totals).toMatchObject({
|
||||
scanned: 2,
|
||||
updated: 2,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
|
||||
const second = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(second.totals).toMatchObject({
|
||||
scanned: 2,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
expect(second.scopes.map((scope) => [scope.label, scope.updated])).toEqual([
|
||||
['global', 0],
|
||||
['warehouse', 0],
|
||||
]);
|
||||
});
|
||||
|
||||
it('force clears stale rows before rebuilding each discovered scope', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
const wikiIndex = new SqliteKnowledgeIndex({ dbPath: join(project.projectDir, '.ktx/db.sqlite') });
|
||||
wikiIndex.sync([
|
||||
{
|
||||
path: 'wiki/global/stale.md',
|
||||
key: 'stale',
|
||||
scope: 'GLOBAL',
|
||||
scopeId: null,
|
||||
summary: 'Stale',
|
||||
content: 'Stale content',
|
||||
tags: [],
|
||||
embedding: [1, 0],
|
||||
},
|
||||
]);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const summary = await reindexLocalIndexes(project, {
|
||||
force: true,
|
||||
embeddingService: new FakeEmbeddingPort(),
|
||||
});
|
||||
|
||||
expect(summary.force).toBe(true);
|
||||
expect(summary.totals).toMatchObject({ scanned: 1, updated: 1, deleted: 0 });
|
||||
expect(wikiIndex.search('Stale', 10)).toEqual([]);
|
||||
});
|
||||
|
||||
it('captures a per-scope error and continues other scopes', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8');
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
|
||||
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML');
|
||||
});
|
||||
|
||||
it('marks a scope errored when configured embeddings fail', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
const embeddingService: KtxEmbeddingPort = {
|
||||
maxBatchSize: 8,
|
||||
async computeEmbedding() {
|
||||
throw new Error('embedding provider unavailable');
|
||||
},
|
||||
async computeEmbeddingsBulk() {
|
||||
throw new Error('embedding provider unavailable');
|
||||
},
|
||||
};
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService });
|
||||
|
||||
expect(summary.scopes[0]).toMatchObject({
|
||||
label: 'global',
|
||||
embeddingsFailed: 1,
|
||||
error: '1 embedding recomputation failed',
|
||||
});
|
||||
});
|
||||
});
|
||||
162
packages/context/src/index-sync/reindex.ts
Normal file
162
packages/context/src/index-sync/reindex.ts
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
import { readdir, stat } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import { ktxLocalStateDbPath, type KtxLocalProject } from '../project/index.js';
|
||||
import { loadLocalSlSourceRecords, SlSearchService, SqliteSlSourcesIndex } from '../sl/index.js';
|
||||
import { KnowledgeWikiService, SqliteKnowledgeIndex } from '../wiki/index.js';
|
||||
import type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
|
||||
|
||||
type DiscoveredScope =
|
||||
| { kind: 'wiki'; scope: 'GLOBAL'; scopeId: null; label: 'global' }
|
||||
| { kind: 'wiki'; scope: 'USER'; scopeId: string; label: `user/${string}` }
|
||||
| { kind: 'sl'; connectionId: string; label: string };
|
||||
|
||||
const ZERO: ReindexWorkResult = {
|
||||
scanned: 0,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
};
|
||||
|
||||
async function directoryExists(path: string): Promise<boolean> {
|
||||
try {
|
||||
return (await stat(path)).isDirectory();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function childDirectories(path: string): Promise<string[]> {
|
||||
try {
|
||||
const entries = await readdir(path, { withFileTypes: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isDirectory())
|
||||
.map((entry) => entry.name)
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
return [];
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function discoverReindexScopes(project: KtxLocalProject): Promise<DiscoveredScope[]> {
|
||||
const scopes: DiscoveredScope[] = [];
|
||||
if (await directoryExists(join(project.projectDir, 'wiki/global'))) {
|
||||
scopes.push({ kind: 'wiki', scope: 'GLOBAL', scopeId: null, label: 'global' });
|
||||
}
|
||||
for (const userId of await childDirectories(join(project.projectDir, 'wiki/user'))) {
|
||||
scopes.push({ kind: 'wiki', scope: 'USER', scopeId: userId, label: `user/${userId}` });
|
||||
}
|
||||
for (const connectionId of await childDirectories(join(project.projectDir, 'semantic-layer'))) {
|
||||
if (connectionId !== '_schema') {
|
||||
scopes.push({ kind: 'sl', connectionId, label: connectionId });
|
||||
}
|
||||
}
|
||||
return scopes;
|
||||
}
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
if (!(error instanceof Error)) {
|
||||
return String(error);
|
||||
}
|
||||
return error.name && error.name !== 'Error' ? `${error.name}: ${error.message}` : error.message;
|
||||
}
|
||||
|
||||
function addTotals(left: ReindexWorkResult, right: ReindexWorkResult): ReindexWorkResult {
|
||||
return {
|
||||
scanned: left.scanned + right.scanned,
|
||||
updated: left.updated + right.updated,
|
||||
deleted: left.deleted + right.deleted,
|
||||
embeddingsRecomputed: left.embeddingsRecomputed + right.embeddingsRecomputed,
|
||||
embeddingsFailed: left.embeddingsFailed + right.embeddingsFailed,
|
||||
};
|
||||
}
|
||||
|
||||
function durationSince(startedAt: bigint): number {
|
||||
return Number((process.hrtime.bigint() - startedAt) / 1_000_000n);
|
||||
}
|
||||
|
||||
function embeddingFailureError(work: ReindexWorkResult): string | undefined {
|
||||
if (work.embeddingsFailed === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return `${work.embeddingsFailed} embedding recomputation${work.embeddingsFailed === 1 ? '' : 's'} failed`;
|
||||
}
|
||||
|
||||
export async function reindexLocalIndexes(
|
||||
project: KtxLocalProject,
|
||||
options: ReindexOptions,
|
||||
): Promise<ReindexSummary> {
|
||||
const startedAt = process.hrtime.bigint();
|
||||
const dbPath = ktxLocalStateDbPath(project);
|
||||
const scopes = await discoverReindexScopes(project);
|
||||
const wikiIndex = new SqliteKnowledgeIndex({ dbPath });
|
||||
const slIndex = new SqliteSlSourcesIndex({ dbPath });
|
||||
const wikiService = new KnowledgeWikiService(project.fileStore, options.embeddingService, wikiIndex, project.git);
|
||||
const slService = new SlSearchService(options.embeddingService, slIndex);
|
||||
const results: ReindexScopeResult[] = [];
|
||||
|
||||
for (const scope of scopes) {
|
||||
const scopeStartedAt = process.hrtime.bigint();
|
||||
try {
|
||||
let work: ReindexWorkResult;
|
||||
if (scope.kind === 'wiki') {
|
||||
if (options.force) {
|
||||
wikiIndex.clear(scope.scope, scope.scopeId);
|
||||
}
|
||||
work = await wikiService.syncIndex(scope.scope, scope.scopeId);
|
||||
results.push({
|
||||
kind: 'wiki',
|
||||
label: scope.label,
|
||||
scope: scope.scope === 'GLOBAL' ? 'global' : 'user',
|
||||
scopeId: scope.scopeId,
|
||||
...work,
|
||||
...(options.force ? { deleted: 0 } : {}),
|
||||
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (options.force) {
|
||||
await slIndex.clear(scope.connectionId);
|
||||
}
|
||||
const records = await loadLocalSlSourceRecords(project, { connectionId: scope.connectionId });
|
||||
work = await slService.indexSources(
|
||||
scope.connectionId,
|
||||
records.map((record) => record.source),
|
||||
);
|
||||
results.push({
|
||||
kind: 'sl',
|
||||
label: scope.label,
|
||||
connectionId: scope.connectionId,
|
||||
...work,
|
||||
...(options.force ? { deleted: 0 } : {}),
|
||||
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
});
|
||||
} catch (error) {
|
||||
results.push({
|
||||
kind: scope.kind,
|
||||
label: scope.label,
|
||||
...(scope.kind === 'wiki'
|
||||
? { scope: scope.scope === 'GLOBAL' ? 'global' : 'user', scopeId: scope.scopeId }
|
||||
: { connectionId: scope.connectionId }),
|
||||
...ZERO,
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
error: errorMessage(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
scopes: results,
|
||||
totals: results.reduce(addTotals, ZERO),
|
||||
dbPath: relative(project.projectDir, dbPath) || dbPath,
|
||||
force: options.force,
|
||||
embeddingsAvailable: options.embeddingService !== null,
|
||||
durationMs: durationSince(startedAt),
|
||||
};
|
||||
}
|
||||
33
packages/context/src/index-sync/types.ts
Normal file
33
packages/context/src/index-sync/types.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import type { KtxEmbeddingPort } from '../core/index.js';
|
||||
|
||||
export interface ReindexOptions {
|
||||
force: boolean;
|
||||
embeddingService: KtxEmbeddingPort | null;
|
||||
}
|
||||
|
||||
export interface ReindexWorkResult {
|
||||
scanned: number;
|
||||
updated: number;
|
||||
deleted: number;
|
||||
embeddingsRecomputed: number;
|
||||
embeddingsFailed: number;
|
||||
}
|
||||
|
||||
export interface ReindexScopeResult extends ReindexWorkResult {
|
||||
kind: 'wiki' | 'sl';
|
||||
label: string;
|
||||
scope?: 'global' | 'user';
|
||||
scopeId?: string | null;
|
||||
connectionId?: string;
|
||||
durationMs: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface ReindexSummary {
|
||||
scopes: ReindexScopeResult[];
|
||||
totals: ReindexWorkResult;
|
||||
dbPath: string;
|
||||
force: boolean;
|
||||
embeddingsAvailable: boolean;
|
||||
durationMs: number;
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@ export * from './agent/index.js';
|
|||
export * from './core/index.js';
|
||||
export * from './daemon/index.js';
|
||||
export * from './ingest/index.js';
|
||||
export * from './index-sync/index.js';
|
||||
export * from './llm/index.js';
|
||||
export type {
|
||||
CaptureSession,
|
||||
|
|
|
|||
|
|
@ -380,16 +380,19 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
return result;
|
||||
}
|
||||
|
||||
async deleteStale(): Promise<void> {
|
||||
async deleteStale(): Promise<number> {
|
||||
await this.syncAllPagesFromDisk();
|
||||
return 0;
|
||||
}
|
||||
|
||||
async deleteByScope(): Promise<void> {
|
||||
async deleteByScope(): Promise<number> {
|
||||
await this.syncAllPagesFromDisk();
|
||||
return 0;
|
||||
}
|
||||
|
||||
async deleteByKey(): Promise<void> {
|
||||
async deleteByKey(): Promise<number> {
|
||||
await this.syncAllPagesFromDisk();
|
||||
return 0;
|
||||
}
|
||||
|
||||
async findPageByKey(scope: string, scopeId: string | null, pageKey: string) {
|
||||
|
|
|
|||
|
|
@ -205,11 +205,17 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
return new Map();
|
||||
}
|
||||
|
||||
async deleteStale(): Promise<void> {}
|
||||
async deleteStale(): Promise<number> {
|
||||
return 0;
|
||||
}
|
||||
|
||||
async deleteByScope(): Promise<void> {}
|
||||
async deleteByScope(): Promise<number> {
|
||||
return 0;
|
||||
}
|
||||
|
||||
async deleteByKey(): Promise<void> {}
|
||||
async deleteByKey(): Promise<number> {
|
||||
return 0;
|
||||
}
|
||||
|
||||
async findPageByKey(scope: string, scopeId: string | null, pageKey: string) {
|
||||
const path = this.pagePath(scope, scopeId, pageKey);
|
||||
|
|
|
|||
|
|
@ -40,9 +40,9 @@ export interface SlSourcesIndexPort {
|
|||
sources: Array<{ sourceName: string; searchText: string; embedding: number[] | null; contentHash?: string | null }>,
|
||||
): Promise<void>;
|
||||
getExistingSearchTexts(connectionId: string): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>>;
|
||||
deleteStale(connectionId: string, keepNames: string[]): Promise<void>;
|
||||
deleteByConnection(connectionId: string): Promise<void>;
|
||||
deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<void>;
|
||||
deleteStale(connectionId: string, keepNames: string[]): Promise<number>;
|
||||
deleteByConnection(connectionId: string): Promise<number>;
|
||||
deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<number>;
|
||||
search(
|
||||
connectionId: string,
|
||||
queryEmbedding: number[] | null,
|
||||
|
|
|
|||
|
|
@ -223,4 +223,73 @@ describe('SlSearchService', () => {
|
|||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('indexSources reports stats and supports lexical-only indexing', async () => {
|
||||
const repository = {
|
||||
upsertSources: vi.fn().mockResolvedValue(undefined),
|
||||
getExistingSearchTexts: vi.fn().mockResolvedValue(
|
||||
new Map([
|
||||
['old_source', { searchText: 'old source', hasEmbedding: true }],
|
||||
]),
|
||||
),
|
||||
deleteStale: vi.fn().mockResolvedValue(1),
|
||||
deleteByConnection: vi.fn().mockResolvedValue(0),
|
||||
deleteByConnectionAndName: vi.fn(),
|
||||
search: vi.fn(),
|
||||
};
|
||||
const service = new SlSearchService(null, repository);
|
||||
const source: SemanticLayerSource = {
|
||||
name: 'orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'number' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
};
|
||||
|
||||
await expect(service.indexSources('warehouse', [source])).resolves.toEqual({
|
||||
scanned: 1,
|
||||
updated: 1,
|
||||
deleted: 1,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
expect(repository.upsertSources).toHaveBeenCalledWith('warehouse', [
|
||||
expect.objectContaining({ sourceName: 'orders', embedding: null }),
|
||||
]);
|
||||
});
|
||||
|
||||
it('does not update unchanged lexical-only SL rows on repeated sync', async () => {
|
||||
const repository = {
|
||||
upsertSources: vi.fn().mockResolvedValue(undefined),
|
||||
getExistingSearchTexts: vi.fn().mockResolvedValue(
|
||||
new Map([
|
||||
['orders', { searchText: 'orders. table: public.orders. id (number)', hasEmbedding: false }],
|
||||
]),
|
||||
),
|
||||
deleteStale: vi.fn().mockResolvedValue(0),
|
||||
deleteByConnection: vi.fn().mockResolvedValue(0),
|
||||
deleteByConnectionAndName: vi.fn(),
|
||||
search: vi.fn(),
|
||||
};
|
||||
const service = new SlSearchService(null, repository);
|
||||
const source: SemanticLayerSource = {
|
||||
name: 'orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'number' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
};
|
||||
|
||||
await expect(service.indexSources('warehouse', [source])).resolves.toEqual({
|
||||
scanned: 1,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
expect(repository.upsertSources).toHaveBeenCalledWith('warehouse', []);
|
||||
expect(repository.deleteStale).toHaveBeenCalledWith('warehouse', ['orders']);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger } from '../core/index.js';
|
||||
import type { ReindexWorkResult } from '../index-sync/types.js';
|
||||
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import type { SlSourcesIndexPort } from './ports.js';
|
||||
|
|
@ -94,73 +95,71 @@ export function buildSemanticLayerSourceSearchText(
|
|||
|
||||
export class SlSearchService {
|
||||
constructor(
|
||||
private readonly embeddingService: KtxEmbeddingPort,
|
||||
private readonly embeddingService: KtxEmbeddingPort | null,
|
||||
private readonly slSourcesRepository: SlSourcesIndexPort,
|
||||
private readonly logger: KtxLogger = noopLogger,
|
||||
) {}
|
||||
|
||||
async indexSources(connectionId: string, sources: SemanticLayerSource[]): Promise<void> {
|
||||
async indexSources(connectionId: string, sources: SemanticLayerSource[]): Promise<ReindexWorkResult> {
|
||||
const existing = await this.slSourcesRepository.getExistingSearchTexts(connectionId);
|
||||
if (sources.length === 0) {
|
||||
await this.slSourcesRepository.deleteByConnection(connectionId);
|
||||
return;
|
||||
const deleted = await this.slSourcesRepository.deleteByConnection(connectionId);
|
||||
return { scanned: 0, updated: 0, deleted, embeddingsRecomputed: 0, embeddingsFailed: 0 };
|
||||
}
|
||||
|
||||
// Detect which sources actually changed by comparing search_text
|
||||
const existing = await this.slSourcesRepository.getExistingSearchTexts(connectionId);
|
||||
const searchTexts = sources.map((s) => this.buildSearchText(s));
|
||||
|
||||
const embeddingService = this.embeddingService;
|
||||
const changedIndices: number[] = [];
|
||||
for (let i = 0; i < sources.length; i++) {
|
||||
const prev = existing.get(sources[i].name);
|
||||
if (!prev || prev.searchText !== searchTexts[i] || !prev.hasEmbedding) {
|
||||
for (let i = 0; i < sources.length; i += 1) {
|
||||
const previous = existing.get(sources[i]!.name);
|
||||
if (
|
||||
!previous ||
|
||||
previous.searchText !== searchTexts[i] ||
|
||||
(embeddingService !== null && !previous.hasEmbedding)
|
||||
) {
|
||||
changedIndices.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (changedIndices.length === 0) {
|
||||
// Still clean up stale sources even if nothing changed
|
||||
const keepNames = sources.map((s) => s.name);
|
||||
await this.slSourcesRepository.deleteStale(connectionId, keepNames);
|
||||
this.logger.log(`SL sources for connection ${connectionId}: all ${sources.length} up to date, 0 reindexed`);
|
||||
return;
|
||||
}
|
||||
let changedEmbeddings: (number[] | null)[] = changedIndices.map(() => null);
|
||||
let embeddingsRecomputed = 0;
|
||||
let embeddingsFailed = 0;
|
||||
|
||||
// Compute embeddings only for changed sources
|
||||
const changedTexts = changedIndices.map((i) => searchTexts[i]);
|
||||
let changedEmbeddings: (number[] | null)[];
|
||||
try {
|
||||
const batchSize = this.embeddingService.maxBatchSize;
|
||||
const allEmbeddings: number[][] = [];
|
||||
for (let i = 0; i < changedTexts.length; i += batchSize) {
|
||||
const batch = changedTexts.slice(i, i + batchSize);
|
||||
const batchEmbeddings = await this.embeddingService.computeEmbeddingsBulk(batch);
|
||||
allEmbeddings.push(...batchEmbeddings);
|
||||
if (embeddingService && changedIndices.length > 0) {
|
||||
try {
|
||||
const changedTexts = changedIndices.map((index) => searchTexts[index]!);
|
||||
const allEmbeddings: number[][] = [];
|
||||
for (let i = 0; i < changedTexts.length; i += embeddingService.maxBatchSize) {
|
||||
const batch = changedTexts.slice(i, i + embeddingService.maxBatchSize);
|
||||
allEmbeddings.push(...(await embeddingService.computeEmbeddingsBulk(batch)));
|
||||
}
|
||||
changedEmbeddings = allEmbeddings;
|
||||
embeddingsRecomputed = allEmbeddings.length;
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`Failed to compute SL source embeddings: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
embeddingsFailed = changedIndices.length;
|
||||
}
|
||||
changedEmbeddings = allEmbeddings;
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`Failed to compute SL source embeddings: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
changedEmbeddings = changedIndices.map(() => null);
|
||||
}
|
||||
|
||||
const rows = changedIndices.map((srcIdx, i) => {
|
||||
return {
|
||||
sourceName: sources[srcIdx].name,
|
||||
searchText: searchTexts[srcIdx],
|
||||
embedding: changedEmbeddings[i],
|
||||
};
|
||||
});
|
||||
|
||||
const rows = changedIndices.map((sourceIndex, embeddingIndex) => ({
|
||||
sourceName: sources[sourceIndex]!.name,
|
||||
searchText: searchTexts[sourceIndex]!,
|
||||
embedding: changedEmbeddings[embeddingIndex] ?? null,
|
||||
}));
|
||||
await this.slSourcesRepository.upsertSources(connectionId, rows);
|
||||
|
||||
// Remove sources that no longer exist in YAML
|
||||
const keepNames = sources.map((s) => s.name);
|
||||
await this.slSourcesRepository.deleteStale(connectionId, keepNames);
|
||||
|
||||
this.logger.log(
|
||||
`SL sources for connection ${connectionId}: ${changedIndices.length}/${sources.length} reindexed, ${sources.length - changedIndices.length} unchanged`,
|
||||
);
|
||||
const keepNames = sources.map((source) => source.name);
|
||||
const deleted = await this.slSourcesRepository.deleteStale(connectionId, keepNames);
|
||||
return {
|
||||
scanned: sources.length,
|
||||
updated: changedIndices.length,
|
||||
deleted,
|
||||
embeddingsRecomputed,
|
||||
embeddingsFailed,
|
||||
};
|
||||
}
|
||||
|
||||
async search(
|
||||
|
|
@ -170,12 +169,14 @@ export class SlSearchService {
|
|||
minRrfScore = 0,
|
||||
): Promise<Array<{ sourceName: string; score: number; snippet?: string }>> {
|
||||
let queryEmbedding: number[] | null = null;
|
||||
try {
|
||||
queryEmbedding = await this.embeddingService.computeEmbedding(query);
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`Failed to compute query embedding, falling back to FTS + trigram: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
if (this.embeddingService) {
|
||||
try {
|
||||
queryEmbedding = await this.embeddingService.computeEmbedding(query);
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`Failed to compute query embedding, falling back to FTS + trigram: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const results = await this.slSourcesRepository.search(connectionId, queryEmbedding, query, limit, minRrfScore);
|
||||
|
|
|
|||
|
|
@ -105,6 +105,33 @@ describe('SqliteSlSourcesIndex', () => {
|
|||
expect(await index.search('finance', null, 'revenue', 10)).toEqual([]);
|
||||
});
|
||||
|
||||
it('clear removes sources and dictionary rows for one connection only', async () => {
|
||||
const index = new SqliteSlSourcesIndex({ dbPath });
|
||||
await index.upsertSources('warehouse', [
|
||||
{ sourceName: 'orders', searchText: 'orders revenue paid', embedding: null },
|
||||
]);
|
||||
await index.upsertSources('finance', [
|
||||
{ sourceName: 'invoices', searchText: 'invoices revenue paid', embedding: null },
|
||||
]);
|
||||
await index.replaceDictionaryEntries('warehouse', [
|
||||
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'paid', cardinality: 1 },
|
||||
]);
|
||||
await index.replaceDictionaryEntries('finance', [
|
||||
{ connectionId: 'finance', sourceName: 'invoices', columnName: 'status', value: 'paid', cardinality: 1 },
|
||||
]);
|
||||
|
||||
await expect(index.clear('warehouse')).resolves.toBe(1);
|
||||
|
||||
expect(await index.search('warehouse', null, 'revenue', 10)).toEqual([]);
|
||||
expect(await index.search('finance', null, 'revenue', 10)).toEqual([
|
||||
expect.objectContaining({ sourceName: 'invoices' }),
|
||||
]);
|
||||
await expect(index.searchDictionaryCandidates({ connectionIds: ['warehouse'], queryText: 'paid', limit: 10 }))
|
||||
.resolves.toEqual([]);
|
||||
await expect(index.searchDictionaryCandidates({ connectionIds: ['finance'], queryText: 'paid', limit: 10 }))
|
||||
.resolves.toEqual([expect.objectContaining({ connectionId: 'finance', sourceName: 'invoices' })]);
|
||||
});
|
||||
|
||||
it('returns lane candidates with stable connection-scoped IDs', async () => {
|
||||
const index = new SqliteSlSourcesIndex({ dbPath });
|
||||
|
||||
|
|
|
|||
|
|
@ -221,10 +221,9 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
|
|||
);
|
||||
}
|
||||
|
||||
async deleteStale(connectionId: string, keepNames: string[]): Promise<void> {
|
||||
async deleteStale(connectionId: string, keepNames: string[]): Promise<number> {
|
||||
if (keepNames.length === 0) {
|
||||
await this.deleteByConnection(connectionId);
|
||||
return;
|
||||
return this.deleteByConnection(connectionId);
|
||||
}
|
||||
|
||||
const placeholders = keepNames.map(() => '?').join(', ');
|
||||
|
|
@ -257,18 +256,29 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
|
|||
});
|
||||
|
||||
remove(stale.map((row) => row.source_name));
|
||||
return stale.length;
|
||||
}
|
||||
|
||||
async deleteByConnection(connectionId: string): Promise<void> {
|
||||
async deleteByConnection(connectionId: string): Promise<number> {
|
||||
return this.clear(connectionId);
|
||||
}
|
||||
|
||||
async clear(connectionId: string): Promise<number> {
|
||||
const rows = this.db
|
||||
.prepare('SELECT source_name FROM local_sl_sources WHERE connection_id = ?')
|
||||
.all(connectionId) as Array<{ source_name: string }>;
|
||||
const remove = this.db.transaction(() => {
|
||||
this.db.prepare('DELETE FROM local_sl_sources_fts WHERE connection_id = ?').run(connectionId);
|
||||
this.db.prepare('DELETE FROM local_sl_sources WHERE connection_id = ?').run(connectionId);
|
||||
this.db.prepare('DELETE FROM local_sl_dictionary_values_fts WHERE connection_id = ?').run(connectionId);
|
||||
this.db.prepare('DELETE FROM local_sl_dictionary_values WHERE connection_id = ?').run(connectionId);
|
||||
});
|
||||
remove();
|
||||
return rows.length;
|
||||
}
|
||||
|
||||
async deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<void> {
|
||||
this.deleteByConnectionAndNameSync(connectionId, sourceName);
|
||||
async deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<number> {
|
||||
return this.deleteByConnectionAndNameSync(connectionId, sourceName);
|
||||
}
|
||||
|
||||
async replaceDictionaryEntries(connectionId: string, entries: SlDictionaryEntry[]): Promise<void> {
|
||||
|
|
@ -537,7 +547,7 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
|
|||
.filter((row) => row.rrfScore >= minRrfScore);
|
||||
}
|
||||
|
||||
private deleteByConnectionAndNameSync(connectionId: string, sourceName: string): void {
|
||||
private deleteByConnectionAndNameSync(connectionId: string, sourceName: string): number {
|
||||
const remove = this.db.transaction(() => {
|
||||
this.db
|
||||
.prepare(
|
||||
|
|
@ -548,7 +558,7 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
|
|||
`,
|
||||
)
|
||||
.run(connectionId, sourceName);
|
||||
this.db
|
||||
const result = this.db
|
||||
.prepare(
|
||||
`
|
||||
DELETE FROM local_sl_sources
|
||||
|
|
@ -557,7 +567,8 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
|
|||
`,
|
||||
)
|
||||
.run(connectionId, sourceName);
|
||||
return Number(result.changes);
|
||||
});
|
||||
remove();
|
||||
return remove();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@ import { KnowledgeWikiService, type WikiFrontmatter } from './knowledge-wiki.ser
|
|||
function makeService() {
|
||||
const pagesRepository: Record<string, ReturnType<typeof vi.fn>> = {
|
||||
upsertPage: vi.fn().mockResolvedValue(undefined),
|
||||
deleteByKey: vi.fn().mockResolvedValue(undefined),
|
||||
deleteByScope: vi.fn().mockResolvedValue(undefined),
|
||||
deleteStale: vi.fn().mockResolvedValue(undefined),
|
||||
deleteByKey: vi.fn().mockResolvedValue(0),
|
||||
deleteByScope: vi.fn().mockResolvedValue(0),
|
||||
deleteStale: vi.fn().mockResolvedValue(0),
|
||||
getExistingSearchTexts: vi.fn().mockResolvedValue(new Map()),
|
||||
applyDiffTransactional: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
|
@ -50,6 +50,87 @@ function makeService() {
|
|||
|
||||
const fm: WikiFrontmatter = { summary: 'sum', usage_mode: 'auto' };
|
||||
|
||||
describe('KnowledgeWikiService.syncIndex result stats', () => {
|
||||
it('reports scanned, updated, deleted, and embedding counts', async () => {
|
||||
const { service, pagesRepository, embeddingService, configService } = makeService();
|
||||
configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] });
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: '---\nsummary: Revenue\nusage_mode: auto\ntags:\n - finance\n---\n\nPaid orders.\n',
|
||||
});
|
||||
pagesRepository.getExistingSearchTexts.mockResolvedValue(
|
||||
new Map([
|
||||
['old-page', { searchText: 'old', hasEmbedding: true }],
|
||||
]),
|
||||
);
|
||||
embeddingService.computeEmbeddingsBulk.mockResolvedValue([[0.1, 0.2, 0.3]]);
|
||||
pagesRepository.deleteStale.mockResolvedValue(1);
|
||||
|
||||
await expect(service.syncIndex('GLOBAL', null)).resolves.toEqual({
|
||||
scanned: 1,
|
||||
updated: 1,
|
||||
deleted: 1,
|
||||
embeddingsRecomputed: 1,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it('indexes lexical rows when embeddings are not configured', async () => {
|
||||
const { pagesRepository, configService, gitService, logger } = makeService();
|
||||
const service = new KnowledgeWikiService(
|
||||
configService as any,
|
||||
null,
|
||||
pagesRepository as any,
|
||||
gitService as any,
|
||||
logger as any,
|
||||
);
|
||||
configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] });
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: '---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
});
|
||||
pagesRepository.getExistingSearchTexts.mockResolvedValue(new Map());
|
||||
pagesRepository.deleteStale.mockResolvedValue(0);
|
||||
|
||||
const result = await service.syncIndex('GLOBAL', null);
|
||||
|
||||
expect(result.embeddingsRecomputed).toBe(0);
|
||||
expect(result.embeddingsFailed).toBe(0);
|
||||
expect(pagesRepository.upsertPage).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ pageKey: 'revenue', embedding: null }),
|
||||
);
|
||||
});
|
||||
|
||||
it('does not update unchanged lexical-only wiki rows on repeated sync', async () => {
|
||||
const { pagesRepository, configService, gitService, logger } = makeService();
|
||||
const service = new KnowledgeWikiService(
|
||||
configService as any,
|
||||
null,
|
||||
pagesRepository as any,
|
||||
gitService as any,
|
||||
logger as any,
|
||||
);
|
||||
configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] });
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: '---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
});
|
||||
pagesRepository.getExistingSearchTexts.mockResolvedValue(
|
||||
new Map([
|
||||
['revenue', { searchText: 'revenue\nRevenue\nPaid orders.', hasEmbedding: false }],
|
||||
]),
|
||||
);
|
||||
pagesRepository.deleteStale.mockResolvedValue(0);
|
||||
|
||||
await expect(service.syncIndex('GLOBAL', null)).resolves.toEqual({
|
||||
scanned: 1,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
expect(pagesRepository.upsertPage).not.toHaveBeenCalled();
|
||||
expect(pagesRepository.deleteStale).toHaveBeenCalledWith('GLOBAL', null, ['revenue']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('KnowledgeWikiService.forWorktree isolation', () => {
|
||||
it('syncSinglePage in worktree scope does not call pagesRepository.upsertPage', async () => {
|
||||
const { service, pagesRepository, embeddingService } = makeService();
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { createHash } from 'node:crypto';
|
|||
import YAML from 'yaml';
|
||||
import type { KtxEmbeddingPort, KtxFileStorePort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger } from '../core/index.js';
|
||||
import type { ReindexWorkResult } from '../index-sync/types.js';
|
||||
import { assertFlatWikiKey, isFlatWikiKey } from './keys.js';
|
||||
import { buildKnowledgeSearchText } from './knowledge-search-text.js';
|
||||
import type { KnowledgeGitDiffPort, KnowledgeIndexPort, UpsertPageParams } from './ports.js';
|
||||
|
|
@ -16,7 +17,7 @@ export class KnowledgeWikiService {
|
|||
|
||||
constructor(
|
||||
private readonly configService: KtxFileStorePort,
|
||||
private readonly embeddingService: KtxEmbeddingPort,
|
||||
private readonly embeddingService: KtxEmbeddingPort | null,
|
||||
private readonly pagesRepository: KnowledgeIndexPort,
|
||||
private readonly gitService: KnowledgeGitDiffPort,
|
||||
private readonly logger: KtxLogger = noopLogger,
|
||||
|
|
@ -246,10 +247,12 @@ export class KnowledgeWikiService {
|
|||
const searchText = buildKnowledgeSearchText(pageKey, frontmatter.summary, content, frontmatter.tags);
|
||||
|
||||
let embedding: number[] | null = null;
|
||||
try {
|
||||
embedding = await this.embeddingService.computeEmbedding(searchText);
|
||||
} catch (err) {
|
||||
this.logger.warn(`Embedding failed for page "${pageKey}": ${err instanceof Error ? err.message : String(err)}`);
|
||||
if (this.embeddingService) {
|
||||
try {
|
||||
embedding = await this.embeddingService.computeEmbedding(searchText);
|
||||
} catch (err) {
|
||||
this.logger.warn(`Embedding failed for page "${pageKey}": ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
await this.pagesRepository.upsertPage({
|
||||
|
|
@ -269,14 +272,21 @@ export class KnowledgeWikiService {
|
|||
* Full sync: load all pages from disk for a scope, reindex changed pages, clean stale entries.
|
||||
* Mirrors SlSearchService.indexSources() pattern.
|
||||
*/
|
||||
async syncIndex(scope: string, scopeId?: string | null): Promise<void> {
|
||||
async syncIndex(scope: string, scopeId?: string | null): Promise<ReindexWorkResult> {
|
||||
const pageKeys = await this.listPageKeys(scope, scopeId);
|
||||
const existing = await this.pagesRepository.getExistingSearchTexts(scope, scopeId ?? null);
|
||||
|
||||
if (pageKeys.length === 0) {
|
||||
await this.pagesRepository.deleteByScope(scope, scopeId ?? null);
|
||||
return;
|
||||
const deleted = await this.pagesRepository.deleteByScope(scope, scopeId ?? null);
|
||||
return {
|
||||
scanned: 0,
|
||||
updated: 0,
|
||||
deleted,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Load and parse all pages
|
||||
const pages: Array<{ pageKey: string; frontmatter: WikiFrontmatter; content: string; searchText: string }> = [];
|
||||
for (const key of pageKeys) {
|
||||
const page = await this.readPage(scope, scopeId, key);
|
||||
|
|
@ -286,58 +296,58 @@ export class KnowledgeWikiService {
|
|||
}
|
||||
}
|
||||
|
||||
// Detect changes
|
||||
const existing = await this.pagesRepository.getExistingSearchTexts(scope, scopeId ?? null);
|
||||
const changedPages = pages.filter((p) => {
|
||||
const ex = existing.get(p.pageKey);
|
||||
return !ex || ex.searchText !== p.searchText || !ex.hasEmbedding;
|
||||
const embeddingService = this.embeddingService;
|
||||
const changedPages = pages.filter((page) => {
|
||||
const previous = existing.get(page.pageKey);
|
||||
return (
|
||||
!previous ||
|
||||
previous.searchText !== page.searchText ||
|
||||
(embeddingService !== null && !previous.hasEmbedding)
|
||||
);
|
||||
});
|
||||
|
||||
if (changedPages.length === 0) {
|
||||
// Still clean up stale
|
||||
await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys);
|
||||
this.logger.log(`Wiki sync ${scope}: all ${pages.length} pages up to date`);
|
||||
return;
|
||||
}
|
||||
let embeddings: (number[] | null)[] = changedPages.map(() => null);
|
||||
let embeddingsRecomputed = 0;
|
||||
let embeddingsFailed = 0;
|
||||
|
||||
// Compute embeddings for changed pages (batched)
|
||||
const changedTexts = changedPages.map((p) => p.searchText);
|
||||
let embeddings: (number[] | null)[];
|
||||
try {
|
||||
const batchSize = this.embeddingService.maxBatchSize;
|
||||
const all: number[][] = [];
|
||||
for (let i = 0; i < changedTexts.length; i += batchSize) {
|
||||
const batch = changedTexts.slice(i, i + batchSize);
|
||||
const batchEmb = await this.embeddingService.computeEmbeddingsBulk(batch);
|
||||
all.push(...batchEmb);
|
||||
if (embeddingService && changedPages.length > 0) {
|
||||
try {
|
||||
const changedTexts = changedPages.map((page) => page.searchText);
|
||||
const all: number[][] = [];
|
||||
for (let i = 0; i < changedTexts.length; i += embeddingService.maxBatchSize) {
|
||||
const batch = changedTexts.slice(i, i + embeddingService.maxBatchSize);
|
||||
all.push(...(await embeddingService.computeEmbeddingsBulk(batch)));
|
||||
}
|
||||
embeddings = all;
|
||||
embeddingsRecomputed = all.length;
|
||||
} catch (err) {
|
||||
this.logger.warn(`Embedding batch failed during sync: ${err instanceof Error ? err.message : String(err)}`);
|
||||
embeddingsFailed = changedPages.length;
|
||||
}
|
||||
embeddings = all;
|
||||
} catch (err) {
|
||||
this.logger.warn(`Embedding batch failed during sync: ${err instanceof Error ? err.message : String(err)}`);
|
||||
embeddings = changedPages.map(() => null);
|
||||
}
|
||||
|
||||
// Upsert changed pages
|
||||
for (let i = 0; i < changedPages.length; i++) {
|
||||
const p = changedPages[i];
|
||||
for (let i = 0; i < changedPages.length; i += 1) {
|
||||
const page = changedPages[i]!;
|
||||
await this.pagesRepository.upsertPage({
|
||||
scope,
|
||||
scopeId: scopeId ?? null,
|
||||
pageKey: p.pageKey,
|
||||
summary: p.frontmatter.summary,
|
||||
usageMode: p.frontmatter.usage_mode,
|
||||
sortOrder: p.frontmatter.sort_order ?? 0,
|
||||
searchText: p.searchText,
|
||||
embedding: embeddings[i],
|
||||
pageKey: page.pageKey,
|
||||
summary: page.frontmatter.summary,
|
||||
usageMode: page.frontmatter.usage_mode,
|
||||
sortOrder: page.frontmatter.sort_order ?? 0,
|
||||
searchText: page.searchText,
|
||||
embedding: embeddings[i] ?? null,
|
||||
});
|
||||
}
|
||||
|
||||
// Clean stale entries
|
||||
await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys);
|
||||
|
||||
this.logger.log(
|
||||
`Wiki sync ${scope}: ${changedPages.length}/${pages.length} reindexed, ${pages.length - changedPages.length} unchanged`,
|
||||
);
|
||||
const deleted = await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys);
|
||||
return {
|
||||
scanned: pages.length,
|
||||
updated: changedPages.length,
|
||||
deleted,
|
||||
embeddingsRecomputed,
|
||||
embeddingsFailed,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -388,12 +398,14 @@ export class KnowledgeWikiService {
|
|||
parsed.frontmatter.tags,
|
||||
);
|
||||
let embedding: number[] | null = null;
|
||||
try {
|
||||
embedding = await this.embeddingService.computeEmbedding(searchText);
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`[wiki.sync] embedding failed for ${parsedPath.pageKey}: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
if (this.embeddingService) {
|
||||
try {
|
||||
embedding = await this.embeddingService.computeEmbedding(searchText);
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`[wiki.sync] embedding failed for ${parsedPath.pageKey}: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
const contentHash = createHash('sha256').update(content).digest('hex');
|
||||
upserts.push({
|
||||
|
|
|
|||
|
|
@ -33,9 +33,9 @@ export interface KnowledgeIndexPort {
|
|||
scope: string,
|
||||
scopeId: string | null,
|
||||
): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>>;
|
||||
deleteStale(scope: string, scopeId: string | null, keepKeys: string[]): Promise<void>;
|
||||
deleteByScope(scope: string, scopeId: string | null): Promise<void>;
|
||||
deleteByKey(scope: string, scopeId: string | null, pageKey: string): Promise<void>;
|
||||
deleteStale(scope: string, scopeId: string | null, keepKeys: string[]): Promise<number>;
|
||||
deleteByScope(scope: string, scopeId: string | null): Promise<number>;
|
||||
deleteByKey(scope: string, scopeId: string | null, pageKey: string): Promise<number>;
|
||||
findPageByKey(
|
||||
scope: string,
|
||||
scopeId: string | null,
|
||||
|
|
|
|||
|
|
@ -65,6 +65,35 @@ describe('SqliteKnowledgeIndex', () => {
|
|||
expect(index.search('churn', 10)).toEqual([]);
|
||||
});
|
||||
|
||||
it('clear removes one wiki scope and leaves other scopes intact', async () => {
|
||||
const index = new SqliteKnowledgeIndex({ dbPath });
|
||||
index.sync([
|
||||
page({ path: 'wiki/global/revenue.md', key: 'revenue', scope: 'GLOBAL', scopeId: null }),
|
||||
page({
|
||||
path: 'wiki/user/local/revenue.md',
|
||||
key: 'revenue',
|
||||
scope: 'USER',
|
||||
scopeId: 'local',
|
||||
summary: 'Local revenue',
|
||||
content: 'Local revenue notes.',
|
||||
}),
|
||||
page({
|
||||
path: 'wiki/user/alex/revenue.md',
|
||||
key: 'revenue',
|
||||
scope: 'USER',
|
||||
scopeId: 'alex',
|
||||
summary: 'Alex revenue',
|
||||
content: 'Alex revenue notes.',
|
||||
}),
|
||||
]);
|
||||
|
||||
expect(index.clear('USER', 'local')).toBe(1);
|
||||
|
||||
expect(index.search('Local', 10)).toEqual([]);
|
||||
expect(index.search('Alex', 10)).toEqual([expect.objectContaining({ path: 'wiki/user/alex/revenue.md' })]);
|
||||
expect(index.search('definition', 10)).toEqual([expect.objectContaining({ path: 'wiki/global/revenue.md' })]);
|
||||
});
|
||||
|
||||
it('exposes existing search text and embedding state for incremental refresh', () => {
|
||||
const index = new SqliteKnowledgeIndex({ dbPath });
|
||||
index.sync([page({ path: 'wiki/global/revenue.md', key: 'revenue', embedding: [1, 0] })]);
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { dirname } from 'node:path';
|
|||
import Database from 'better-sqlite3';
|
||||
import { buildKnowledgeSearchText } from './knowledge-search-text.js';
|
||||
import type { LocalKnowledgeScope } from './local-knowledge.js';
|
||||
import type { KnowledgeIndexPageListing, UpsertPageParams } from './ports.js';
|
||||
|
||||
export interface SqliteKnowledgeIndexOptions {
|
||||
dbPath: string;
|
||||
|
|
@ -12,6 +13,7 @@ export interface SqliteKnowledgeIndexPage {
|
|||
path: string;
|
||||
key: string;
|
||||
scope: LocalKnowledgeScope;
|
||||
scopeId?: string | null;
|
||||
summary: string;
|
||||
content: string;
|
||||
tags: string[];
|
||||
|
|
@ -106,6 +108,7 @@ export class SqliteKnowledgeIndex {
|
|||
path TEXT PRIMARY KEY,
|
||||
key TEXT NOT NULL,
|
||||
scope TEXT NOT NULL,
|
||||
scope_id TEXT,
|
||||
summary TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
tags TEXT NOT NULL,
|
||||
|
|
@ -129,6 +132,9 @@ export class SqliteKnowledgeIndex {
|
|||
if (!columnNames.has('embedding_json')) {
|
||||
this.db.exec('ALTER TABLE knowledge_pages ADD COLUMN embedding_json TEXT');
|
||||
}
|
||||
if (!columnNames.has('scope_id')) {
|
||||
this.db.exec('ALTER TABLE knowledge_pages ADD COLUMN scope_id TEXT');
|
||||
}
|
||||
}
|
||||
|
||||
sync(pages: SqliteKnowledgeIndexPage[]): void {
|
||||
|
|
@ -142,11 +148,12 @@ export class SqliteKnowledgeIndex {
|
|||
? this.db.prepare('DELETE FROM knowledge_pages_fts')
|
||||
: this.db.prepare(`DELETE FROM knowledge_pages_fts WHERE path NOT IN (${keepPaths.map(() => '?').join(', ')})`);
|
||||
const upsertPage = this.db.prepare(`
|
||||
INSERT INTO knowledge_pages (path, key, scope, summary, content, tags, search_text, embedding_json)
|
||||
VALUES (@path, @key, @scope, @summary, @content, @tags, @searchText, @embeddingJson)
|
||||
INSERT INTO knowledge_pages (path, key, scope, scope_id, summary, content, tags, search_text, embedding_json)
|
||||
VALUES (@path, @key, @scope, @scopeId, @summary, @content, @tags, @searchText, @embeddingJson)
|
||||
ON CONFLICT(path) DO UPDATE SET
|
||||
key = excluded.key,
|
||||
scope = excluded.scope,
|
||||
scope_id = excluded.scope_id,
|
||||
summary = excluded.summary,
|
||||
content = excluded.content,
|
||||
tags = excluded.tags,
|
||||
|
|
@ -168,6 +175,7 @@ export class SqliteKnowledgeIndex {
|
|||
path: page.path,
|
||||
key: page.key,
|
||||
scope: page.scope,
|
||||
scopeId: page.scopeId ?? null,
|
||||
summary: page.summary,
|
||||
content: searchText,
|
||||
tags: page.tags.join(' '),
|
||||
|
|
@ -275,4 +283,201 @@ export class SqliteKnowledgeIndex {
|
|||
score: scoreFromRank(row.rawScore),
|
||||
}));
|
||||
}
|
||||
|
||||
private pathForPage(scope: string, scopeId: string | null, pageKey: string): string {
|
||||
return scope === 'GLOBAL' ? `wiki/global/${pageKey}.md` : `wiki/user/${scopeId ?? 'local'}/${pageKey}.md`;
|
||||
}
|
||||
|
||||
async upsertPage(params: UpsertPageParams): Promise<void> {
|
||||
const path = this.pathForPage(params.scope, params.scopeId, params.pageKey);
|
||||
const row = {
|
||||
path,
|
||||
key: params.pageKey,
|
||||
scope: params.scope,
|
||||
scopeId: params.scopeId,
|
||||
summary: params.summary,
|
||||
content: params.searchText,
|
||||
tags: '',
|
||||
searchText: params.searchText,
|
||||
embeddingJson: params.embedding && params.embedding.length > 0 ? JSON.stringify(params.embedding) : null,
|
||||
};
|
||||
const write = this.db.transaction(() => {
|
||||
this.db
|
||||
.prepare(
|
||||
`
|
||||
INSERT INTO knowledge_pages (path, key, scope, scope_id, summary, content, tags, search_text, embedding_json)
|
||||
VALUES (@path, @key, @scope, @scopeId, @summary, @content, @tags, @searchText, @embeddingJson)
|
||||
ON CONFLICT(path) DO UPDATE SET
|
||||
key = excluded.key,
|
||||
scope = excluded.scope,
|
||||
scope_id = excluded.scope_id,
|
||||
summary = excluded.summary,
|
||||
content = excluded.content,
|
||||
tags = excluded.tags,
|
||||
search_text = excluded.search_text,
|
||||
embedding_json = excluded.embedding_json
|
||||
`,
|
||||
)
|
||||
.run(row);
|
||||
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = @path').run(row);
|
||||
this.db
|
||||
.prepare(
|
||||
`
|
||||
INSERT INTO knowledge_pages_fts (path, key, summary, content, tags)
|
||||
VALUES (@path, @key, @summary, @content, @tags)
|
||||
`,
|
||||
)
|
||||
.run(row);
|
||||
});
|
||||
write();
|
||||
}
|
||||
|
||||
async getExistingSearchTexts(
|
||||
scope: string,
|
||||
scopeId: string | null,
|
||||
): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>> {
|
||||
const rows = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT key, search_text, embedding_json
|
||||
FROM knowledge_pages
|
||||
WHERE scope = ?
|
||||
AND scope_id IS ?
|
||||
ORDER BY key ASC
|
||||
`,
|
||||
)
|
||||
.all(scope, scopeId) as Array<{ key: string; search_text: string; embedding_json: string | null }>;
|
||||
return new Map(
|
||||
rows.map((row) => [row.key, { searchText: row.search_text, hasEmbedding: row.embedding_json !== null }]),
|
||||
);
|
||||
}
|
||||
|
||||
async deleteStale(scope: string, scopeId: string | null, keepKeys: string[]): Promise<number> {
|
||||
if (keepKeys.length === 0) {
|
||||
return this.deleteByScope(scope, scopeId);
|
||||
}
|
||||
const placeholders = keepKeys.map(() => '?').join(', ');
|
||||
const stale = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT key
|
||||
FROM knowledge_pages
|
||||
WHERE scope = ?
|
||||
AND scope_id IS ?
|
||||
AND key NOT IN (${placeholders})
|
||||
`,
|
||||
)
|
||||
.all(scope, scopeId, ...keepKeys) as Array<{ key: string }>;
|
||||
for (const row of stale) {
|
||||
await this.deleteByKey(scope, scopeId, row.key);
|
||||
}
|
||||
return stale.length;
|
||||
}
|
||||
|
||||
async deleteByScope(scope: string, scopeId: string | null): Promise<number> {
|
||||
return this.clear(scope, scopeId);
|
||||
}
|
||||
|
||||
async deleteByKey(scope: string, scopeId: string | null, pageKey: string): Promise<number> {
|
||||
const path = this.pathForPage(scope, scopeId, pageKey);
|
||||
const remove = this.db.transaction(() => {
|
||||
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = ?').run(path);
|
||||
const result = this.db.prepare('DELETE FROM knowledge_pages WHERE path = ?').run(path);
|
||||
return Number(result.changes);
|
||||
});
|
||||
return remove();
|
||||
}
|
||||
|
||||
clear(scope: string, scopeId: string | null): number {
|
||||
const rows = this.db
|
||||
.prepare('SELECT path FROM knowledge_pages WHERE scope = ? AND scope_id IS ?')
|
||||
.all(scope, scopeId) as Array<{ path: string }>;
|
||||
const remove = this.db.transaction((paths: string[]) => {
|
||||
for (const path of paths) {
|
||||
this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = ?').run(path);
|
||||
this.db.prepare('DELETE FROM knowledge_pages WHERE path = ?').run(path);
|
||||
}
|
||||
});
|
||||
remove(rows.map((row) => row.path));
|
||||
return rows.length;
|
||||
}
|
||||
|
||||
async applyDiffTransactional(params: {
|
||||
runId: string;
|
||||
upserts: UpsertPageParams[];
|
||||
deletes: Array<{ scope: string; scopeId: string | null; pageKey: string }>;
|
||||
}): Promise<void> {
|
||||
void params.runId;
|
||||
for (const page of params.upserts) {
|
||||
await this.upsertPage(page);
|
||||
}
|
||||
for (const page of params.deletes) {
|
||||
await this.deleteByKey(page.scope, page.scopeId, page.pageKey);
|
||||
}
|
||||
}
|
||||
|
||||
async findPageByKey(
|
||||
scope: string,
|
||||
scopeId: string | null,
|
||||
pageKey: string,
|
||||
): Promise<{ id?: string; page_key: string } | null> {
|
||||
const path = this.pathForPage(scope, scopeId, pageKey);
|
||||
const row = this.db.prepare('SELECT path, key FROM knowledge_pages WHERE path = ?').get(path) as
|
||||
| { path: string; key: string }
|
||||
| undefined;
|
||||
return row ? { id: row.path, page_key: row.key } : null;
|
||||
}
|
||||
|
||||
async listPagesForUser(userId: string): Promise<KnowledgeIndexPageListing[]> {
|
||||
const rows = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT path, key, scope, scope_id, summary, tags
|
||||
FROM knowledge_pages
|
||||
WHERE scope = 'GLOBAL'
|
||||
OR (scope = 'USER' AND scope_id = ?)
|
||||
ORDER BY scope ASC, key ASC
|
||||
`,
|
||||
)
|
||||
.all(userId) as Array<{
|
||||
path: string;
|
||||
key: string;
|
||||
scope: string;
|
||||
scope_id: string | null;
|
||||
summary: string;
|
||||
tags: string;
|
||||
}>;
|
||||
return rows.map((row) => ({
|
||||
id: row.path,
|
||||
page_key: row.key,
|
||||
summary: row.summary,
|
||||
scope: row.scope,
|
||||
scope_id: row.scope_id,
|
||||
tags: row.tags.split(/\s+/).filter(Boolean),
|
||||
}));
|
||||
}
|
||||
|
||||
async getUserPageCount(userId: string): Promise<number> {
|
||||
const row = this.db
|
||||
.prepare("SELECT COUNT(*) AS count FROM knowledge_pages WHERE scope = 'USER' AND scope_id = ?")
|
||||
.get(userId) as { count: number };
|
||||
return row.count;
|
||||
}
|
||||
|
||||
async incrementUsageCount(): Promise<void> {}
|
||||
|
||||
async searchRRF(
|
||||
userId: string,
|
||||
_embedding: number[] | null,
|
||||
queryText: string,
|
||||
limit: number,
|
||||
): Promise<Array<{ pageKey: string; summary: string; rrfScore: number }>> {
|
||||
const allowedPages = new Map((await this.listPagesForUser(userId)).map((page) => [page.id, page]));
|
||||
return this.search(queryText, limit)
|
||||
.map((row) => {
|
||||
const page = allowedPages.get(row.path);
|
||||
return page ? { pageKey: page.page_key, summary: page.summary, rrfScore: row.score } : null;
|
||||
})
|
||||
.filter((row): row is { pageKey: string; summary: string; rrfScore: number } => row !== null);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue