mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
fix: keep lexical-only reindex incremental
This commit is contained in:
parent
a4ae2213b4
commit
ba5d01046b
5 changed files with 123 additions and 10 deletions
|
|
@ -83,6 +83,44 @@ describe('reindexLocalIndexes', () => {
|
|||
expect(summary.embeddingsAvailable).toBe(true);
|
||||
});
|
||||
|
||||
it('does not report unchanged lexical-only rows as updated on repeated runs', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
|
||||
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const first = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
expect(first.totals).toMatchObject({
|
||||
scanned: 2,
|
||||
updated: 2,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
|
||||
const second = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(second.totals).toMatchObject({
|
||||
scanned: 2,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
expect(second.scopes.map((scope) => [scope.label, scope.updated])).toEqual([
|
||||
['global', 0],
|
||||
['warehouse', 0],
|
||||
]);
|
||||
});
|
||||
|
||||
it('force clears stale rows before rebuilding each discovered scope', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
const wikiIndex = new SqliteKnowledgeIndex({ dbPath: join(project.projectDir, '.ktx/db.sqlite') });
|
||||
|
|
|
|||
|
|
@ -258,4 +258,38 @@ describe('SlSearchService', () => {
|
|||
expect.objectContaining({ sourceName: 'orders', embedding: null }),
|
||||
]);
|
||||
});
|
||||
|
||||
it('does not update unchanged lexical-only SL rows on repeated sync', async () => {
|
||||
const repository = {
|
||||
upsertSources: vi.fn().mockResolvedValue(undefined),
|
||||
getExistingSearchTexts: vi.fn().mockResolvedValue(
|
||||
new Map([
|
||||
['orders', { searchText: 'orders. table: public.orders. id (number)', hasEmbedding: false }],
|
||||
]),
|
||||
),
|
||||
deleteStale: vi.fn().mockResolvedValue(0),
|
||||
deleteByConnection: vi.fn().mockResolvedValue(0),
|
||||
deleteByConnectionAndName: vi.fn(),
|
||||
search: vi.fn(),
|
||||
};
|
||||
const service = new SlSearchService(null, repository);
|
||||
const source: SemanticLayerSource = {
|
||||
name: 'orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'number' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
};
|
||||
|
||||
await expect(service.indexSources('warehouse', [source])).resolves.toEqual({
|
||||
scanned: 1,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
expect(repository.upsertSources).toHaveBeenCalledWith('warehouse', []);
|
||||
expect(repository.deleteStale).toHaveBeenCalledWith('warehouse', ['orders']);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -109,10 +109,15 @@ export class SlSearchService {
|
|||
|
||||
const searchTexts = sources.map((s) => this.buildSearchText(s));
|
||||
|
||||
const embeddingService = this.embeddingService;
|
||||
const changedIndices: number[] = [];
|
||||
for (let i = 0; i < sources.length; i += 1) {
|
||||
const previous = existing.get(sources[i]!.name);
|
||||
if (!previous || previous.searchText !== searchTexts[i] || !previous.hasEmbedding) {
|
||||
if (
|
||||
!previous ||
|
||||
previous.searchText !== searchTexts[i] ||
|
||||
(embeddingService !== null && !previous.hasEmbedding)
|
||||
) {
|
||||
changedIndices.push(i);
|
||||
}
|
||||
}
|
||||
|
|
@ -121,13 +126,13 @@ export class SlSearchService {
|
|||
let embeddingsRecomputed = 0;
|
||||
let embeddingsFailed = 0;
|
||||
|
||||
if (this.embeddingService && changedIndices.length > 0) {
|
||||
if (embeddingService && changedIndices.length > 0) {
|
||||
try {
|
||||
const changedTexts = changedIndices.map((index) => searchTexts[index]!);
|
||||
const allEmbeddings: number[][] = [];
|
||||
for (let i = 0; i < changedTexts.length; i += this.embeddingService.maxBatchSize) {
|
||||
const batch = changedTexts.slice(i, i + this.embeddingService.maxBatchSize);
|
||||
allEmbeddings.push(...(await this.embeddingService.computeEmbeddingsBulk(batch)));
|
||||
for (let i = 0; i < changedTexts.length; i += embeddingService.maxBatchSize) {
|
||||
const batch = changedTexts.slice(i, i + embeddingService.maxBatchSize);
|
||||
allEmbeddings.push(...(await embeddingService.computeEmbeddingsBulk(batch)));
|
||||
}
|
||||
changedEmbeddings = allEmbeddings;
|
||||
embeddingsRecomputed = allEmbeddings.length;
|
||||
|
|
|
|||
|
|
@ -98,6 +98,37 @@ describe('KnowledgeWikiService.syncIndex result stats', () => {
|
|||
expect.objectContaining({ pageKey: 'revenue', embedding: null }),
|
||||
);
|
||||
});
|
||||
|
||||
it('does not update unchanged lexical-only wiki rows on repeated sync', async () => {
|
||||
const { pagesRepository, configService, gitService, logger } = makeService();
|
||||
const service = new KnowledgeWikiService(
|
||||
configService as any,
|
||||
null,
|
||||
pagesRepository as any,
|
||||
gitService as any,
|
||||
logger as any,
|
||||
);
|
||||
configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] });
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: '---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
});
|
||||
pagesRepository.getExistingSearchTexts.mockResolvedValue(
|
||||
new Map([
|
||||
['revenue', { searchText: 'revenue\nRevenue\nPaid orders.', hasEmbedding: false }],
|
||||
]),
|
||||
);
|
||||
pagesRepository.deleteStale.mockResolvedValue(0);
|
||||
|
||||
await expect(service.syncIndex('GLOBAL', null)).resolves.toEqual({
|
||||
scanned: 1,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
expect(pagesRepository.upsertPage).not.toHaveBeenCalled();
|
||||
expect(pagesRepository.deleteStale).toHaveBeenCalledWith('GLOBAL', null, ['revenue']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('KnowledgeWikiService.forWorktree isolation', () => {
|
||||
|
|
|
|||
|
|
@ -296,22 +296,27 @@ export class KnowledgeWikiService {
|
|||
}
|
||||
}
|
||||
|
||||
const embeddingService = this.embeddingService;
|
||||
const changedPages = pages.filter((page) => {
|
||||
const previous = existing.get(page.pageKey);
|
||||
return !previous || previous.searchText !== page.searchText || !previous.hasEmbedding;
|
||||
return (
|
||||
!previous ||
|
||||
previous.searchText !== page.searchText ||
|
||||
(embeddingService !== null && !previous.hasEmbedding)
|
||||
);
|
||||
});
|
||||
|
||||
let embeddings: (number[] | null)[] = changedPages.map(() => null);
|
||||
let embeddingsRecomputed = 0;
|
||||
let embeddingsFailed = 0;
|
||||
|
||||
if (this.embeddingService && changedPages.length > 0) {
|
||||
if (embeddingService && changedPages.length > 0) {
|
||||
try {
|
||||
const changedTexts = changedPages.map((page) => page.searchText);
|
||||
const all: number[][] = [];
|
||||
for (let i = 0; i < changedTexts.length; i += this.embeddingService.maxBatchSize) {
|
||||
const batch = changedTexts.slice(i, i + this.embeddingService.maxBatchSize);
|
||||
all.push(...(await this.embeddingService.computeEmbeddingsBulk(batch)));
|
||||
for (let i = 0; i < changedTexts.length; i += embeddingService.maxBatchSize) {
|
||||
const batch = changedTexts.slice(i, i + embeddingService.maxBatchSize);
|
||||
all.push(...(await embeddingService.computeEmbeddingsBulk(batch)));
|
||||
}
|
||||
embeddings = all;
|
||||
embeddingsRecomputed = all.length;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue