From ba5d01046bfe89434f6a576d46809dfafd79f455 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 20 May 2026 01:12:27 +0200 Subject: [PATCH] fix: keep lexical-only reindex incremental --- .../context/src/index-sync/reindex.test.ts | 38 +++++++++++++++++++ .../context/src/sl/sl-search.service.test.ts | 34 +++++++++++++++++ packages/context/src/sl/sl-search.service.ts | 15 +++++--- .../src/wiki/knowledge-wiki.service.test.ts | 31 +++++++++++++++ .../src/wiki/knowledge-wiki.service.ts | 15 +++++--- 5 files changed, 123 insertions(+), 10 deletions(-) diff --git a/packages/context/src/index-sync/reindex.test.ts b/packages/context/src/index-sync/reindex.test.ts index 70963661..beb62342 100644 --- a/packages/context/src/index-sync/reindex.test.ts +++ b/packages/context/src/index-sync/reindex.test.ts @@ -83,6 +83,44 @@ describe('reindexLocalIndexes', () => { expect(summary.embeddingsAvailable).toBe(true); }); + it('does not report unchanged lexical-only rows as updated on repeated runs', async () => { + const project = await createProject(tempDir); + await writeFile( + join(project.projectDir, 'wiki/global/revenue.md'), + '---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n', + 'utf-8', + ); + await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'), + 'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n', + 'utf-8', + ); + + const first = await reindexLocalIndexes(project, { force: false, embeddingService: null }); + expect(first.totals).toMatchObject({ + scanned: 2, + updated: 2, + deleted: 0, + embeddingsRecomputed: 0, + embeddingsFailed: 0, + }); + + const second = await reindexLocalIndexes(project, { force: false, embeddingService: null }); + + expect(second.totals).toMatchObject({ + scanned: 2, + updated: 0, + deleted: 0, + embeddingsRecomputed: 0, + embeddingsFailed: 0, + }); + expect(second.scopes.map((scope) => [scope.label, scope.updated])).toEqual([ + ['global', 0], + ['warehouse', 0], + ]); + }); + it('force clears stale rows before rebuilding each discovered scope', async () => { const project = await createProject(tempDir); const wikiIndex = new SqliteKnowledgeIndex({ dbPath: join(project.projectDir, '.ktx/db.sqlite') }); diff --git a/packages/context/src/sl/sl-search.service.test.ts b/packages/context/src/sl/sl-search.service.test.ts index 53ed6d13..164c3954 100644 --- a/packages/context/src/sl/sl-search.service.test.ts +++ b/packages/context/src/sl/sl-search.service.test.ts @@ -258,4 +258,38 @@ describe('SlSearchService', () => { expect.objectContaining({ sourceName: 'orders', embedding: null }), ]); }); + + it('does not update unchanged lexical-only SL rows on repeated sync', async () => { + const repository = { + upsertSources: vi.fn().mockResolvedValue(undefined), + getExistingSearchTexts: vi.fn().mockResolvedValue( + new Map([ + ['orders', { searchText: 'orders. table: public.orders. id (number)', hasEmbedding: false }], + ]), + ), + deleteStale: vi.fn().mockResolvedValue(0), + deleteByConnection: vi.fn().mockResolvedValue(0), + deleteByConnectionAndName: vi.fn(), + search: vi.fn(), + }; + const service = new SlSearchService(null, repository); + const source: SemanticLayerSource = { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'number' }], + joins: [], + measures: [], + }; + + await expect(service.indexSources('warehouse', [source])).resolves.toEqual({ + scanned: 1, + updated: 0, + deleted: 0, + embeddingsRecomputed: 0, + embeddingsFailed: 0, + }); + expect(repository.upsertSources).toHaveBeenCalledWith('warehouse', []); + expect(repository.deleteStale).toHaveBeenCalledWith('warehouse', ['orders']); + }); }); diff --git a/packages/context/src/sl/sl-search.service.ts b/packages/context/src/sl/sl-search.service.ts index 016301a0..0a7ecfb5 100644 --- a/packages/context/src/sl/sl-search.service.ts +++ b/packages/context/src/sl/sl-search.service.ts @@ -109,10 +109,15 @@ export class SlSearchService { const searchTexts = sources.map((s) => this.buildSearchText(s)); + const embeddingService = this.embeddingService; const changedIndices: number[] = []; for (let i = 0; i < sources.length; i += 1) { const previous = existing.get(sources[i]!.name); - if (!previous || previous.searchText !== searchTexts[i] || !previous.hasEmbedding) { + if ( + !previous || + previous.searchText !== searchTexts[i] || + (embeddingService !== null && !previous.hasEmbedding) + ) { changedIndices.push(i); } } @@ -121,13 +126,13 @@ export class SlSearchService { let embeddingsRecomputed = 0; let embeddingsFailed = 0; - if (this.embeddingService && changedIndices.length > 0) { + if (embeddingService && changedIndices.length > 0) { try { const changedTexts = changedIndices.map((index) => searchTexts[index]!); const allEmbeddings: number[][] = []; - for (let i = 0; i < changedTexts.length; i += this.embeddingService.maxBatchSize) { - const batch = changedTexts.slice(i, i + this.embeddingService.maxBatchSize); - allEmbeddings.push(...(await this.embeddingService.computeEmbeddingsBulk(batch))); + for (let i = 0; i < changedTexts.length; i += embeddingService.maxBatchSize) { + const batch = changedTexts.slice(i, i + embeddingService.maxBatchSize); + allEmbeddings.push(...(await embeddingService.computeEmbeddingsBulk(batch))); } changedEmbeddings = allEmbeddings; embeddingsRecomputed = allEmbeddings.length; diff --git a/packages/context/src/wiki/knowledge-wiki.service.test.ts b/packages/context/src/wiki/knowledge-wiki.service.test.ts index 9360b891..d9242f97 100644 --- a/packages/context/src/wiki/knowledge-wiki.service.test.ts +++ b/packages/context/src/wiki/knowledge-wiki.service.test.ts @@ -98,6 +98,37 @@ describe('KnowledgeWikiService.syncIndex result stats', () => { expect.objectContaining({ pageKey: 'revenue', embedding: null }), ); }); + + it('does not update unchanged lexical-only wiki rows on repeated sync', async () => { + const { pagesRepository, configService, gitService, logger } = makeService(); + const service = new KnowledgeWikiService( + configService as any, + null, + pagesRepository as any, + gitService as any, + logger as any, + ); + configService.listFiles.mockResolvedValue({ files: ['wiki/global/revenue.md'] }); + configService.readFile.mockResolvedValue({ + content: '---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n', + }); + pagesRepository.getExistingSearchTexts.mockResolvedValue( + new Map([ + ['revenue', { searchText: 'revenue\nRevenue\nPaid orders.', hasEmbedding: false }], + ]), + ); + pagesRepository.deleteStale.mockResolvedValue(0); + + await expect(service.syncIndex('GLOBAL', null)).resolves.toEqual({ + scanned: 1, + updated: 0, + deleted: 0, + embeddingsRecomputed: 0, + embeddingsFailed: 0, + }); + expect(pagesRepository.upsertPage).not.toHaveBeenCalled(); + expect(pagesRepository.deleteStale).toHaveBeenCalledWith('GLOBAL', null, ['revenue']); + }); }); describe('KnowledgeWikiService.forWorktree isolation', () => { diff --git a/packages/context/src/wiki/knowledge-wiki.service.ts b/packages/context/src/wiki/knowledge-wiki.service.ts index a2a19489..88447c14 100644 --- a/packages/context/src/wiki/knowledge-wiki.service.ts +++ b/packages/context/src/wiki/knowledge-wiki.service.ts @@ -296,22 +296,27 @@ export class KnowledgeWikiService { } } + const embeddingService = this.embeddingService; const changedPages = pages.filter((page) => { const previous = existing.get(page.pageKey); - return !previous || previous.searchText !== page.searchText || !previous.hasEmbedding; + return ( + !previous || + previous.searchText !== page.searchText || + (embeddingService !== null && !previous.hasEmbedding) + ); }); let embeddings: (number[] | null)[] = changedPages.map(() => null); let embeddingsRecomputed = 0; let embeddingsFailed = 0; - if (this.embeddingService && changedPages.length > 0) { + if (embeddingService && changedPages.length > 0) { try { const changedTexts = changedPages.map((page) => page.searchText); const all: number[][] = []; - for (let i = 0; i < changedTexts.length; i += this.embeddingService.maxBatchSize) { - const batch = changedTexts.slice(i, i + this.embeddingService.maxBatchSize); - all.push(...(await this.embeddingService.computeEmbeddingsBulk(batch))); + for (let i = 0; i < changedTexts.length; i += embeddingService.maxBatchSize) { + const batch = changedTexts.slice(i, i + embeddingService.maxBatchSize); + all.push(...(await embeddingService.computeEmbeddingsBulk(batch))); } embeddings = all; embeddingsRecomputed = all.length;