diff --git a/packages/context/src/ingest/local-bundle-ingest.test.ts b/packages/context/src/ingest/local-bundle-ingest.test.ts index ce6b3803..f631e6ed 100644 --- a/packages/context/src/ingest/local-bundle-ingest.test.ts +++ b/packages/context/src/ingest/local-bundle-ingest.test.ts @@ -403,6 +403,50 @@ describe('canonical local ingest', () => { } }); + it('does not persist noop embedding vectors when local embeddings are disabled', async () => { + await writeFile( + join(project.projectDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - fake', + ' embeddings:', + ' backend: none', + '', + ].join('\n'), + 'utf-8', + ); + project = await loadKtxProject({ projectDir: project.projectDir }); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + const agentRunner = new WikiWritingAgentRunner(); + + const result = await runLocalIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'wiki-local-no-embeddings-1', + agentRunner, + }); + + expect(result.result.failedWorkUnits).toEqual([]); + const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true }); + try { + expect(db.prepare('SELECT key, summary, embedding_json IS NOT NULL AS has_embedding FROM knowledge_pages ORDER BY key').all()).toEqual([ + { key: 'orders_context', summary: 'Orders source context', has_embedding: 0 }, + ]); + } finally { + db.close(); + } + }); + it('uses explicit action raw paths to avoid over-attributing work-unit provenance', async () => { const sourceDir = join(tempDir, 'source'); await mkdir(join(sourceDir, 'orders'), { recursive: true }); diff --git a/packages/context/src/wiki/sqlite-knowledge-index.test.ts b/packages/context/src/wiki/sqlite-knowledge-index.test.ts index 9bb0e6f0..620702a1 100644 --- a/packages/context/src/wiki/sqlite-knowledge-index.test.ts +++ b/packages/context/src/wiki/sqlite-knowledge-index.test.ts @@ -82,6 +82,14 @@ describe('SqliteKnowledgeIndex', () => { ); }); + it('does not treat empty embeddings as indexed semantic vectors', () => { + const index = new SqliteKnowledgeIndex({ dbPath }); + index.sync([page({ path: 'knowledge/global/revenue.md', key: 'revenue', embedding: [] })]); + + expect(index.getExistingPages().get('knowledge/global/revenue.md')?.embedding).toBeNull(); + expect(index.searchSemanticCandidates({ queryEmbedding: [1, 0], limit: 10 })).toEqual([]); + }); + it('returns semantic lane candidates from stored page embeddings', () => { const index = new SqliteKnowledgeIndex({ dbPath }); index.sync([ diff --git a/packages/context/src/wiki/sqlite-knowledge-index.ts b/packages/context/src/wiki/sqlite-knowledge-index.ts index acadc02e..7a5ae8fc 100644 --- a/packages/context/src/wiki/sqlite-knowledge-index.ts +++ b/packages/context/src/wiki/sqlite-knowledge-index.ts @@ -75,7 +75,9 @@ function parseEmbedding(raw: string | null): number[] | null { } try { const embedding = JSON.parse(raw) as unknown; - return Array.isArray(embedding) && embedding.every((value) => typeof value === 'number') ? embedding : null; + return Array.isArray(embedding) && embedding.length > 0 && embedding.every((value) => typeof value === 'number') + ? embedding + : null; } catch { return null; } @@ -170,7 +172,7 @@ export class SqliteKnowledgeIndex { content: searchText, tags: page.tags.join(' '), searchText, - embeddingJson: page.embedding ? JSON.stringify(page.embedding) : null, + embeddingJson: page.embedding && page.embedding.length > 0 ? JSON.stringify(page.embedding) : null, }; upsertPage.run(row); deleteFts.run(row);