mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
fix(wiki): ignore empty embedding vectors
This commit is contained in:
parent
7a86aa9ddc
commit
77dce6fdb3
3 changed files with 56 additions and 2 deletions
|
|
@ -403,6 +403,50 @@ describe('canonical local ingest', () => {
|
|||
}
|
||||
});
|
||||
|
||||
it('does not persist noop embedding vectors when local embeddings are disabled', async () => {
|
||||
await writeFile(
|
||||
join(project.projectDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: warehouse',
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: postgres',
|
||||
'ingest:',
|
||||
' adapters:',
|
||||
' - fake',
|
||||
' embeddings:',
|
||||
' backend: none',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
project = await loadKtxProject({ projectDir: project.projectDir });
|
||||
const sourceDir = join(tempDir, 'source');
|
||||
await mkdir(join(sourceDir, 'orders'), { recursive: true });
|
||||
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
|
||||
const agentRunner = new WikiWritingAgentRunner();
|
||||
|
||||
const result = await runLocalIngest({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
adapter: 'fake',
|
||||
connectionId: 'warehouse',
|
||||
sourceDir,
|
||||
jobId: 'wiki-local-no-embeddings-1',
|
||||
agentRunner,
|
||||
});
|
||||
|
||||
expect(result.result.failedWorkUnits).toEqual([]);
|
||||
const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true });
|
||||
try {
|
||||
expect(db.prepare('SELECT key, summary, embedding_json IS NOT NULL AS has_embedding FROM knowledge_pages ORDER BY key').all()).toEqual([
|
||||
{ key: 'orders_context', summary: 'Orders source context', has_embedding: 0 },
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('uses explicit action raw paths to avoid over-attributing work-unit provenance', async () => {
|
||||
const sourceDir = join(tempDir, 'source');
|
||||
await mkdir(join(sourceDir, 'orders'), { recursive: true });
|
||||
|
|
|
|||
|
|
@ -82,6 +82,14 @@ describe('SqliteKnowledgeIndex', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('does not treat empty embeddings as indexed semantic vectors', () => {
|
||||
const index = new SqliteKnowledgeIndex({ dbPath });
|
||||
index.sync([page({ path: 'knowledge/global/revenue.md', key: 'revenue', embedding: [] })]);
|
||||
|
||||
expect(index.getExistingPages().get('knowledge/global/revenue.md')?.embedding).toBeNull();
|
||||
expect(index.searchSemanticCandidates({ queryEmbedding: [1, 0], limit: 10 })).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns semantic lane candidates from stored page embeddings', () => {
|
||||
const index = new SqliteKnowledgeIndex({ dbPath });
|
||||
index.sync([
|
||||
|
|
|
|||
|
|
@ -75,7 +75,9 @@ function parseEmbedding(raw: string | null): number[] | null {
|
|||
}
|
||||
try {
|
||||
const embedding = JSON.parse(raw) as unknown;
|
||||
return Array.isArray(embedding) && embedding.every((value) => typeof value === 'number') ? embedding : null;
|
||||
return Array.isArray(embedding) && embedding.length > 0 && embedding.every((value) => typeof value === 'number')
|
||||
? embedding
|
||||
: null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -170,7 +172,7 @@ export class SqliteKnowledgeIndex {
|
|||
content: searchText,
|
||||
tags: page.tags.join(' '),
|
||||
searchText,
|
||||
embeddingJson: page.embedding ? JSON.stringify(page.embedding) : null,
|
||||
embeddingJson: page.embedding && page.embedding.length > 0 ? JSON.stringify(page.embedding) : null,
|
||||
};
|
||||
upsertPage.run(row);
|
||||
deleteFts.run(row);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue