mirror of
https://github.com/Kaelio/ktx.git
synced 2026-07-04 10:52:13 +02:00
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
This commit is contained in:
parent
924868841d
commit
56985b7e09
548 changed files with 5048 additions and 2228 deletions
|
|
@ -1,473 +0,0 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, it } from 'vitest';
|
||||
import { SqliteContextEvidenceStore } from '../ingest/context-evidence/sqlite-context-evidence-store.js';
|
||||
import type { JsonValue } from '../ingest/ports.js';
|
||||
import { initKtxProject, type KtxLocalProject } from '../project/project.js';
|
||||
import { type LocalSlSourceSearchResult, searchLocalSlSources, writeLocalSlSource } from '../sl/local-sl.js';
|
||||
import type { ContextEvidenceSearchResult } from '../tools/context-evidence-tool-store.js';
|
||||
import {
|
||||
type LocalKnowledgeSearchResult,
|
||||
searchLocalKnowledgePages,
|
||||
writeLocalKnowledgePage,
|
||||
} from '../wiki/local-knowledge.js';
|
||||
import {
|
||||
assertSearchBackendCapabilities,
|
||||
assertSearchBackendConformanceCase,
|
||||
type SearchBackendConformanceResult,
|
||||
} from './backend-conformance.test-utils.js';
|
||||
import type { SearchBackendCapabilities } from './types.js';
|
||||
|
||||
const SQLITE_SEARCH_CAPABILITIES = {
|
||||
fts: true,
|
||||
vector: false,
|
||||
fuzzy: false,
|
||||
jsonSearch: true,
|
||||
arraySearch: false,
|
||||
} satisfies SearchBackendCapabilities;
|
||||
|
||||
const ORDERS_YAML = [
|
||||
'name: orders',
|
||||
'table: public.orders',
|
||||
'grain:',
|
||||
' - order_id',
|
||||
'columns:',
|
||||
' - name: order_id',
|
||||
' type: string',
|
||||
' - name: revenue',
|
||||
' type: number',
|
||||
'measures:',
|
||||
' - name: total_revenue',
|
||||
' expr: sum(revenue)',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
const FINANCE_ORDERS_YAML = [
|
||||
'name: orders',
|
||||
'descriptions:',
|
||||
' user: Finance orders used for invoice reconciliation.',
|
||||
'table: finance.orders',
|
||||
'grain:',
|
||||
' - order_id',
|
||||
'columns:',
|
||||
' - name: order_id',
|
||||
' type: string',
|
||||
' - name: invoice_status',
|
||||
' type: string',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
class FakeEmbeddingPort {
|
||||
readonly maxBatchSize = 16;
|
||||
|
||||
async computeEmbedding(text: string): Promise<number[]> {
|
||||
return text.toLowerCase().includes('semantic revenue') ? [1, 0] : [0, 1];
|
||||
}
|
||||
|
||||
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
|
||||
return Promise.all(texts.map((text) => this.computeEmbedding(text)));
|
||||
}
|
||||
}
|
||||
|
||||
function toSlConformanceResult(result: LocalSlSourceSearchResult): SearchBackendConformanceResult {
|
||||
return {
|
||||
id: `${result.connectionId}/${result.name}`,
|
||||
score: result.score ?? 0,
|
||||
matchReasons: result.matchReasons ?? [],
|
||||
lanes: result.lanes,
|
||||
dictionaryMatches: result.dictionaryMatches,
|
||||
};
|
||||
}
|
||||
|
||||
function toWikiConformanceResult(result: LocalKnowledgeSearchResult): SearchBackendConformanceResult {
|
||||
return {
|
||||
id: result.key,
|
||||
score: result.score,
|
||||
matchReasons: result.matchReasons,
|
||||
lanes: result.lanes,
|
||||
};
|
||||
}
|
||||
|
||||
function toContextConformanceResult(result: ContextEvidenceSearchResult): SearchBackendConformanceResult {
|
||||
return {
|
||||
id: `${result.externalId}:${result.stableCitationKey}`,
|
||||
score: result.score,
|
||||
matchReasons: result.matchReasons ?? [],
|
||||
lanes: result.lanes,
|
||||
};
|
||||
}
|
||||
|
||||
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
|
||||
await writeLocalSlSource(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
});
|
||||
await writeLocalSlSource(project, {
|
||||
connectionId: 'finance',
|
||||
sourceName: 'orders',
|
||||
yaml: FINANCE_ORDERS_YAML,
|
||||
});
|
||||
await project.fileStore.writeFile(
|
||||
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
|
||||
`${JSON.stringify(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
sqlAvailable: true,
|
||||
queryCount: 2,
|
||||
tables: [],
|
||||
columns: {
|
||||
'orders.status': {
|
||||
table: { catalog: null, db: 'public', name: 'orders' },
|
||||
column: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'string',
|
||||
rowCount: 10,
|
||||
nullCount: 0,
|
||||
distinctCount: 2,
|
||||
uniquenessRatio: 0.2,
|
||||
nullRate: 0,
|
||||
sampleValues: ['paid', 'refunded'],
|
||||
minTextLength: 4,
|
||||
maxTextLength: 8,
|
||||
},
|
||||
},
|
||||
warnings: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Seed dictionary profile',
|
||||
);
|
||||
}
|
||||
|
||||
async function seedWikiProject(project: KtxLocalProject): Promise<void> {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Semantic revenue definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
tags: ['finance'],
|
||||
refs: ['semantic-layer/warehouse/orders.yaml'],
|
||||
slRefs: ['orders'],
|
||||
});
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'support-escalations',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Support escalation process',
|
||||
content: 'Escalations move urgent support tickets to the operations queue.',
|
||||
tags: ['operations'],
|
||||
});
|
||||
}
|
||||
|
||||
async function seedContextDocument(
|
||||
subject: SqliteContextEvidenceStore,
|
||||
input: {
|
||||
runId?: string;
|
||||
syncId?: string;
|
||||
externalId?: string;
|
||||
title?: string;
|
||||
rawPath?: string;
|
||||
metadata?: JsonValue;
|
||||
publishState?: 'pending' | 'published';
|
||||
embedding?: number[] | null;
|
||||
content?: string;
|
||||
searchText?: string;
|
||||
} = {},
|
||||
): Promise<{ documentId: string; chunkId: string }> {
|
||||
const runId = input.runId ?? 'run-1';
|
||||
const syncId = input.syncId ?? 'sync-1';
|
||||
const externalId = input.externalId ?? 'page-1';
|
||||
const title = input.title ?? 'Revenue Policy';
|
||||
const rawPath = input.rawPath ?? `pages/${externalId}/page.md`;
|
||||
const doc = await subject.upsertDocument({
|
||||
runId,
|
||||
connectionId: 'conn-1',
|
||||
sourceKey: 'notion',
|
||||
externalId,
|
||||
externalParentId: null,
|
||||
databaseId: null,
|
||||
dataSourceId: null,
|
||||
title,
|
||||
path: `Company Handbook / ${title}`,
|
||||
url: `https://notion.test/${externalId}`,
|
||||
objectType: 'page',
|
||||
lastEditedAt: new Date('2026-04-30T10:00:00.000Z'),
|
||||
lastEditedBy: 'user-1',
|
||||
rawPath,
|
||||
syncId,
|
||||
contentHash: `hash-${externalId}`,
|
||||
publishState: input.publishState ?? 'published',
|
||||
metadata: input.metadata ?? {},
|
||||
});
|
||||
await subject.replaceChunks(doc.id, [
|
||||
{
|
||||
chunkKey: 'intro',
|
||||
headingPath: ['Policy'],
|
||||
ordinal: 0,
|
||||
content: input.content ?? `${title} requires approval from the accountable owner.`,
|
||||
searchText: input.searchText ?? `${title} approval accountable owner`,
|
||||
embedding: input.embedding ?? [1, 0, 0],
|
||||
tokenCount: 8,
|
||||
citation: {
|
||||
source: 'notion',
|
||||
pageId: externalId,
|
||||
title,
|
||||
syncId,
|
||||
rawPath,
|
||||
},
|
||||
stableCitationKey: `notion:${externalId}:intro`,
|
||||
syncId,
|
||||
contentHash: `chunk-${externalId}`,
|
||||
},
|
||||
]);
|
||||
|
||||
const read = await subject.readDocumentByExternalId('conn-1', 'notion', externalId, runId);
|
||||
if (!read) {
|
||||
throw new Error(`seeded document ${externalId} was not readable`);
|
||||
}
|
||||
|
||||
return { documentId: doc.id, chunkId: read.chunks[0].id };
|
||||
}
|
||||
|
||||
describe('SQLite hybrid search backend conformance', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-search-conformance-'));
|
||||
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
|
||||
dbPath = join(tempDir, '.ktx', 'db.sqlite');
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('documents SQLite search backend capabilities', () => {
|
||||
assertSearchBackendCapabilities({
|
||||
backendName: 'sqlite',
|
||||
capabilities: SQLITE_SEARCH_CAPABILITIES,
|
||||
expected: {
|
||||
fts: true,
|
||||
vector: false,
|
||||
fuzzy: false,
|
||||
jsonSearch: true,
|
||||
arraySearch: false,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('keeps semantic-layer global ranking, dictionary evidence, and token fallback stable', async () => {
|
||||
await seedSemanticLayerProject(project);
|
||||
|
||||
const global = await searchLocalSlSources(project, { query: 'orders', limit: 5 });
|
||||
assertSearchBackendConformanceCase({
|
||||
backendName: 'sqlite',
|
||||
surface: 'semantic-layer',
|
||||
caseName: 'global source ranking',
|
||||
results: global.map(toSlConformanceResult),
|
||||
expectedTopIds: ['finance/orders', 'warehouse/orders'],
|
||||
expectedReasonsById: {
|
||||
'finance/orders': ['lexical'],
|
||||
'warehouse/orders': ['lexical'],
|
||||
},
|
||||
expectedLanes: {
|
||||
lexical: { status: 'available' },
|
||||
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
|
||||
},
|
||||
});
|
||||
|
||||
const dictionary = await searchLocalSlSources(project, {
|
||||
connectionId: 'warehouse',
|
||||
query: 'refunded',
|
||||
limit: 5,
|
||||
});
|
||||
assertSearchBackendConformanceCase({
|
||||
backendName: 'sqlite',
|
||||
surface: 'semantic-layer',
|
||||
caseName: 'dictionary source evidence',
|
||||
results: dictionary.map(toSlConformanceResult),
|
||||
expectedTopIds: ['warehouse/orders'],
|
||||
expectedReasonsById: {
|
||||
'warehouse/orders': ['dictionary'],
|
||||
},
|
||||
expectedLanes: {
|
||||
dictionary: { status: 'available' },
|
||||
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
|
||||
},
|
||||
expectedDictionaryMatchesById: {
|
||||
'warehouse/orders': [{ column: 'status', values: ['refunded'] }],
|
||||
},
|
||||
});
|
||||
|
||||
const token = await searchLocalSlSources(project, {
|
||||
connectionId: 'warehouse',
|
||||
query: 'orders---',
|
||||
limit: 5,
|
||||
});
|
||||
assertSearchBackendConformanceCase({
|
||||
backendName: 'sqlite',
|
||||
surface: 'semantic-layer',
|
||||
caseName: 'token fallback reason',
|
||||
results: token.map(toSlConformanceResult),
|
||||
expectedTopIds: ['warehouse/orders'],
|
||||
expectedReasonsById: {
|
||||
'warehouse/orders': ['token'],
|
||||
},
|
||||
expectedLanes: {
|
||||
token: { status: 'available' },
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('keeps wiki lexical, semantic, and token behavior stable', async () => {
|
||||
await seedWikiProject(project);
|
||||
|
||||
const lexical = await searchLocalKnowledgePages(project, {
|
||||
query: 'paid order',
|
||||
userId: 'local',
|
||||
limit: 5,
|
||||
});
|
||||
assertSearchBackendConformanceCase({
|
||||
backendName: 'sqlite',
|
||||
surface: 'wiki',
|
||||
caseName: 'lexical page ranking',
|
||||
results: lexical.map(toWikiConformanceResult),
|
||||
expectedTopIds: ['metrics-revenue'],
|
||||
expectedReasonsById: {
|
||||
'metrics-revenue': ['lexical'],
|
||||
},
|
||||
expectedLanes: {
|
||||
lexical: { status: 'available' },
|
||||
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
|
||||
},
|
||||
});
|
||||
|
||||
const semantic = await searchLocalKnowledgePages(project, {
|
||||
query: 'semantic revenue',
|
||||
userId: 'local',
|
||||
limit: 5,
|
||||
embeddingService: new FakeEmbeddingPort(),
|
||||
});
|
||||
assertSearchBackendConformanceCase({
|
||||
backendName: 'sqlite',
|
||||
surface: 'wiki',
|
||||
caseName: 'semantic page ranking',
|
||||
results: semantic.map(toWikiConformanceResult),
|
||||
expectedTopIds: ['metrics-revenue'],
|
||||
expectedReasonsById: {
|
||||
'metrics-revenue': ['semantic'],
|
||||
},
|
||||
expectedLanes: {
|
||||
semantic: { status: 'available' },
|
||||
},
|
||||
});
|
||||
|
||||
const token = await searchLocalKnowledgePages(project, {
|
||||
query: 'paid---',
|
||||
userId: 'local',
|
||||
limit: 5,
|
||||
});
|
||||
assertSearchBackendConformanceCase({
|
||||
backendName: 'sqlite',
|
||||
surface: 'wiki',
|
||||
caseName: 'token page fallback',
|
||||
results: token.map(toWikiConformanceResult),
|
||||
expectedTopIds: ['metrics-revenue'],
|
||||
expectedReasonsById: {
|
||||
'metrics-revenue': ['token'],
|
||||
},
|
||||
expectedLanes: {
|
||||
token: { status: 'available' },
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('keeps context-evidence lane fusion and token fallback stable', async () => {
|
||||
const subject = new SqliteContextEvidenceStore({ dbPath });
|
||||
await seedContextDocument(subject, {
|
||||
externalId: 'page-discount',
|
||||
title: 'Enterprise Discount Policy',
|
||||
content: 'Enterprise discounts require finance approval before quote approval.',
|
||||
searchText: 'enterprise discount finance approval quote',
|
||||
embedding: [1, 0, 0],
|
||||
});
|
||||
await seedContextDocument(subject, {
|
||||
externalId: 'page-owner',
|
||||
title: 'Accountable Owner Policy',
|
||||
content: 'Every policy has an accountable owner and review date.',
|
||||
searchText: 'accountable owner review date',
|
||||
embedding: [0.95, 0.05, 0],
|
||||
});
|
||||
await seedContextDocument(subject, {
|
||||
externalId: 'page-expense',
|
||||
title: 'Expense Policy',
|
||||
content: 'Expense reimbursement requires receipt review.',
|
||||
searchText: 'expense reimbursement receipt review',
|
||||
embedding: [0, 1, 0],
|
||||
});
|
||||
|
||||
const fused = await subject.searchRRF({
|
||||
connectionId: 'conn-1',
|
||||
sourceKey: 'notion',
|
||||
queryEmbedding: [1, 0, 0],
|
||||
queryText: 'enterprise discount approval',
|
||||
limit: 2,
|
||||
includeDeleted: false,
|
||||
});
|
||||
assertSearchBackendConformanceCase({
|
||||
backendName: 'sqlite',
|
||||
surface: 'context-evidence',
|
||||
caseName: 'chunk lane fusion',
|
||||
results: fused.map(toContextConformanceResult),
|
||||
expectedTopIds: ['page-discount:notion:page-discount:intro'],
|
||||
expectedReasonsById: {
|
||||
'page-discount:notion:page-discount:intro': ['lexical', 'semantic', 'token'],
|
||||
},
|
||||
expectedLanes: {
|
||||
lexical: { status: 'available' },
|
||||
semantic: { status: 'available' },
|
||||
token: { status: 'available' },
|
||||
},
|
||||
});
|
||||
|
||||
const tokenSubject = new SqliteContextEvidenceStore({ dbPath: join(tempDir, 'token.sqlite') });
|
||||
await seedContextDocument(tokenSubject, {
|
||||
externalId: 'page-cpp',
|
||||
title: 'C++ Warehouse Notes',
|
||||
content: 'C++ parser notes for warehouse extraction.',
|
||||
searchText: 'C++ parser warehouse extraction',
|
||||
embedding: null,
|
||||
});
|
||||
|
||||
const token = await tokenSubject.searchRRF({
|
||||
connectionId: 'conn-1',
|
||||
sourceKey: 'notion',
|
||||
queryEmbedding: null,
|
||||
queryText: '++',
|
||||
limit: 5,
|
||||
includeDeleted: false,
|
||||
});
|
||||
assertSearchBackendConformanceCase({
|
||||
backendName: 'sqlite',
|
||||
surface: 'context-evidence',
|
||||
caseName: 'fts-empty token fallback',
|
||||
results: token.map(toContextConformanceResult),
|
||||
expectedTopIds: ['page-cpp:notion:page-cpp:intro'],
|
||||
expectedReasonsById: {
|
||||
'page-cpp:notion:page-cpp:intro': ['token'],
|
||||
},
|
||||
expectedLanes: {
|
||||
lexical: { status: 'skipped', reason: 'fts_query_empty' },
|
||||
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
|
||||
token: { status: 'available' },
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue