ktx/packages/cli/test/context/search/backend-conformance.test-utils.test.ts

474 lines
14 KiB
TypeScript
Raw Permalink Normal View History

2026-05-10 23:12:26 +02:00
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, it } from 'vitest';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import { SqliteContextEvidenceStore } from '../../../src/context/ingest/context-evidence/sqlite-context-evidence-store.js';
import type { JsonValue } from '../../../src/context/ingest/ports.js';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { type LocalSlSourceSearchResult, searchLocalSlSources, writeLocalSlSource } from '../../../src/context/sl/local-sl.js';
import type { ContextEvidenceSearchResult } from '../../../src/context/tools/context-evidence-tool-store.js';
2026-05-10 23:12:26 +02:00
import {
type LocalKnowledgeSearchResult,
searchLocalKnowledgePages,
writeLocalKnowledgePage,
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
} from '../../../src/context/wiki/local-knowledge.js';
2026-05-10 23:12:26 +02:00
import {
assertSearchBackendCapabilities,
assertSearchBackendConformanceCase,
type SearchBackendConformanceResult,
chore(workspace): gate dead-code with knip production mode (#196) * refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
} from './backend-conformance.test-utils.js';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import type { SearchBackendCapabilities } from '../../../src/context/search/types.js';
2026-05-10 23:12:26 +02:00
const SQLITE_SEARCH_CAPABILITIES = {
fts: true,
vector: false,
fuzzy: false,
jsonSearch: true,
arraySearch: false,
} satisfies SearchBackendCapabilities;
const ORDERS_YAML = [
'name: orders',
'table: public.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
' - name: revenue',
' type: number',
'measures:',
' - name: total_revenue',
' expr: sum(revenue)',
'',
].join('\n');
const FINANCE_ORDERS_YAML = [
'name: orders',
'descriptions:',
' user: Finance orders used for invoice reconciliation.',
2026-05-10 23:12:26 +02:00
'table: finance.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
' - name: invoice_status',
' type: string',
'',
].join('\n');
class FakeEmbeddingPort {
readonly maxBatchSize = 16;
async computeEmbedding(text: string): Promise<number[]> {
return text.toLowerCase().includes('semantic revenue') ? [1, 0] : [0, 1];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return Promise.all(texts.map((text) => this.computeEmbedding(text)));
}
}
function toSlConformanceResult(result: LocalSlSourceSearchResult): SearchBackendConformanceResult {
return {
id: `${result.connectionId}/${result.name}`,
score: result.score ?? 0,
matchReasons: result.matchReasons ?? [],
lanes: result.lanes,
dictionaryMatches: result.dictionaryMatches,
};
}
function toWikiConformanceResult(result: LocalKnowledgeSearchResult): SearchBackendConformanceResult {
return {
id: result.key,
score: result.score,
matchReasons: result.matchReasons,
lanes: result.lanes,
};
}
function toContextConformanceResult(result: ContextEvidenceSearchResult): SearchBackendConformanceResult {
return {
id: `${result.externalId}:${result.stableCitationKey}`,
score: result.score,
matchReasons: result.matchReasons ?? [],
lanes: result.lanes,
};
}
2026-05-10 23:51:24 +02:00
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
2026-05-10 23:12:26 +02:00
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
await writeLocalSlSource(project, {
connectionId: 'finance',
sourceName: 'orders',
yaml: FINANCE_ORDERS_YAML,
});
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
`${JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
sqlAvailable: true,
queryCount: 2,
tables: [],
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
rowCount: 10,
nullCount: 0,
distinctCount: 2,
uniquenessRatio: 0.2,
nullRate: 0,
sampleValues: ['paid', 'refunded'],
minTextLength: 4,
maxTextLength: 8,
},
},
warnings: [],
},
null,
2,
)}\n`,
2026-05-10 23:51:24 +02:00
'ktx',
'ktx@example.com',
2026-05-10 23:12:26 +02:00
'Seed dictionary profile',
);
}
2026-05-10 23:51:24 +02:00
async function seedWikiProject(project: KtxLocalProject): Promise<void> {
2026-05-10 23:12:26 +02:00
await writeLocalKnowledgePage(project, {
key: 'metrics-revenue',
2026-05-10 23:12:26 +02:00
scope: 'GLOBAL',
summary: 'Semantic revenue definition',
content: 'Revenue is recognized when an order is paid.',
tags: ['finance'],
refs: ['semantic-layer/warehouse/orders.yaml'],
slRefs: ['orders'],
});
await writeLocalKnowledgePage(project, {
key: 'support-escalations',
2026-05-10 23:12:26 +02:00
scope: 'GLOBAL',
summary: 'Support escalation process',
content: 'Escalations move urgent support tickets to the operations queue.',
tags: ['operations'],
});
}
async function seedContextDocument(
subject: SqliteContextEvidenceStore,
input: {
runId?: string;
syncId?: string;
externalId?: string;
title?: string;
rawPath?: string;
metadata?: JsonValue;
publishState?: 'pending' | 'published';
embedding?: number[] | null;
content?: string;
searchText?: string;
} = {},
): Promise<{ documentId: string; chunkId: string }> {
const runId = input.runId ?? 'run-1';
const syncId = input.syncId ?? 'sync-1';
const externalId = input.externalId ?? 'page-1';
const title = input.title ?? 'Revenue Policy';
const rawPath = input.rawPath ?? `pages/${externalId}/page.md`;
const doc = await subject.upsertDocument({
runId,
connectionId: 'conn-1',
sourceKey: 'notion',
externalId,
externalParentId: null,
databaseId: null,
dataSourceId: null,
title,
path: `Company Handbook / ${title}`,
url: `https://notion.test/${externalId}`,
objectType: 'page',
lastEditedAt: new Date('2026-04-30T10:00:00.000Z'),
lastEditedBy: 'user-1',
rawPath,
syncId,
contentHash: `hash-${externalId}`,
publishState: input.publishState ?? 'published',
metadata: input.metadata ?? {},
});
await subject.replaceChunks(doc.id, [
{
chunkKey: 'intro',
headingPath: ['Policy'],
ordinal: 0,
content: input.content ?? `${title} requires approval from the accountable owner.`,
searchText: input.searchText ?? `${title} approval accountable owner`,
embedding: input.embedding ?? [1, 0, 0],
tokenCount: 8,
citation: {
source: 'notion',
pageId: externalId,
title,
syncId,
rawPath,
},
stableCitationKey: `notion:${externalId}:intro`,
syncId,
contentHash: `chunk-${externalId}`,
},
]);
const read = await subject.readDocumentByExternalId('conn-1', 'notion', externalId, runId);
if (!read) {
throw new Error(`seeded document ${externalId} was not readable`);
}
return { documentId: doc.id, chunkId: read.chunks[0].id };
}
describe('SQLite hybrid search backend conformance', () => {
let tempDir: string;
2026-05-10 23:51:24 +02:00
let project: KtxLocalProject;
2026-05-10 23:12:26 +02:00
let dbPath: string;
beforeEach(async () => {
2026-05-10 23:51:24 +02:00
tempDir = await mkdtemp(join(tmpdir(), 'ktx-search-conformance-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
2026-05-10 23:51:24 +02:00
dbPath = join(tempDir, '.ktx', 'db.sqlite');
2026-05-10 23:12:26 +02:00
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('documents SQLite search backend capabilities', () => {
assertSearchBackendCapabilities({
backendName: 'sqlite',
capabilities: SQLITE_SEARCH_CAPABILITIES,
expected: {
fts: true,
vector: false,
fuzzy: false,
jsonSearch: true,
arraySearch: false,
},
});
});
it('keeps semantic-layer global ranking, dictionary evidence, and token fallback stable', async () => {
await seedSemanticLayerProject(project);
const global = await searchLocalSlSources(project, { query: 'orders', limit: 5 });
assertSearchBackendConformanceCase({
backendName: 'sqlite',
surface: 'semantic-layer',
caseName: 'global source ranking',
results: global.map(toSlConformanceResult),
expectedTopIds: ['finance/orders', 'warehouse/orders'],
expectedReasonsById: {
'finance/orders': ['lexical'],
'warehouse/orders': ['lexical'],
},
expectedLanes: {
lexical: { status: 'available' },
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
},
});
const dictionary = await searchLocalSlSources(project, {
connectionId: 'warehouse',
query: 'refunded',
limit: 5,
});
assertSearchBackendConformanceCase({
backendName: 'sqlite',
surface: 'semantic-layer',
caseName: 'dictionary source evidence',
results: dictionary.map(toSlConformanceResult),
expectedTopIds: ['warehouse/orders'],
expectedReasonsById: {
'warehouse/orders': ['dictionary'],
},
expectedLanes: {
dictionary: { status: 'available' },
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
},
expectedDictionaryMatchesById: {
'warehouse/orders': [{ column: 'status', values: ['refunded'] }],
},
});
const token = await searchLocalSlSources(project, {
connectionId: 'warehouse',
query: 'orders---',
limit: 5,
});
assertSearchBackendConformanceCase({
backendName: 'sqlite',
surface: 'semantic-layer',
caseName: 'token fallback reason',
results: token.map(toSlConformanceResult),
expectedTopIds: ['warehouse/orders'],
expectedReasonsById: {
'warehouse/orders': ['token'],
},
expectedLanes: {
token: { status: 'available' },
},
});
});
it('keeps wiki lexical, semantic, and token behavior stable', async () => {
await seedWikiProject(project);
const lexical = await searchLocalKnowledgePages(project, {
query: 'paid order',
userId: 'local',
limit: 5,
});
assertSearchBackendConformanceCase({
backendName: 'sqlite',
surface: 'wiki',
caseName: 'lexical page ranking',
results: lexical.map(toWikiConformanceResult),
expectedTopIds: ['metrics-revenue'],
2026-05-10 23:12:26 +02:00
expectedReasonsById: {
'metrics-revenue': ['lexical'],
2026-05-10 23:12:26 +02:00
},
expectedLanes: {
lexical: { status: 'available' },
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
},
});
const semantic = await searchLocalKnowledgePages(project, {
query: 'semantic revenue',
userId: 'local',
limit: 5,
embeddingService: new FakeEmbeddingPort(),
});
assertSearchBackendConformanceCase({
backendName: 'sqlite',
surface: 'wiki',
caseName: 'semantic page ranking',
results: semantic.map(toWikiConformanceResult),
expectedTopIds: ['metrics-revenue'],
2026-05-10 23:12:26 +02:00
expectedReasonsById: {
'metrics-revenue': ['semantic'],
2026-05-10 23:12:26 +02:00
},
expectedLanes: {
semantic: { status: 'available' },
},
});
const token = await searchLocalKnowledgePages(project, {
query: 'paid---',
userId: 'local',
limit: 5,
});
assertSearchBackendConformanceCase({
backendName: 'sqlite',
surface: 'wiki',
caseName: 'token page fallback',
results: token.map(toWikiConformanceResult),
expectedTopIds: ['metrics-revenue'],
2026-05-10 23:12:26 +02:00
expectedReasonsById: {
'metrics-revenue': ['token'],
2026-05-10 23:12:26 +02:00
},
expectedLanes: {
token: { status: 'available' },
},
});
});
it('keeps context-evidence lane fusion and token fallback stable', async () => {
const subject = new SqliteContextEvidenceStore({ dbPath });
await seedContextDocument(subject, {
externalId: 'page-discount',
title: 'Enterprise Discount Policy',
content: 'Enterprise discounts require finance approval before quote approval.',
searchText: 'enterprise discount finance approval quote',
embedding: [1, 0, 0],
});
await seedContextDocument(subject, {
externalId: 'page-owner',
title: 'Accountable Owner Policy',
content: 'Every policy has an accountable owner and review date.',
searchText: 'accountable owner review date',
embedding: [0.95, 0.05, 0],
});
await seedContextDocument(subject, {
externalId: 'page-expense',
title: 'Expense Policy',
content: 'Expense reimbursement requires receipt review.',
searchText: 'expense reimbursement receipt review',
embedding: [0, 1, 0],
});
const fused = await subject.searchRRF({
connectionId: 'conn-1',
sourceKey: 'notion',
queryEmbedding: [1, 0, 0],
queryText: 'enterprise discount approval',
limit: 2,
includeDeleted: false,
});
assertSearchBackendConformanceCase({
backendName: 'sqlite',
surface: 'context-evidence',
caseName: 'chunk lane fusion',
results: fused.map(toContextConformanceResult),
expectedTopIds: ['page-discount:notion:page-discount:intro'],
expectedReasonsById: {
'page-discount:notion:page-discount:intro': ['lexical', 'semantic', 'token'],
},
expectedLanes: {
lexical: { status: 'available' },
semantic: { status: 'available' },
token: { status: 'available' },
},
});
const tokenSubject = new SqliteContextEvidenceStore({ dbPath: join(tempDir, 'token.sqlite') });
await seedContextDocument(tokenSubject, {
externalId: 'page-cpp',
title: 'C++ Warehouse Notes',
content: 'C++ parser notes for warehouse extraction.',
searchText: 'C++ parser warehouse extraction',
embedding: null,
});
const token = await tokenSubject.searchRRF({
connectionId: 'conn-1',
sourceKey: 'notion',
queryEmbedding: null,
queryText: '++',
limit: 5,
includeDeleted: false,
});
assertSearchBackendConformanceCase({
backendName: 'sqlite',
surface: 'context-evidence',
caseName: 'fts-empty token fallback',
results: token.map(toContextConformanceResult),
expectedTopIds: ['page-cpp:notion:page-cpp:intro'],
expectedReasonsById: {
'page-cpp:notion:page-cpp:intro': ['token'],
},
expectedLanes: {
lexical: { status: 'skipped', reason: 'fts_query_empty' },
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
token: { status: 'available' },
},
});
});
});