2026-05-10 23:12:26 +02:00
|
|
|
import { mkdtemp, rm } from 'node:fs/promises';
|
|
|
|
|
import { tmpdir } from 'node:os';
|
|
|
|
|
import { join } from 'node:path';
|
|
|
|
|
import { afterEach, beforeEach, describe, it } from 'vitest';
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
import { SqliteContextEvidenceStore } from '../../../src/context/ingest/context-evidence/sqlite-context-evidence-store.js';
|
|
|
|
|
import type { JsonValue } from '../../../src/context/ingest/ports.js';
|
|
|
|
|
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
|
|
|
|
import { type LocalSlSourceSearchResult, searchLocalSlSources, writeLocalSlSource } from '../../../src/context/sl/local-sl.js';
|
|
|
|
|
import type { ContextEvidenceSearchResult } from '../../../src/context/tools/context-evidence-tool-store.js';
|
2026-05-10 23:12:26 +02:00
|
|
|
import {
|
|
|
|
|
type LocalKnowledgeSearchResult,
|
|
|
|
|
searchLocalKnowledgePages,
|
|
|
|
|
writeLocalKnowledgePage,
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
} from '../../../src/context/wiki/local-knowledge.js';
|
2026-05-10 23:12:26 +02:00
|
|
|
import {
|
|
|
|
|
assertSearchBackendCapabilities,
|
|
|
|
|
assertSearchBackendConformanceCase,
|
|
|
|
|
type SearchBackendConformanceResult,
|
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm
* refactor(workspace): rewrite @ktx/llm imports to relative paths
* refactor(workspace): fold internal packages into cli
* chore(workspace): gate dead-code with knip production mode
Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.
* refactor(cli): delete internal barrel index.ts files
The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).
This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
(the published package entry).
- Rewrites ~270 source/test files to import each name directly from
the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
`create-warehouse-verification-tools.ts` (the function it defined
locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
live-database/extracted-schema, live-database/structural-sync,
relationship-* feedback/review chain) plus their tests and a
cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
(notion-client, connector barrels in scan/local-scan-connectors
tests) to mock the source files instead.
- Points the maintainer benchmark script
(`scripts/relationship-benchmark-report.mjs`) at source files
instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
production entries only for the benchmark code reached via dist by
the maintainer script.
Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.
`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.
* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly
Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.
Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.
* docs: align "agent clients" and "data agents" terminology
Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.
Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.
* refactor(release): single source of truth for package version
Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.
Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.
- update-public-release-version.mjs rewrites both Python pyproject.toml
files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
@semantic-release/git assets so the release commit back to main
carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
replaced with "?? getKtxCliPackageInfo().version", and
createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
always reflects the most recent release; no sentinel pin to
maintain.
Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.
* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime
Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.
* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal
Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.
* fix(cli): use real package metadata in print-command-tree
The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.
* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts
Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.
Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
|
|
|
} from './backend-conformance.test-utils.js';
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
import type { SearchBackendCapabilities } from '../../../src/context/search/types.js';
|
2026-05-10 23:12:26 +02:00
|
|
|
|
|
|
|
|
const SQLITE_SEARCH_CAPABILITIES = {
|
|
|
|
|
fts: true,
|
|
|
|
|
vector: false,
|
|
|
|
|
fuzzy: false,
|
|
|
|
|
jsonSearch: true,
|
|
|
|
|
arraySearch: false,
|
|
|
|
|
} satisfies SearchBackendCapabilities;
|
|
|
|
|
|
|
|
|
|
const ORDERS_YAML = [
|
|
|
|
|
'name: orders',
|
|
|
|
|
'table: public.orders',
|
|
|
|
|
'grain:',
|
|
|
|
|
' - order_id',
|
|
|
|
|
'columns:',
|
|
|
|
|
' - name: order_id',
|
|
|
|
|
' type: string',
|
|
|
|
|
' - name: revenue',
|
|
|
|
|
' type: number',
|
|
|
|
|
'measures:',
|
|
|
|
|
' - name: total_revenue',
|
|
|
|
|
' expr: sum(revenue)',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n');
|
|
|
|
|
|
|
|
|
|
const FINANCE_ORDERS_YAML = [
|
|
|
|
|
'name: orders',
|
2026-05-13 15:55:00 +02:00
|
|
|
'descriptions:',
|
|
|
|
|
' user: Finance orders used for invoice reconciliation.',
|
2026-05-10 23:12:26 +02:00
|
|
|
'table: finance.orders',
|
|
|
|
|
'grain:',
|
|
|
|
|
' - order_id',
|
|
|
|
|
'columns:',
|
|
|
|
|
' - name: order_id',
|
|
|
|
|
' type: string',
|
|
|
|
|
' - name: invoice_status',
|
|
|
|
|
' type: string',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n');
|
|
|
|
|
|
|
|
|
|
class FakeEmbeddingPort {
|
|
|
|
|
readonly maxBatchSize = 16;
|
|
|
|
|
|
|
|
|
|
async computeEmbedding(text: string): Promise<number[]> {
|
|
|
|
|
return text.toLowerCase().includes('semantic revenue') ? [1, 0] : [0, 1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
|
|
|
|
|
return Promise.all(texts.map((text) => this.computeEmbedding(text)));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function toSlConformanceResult(result: LocalSlSourceSearchResult): SearchBackendConformanceResult {
|
|
|
|
|
return {
|
|
|
|
|
id: `${result.connectionId}/${result.name}`,
|
|
|
|
|
score: result.score ?? 0,
|
|
|
|
|
matchReasons: result.matchReasons ?? [],
|
|
|
|
|
lanes: result.lanes,
|
|
|
|
|
dictionaryMatches: result.dictionaryMatches,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function toWikiConformanceResult(result: LocalKnowledgeSearchResult): SearchBackendConformanceResult {
|
|
|
|
|
return {
|
|
|
|
|
id: result.key,
|
|
|
|
|
score: result.score,
|
|
|
|
|
matchReasons: result.matchReasons,
|
|
|
|
|
lanes: result.lanes,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function toContextConformanceResult(result: ContextEvidenceSearchResult): SearchBackendConformanceResult {
|
|
|
|
|
return {
|
|
|
|
|
id: `${result.externalId}:${result.stableCitationKey}`,
|
|
|
|
|
score: result.score,
|
|
|
|
|
matchReasons: result.matchReasons ?? [],
|
|
|
|
|
lanes: result.lanes,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
|
2026-05-10 23:12:26 +02:00
|
|
|
await writeLocalSlSource(project, {
|
|
|
|
|
connectionId: 'warehouse',
|
|
|
|
|
sourceName: 'orders',
|
|
|
|
|
yaml: ORDERS_YAML,
|
|
|
|
|
});
|
|
|
|
|
await writeLocalSlSource(project, {
|
|
|
|
|
connectionId: 'finance',
|
|
|
|
|
sourceName: 'orders',
|
|
|
|
|
yaml: FINANCE_ORDERS_YAML,
|
|
|
|
|
});
|
|
|
|
|
await project.fileStore.writeFile(
|
|
|
|
|
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
|
|
|
|
|
`${JSON.stringify(
|
|
|
|
|
{
|
|
|
|
|
connectionId: 'warehouse',
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
sqlAvailable: true,
|
|
|
|
|
queryCount: 2,
|
|
|
|
|
tables: [],
|
|
|
|
|
columns: {
|
|
|
|
|
'orders.status': {
|
|
|
|
|
table: { catalog: null, db: 'public', name: 'orders' },
|
|
|
|
|
column: 'status',
|
|
|
|
|
nativeType: 'text',
|
|
|
|
|
normalizedType: 'string',
|
|
|
|
|
rowCount: 10,
|
|
|
|
|
nullCount: 0,
|
|
|
|
|
distinctCount: 2,
|
|
|
|
|
uniquenessRatio: 0.2,
|
|
|
|
|
nullRate: 0,
|
|
|
|
|
sampleValues: ['paid', 'refunded'],
|
|
|
|
|
minTextLength: 4,
|
|
|
|
|
maxTextLength: 8,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
warnings: [],
|
|
|
|
|
},
|
|
|
|
|
null,
|
|
|
|
|
2,
|
|
|
|
|
)}\n`,
|
2026-05-10 23:51:24 +02:00
|
|
|
'ktx',
|
|
|
|
|
'ktx@example.com',
|
2026-05-10 23:12:26 +02:00
|
|
|
'Seed dictionary profile',
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
async function seedWikiProject(project: KtxLocalProject): Promise<void> {
|
2026-05-10 23:12:26 +02:00
|
|
|
await writeLocalKnowledgePage(project, {
|
2026-05-12 16:56:58 -04:00
|
|
|
key: 'metrics-revenue',
|
2026-05-10 23:12:26 +02:00
|
|
|
scope: 'GLOBAL',
|
|
|
|
|
summary: 'Semantic revenue definition',
|
|
|
|
|
content: 'Revenue is recognized when an order is paid.',
|
|
|
|
|
tags: ['finance'],
|
|
|
|
|
refs: ['semantic-layer/warehouse/orders.yaml'],
|
|
|
|
|
slRefs: ['orders'],
|
|
|
|
|
});
|
|
|
|
|
await writeLocalKnowledgePage(project, {
|
2026-05-12 16:56:58 -04:00
|
|
|
key: 'support-escalations',
|
2026-05-10 23:12:26 +02:00
|
|
|
scope: 'GLOBAL',
|
|
|
|
|
summary: 'Support escalation process',
|
|
|
|
|
content: 'Escalations move urgent support tickets to the operations queue.',
|
|
|
|
|
tags: ['operations'],
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function seedContextDocument(
|
|
|
|
|
subject: SqliteContextEvidenceStore,
|
|
|
|
|
input: {
|
|
|
|
|
runId?: string;
|
|
|
|
|
syncId?: string;
|
|
|
|
|
externalId?: string;
|
|
|
|
|
title?: string;
|
|
|
|
|
rawPath?: string;
|
|
|
|
|
metadata?: JsonValue;
|
|
|
|
|
publishState?: 'pending' | 'published';
|
|
|
|
|
embedding?: number[] | null;
|
|
|
|
|
content?: string;
|
|
|
|
|
searchText?: string;
|
|
|
|
|
} = {},
|
|
|
|
|
): Promise<{ documentId: string; chunkId: string }> {
|
|
|
|
|
const runId = input.runId ?? 'run-1';
|
|
|
|
|
const syncId = input.syncId ?? 'sync-1';
|
|
|
|
|
const externalId = input.externalId ?? 'page-1';
|
|
|
|
|
const title = input.title ?? 'Revenue Policy';
|
|
|
|
|
const rawPath = input.rawPath ?? `pages/${externalId}/page.md`;
|
|
|
|
|
const doc = await subject.upsertDocument({
|
|
|
|
|
runId,
|
|
|
|
|
connectionId: 'conn-1',
|
|
|
|
|
sourceKey: 'notion',
|
|
|
|
|
externalId,
|
|
|
|
|
externalParentId: null,
|
|
|
|
|
databaseId: null,
|
|
|
|
|
dataSourceId: null,
|
|
|
|
|
title,
|
|
|
|
|
path: `Company Handbook / ${title}`,
|
|
|
|
|
url: `https://notion.test/${externalId}`,
|
|
|
|
|
objectType: 'page',
|
|
|
|
|
lastEditedAt: new Date('2026-04-30T10:00:00.000Z'),
|
|
|
|
|
lastEditedBy: 'user-1',
|
|
|
|
|
rawPath,
|
|
|
|
|
syncId,
|
|
|
|
|
contentHash: `hash-${externalId}`,
|
|
|
|
|
publishState: input.publishState ?? 'published',
|
|
|
|
|
metadata: input.metadata ?? {},
|
|
|
|
|
});
|
|
|
|
|
await subject.replaceChunks(doc.id, [
|
|
|
|
|
{
|
|
|
|
|
chunkKey: 'intro',
|
|
|
|
|
headingPath: ['Policy'],
|
|
|
|
|
ordinal: 0,
|
|
|
|
|
content: input.content ?? `${title} requires approval from the accountable owner.`,
|
|
|
|
|
searchText: input.searchText ?? `${title} approval accountable owner`,
|
|
|
|
|
embedding: input.embedding ?? [1, 0, 0],
|
|
|
|
|
tokenCount: 8,
|
|
|
|
|
citation: {
|
|
|
|
|
source: 'notion',
|
|
|
|
|
pageId: externalId,
|
|
|
|
|
title,
|
|
|
|
|
syncId,
|
|
|
|
|
rawPath,
|
|
|
|
|
},
|
|
|
|
|
stableCitationKey: `notion:${externalId}:intro`,
|
|
|
|
|
syncId,
|
|
|
|
|
contentHash: `chunk-${externalId}`,
|
|
|
|
|
},
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const read = await subject.readDocumentByExternalId('conn-1', 'notion', externalId, runId);
|
|
|
|
|
if (!read) {
|
|
|
|
|
throw new Error(`seeded document ${externalId} was not readable`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { documentId: doc.id, chunkId: read.chunks[0].id };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
describe('SQLite hybrid search backend conformance', () => {
|
|
|
|
|
let tempDir: string;
|
2026-05-10 23:51:24 +02:00
|
|
|
let project: KtxLocalProject;
|
2026-05-10 23:12:26 +02:00
|
|
|
let dbPath: string;
|
|
|
|
|
|
|
|
|
|
beforeEach(async () => {
|
2026-05-10 23:51:24 +02:00
|
|
|
tempDir = await mkdtemp(join(tmpdir(), 'ktx-search-conformance-'));
|
2026-05-14 17:39:31 +02:00
|
|
|
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
|
2026-05-10 23:51:24 +02:00
|
|
|
dbPath = join(tempDir, '.ktx', 'db.sqlite');
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
afterEach(async () => {
|
|
|
|
|
await rm(tempDir, { recursive: true, force: true });
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('documents SQLite search backend capabilities', () => {
|
|
|
|
|
assertSearchBackendCapabilities({
|
|
|
|
|
backendName: 'sqlite',
|
|
|
|
|
capabilities: SQLITE_SEARCH_CAPABILITIES,
|
|
|
|
|
expected: {
|
|
|
|
|
fts: true,
|
|
|
|
|
vector: false,
|
|
|
|
|
fuzzy: false,
|
|
|
|
|
jsonSearch: true,
|
|
|
|
|
arraySearch: false,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('keeps semantic-layer global ranking, dictionary evidence, and token fallback stable', async () => {
|
|
|
|
|
await seedSemanticLayerProject(project);
|
|
|
|
|
|
|
|
|
|
const global = await searchLocalSlSources(project, { query: 'orders', limit: 5 });
|
|
|
|
|
assertSearchBackendConformanceCase({
|
|
|
|
|
backendName: 'sqlite',
|
|
|
|
|
surface: 'semantic-layer',
|
|
|
|
|
caseName: 'global source ranking',
|
|
|
|
|
results: global.map(toSlConformanceResult),
|
|
|
|
|
expectedTopIds: ['finance/orders', 'warehouse/orders'],
|
|
|
|
|
expectedReasonsById: {
|
|
|
|
|
'finance/orders': ['lexical'],
|
|
|
|
|
'warehouse/orders': ['lexical'],
|
|
|
|
|
},
|
|
|
|
|
expectedLanes: {
|
|
|
|
|
lexical: { status: 'available' },
|
|
|
|
|
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const dictionary = await searchLocalSlSources(project, {
|
|
|
|
|
connectionId: 'warehouse',
|
|
|
|
|
query: 'refunded',
|
|
|
|
|
limit: 5,
|
|
|
|
|
});
|
|
|
|
|
assertSearchBackendConformanceCase({
|
|
|
|
|
backendName: 'sqlite',
|
|
|
|
|
surface: 'semantic-layer',
|
|
|
|
|
caseName: 'dictionary source evidence',
|
|
|
|
|
results: dictionary.map(toSlConformanceResult),
|
|
|
|
|
expectedTopIds: ['warehouse/orders'],
|
|
|
|
|
expectedReasonsById: {
|
|
|
|
|
'warehouse/orders': ['dictionary'],
|
|
|
|
|
},
|
|
|
|
|
expectedLanes: {
|
|
|
|
|
dictionary: { status: 'available' },
|
|
|
|
|
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
|
|
|
|
|
},
|
|
|
|
|
expectedDictionaryMatchesById: {
|
|
|
|
|
'warehouse/orders': [{ column: 'status', values: ['refunded'] }],
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const token = await searchLocalSlSources(project, {
|
|
|
|
|
connectionId: 'warehouse',
|
|
|
|
|
query: 'orders---',
|
|
|
|
|
limit: 5,
|
|
|
|
|
});
|
|
|
|
|
assertSearchBackendConformanceCase({
|
|
|
|
|
backendName: 'sqlite',
|
|
|
|
|
surface: 'semantic-layer',
|
|
|
|
|
caseName: 'token fallback reason',
|
|
|
|
|
results: token.map(toSlConformanceResult),
|
|
|
|
|
expectedTopIds: ['warehouse/orders'],
|
|
|
|
|
expectedReasonsById: {
|
|
|
|
|
'warehouse/orders': ['token'],
|
|
|
|
|
},
|
|
|
|
|
expectedLanes: {
|
|
|
|
|
token: { status: 'available' },
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('keeps wiki lexical, semantic, and token behavior stable', async () => {
|
|
|
|
|
await seedWikiProject(project);
|
|
|
|
|
|
|
|
|
|
const lexical = await searchLocalKnowledgePages(project, {
|
|
|
|
|
query: 'paid order',
|
|
|
|
|
userId: 'local',
|
|
|
|
|
limit: 5,
|
|
|
|
|
});
|
|
|
|
|
assertSearchBackendConformanceCase({
|
|
|
|
|
backendName: 'sqlite',
|
|
|
|
|
surface: 'wiki',
|
|
|
|
|
caseName: 'lexical page ranking',
|
|
|
|
|
results: lexical.map(toWikiConformanceResult),
|
2026-05-12 16:56:58 -04:00
|
|
|
expectedTopIds: ['metrics-revenue'],
|
2026-05-10 23:12:26 +02:00
|
|
|
expectedReasonsById: {
|
2026-05-12 16:56:58 -04:00
|
|
|
'metrics-revenue': ['lexical'],
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
expectedLanes: {
|
|
|
|
|
lexical: { status: 'available' },
|
|
|
|
|
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const semantic = await searchLocalKnowledgePages(project, {
|
|
|
|
|
query: 'semantic revenue',
|
|
|
|
|
userId: 'local',
|
|
|
|
|
limit: 5,
|
|
|
|
|
embeddingService: new FakeEmbeddingPort(),
|
|
|
|
|
});
|
|
|
|
|
assertSearchBackendConformanceCase({
|
|
|
|
|
backendName: 'sqlite',
|
|
|
|
|
surface: 'wiki',
|
|
|
|
|
caseName: 'semantic page ranking',
|
|
|
|
|
results: semantic.map(toWikiConformanceResult),
|
2026-05-12 16:56:58 -04:00
|
|
|
expectedTopIds: ['metrics-revenue'],
|
2026-05-10 23:12:26 +02:00
|
|
|
expectedReasonsById: {
|
2026-05-12 16:56:58 -04:00
|
|
|
'metrics-revenue': ['semantic'],
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
expectedLanes: {
|
|
|
|
|
semantic: { status: 'available' },
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const token = await searchLocalKnowledgePages(project, {
|
|
|
|
|
query: 'paid---',
|
|
|
|
|
userId: 'local',
|
|
|
|
|
limit: 5,
|
|
|
|
|
});
|
|
|
|
|
assertSearchBackendConformanceCase({
|
|
|
|
|
backendName: 'sqlite',
|
|
|
|
|
surface: 'wiki',
|
|
|
|
|
caseName: 'token page fallback',
|
|
|
|
|
results: token.map(toWikiConformanceResult),
|
2026-05-12 16:56:58 -04:00
|
|
|
expectedTopIds: ['metrics-revenue'],
|
2026-05-10 23:12:26 +02:00
|
|
|
expectedReasonsById: {
|
2026-05-12 16:56:58 -04:00
|
|
|
'metrics-revenue': ['token'],
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
expectedLanes: {
|
|
|
|
|
token: { status: 'available' },
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('keeps context-evidence lane fusion and token fallback stable', async () => {
|
|
|
|
|
const subject = new SqliteContextEvidenceStore({ dbPath });
|
|
|
|
|
await seedContextDocument(subject, {
|
|
|
|
|
externalId: 'page-discount',
|
|
|
|
|
title: 'Enterprise Discount Policy',
|
|
|
|
|
content: 'Enterprise discounts require finance approval before quote approval.',
|
|
|
|
|
searchText: 'enterprise discount finance approval quote',
|
|
|
|
|
embedding: [1, 0, 0],
|
|
|
|
|
});
|
|
|
|
|
await seedContextDocument(subject, {
|
|
|
|
|
externalId: 'page-owner',
|
|
|
|
|
title: 'Accountable Owner Policy',
|
|
|
|
|
content: 'Every policy has an accountable owner and review date.',
|
|
|
|
|
searchText: 'accountable owner review date',
|
|
|
|
|
embedding: [0.95, 0.05, 0],
|
|
|
|
|
});
|
|
|
|
|
await seedContextDocument(subject, {
|
|
|
|
|
externalId: 'page-expense',
|
|
|
|
|
title: 'Expense Policy',
|
|
|
|
|
content: 'Expense reimbursement requires receipt review.',
|
|
|
|
|
searchText: 'expense reimbursement receipt review',
|
|
|
|
|
embedding: [0, 1, 0],
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const fused = await subject.searchRRF({
|
|
|
|
|
connectionId: 'conn-1',
|
|
|
|
|
sourceKey: 'notion',
|
|
|
|
|
queryEmbedding: [1, 0, 0],
|
|
|
|
|
queryText: 'enterprise discount approval',
|
|
|
|
|
limit: 2,
|
|
|
|
|
includeDeleted: false,
|
|
|
|
|
});
|
|
|
|
|
assertSearchBackendConformanceCase({
|
|
|
|
|
backendName: 'sqlite',
|
|
|
|
|
surface: 'context-evidence',
|
|
|
|
|
caseName: 'chunk lane fusion',
|
|
|
|
|
results: fused.map(toContextConformanceResult),
|
|
|
|
|
expectedTopIds: ['page-discount:notion:page-discount:intro'],
|
|
|
|
|
expectedReasonsById: {
|
|
|
|
|
'page-discount:notion:page-discount:intro': ['lexical', 'semantic', 'token'],
|
|
|
|
|
},
|
|
|
|
|
expectedLanes: {
|
|
|
|
|
lexical: { status: 'available' },
|
|
|
|
|
semantic: { status: 'available' },
|
|
|
|
|
token: { status: 'available' },
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const tokenSubject = new SqliteContextEvidenceStore({ dbPath: join(tempDir, 'token.sqlite') });
|
|
|
|
|
await seedContextDocument(tokenSubject, {
|
|
|
|
|
externalId: 'page-cpp',
|
|
|
|
|
title: 'C++ Warehouse Notes',
|
|
|
|
|
content: 'C++ parser notes for warehouse extraction.',
|
|
|
|
|
searchText: 'C++ parser warehouse extraction',
|
|
|
|
|
embedding: null,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const token = await tokenSubject.searchRRF({
|
|
|
|
|
connectionId: 'conn-1',
|
|
|
|
|
sourceKey: 'notion',
|
|
|
|
|
queryEmbedding: null,
|
|
|
|
|
queryText: '++',
|
|
|
|
|
limit: 5,
|
|
|
|
|
includeDeleted: false,
|
|
|
|
|
});
|
|
|
|
|
assertSearchBackendConformanceCase({
|
|
|
|
|
backendName: 'sqlite',
|
|
|
|
|
surface: 'context-evidence',
|
|
|
|
|
caseName: 'fts-empty token fallback',
|
|
|
|
|
results: token.map(toContextConformanceResult),
|
|
|
|
|
expectedTopIds: ['page-cpp:notion:page-cpp:intro'],
|
|
|
|
|
expectedReasonsById: {
|
|
|
|
|
'page-cpp:notion:page-cpp:intro': ['token'],
|
|
|
|
|
},
|
|
|
|
|
expectedLanes: {
|
|
|
|
|
lexical: { status: 'skipped', reason: 'fts_query_empty' },
|
|
|
|
|
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
|
|
|
|
|
token: { status: 'available' },
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|