refactor(workspace): fold internal packages into cli

This commit is contained in:
Andrey Avtomonov 2026-05-21 03:27:33 +02:00
parent 8c2333cc15
commit ac3885b652
945 changed files with 517 additions and 2686 deletions

View file

@ -5,21 +5,21 @@ import { describe, it } from 'node:test';
const KTX_ROOT = new URL('../', import.meta.url);
const RELATIONSHIP_RUNTIME_SOURCES = Object.freeze([
'packages/context/src/scan/relationship-benchmarks.ts',
'packages/context/src/scan/relationship-budget.ts',
'packages/context/src/scan/relationship-candidates.ts',
'packages/context/src/scan/relationship-composite-candidates.ts',
'packages/context/src/scan/relationship-graph-resolver.ts',
'packages/context/src/scan/relationship-locality.ts',
'packages/context/src/scan/relationship-name-similarity.ts',
'packages/context/src/scan/relationship-discovery.ts',
'packages/context/src/scan/relationship-profiling.ts',
'packages/context/src/scan/relationship-scoring.ts',
'packages/context/src/scan/relationship-validation.ts',
'packages/cli/src/context/scan/relationship-benchmarks.ts',
'packages/cli/src/context/scan/relationship-budget.ts',
'packages/cli/src/context/scan/relationship-candidates.ts',
'packages/cli/src/context/scan/relationship-composite-candidates.ts',
'packages/cli/src/context/scan/relationship-graph-resolver.ts',
'packages/cli/src/context/scan/relationship-locality.ts',
'packages/cli/src/context/scan/relationship-name-similarity.ts',
'packages/cli/src/context/scan/relationship-discovery.ts',
'packages/cli/src/context/scan/relationship-profiling.ts',
'packages/cli/src/context/scan/relationship-scoring.ts',
'packages/cli/src/context/scan/relationship-validation.ts',
]);
async function checkedInFixtureIds() {
const fixtureRoot = new URL('packages/context/test/fixtures/relationship-benchmarks/', KTX_ROOT);
const fixtureRoot = new URL('packages/cli/src/test/fixtures/relationship-benchmarks/', KTX_ROOT);
const entries = await readdir(fixtureRoot, { withFileTypes: true });
return entries
.filter((entry) => entry.isDirectory())

View file

@ -7,7 +7,7 @@ import { expectedLinksFromSnapshot, normalizeSqliteType } from './build-benchmar
const scriptDir = path.dirname(fileURLToPath(import.meta.url));
const repoRoot = path.resolve(scriptDir, '..');
const require = createRequire(new URL('../packages/context/package.json', import.meta.url));
const require = createRequire(new URL('../packages/cli/package.json', import.meta.url));
const Database = require('better-sqlite3');
const { stringify: yamlStringify } = require('yaml');
@ -224,7 +224,7 @@ async function main() {
}
const source = JSON.parse(readFileSync(path.join(scriptDir, 'adventureworks-oltp-source.json'), 'utf8'));
const { KtxSqlServerScanConnector } = await import('../packages/connector-sqlserver/dist/index.js');
const { KtxSqlServerScanConnector } = await import('../packages/cli/dist/connectors/sqlserver/index.js');
const connector = new KtxSqlServerScanConnector({
connectionId: fixtureId,
connection: {

View file

@ -5,7 +5,7 @@ import path from 'node:path';
import { fileURLToPath } from 'node:url';
const scriptDir = path.dirname(fileURLToPath(import.meta.url));
const require = createRequire(new URL('../packages/context/package.json', import.meta.url));
const require = createRequire(new URL('../packages/cli/package.json', import.meta.url));
const Database = require('better-sqlite3');
const { stringify: yamlStringify } = require('yaml');

View file

@ -4,7 +4,7 @@ import { createRequire } from 'node:module';
import { describe, it } from 'node:test';
import { buildBenchmarkSnapshot } from './build-benchmark-snapshot.mjs';
const require = createRequire(new URL('../packages/context/package.json', import.meta.url));
const require = createRequire(new URL('../packages/cli/package.json', import.meta.url));
const Database = require('better-sqlite3');
describe('buildBenchmarkSnapshot', () => {
@ -252,12 +252,12 @@ describe('buildBenchmarkSnapshot', () => {
]);
});
it('exposes relationship benchmarks as an explicit context package script', async () => {
const packageJson = JSON.parse(await readFile(new URL('../packages/context/package.json', import.meta.url), 'utf8'));
it('exposes relationship benchmarks as an explicit CLI package script', async () => {
const packageJson = JSON.parse(await readFile(new URL('../packages/cli/package.json', import.meta.url), 'utf8'));
assert.equal(
packageJson.scripts['relationships:benchmarks:test'],
'KTX_RUN_RELATIONSHIP_BENCHMARKS=1 vitest run src/scan/relationship-benchmarks.test.ts',
'KTX_RUN_RELATIONSHIP_BENCHMARKS=1 vitest run src/context/scan/relationship-benchmarks.test.ts',
);
});
});

View file

@ -9,7 +9,7 @@ import { buildBenchmarkSnapshot, writeFixtureFiles } from './build-benchmark-sna
const scriptDir = path.dirname(fileURLToPath(import.meta.url));
const ktxRoot = path.resolve(scriptDir, '..');
const fixtureRoot = path.join(ktxRoot, 'packages', 'context', 'test', 'fixtures', 'relationship-benchmarks');
const require = createRequire(new URL('../packages/context/package.json', import.meta.url));
const require = createRequire(new URL('../packages/cli/package.json', import.meta.url));
const Database = require('better-sqlite3');
const { stringify: yamlStringify } = require('yaml');

View file

@ -20,30 +20,6 @@ export function publicNpmPackageTarballName(version = PUBLIC_NPM_PACKAGE_VERSION
return `kaelio-ktx-${version}.tgz`;
}
export const PUBLIC_BUNDLED_WORKSPACE_PACKAGES = [
'@ktx/llm',
'@ktx/context',
'@ktx/connector-bigquery',
'@ktx/connector-clickhouse',
'@ktx/connector-mysql',
'@ktx/connector-postgres',
'@ktx/connector-snowflake',
'@ktx/connector-sqlite',
'@ktx/connector-sqlserver',
];
export const PUBLIC_BUNDLED_WORKSPACE_PACKAGE_ROOTS = {
'@ktx/llm': 'packages/llm',
'@ktx/context': 'packages/context',
'@ktx/connector-bigquery': 'packages/connector-bigquery',
'@ktx/connector-clickhouse': 'packages/connector-clickhouse',
'@ktx/connector-mysql': 'packages/connector-mysql',
'@ktx/connector-postgres': 'packages/connector-postgres',
'@ktx/connector-snowflake': 'packages/connector-snowflake',
'@ktx/connector-sqlite': 'packages/connector-sqlite',
'@ktx/connector-sqlserver': 'packages/connector-sqlserver',
};
function scriptRootDir() {
return resolve(dirname(fileURLToPath(import.meta.url)), '..');
}
@ -75,50 +51,10 @@ function isWorkspacePackageName(name) {
return name.startsWith('@ktx/');
}
function parseCaretVersion(value) {
const match = /^\^(\d+)\.(\d+)\.(\d+)$/.exec(value);
if (!match) {
return null;
}
return {
major: Number(match[1]),
minor: Number(match[2]),
patch: Number(match[3]),
};
}
function compareParsedVersions(left, right) {
return left.major - right.major || left.minor - right.minor || left.patch - right.patch;
}
function mergeDependencyVersion(name, previous, next) {
if (previous === next) {
return previous;
}
const previousCaret = parseCaretVersion(previous);
const nextCaret = parseCaretVersion(next);
if (previousCaret && nextCaret && previousCaret.major === nextCaret.major) {
return compareParsedVersions(previousCaret, nextCaret) >= 0 ? previous : next;
}
throw new Error(`Incompatible dependency versions for ${name}: ${previous} and ${next}`);
}
export function collectPublicDependencies(packageJsons) {
const dependencies = new Map();
for (const packageJson of packageJsons) {
for (const [name, version] of Object.entries(packageJson.dependencies ?? {})) {
if (isWorkspacePackageName(name)) {
continue;
}
const previous = dependencies.get(name);
dependencies.set(name, previous ? mergeDependencyVersion(name, previous, version) : version);
}
}
return sortedObject(dependencies);
export function collectPublicDependencies(cliPackageJson) {
return sortedObject(
Object.entries(cliPackageJson.dependencies ?? {}).filter(([name]) => !isWorkspacePackageName(name)),
);
}
export function publicNpmPackageJson(cliPackageJson, dependencies, version = PUBLIC_NPM_PACKAGE_VERSION) {
@ -142,7 +78,6 @@ export function publicNpmPackageJson(cliPackageJson, dependencies, version = PUB
},
files: ['dist', 'assets'],
dependencies,
bundledDependencies: PUBLIC_BUNDLED_WORKSPACE_PACKAGES,
license: cliPackageJson.license ?? 'Apache-2.0',
repository: {
type: 'git',
@ -155,20 +90,6 @@ export function publicNpmPackageJson(cliPackageJson, dependencies, version = PUB
};
}
function bundledWorkspacePackageJson(packageJson) {
return {
name: packageJson.name,
version: packageJson.version ?? PUBLIC_NPM_PACKAGE_VERSION,
private: true,
type: packageJson.type ?? 'module',
main: packageJson.main,
types: packageJson.types,
exports: packageJson.exports,
files: packageJson.files,
license: packageJson.license ?? 'Apache-2.0',
};
}
async function copyPackageFileEntries(sourceRoot, targetRoot, packageJson) {
for (const entry of packageJson.files ?? ['dist']) {
await cp(join(sourceRoot, entry), join(targetRoot, entry), {
@ -186,46 +107,18 @@ async function copyCliPackage(layout, cliPackageJson, dependencies) {
);
}
async function copyBundledWorkspacePackage(rootDir, packageName, packageJson) {
const packageRoot = PUBLIC_BUNDLED_WORKSPACE_PACKAGE_ROOTS[packageName];
if (!packageRoot) {
throw new Error(`Missing bundled workspace package root for ${packageName}`);
}
const sourceRoot = join(rootDir, packageRoot);
const targetRoot = join(rootDir, 'dist', 'public-npm-package', 'node_modules', ...packageName.split('/'));
await mkdir(targetRoot, { recursive: true });
await copyPackageFileEntries(sourceRoot, targetRoot, packageJson);
await writeJson(join(targetRoot, 'package.json'), bundledWorkspacePackageJson(packageJson));
}
export async function createPublicNpmPackageTree(layout = publicNpmPackageLayout()) {
const cliPackageJson = await readJson(join(layout.cliPackageRoot, 'package.json'));
const bundledPackageJsons = await Promise.all(
PUBLIC_BUNDLED_WORKSPACE_PACKAGES.map(async (packageName) => {
const packageRoot = PUBLIC_BUNDLED_WORKSPACE_PACKAGE_ROOTS[packageName];
const packageJson = await readJson(join(layout.rootDir, packageRoot, 'package.json'));
if (packageJson.name !== packageName) {
throw new Error(`Unexpected package name in ${packageRoot}/package.json: ${packageJson.name}`);
}
return packageJson;
}),
);
const dependencies = collectPublicDependencies([cliPackageJson, ...bundledPackageJsons]);
const dependencies = collectPublicDependencies(cliPackageJson);
await rm(layout.packRoot, { recursive: true, force: true });
await mkdir(layout.packRoot, { recursive: true });
await mkdir(layout.npmDir, { recursive: true });
await copyCliPackage(layout, cliPackageJson, dependencies);
for (const packageJson of bundledPackageJsons) {
await copyBundledWorkspacePackage(layout.rootDir, packageJson.name, packageJson);
}
return {
layout,
packageJson: publicNpmPackageJson(cliPackageJson, dependencies, layout.packageVersion),
bundledPackages: PUBLIC_BUNDLED_WORKSPACE_PACKAGES,
};
}

View file

@ -5,7 +5,6 @@ import { join } from 'node:path';
import { describe, it } from 'node:test';
import {
PUBLIC_BUNDLED_WORKSPACE_PACKAGES,
PUBLIC_NPM_PACKAGE_NAME,
PUBLIC_NPM_PACKAGE_VERSION,
collectPublicDependencies,
@ -56,8 +55,9 @@ async function writeWorkspaceFixture(root) {
files: ['dist', 'assets'],
dependencies: {
'@clack/prompts': '1.3.0',
'@ktx/context': 'workspace:*',
ai: '^6.0.168',
commander: '14.0.3',
yaml: '^2.8.2',
},
license: 'Apache-2.0',
repository: {
@ -74,68 +74,6 @@ async function writeWorkspaceFixture(root) {
},
);
await writePackage(
root,
'packages/context',
{
name: '@ktx/context',
version: '0.0.0-private',
type: 'module',
main: 'dist/index.js',
exports: { '.': './dist/index.js' },
files: ['dist', 'prompts', 'skills'],
dependencies: {
'@ktx/llm': 'workspace:*',
yaml: '^2.8.2',
},
},
{
'dist/index.js': 'export const context = true;\n',
'prompts/system.md': 'prompt\n',
'skills/sl/SKILL.md': 'skill\n',
},
);
await writePackage(
root,
'packages/llm',
{
name: '@ktx/llm',
version: '0.0.0-private',
type: 'module',
main: 'dist/index.js',
exports: { '.': './dist/index.js' },
files: ['dist'],
dependencies: {
ai: '^6.0.168',
},
},
{
'dist/index.js': 'export const llm = true;\n',
},
);
for (const packageName of PUBLIC_BUNDLED_WORKSPACE_PACKAGES.filter((name) => name.startsWith('@ktx/connector-'))) {
const directory = packageName.replace('@ktx/', '');
await writePackage(
root,
`packages/${directory}`,
{
name: packageName,
version: '0.0.0-private',
type: 'module',
main: 'dist/index.js',
exports: { '.': './dist/index.js' },
files: ['dist'],
dependencies: {
'@ktx/context': 'workspace:*',
},
},
{
'dist/index.js': `export const name = ${JSON.stringify(packageName)};\n`,
},
);
}
}
describe('publicNpmPackageLayout', () => {
@ -152,51 +90,25 @@ describe('publicNpmPackageLayout', () => {
});
describe('collectPublicDependencies', () => {
it('unions external runtime dependencies and omits workspace packages', () => {
it('returns CLI external runtime dependencies and omits workspace packages', () => {
assert.deepEqual(
collectPublicDependencies([
{
name: '@ktx/cli',
dependencies: {
'@ktx/context': 'workspace:*',
commander: '14.0.3',
zod: '^4.4.3',
},
collectPublicDependencies({
name: '@ktx/cli',
dependencies: {
'@ktx/internal-only': 'workspace:*',
commander: '14.0.3',
zod: '^4.4.3',
},
{
name: '@ktx/context',
dependencies: {
'@ktx/llm': 'workspace:*',
commander: '14.0.3',
yaml: '^2.8.2',
zod: '^4.1.13',
},
},
]),
}),
{
commander: '14.0.3',
yaml: '^2.8.2',
zod: '^4.4.3',
},
);
});
it('fails on incompatible external dependency ranges', () => {
assert.throws(
() =>
collectPublicDependencies([
{ name: '@ktx/cli', dependencies: { zod: '^4.4.3' } },
{ name: '@ktx/context', dependencies: { zod: '^3.25.0' } },
]),
/Incompatible dependency versions for zod/,
);
});
});
describe('publicNpmPackageJson', () => {
it('does not bundle the removed PostHog connector package', () => {
assert.equal(PUBLIC_BUNDLED_WORKSPACE_PACKAGES.includes('@ktx/connector-posthog'), false);
});
it('describes the public @kaelio/ktx binary package', () => {
const packageJson = publicNpmPackageJson(
@ -218,7 +130,6 @@ describe('publicNpmPackageJson', () => {
assert.equal(packageJson.private, false);
assert.deepEqual(packageJson.bin, { ktx: './dist/bin.js' });
assert.deepEqual(packageJson.dependencies, { commander: '14.0.3' });
assert.deepEqual(packageJson.bundledDependencies, PUBLIC_BUNDLED_WORKSPACE_PACKAGES);
assert.deepEqual(packageJson.files, ['dist', 'assets']);
assert.deepEqual(packageJson.repository, {
type: 'git',
@ -232,7 +143,7 @@ describe('publicNpmPackageJson', () => {
});
describe('createPublicNpmPackageTree', () => {
it('copies CLI files, assets, and bundled internal workspace packages', async () => {
it('copies CLI files and assets without bundled internal workspace packages', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-public-npm-test-'));
try {
await writeWorkspaceFixture(root);
@ -248,20 +159,10 @@ describe('createPublicNpmPackageTree', () => {
await readFile(join(layout.packRoot, 'assets', 'python', 'manifest.json'), 'utf8'),
'{"schemaVersion":1}\n',
);
assert.equal(
await readFile(join(layout.packRoot, 'node_modules', '@ktx', 'context', 'dist', 'index.js'), 'utf8'),
'export const context = true;\n',
await assert.rejects(
() => readFile(join(layout.packRoot, 'node_modules', '@ktx', 'context', 'package.json'), 'utf8'),
/ENOENT/,
);
assert.equal(
await readFile(join(layout.packRoot, 'node_modules', '@ktx', 'context', 'prompts', 'system.md'), 'utf8'),
'prompt\n',
);
const bundledContextJson = JSON.parse(
await readFile(join(layout.packRoot, 'node_modules', '@ktx', 'context', 'package.json'), 'utf8'),
);
assert.equal(bundledContextJson.private, true);
assert.equal(bundledContextJson.dependencies, undefined);
} finally {
await rm(root, { recursive: true, force: true });
}

View file

@ -5,7 +5,7 @@ import path from 'node:path';
import { fileURLToPath, pathToFileURL } from 'node:url';
const codeExtensions = new Set(['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.py']);
const runtimeAssetPatterns = [/^packages\/[^/]+\/prompts\/.+\.md$/, /^packages\/[^/]+\/skills\/.+\.md$/];
const runtimeAssetPatterns = [/^packages\/cli\/src\/prompts\/.+\.md$/, /^packages\/cli\/src\/skills\/.+\.md$/];
const identifierSkipPrefixes = ['docs/', 'docs-site/', 'examples/', 'python/ktx-sl/plans/', 'python/ktx-sl/openspec/'];
const identifierAllowPatterns = [
/^packages\/cli\/src\/(?:index|managed-local-embeddings|managed-python-command|managed-python-daemon|managed-python-runtime|release-version|runtime)(?:\.test)?\.ts$/,
@ -85,7 +85,7 @@ function scansForAppImports(relativePath) {
}
function scansForLlmBoundaries(relativePath) {
return isCodeSource(relativePath) && relativePath.startsWith('packages/context/src/');
return isCodeSource(relativePath) && relativePath.startsWith('packages/cli/src/context/');
}
function isTestSource(relativePath) {
@ -133,7 +133,7 @@ export function scanFileContent(relativePath, content) {
violations.push({
file: normalizedPath,
kind: 'llm-boundary',
message: `Forbidden ${llmBoundaryPattern.label}; use @ktx/llm`,
message: `Forbidden ${llmBoundaryPattern.label}; use packages/cli/src/llm`,
});
}
}
@ -145,7 +145,7 @@ export function scanFileContent(relativePath, content) {
violations.push({
file: normalizedPath,
kind: 'llm-boundary',
message: `Forbidden ${llmBoundaryPattern.label}; use getModel(role) inside @ktx/context`,
message: `Forbidden ${llmBoundaryPattern.label}; use getModel(role) inside context modules`,
});
}
}

View file

@ -17,8 +17,8 @@ describe('scanFileContent', () => {
const pythonAppPath = `${['python', 'service'].join('-')}/app/api/endpoints/semantic_layer.py`;
const violations = [
...scanFileContent('packages/context/src/index.ts', `import { orpc } from '${serverAlias}';`),
...scanFileContent('packages/context/src/index.ts', `import "${pythonAppPath}";`),
...scanFileContent('packages/cli/src/context/index.ts', `import { orpc } from '${serverAlias}';`),
...scanFileContent('packages/cli/src/context/index.ts', `import "${pythonAppPath}";`),
];
assert.deepEqual(
@ -28,7 +28,7 @@ describe('scanFileContent', () => {
});
it('rejects forbidden product identifiers in code source files', () => {
const violations = scanFileContent('packages/context/src/index.ts', `export const owner = '${lowerProductName()}';`);
const violations = scanFileContent('packages/cli/src/context/index.ts', `export const owner = '${lowerProductName()}';`);
assert.equal(violations.length, 1);
assert.equal(violations[0]?.kind, 'identifier');
@ -36,24 +36,24 @@ describe('scanFileContent', () => {
it('rejects forbidden product identifiers in shipped runtime prompt assets', () => {
const violations = scanFileContent(
'packages/context/prompts/memory_agent_bundle_ingest_work_unit.md',
'packages/cli/src/prompts/memory_agent_bundle_ingest_work_unit.md',
`Write output for ${productName()}.`,
);
assert.equal(violations.length, 1);
assert.equal(violations[0]?.kind, 'identifier');
assert.equal(violations[0]?.file, 'packages/context/prompts/memory_agent_bundle_ingest_work_unit.md');
assert.equal(violations[0]?.file, 'packages/cli/src/prompts/memory_agent_bundle_ingest_work_unit.md');
});
it('rejects forbidden product identifiers in shipped runtime skill assets', () => {
const violations = scanFileContent(
'packages/context/skills/metabase_ingest/SKILL.md',
'packages/cli/src/skills/metabase_ingest/SKILL.md',
`Use ${productName()} project conventions.`,
);
assert.equal(violations.length, 1);
assert.equal(violations[0]?.kind, 'identifier');
assert.equal(violations[0]?.file, 'packages/context/skills/metabase_ingest/SKILL.md');
assert.equal(violations[0]?.file, 'packages/cli/src/skills/metabase_ingest/SKILL.md');
});
it('allows product identifiers in docs, examples, and transition metadata', () => {
@ -69,7 +69,7 @@ describe('scanFileContent', () => {
const name = lowerProductName();
assert.equal(scanFileContent('packages/cli/src/setup.test.ts', `project: ${name}-dev`).length, 0);
assert.equal(scanFileContent('packages/context/src/ingest/importer.test.ts', `email: system@${name}.dev`).length, 0);
assert.equal(scanFileContent('packages/cli/src/context/ingest/importer.test.ts', `email: system@${name}.dev`).length, 0);
assert.equal(scanFileContent('python/ktx-daemon/tests/test_package.py', `${name}-ktx`).length, 0);
});
@ -87,23 +87,23 @@ describe('scanFileContent', () => {
it('allows clean source files and clean runtime prompt assets', () => {
assert.deepEqual(
scanFileContent('packages/context/src/index.ts', "export const packageName = '@ktx/context';"),
scanFileContent('packages/cli/src/context/index.ts', "export const packageName = 'ktx';"),
[],
);
assert.deepEqual(
scanFileContent('packages/context/prompts/memory_agent_bundle_ingest_work_unit.md', 'Write output for KTX.'),
scanFileContent('packages/cli/src/prompts/memory_agent_bundle_ingest_work_unit.md', 'Write output for KTX.'),
[],
);
});
it('rejects context-owned LLM provider construction outside @ktx/llm', () => {
it('rejects context-owned LLM provider construction outside llm modules', () => {
const violations = [
...scanFileContent(
'packages/context/src/agent/local-llm-provider.ts',
'packages/cli/src/context/agent/local-llm-provider.ts',
"import { createAnthropic } from '@ai-sdk/anthropic';",
),
...scanFileContent('packages/context/src/scan/local-ai-gateway-enrichment.ts', "import { createGateway } from 'ai';"),
...scanFileContent('packages/context/src/core/local-embedding-provider.ts', "import { embedMany } from 'ai';"),
...scanFileContent('packages/cli/src/context/scan/local-ai-gateway-enrichment.ts', "import { createGateway } from 'ai';"),
...scanFileContent('packages/cli/src/context/core/local-embedding-provider.ts', "import { embedMany } from 'ai';"),
];
assert.deepEqual(
@ -114,9 +114,9 @@ describe('scanFileContent', () => {
it('rejects old KTX LLM port declarations in context', () => {
const violations = [
...scanFileContent('packages/context/src/agent/agent-runner.service.ts', 'export interface LlmProviderPort {}'),
...scanFileContent('packages/context/src/scan/types.ts', 'export interface KtxScanLlmPort {}'),
...scanFileContent('packages/context/src/agent/gateway-llm-provider.ts', 'export function createGatewayLlmProvider() {}'),
...scanFileContent('packages/cli/src/context/agent/agent-runner.service.ts', 'export interface LlmProviderPort {}'),
...scanFileContent('packages/cli/src/context/scan/types.ts', 'export interface KtxScanLlmPort {}'),
...scanFileContent('packages/cli/src/context/agent/gateway-llm-provider.ts', 'export function createGatewayLlmProvider() {}'),
];
assert.deepEqual(
@ -127,7 +127,7 @@ describe('scanFileContent', () => {
it('rejects getModelByName calls in context production source', () => {
const violations = scanFileContent(
'packages/context/src/ingest/page-triage/page-triage.service.ts',
'packages/cli/src/context/ingest/page-triage/page-triage.service.ts',
"const model = this.deps.llmProvider.getModelByName('claude-sonnet-4-6');",
);
@ -135,14 +135,14 @@ describe('scanFileContent', () => {
assert.equal(violations[0]?.kind, 'llm-boundary');
assert.equal(
violations[0]?.message,
'Forbidden context getModelByName call; use getModel(role) inside @ktx/context',
'Forbidden context getModelByName call; use getModel(role) inside context modules',
);
});
it('allows role-driven getModel calls, test calls, and provider shape declarations', () => {
assert.deepEqual(
scanFileContent(
'packages/context/src/ingest/page-triage/page-triage.service.ts',
'packages/cli/src/context/ingest/page-triage/page-triage.service.ts',
"const model = this.deps.llmProvider.getModel('triage');",
),
[],
@ -150,7 +150,7 @@ describe('scanFileContent', () => {
assert.deepEqual(
scanFileContent(
'packages/context/src/ingest/page-triage/page-triage.service.test.ts',
'packages/cli/src/context/ingest/page-triage/page-triage.service.test.ts',
"const model = this.deps.llmProvider.getModelByName('test-model');",
),
[],
@ -158,7 +158,7 @@ describe('scanFileContent', () => {
assert.deepEqual(
scanFileContent(
'packages/context/src/scan/local-enrichment.ts',
'packages/cli/src/context/scan/local-enrichment.ts',
'return { getModel() { return model; }, getModelByName() { return model; } };',
),
[],

View file

@ -144,18 +144,13 @@ describe('standalone example docs', () => {
assert.doesNotMatch(orbitConfig, legacyPublicAdapter);
});
it('lists every workspace package in the contributor docs', async () => {
it('lists the consolidated workspace layout in the contributor docs', async () => {
const contributing = await readText('docs-site/content/docs/community/contributing.mdx');
assert.match(contributing, /cli\/\s+# CLI entry point/);
assert.match(contributing, /context\/\s+# Core context engine/);
assert.match(contributing, /llm\/\s+# LLM client abstraction/);
assert.match(contributing, /connector-bigquery\/\s+# BigQuery connector/);
assert.match(contributing, /connector-mysql\/\s+# MySQL connector/);
assert.match(contributing, /connector-postgres\/\s+# PostgreSQL connector/);
assert.match(contributing, /connector-snowflake\/\s+# Snowflake connector/);
assert.match(contributing, /connector-sqlite\/\s+# SQLite connector/);
assert.match(contributing, /connector-sqlserver\/\s+# SQL Server connector/);
assert.match(contributing, /cli\/\s+# CLI package and published npm package source/);
assert.match(contributing, /src\/context\/\s+# Core context engine/);
assert.match(contributing, /src\/llm\/\s+# LLM client abstraction/);
assert.match(contributing, /src\/connectors\/\s+# Database connectors/);
assert.match(contributing, /ktx-sl\/\s+# Semantic layer/);
assert.match(contributing, /ktx-daemon\/\s+# Daemon/);
});

View file

@ -263,8 +263,7 @@ async function assertPathExists(path, label) {
}
async function prepareCleanInstall(layout, cleanInstallDir) {
await assertPathExists(layout.contextTarball, '@ktx/context tarball');
await assertPathExists(layout.cliTarball, '@ktx/cli tarball');
await assertPathExists(layout.cliTarball, 'CLI tarball');
await mkdir(cleanInstallDir, { recursive: true });
await writeFile(join(cleanInstallDir, 'package.json'), `${JSON.stringify(npmSmokePackageJson(layout), null, 2)}\n`);
await writeFile(join(cleanInstallDir, 'pnpm-workspace.yaml'), npmSmokePnpmWorkspaceYaml());

View file

@ -8,11 +8,11 @@ describe('normalizeLcovContent', () => {
const input = ['TN:', 'SF:src/index.ts', 'SF:src\\windows.ts', 'DA:1,1', 'end_of_record'].join('\n');
assert.equal(
normalizeLcovContent(input, 'packages/context'),
normalizeLcovContent(input, 'packages/cli'),
[
'TN:',
'SF:packages/context/src/index.ts',
'SF:packages/context/src/windows.ts',
'SF:packages/cli/src/index.ts',
'SF:packages/cli/src/windows.ts',
'DA:1,1',
'end_of_record',
].join('\n'),

View file

@ -25,15 +25,6 @@ export {
};
export const INTERNAL_NPM_WORKSPACE_PACKAGES = [
{ name: '@ktx/context', packageRoot: 'packages/context' },
{ name: '@ktx/llm', packageRoot: 'packages/llm' },
{ name: '@ktx/connector-bigquery', packageRoot: 'packages/connector-bigquery' },
{ name: '@ktx/connector-clickhouse', packageRoot: 'packages/connector-clickhouse' },
{ name: '@ktx/connector-mysql', packageRoot: 'packages/connector-mysql' },
{ name: '@ktx/connector-postgres', packageRoot: 'packages/connector-postgres' },
{ name: '@ktx/connector-snowflake', packageRoot: 'packages/connector-snowflake' },
{ name: '@ktx/connector-sqlite', packageRoot: 'packages/connector-sqlite' },
{ name: '@ktx/connector-sqlserver', packageRoot: 'packages/connector-sqlserver' },
{ name: '@ktx/cli', packageRoot: 'packages/cli' },
];
@ -81,14 +72,11 @@ export function packageArtifactLayout(rootDir = scriptRootDir(), version = publi
}
export function buildArtifactCommands(layout) {
// One recursive pnpm invocation; topology comes from workspace deps in
// each package.json, parallelism from --workspace-concurrency.
const npmBuildCommand = {
command: 'pnpm',
args: [
'--filter',
'./packages/*',
'--workspace-concurrency=10',
'@ktx/cli',
'run',
'build',
],

View file

@ -112,14 +112,14 @@ describe('packageArtifactLayout', () => {
});
describe('buildArtifactCommands', () => {
it('builds TypeScript packages in parallel topology, then the runtime wheel, then packs npm artifacts', () => {
it('builds the CLI package, then the runtime wheel, then packs npm artifacts', () => {
const layout = packageArtifactLayout('/repo/ktx', PUBLIC_NPM_PACKAGE_VERSION);
const commands = buildArtifactCommands(layout);
assert.deepEqual(
commands.map((command) => [command.command, command.args]),
[
['pnpm', ['--filter', './packages/*', '--workspace-concurrency=10', 'run', 'build']],
['pnpm', ['--filter', '@ktx/cli', 'run', 'build']],
[process.execPath, ['scripts/build-python-runtime-wheel.mjs']],
[process.execPath, ['scripts/build-public-npm-package.mjs']],
],

View file

@ -0,0 +1,353 @@
import { readdir, readFile, realpath, rm, stat, writeFile, mkdtemp } from 'node:fs/promises';
import { createRequire } from 'node:module';
import { tmpdir } from 'node:os';
import { dirname, join, relative, resolve } from 'node:path';
import { performance } from 'node:perf_hooks';
import { fileURLToPath } from 'node:url';
const require = createRequire(import.meta.url);
const scriptDir = dirname(fileURLToPath(import.meta.url));
const ktxRoot = resolve(scriptDir, '..');
const docsDir = join(ktxRoot, 'docs');
const reportPath = join(docsDir, 'hybrid-search-pglite-spike.md');
async function timed(label, fn) {
const started = performance.now();
const value = await fn();
const durationMs = Number((performance.now() - started).toFixed(2));
return { label, durationMs, value };
}
async function directoryBytes(path) {
const entry = await stat(path);
if (entry.isFile()) {
return entry.size;
}
if (!entry.isDirectory()) {
return 0;
}
const children = await readdir(path);
const childSizes = await Promise.all(children.map((child) => directoryBytes(join(path, child))));
return childSizes.reduce((sum, size) => sum + size, 0);
}
async function resolvePackageJson(packageName) {
let currentDir = dirname(require.resolve(packageName));
while (currentDir !== dirname(currentDir)) {
const packageJsonPath = join(currentDir, 'package.json');
try {
const packageJson = JSON.parse(await readFile(packageJsonPath, 'utf8'));
if (packageJson.name === packageName) {
return { packageJsonPath, packageJson };
}
} catch (error) {
if (error?.code !== 'ENOENT') {
throw error;
}
}
currentDir = dirname(currentDir);
}
throw new Error(`Could not resolve package.json for ${packageName}`);
}
async function packageInfo(packageName) {
const { packageJsonPath, packageJson } = await resolvePackageJson(packageName);
const packageDir = await realpath(dirname(packageJsonPath));
return {
name: packageName,
version: packageJson.version,
path: relative(ktxRoot, packageDir),
bytes: await directoryBytes(packageDir),
};
}
async function createDb(PGlite, vector, pg_trgm, dataDir) {
const db = await PGlite.create({
dataDir,
extensions: {
vector,
pg_trgm,
},
});
await db.exec(`
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE TABLE IF NOT EXISTS spike_documents (
id TEXT PRIMARY KEY,
search_text TEXT NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
embedding vector(3) NOT NULL
);
CREATE INDEX IF NOT EXISTS spike_documents_fts_idx
ON spike_documents
USING GIN (to_tsvector('english', search_text));
CREATE INDEX IF NOT EXISTS spike_documents_vector_idx
ON spike_documents
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 1);
CREATE TABLE IF NOT EXISTS spike_dictionary_values (
connection_id TEXT NOT NULL,
source_name TEXT NOT NULL,
column_name TEXT NOT NULL,
value TEXT NOT NULL,
PRIMARY KEY (connection_id, source_name, column_name, value)
);
CREATE INDEX IF NOT EXISTS spike_dictionary_values_trgm_idx
ON spike_dictionary_values
USING GIN (value gin_trgm_ops);
`);
return db;
}
async function seed(db) {
await db.query(
`
INSERT INTO spike_documents (id, search_text, metadata, embedding)
VALUES
($1, $2, $3::jsonb, $4::vector),
($5, $6, $7::jsonb, $8::vector),
($9, $10, $11::jsonb, $12::vector)
ON CONFLICT (id) DO UPDATE
SET search_text = EXCLUDED.search_text,
metadata = EXCLUDED.metadata,
embedding = EXCLUDED.embedding
`,
[
'warehouse/orders',
'orders paid revenue refund status customer',
JSON.stringify({ connectionId: 'warehouse', sourceName: 'orders' }),
JSON.stringify([1, 0, 0]),
'finance/orders',
'orders finance bookings gross margin',
JSON.stringify({ connectionId: 'finance', sourceName: 'orders' }),
JSON.stringify([0.72, 0.28, 0]),
'warehouse/customers',
'customers accounts lifecycle region',
JSON.stringify({ connectionId: 'warehouse', sourceName: 'customers' }),
JSON.stringify([0, 1, 0]),
],
);
await db.query(`
INSERT INTO spike_dictionary_values (connection_id, source_name, column_name, value)
VALUES
('warehouse', 'orders', 'status', 'refunded'),
('warehouse', 'orders', 'status', 'paid'),
('warehouse', 'customers', 'region', 'emea')
ON CONFLICT DO NOTHING
`);
}
async function closeDb(db) {
if (typeof db.close === 'function') {
await db.close();
}
}
async function main() {
const importTimer = await timed('dynamic import @electric-sql/pglite', async () => {
const [{ PGlite }, { vector }, { pg_trgm }] = await Promise.all([
import('@electric-sql/pglite'),
import('@electric-sql/pglite/vector'),
import('@electric-sql/pglite/contrib/pg_trgm'),
]);
return { PGlite, vector, pg_trgm };
});
const { PGlite, vector, pg_trgm } = importTimer.value;
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-pglite-report-'));
const dataDir = join(tempDir, 'pgdata');
let db;
let reopened;
try {
const createTimer = await timed('create persistent PGlite database and load extensions', async () => {
db = await createDb(PGlite, vector, pg_trgm, dataDir);
return true;
});
const seedTimer = await timed('seed hybrid search fixture', async () => seed(db));
const ftsTimer = await timed('Postgres FTS query', () =>
db.query(
`
SELECT id
FROM spike_documents
WHERE to_tsvector('english', search_text) @@ websearch_to_tsquery('english', $1)
ORDER BY ts_rank_cd(to_tsvector('english', search_text), websearch_to_tsquery('english', $1)) DESC, id ASC
LIMIT 1
`,
['paid orders'],
),
);
const vectorTimer = await timed('pgvector cosine query', () =>
db.query(
`
SELECT id, 1 - (embedding <=> $1::vector) AS similarity
FROM spike_documents
ORDER BY embedding <=> $1::vector, id ASC
LIMIT 1
`,
[JSON.stringify([1, 0, 0])],
),
);
const trigramTimer = await timed('pg_trgm dictionary query', () =>
db.query(
`
SELECT connection_id || '/' || source_name AS id, value, similarity(value, $1) AS score
FROM spike_dictionary_values
WHERE similarity(value, $1) > 0
ORDER BY score DESC, id ASC, value ASC
LIMIT 1
`,
['refund'],
),
);
const sameInstanceTimer = await timed('same instance parallel reads', () =>
Promise.all(Array.from({ length: 4 }, () => db.query('SELECT COUNT(*)::int AS count FROM spike_documents'))),
);
let secondOpenStatus = 'opened';
let secondOpenMessage = 'Second direct opener executed SELECT 1.';
let second;
try {
second = await createDb(PGlite, vector, pg_trgm, dataDir);
await second.query('SELECT 1');
} catch (error) {
secondOpenStatus = 'blocked';
secondOpenMessage = error instanceof Error ? error.message : String(error);
} finally {
if (second) {
await closeDb(second);
}
}
await closeDb(db);
db = undefined;
const reopenTimer = await timed('reopen persistent PGlite database', async () => {
reopened = await createDb(PGlite, vector, pg_trgm, dataDir);
return reopened.query('SELECT COUNT(*)::int AS count FROM spike_documents');
});
const packages = await Promise.all([
packageInfo('@electric-sql/pglite'),
packageInfo('@electric-sql/pglite-socket'),
]);
const result = {
generatedAt: new Date().toISOString(),
node: process.version,
packages,
timingsMs: {
import: importTimer.durationMs,
createAndExtensions: createTimer.durationMs,
seed: seedTimer.durationMs,
ftsQuery: ftsTimer.durationMs,
vectorQuery: vectorTimer.durationMs,
trigramQuery: trigramTimer.durationMs,
sameInstanceParallelReads: sameInstanceTimer.durationMs,
reopen: reopenTimer.durationMs,
},
topResults: {
fts: ftsTimer.value.rows[0]?.id ?? null,
vector: vectorTimer.value.rows[0]?.id ?? null,
trigram: trigramTimer.value.rows[0]?.id ?? null,
persistedRowCount: reopenTimer.value.rows[0]?.count ?? null,
},
concurrency: {
sameInstanceReadCounts: sameInstanceTimer.value.map((queryResult) => queryResult.rows[0]?.count ?? null),
secondDirectOpenStatus: secondOpenStatus,
secondDirectOpenMessage: secondOpenMessage,
},
};
const totalPackageBytes = packages.reduce((sum, pkg) => sum + pkg.bytes, 0);
const recommendation =
secondOpenStatus === 'opened'
? 'Prototype a PGlite backend behind an explicit owner process or socket before exposing CLI plus MCP concurrent access.'
: 'Use a socket or owner-process architecture for any PGlite backend prototype because direct second opener access was blocked.';
const markdown = `# Hybrid Search PGlite Spike
Generated: ${result.generatedAt}
## Summary
PGlite loaded in Node ${result.node}, enabled vector and pg_trgm extensions, executed Postgres FTS, pgvector cosine ranking, pg_trgm dictionary ranking, and reopened a persistent filesystem database.
Recommendation: ${recommendation}
## Package Footprint
| Package | Version | Approx bytes | Resolved path |
| --- | --- | ---: | --- |
${packages.map((pkg) => `| \`${pkg.name}\` | \`${pkg.version}\` | ${pkg.bytes} | \`${pkg.path}\` |`).join('\n')}
Total measured package bytes: ${totalPackageBytes}
## Timings
| Probe | Duration ms |
| --- | ---: |
${Object.entries(result.timingsMs)
.map(([name, ms]) => `| ${name} | ${ms} |`)
.join('\n')}
## Search Feature Results
| Probe | Top result |
| --- | --- |
| Postgres FTS | \`${result.topResults.fts}\` |
| pgvector cosine | \`${result.topResults.vector}\` |
| pg_trgm dictionary | \`${result.topResults.trigram}\` |
| Reopened persisted row count | \`${result.topResults.persistedRowCount}\` |
## Concurrency Observation
Same-instance parallel read counts: \`${result.concurrency.sameInstanceReadCounts.join(', ')}\`
Second direct opener status: \`${result.concurrency.secondDirectOpenStatus}\`
Second direct opener message:
\`\`\`text
${result.concurrency.secondDirectOpenMessage}
\`\`\`
## Decision
The SQLite backend remains the production default. The next PGlite step, if approved, is an owner-process or socket-backed prototype that reuses the existing \`SearchBackendCapabilities\` and backend conformance helpers without changing the public CLI surface.
`;
await writeFile(reportPath, markdown);
process.stdout.write(`Wrote ${relative(process.cwd(), reportPath)}\n`);
process.stdout.write(JSON.stringify(result, null, 2));
process.stdout.write('\n');
} finally {
if (db) {
await closeDb(db);
}
if (reopened) {
await closeDb(reopened);
}
await rm(tempDir, { recursive: true, force: true });
}
}
main().catch((error) => {
console.error(error);
process.exitCode = 1;
});

View file

@ -0,0 +1,316 @@
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { createServer } from 'node:net';
import { tmpdir } from 'node:os';
import { dirname, join, resolve } from 'node:path';
import { performance } from 'node:perf_hooks';
import { fileURLToPath } from 'node:url';
import { PGlite } from '@electric-sql/pglite';
import { pg_trgm } from '@electric-sql/pglite/contrib/pg_trgm';
import { vector } from '@electric-sql/pglite/vector';
import { PGLiteSocketServer } from '@electric-sql/pglite-socket';
import { Client } from 'pg';
const scriptDir = dirname(fileURLToPath(import.meta.url));
const ktxRoot = resolve(scriptDir, '..');
const reportPath = join(ktxRoot, 'docs', 'hybrid-search-pglite-owner-process.md');
async function timed(label, fn) {
const started = performance.now();
const value = await fn();
return {
label,
durationMs: Number((performance.now() - started).toFixed(2)),
value,
};
}
async function allocatePort() {
const server = createServer();
await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve));
const address = server.address();
if (typeof address !== 'object' || address === null) {
throw new Error('Expected TCP server address while allocating a PGlite owner-process port.');
}
await new Promise((resolve, reject) => {
server.close((error) => {
if (error) {
reject(error);
return;
}
resolve();
});
});
return address.port;
}
async function createOwner(dataDir, port) {
const db = await PGlite.create({
dataDir,
extensions: {
vector,
pg_trgm,
},
});
await db.exec(`
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE TABLE IF NOT EXISTS prototype_documents (
id TEXT PRIMARY KEY,
search_text TEXT NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
embedding vector(3) NOT NULL
);
CREATE INDEX IF NOT EXISTS prototype_documents_fts_idx
ON prototype_documents
USING GIN (to_tsvector('english', search_text));
CREATE INDEX IF NOT EXISTS prototype_documents_vector_idx
ON prototype_documents
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 1);
CREATE TABLE IF NOT EXISTS prototype_dictionary_values (
connection_id TEXT NOT NULL,
source_name TEXT NOT NULL,
column_name TEXT NOT NULL,
value TEXT NOT NULL,
PRIMARY KEY (connection_id, source_name, column_name, value)
);
CREATE INDEX IF NOT EXISTS prototype_dictionary_values_trgm_idx
ON prototype_dictionary_values
USING GIN (value gin_trgm_ops);
`);
const server = new PGLiteSocketServer({
db,
host: '127.0.0.1',
port,
maxConnections: 100,
});
await server.start();
return {
db,
server,
connectionConfig: {
host: '127.0.0.1',
port,
user: 'postgres',
database: 'postgres',
application_name: 'ktx-pglite-owner-report',
connectionTimeoutMillis: 5_000,
},
};
}
async function withClient(connectionConfig, fn) {
const client = new Client(connectionConfig);
await client.connect();
try {
return await fn(client);
} finally {
await client.end();
}
}
async function seed(connectionConfig) {
await withClient(connectionConfig, async (client) => {
await client.query(
`
INSERT INTO prototype_documents (id, search_text, metadata, embedding)
VALUES
($1, $2, $3::jsonb, $4::vector),
($5, $6, $7::jsonb, $8::vector),
($9, $10, $11::jsonb, $12::vector)
ON CONFLICT (id) DO UPDATE
SET search_text = EXCLUDED.search_text,
metadata = EXCLUDED.metadata,
embedding = EXCLUDED.embedding
`,
[
'warehouse/orders',
'orders paid revenue refund status customer',
JSON.stringify({ connectionId: 'warehouse', sourceName: 'orders' }),
JSON.stringify([1, 0, 0]),
'finance/orders',
'orders finance bookings gross margin',
JSON.stringify({ connectionId: 'finance', sourceName: 'orders' }),
JSON.stringify([0.72, 0.28, 0]),
'warehouse/customers',
'customers accounts lifecycle region',
JSON.stringify({ connectionId: 'warehouse', sourceName: 'customers' }),
JSON.stringify([0, 1, 0]),
],
);
await client.query(`
INSERT INTO prototype_dictionary_values (connection_id, source_name, column_name, value)
VALUES
('warehouse', 'orders', 'status', 'refunded'),
('warehouse', 'orders', 'status', 'paid'),
('warehouse', 'customers', 'region', 'emea')
ON CONFLICT DO NOTHING
`);
});
}
async function queryTopResults(connectionConfig) {
return await withClient(connectionConfig, async (client) => {
const lexical = await client.query(
`
SELECT id
FROM prototype_documents
WHERE to_tsvector('english', search_text) @@ websearch_to_tsquery('english', $1)
ORDER BY ts_rank_cd(to_tsvector('english', search_text), websearch_to_tsquery('english', $1)) DESC, id ASC
LIMIT 1
`,
['paid orders'],
);
const semantic = await client.query(
`
SELECT id
FROM prototype_documents
ORDER BY embedding <=> $1::vector, id ASC
LIMIT 1
`,
[JSON.stringify([1, 0, 0])],
);
const dictionary = await client.query(
`
SELECT connection_id || '/' || source_name AS id
FROM prototype_dictionary_values
WHERE similarity(value, $1) > 0
ORDER BY similarity(value, $1) DESC, id ASC, value ASC
LIMIT 1
`,
['refund'],
);
return {
lexical: lexical.rows[0]?.id ?? '<missing>',
semantic: semantic.rows[0]?.id ?? '<missing>',
dictionary: dictionary.rows[0]?.id ?? '<missing>',
};
});
}
async function concurrentReads(connectionConfig) {
const clients = await Promise.all(
Array.from({ length: 4 }, async () => {
const client = new Client(connectionConfig);
await client.connect();
return client;
}),
);
try {
const results = await Promise.all(
clients.map((client) => client.query('SELECT COUNT(*)::int AS count FROM prototype_documents')),
);
return results.map((result) => result.rows[0]?.count ?? null);
} finally {
await Promise.all(clients.map((client) => client.end().catch(() => undefined)));
}
}
async function stopOwner(owner) {
await owner.server.stop();
await owner.db.close();
}
async function main() {
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-pglite-owner-report-'));
const dataDir = join(tempDir, 'pgdata');
const port = await allocatePort();
let owner;
try {
const startTimer = await timed('startOwner', async () => await createOwner(dataDir, port));
owner = startTimer.value;
const seedTimer = await timed('seed', async () => await seed(owner.connectionConfig));
const queryTimer = await timed('searchQueries', async () => await queryTopResults(owner.connectionConfig));
const concurrentTimer = await timed('concurrentReads', async () => await concurrentReads(owner.connectionConfig));
await stopOwner(owner);
owner = undefined;
const restartTimer = await timed('restartOwner', async () => await createOwner(dataDir, port));
owner = restartTimer.value;
const persisted = await withClient(owner.connectionConfig, async (client) => {
const result = await client.query('SELECT COUNT(*)::int AS count FROM prototype_documents');
return result.rows[0]?.count ?? null;
});
const markdown = `# Hybrid Search PGlite Owner Process Prototype
Generated: ${new Date().toISOString()}
## Summary
PGlite started behind one explicit owner process, enabled vector and pg_trgm extensions, served PostgreSQL clients through \`@electric-sql/pglite-socket\`, answered lexical, semantic, and dictionary probes, and preserved rows across owner restart.
Recommendation: Keep SQLite as the production default. The next PGlite implementation step should be a private adapter prototype behind an explicit configuration flag, still guarded by backend conformance tests, before any CLI or MCP default changes.
## Timings
| Probe | Duration ms |
| --- | ---: |
| startOwner | ${startTimer.durationMs} |
| seed | ${seedTimer.durationMs} |
| searchQueries | ${queryTimer.durationMs} |
| concurrentReads | ${concurrentTimer.durationMs} |
| restartOwner | ${restartTimer.durationMs} |
## Search Feature Results
| Probe | Top result |
| --- | --- |
| Postgres FTS through socket | \`${queryTimer.value.lexical}\` |
| pgvector cosine through socket | \`${queryTimer.value.semantic}\` |
| pg_trgm dictionary through socket | \`${queryTimer.value.dictionary}\` |
| Reopened persisted row count | \`${persisted}\` |
## Concurrency Observation
Concurrent socket read counts: \`${concurrentTimer.value.join(', ')}\`
## Decision
The owner-process shape is viable for a prototype because it gives CLI and MCP callers a PostgreSQL protocol boundary without opening the same PGlite data directory from independent runtimes. This report is not a production adapter acceptance record.
`;
await writeFile(reportPath, markdown);
console.log(`Wrote ${reportPath}`);
console.log(
JSON.stringify(
{
port,
timings: {
startOwner: startTimer.durationMs,
seed: seedTimer.durationMs,
searchQueries: queryTimer.durationMs,
concurrentReads: concurrentTimer.durationMs,
restartOwner: restartTimer.durationMs,
},
topResults: queryTimer.value,
concurrentReads: concurrentTimer.value,
persisted,
},
null,
2,
),
);
} finally {
if (owner) {
await stopOwner(owner).catch(() => undefined);
}
await rm(tempDir, { recursive: true, force: true });
}
}
await main();

View file

@ -0,0 +1,262 @@
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { createServer } from 'node:net';
import { tmpdir } from 'node:os';
import { dirname, join, resolve } from 'node:path';
import { performance } from 'node:perf_hooks';
import { fileURLToPath } from 'node:url';
import { PGlite } from '@electric-sql/pglite';
import { pg_trgm } from '@electric-sql/pglite/contrib/pg_trgm';
import { vector } from '@electric-sql/pglite/vector';
import { PGLiteSocketServer } from '@electric-sql/pglite-socket';
import { Client } from 'pg';
const scriptDir = dirname(fileURLToPath(import.meta.url));
const ktxRoot = resolve(scriptDir, '..');
const reportPath = join(ktxRoot, 'docs', 'hybrid-search-pglite-sl-adapter-prototype.md');
async function timed(label, fn) {
const started = performance.now();
const value = await fn();
return {
label,
durationMs: Number((performance.now() - started).toFixed(2)),
value,
};
}
async function allocatePort() {
const server = createServer();
await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve));
const address = server.address();
if (typeof address !== 'object' || address === null) {
throw new Error('Expected TCP server address while allocating a PGlite SL prototype port.');
}
await new Promise((resolve, reject) => {
server.close((error) => {
if (error) {
reject(error);
return;
}
resolve();
});
});
return address.port;
}
async function createOwner(dataDir, port) {
const db = await PGlite.create({
dataDir,
extensions: { vector, pg_trgm },
});
await db.exec(`
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE TABLE prototype_sl_sources (
connection_id TEXT NOT NULL,
source_name TEXT NOT NULL,
search_text TEXT NOT NULL,
embedding vector(3),
PRIMARY KEY (connection_id, source_name)
);
CREATE INDEX prototype_sl_sources_fts_idx
ON prototype_sl_sources
USING GIN (to_tsvector('english', search_text));
CREATE INDEX prototype_sl_sources_vector_idx
ON prototype_sl_sources
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 1);
CREATE TABLE prototype_sl_dictionary_values (
connection_id TEXT NOT NULL,
source_name TEXT NOT NULL,
column_name TEXT NOT NULL,
value TEXT NOT NULL,
value_lower TEXT NOT NULL,
PRIMARY KEY (connection_id, source_name, column_name, value)
);
CREATE INDEX prototype_sl_dictionary_values_trgm_idx
ON prototype_sl_dictionary_values
USING GIN (value gin_trgm_ops);
`);
const server = new PGLiteSocketServer({ db, host: '127.0.0.1', port, maxConnections: 100 });
await server.start();
return {
db,
server,
connectionConfig: {
host: '127.0.0.1',
port,
user: 'postgres',
database: 'postgres',
application_name: 'ktx-pglite-sl-prototype-report',
connectionTimeoutMillis: 5_000,
},
};
}
async function withClient(connectionConfig, fn) {
const client = new Client(connectionConfig);
await client.connect();
try {
return await fn(client);
} finally {
await client.end();
}
}
async function seed(connectionConfig) {
await withClient(connectionConfig, async (client) => {
await client.query(
`
INSERT INTO prototype_sl_sources (connection_id, source_name, search_text, embedding)
VALUES
($1, $2, $3, $4::vector),
($5, $6, $7, $8::vector),
($9, $10, $11, $12::vector)
`,
[
'warehouse',
'orders',
'orders paid revenue refund status customer',
JSON.stringify([1, 0, 0]),
'finance',
'orders',
'orders finance bookings gross margin',
JSON.stringify([0.72, 0.28, 0]),
'warehouse',
'customers',
'customers accounts lifecycle region',
JSON.stringify([0, 1, 0]),
],
);
await client.query(`
INSERT INTO prototype_sl_dictionary_values (connection_id, source_name, column_name, value, value_lower)
VALUES
('warehouse', 'orders', 'status', 'refunded', 'refunded'),
('warehouse', 'orders', 'status', 'paid', 'paid'),
('warehouse', 'customers', 'region', 'emea', 'emea')
`);
});
}
async function queryTopResults(connectionConfig) {
return withClient(connectionConfig, async (client) => {
const lexical = await client.query(
`
SELECT connection_id || '/' || source_name AS id
FROM prototype_sl_sources
WHERE to_tsvector('english', search_text) @@ websearch_to_tsquery('english', $1)
ORDER BY ts_rank_cd(to_tsvector('english', search_text), websearch_to_tsquery('english', $1)) DESC, id ASC
LIMIT 1
`,
['paid revenue'],
);
const semantic = await client.query(
`
SELECT connection_id || '/' || source_name AS id
FROM prototype_sl_sources
ORDER BY embedding <=> $1::vector, id ASC
LIMIT 1
`,
[JSON.stringify([1, 0, 0])],
);
const dictionary = await client.query(
`
SELECT connection_id || '/' || source_name AS id
FROM prototype_sl_dictionary_values
WHERE similarity(value, $1) > 0 OR value_lower LIKE '%' || lower($1) || '%'
ORDER BY GREATEST(similarity(value, $1), CASE WHEN value_lower LIKE '%' || lower($1) || '%' THEN 0.75 ELSE 0 END) DESC,
id ASC,
value ASC
LIMIT 1
`,
['refund'],
);
return {
lexical: lexical.rows[0]?.id ?? '<missing>',
semantic: semantic.rows[0]?.id ?? '<missing>',
dictionary: dictionary.rows[0]?.id ?? '<missing>',
};
});
}
async function stopOwner(owner) {
await owner.server.stop();
await owner.db.close();
}
async function main() {
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-pglite-sl-prototype-report-'));
const dataDir = join(tempDir, 'pgdata');
const port = await allocatePort();
let owner;
try {
const startTimer = await timed('startOwner', async () => createOwner(dataDir, port));
owner = startTimer.value;
const seedTimer = await timed('seedSemanticLayerIndex', async () => seed(owner.connectionConfig));
const searchTimer = await timed('searchQueries', async () => queryTopResults(owner.connectionConfig));
const markdown = `# Hybrid Search PGlite Semantic-Layer Adapter Prototype
Generated: ${new Date().toISOString()}
## Summary
PGlite served a semantic-layer-style search index through one owner process and PostgreSQL clients. The probe returned lexical, semantic, and dictionary top results through Postgres FTS, pgvector ordering, and pg_trgm matching.
Recommendation: Keep SQLite as the production default. The PGlite semantic-layer adapter remains private and explicitly opt-in until a separate plan decides runtime dependencies, long-lived owner lifecycle, and CLI/MCP routing.
## Timings
| Probe | Duration ms |
| --- | ---: |
| startOwner | ${startTimer.durationMs} |
| seedSemanticLayerIndex | ${seedTimer.durationMs} |
| searchQueries | ${searchTimer.durationMs} |
## Search Feature Results
| Probe | Top result |
| --- | --- |
| Postgres FTS through socket | \`${searchTimer.value.lexical}\` |
| pgvector cosine through socket | \`${searchTimer.value.semantic}\` |
| pg_trgm dictionary through socket | \`${searchTimer.value.dictionary}\` |
## Decision
The private adapter shape is viable for semantic-layer search prototypes. It is not a production backend acceptance record and does not change the default SQLite search path.
`;
await writeFile(reportPath, markdown);
console.log(`Wrote ${reportPath}`);
console.log(
JSON.stringify(
{
port,
timings: {
startOwner: startTimer.durationMs,
seed: seedTimer.durationMs,
searchQueries: searchTimer.durationMs,
},
topResults: searchTimer.value,
},
null,
2,
),
);
} finally {
if (owner) {
await stopOwner(owner).catch(() => undefined);
}
await rm(tempDir, { recursive: true, force: true });
}
}
await main();

View file

@ -0,0 +1,52 @@
import { dirname, join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import {
KTX_RELATIONSHIP_BENCHMARK_MODES,
buildKtxRelationshipBenchmarkReport,
currentKtxRelationshipBenchmarkDetector,
formatKtxRelationshipBenchmarkReportMarkdown,
ktxRelationshipBenchmarkDetectorWithLlm,
loadKtxRelationshipBenchmarkFixtures,
runKtxRelationshipBenchmarkSuite,
} from '../packages/cli/dist/context/scan/index.js';
const scriptDir = dirname(fileURLToPath(import.meta.url));
const ktxRoot = resolve(scriptDir, '..');
const fixtureRoot = join(ktxRoot, 'packages/cli/src/test/fixtures/relationship-benchmarks');
async function buildDetector() {
const backend = process.env.KTX_BENCHMARK_LLM_BACKEND;
if (!backend || backend === 'none') {
return currentKtxRelationshipBenchmarkDetector();
}
if (backend !== 'vertex') {
throw new Error(`Unsupported KTX_BENCHMARK_LLM_BACKEND: ${backend}`);
}
const project = process.env.KTX_BENCHMARK_VERTEX_PROJECT;
const location = process.env.KTX_BENCHMARK_VERTEX_LOCATION;
const model = process.env.KTX_BENCHMARK_LLM_MODEL ?? 'claude-sonnet-4-6';
if (!project || !location) {
throw new Error('KTX_BENCHMARK_VERTEX_PROJECT and KTX_BENCHMARK_VERTEX_LOCATION are required for vertex backend');
}
const { createKtxLlmProvider } = await import('../packages/cli/dist/llm/index.js');
const provider = createKtxLlmProvider({
backend: 'vertex',
vertex: { project, location },
modelSlots: { default: model },
});
return ktxRelationshipBenchmarkDetectorWithLlm(provider);
}
const fixtures = await loadKtxRelationshipBenchmarkFixtures(fixtureRoot);
const detector = await buildDetector();
const suite = await runKtxRelationshipBenchmarkSuite({
fixtures,
detector,
});
const report = buildKtxRelationshipBenchmarkReport({
fixtures,
suite,
modes: KTX_RELATIONSHIP_BENCHMARK_MODES,
});
process.stdout.write(formatKtxRelationshipBenchmarkReportMarkdown(report));

View file

@ -550,7 +550,7 @@ describe('release readiness policy', () => {
await writeReadyFixture(root, {
policy: releasePolicy({
npm: {
packages: ['@kaelio/ktx', '@ktx/context'],
packages: ['@kaelio/ktx', '@ktx/internal-only'],
},
}),
});

View file

@ -24,10 +24,8 @@ describe('standalone KTX CI workflow', () => {
'name: Pre-commit checks',
'typescript-checks:',
'name: TypeScript checks',
'slow-context-tests:',
'name: Slow context tests',
'slow-cli-tests:',
'name: Slow CLI tests',
'name: Slow TypeScript tests',
'cli-smoke-tests:',
'name: CLI smoke tests',
'python-checks:',
@ -42,7 +40,6 @@ describe('standalone KTX CI workflow', () => {
'pnpm install --frozen-lockfile',
'pnpm run check',
'pnpm run build',
'pnpm --filter @ktx/context run test:slow',
'pnpm --filter @ktx/cli run test:slow',
'pnpm run smoke',
'actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405',

View file

@ -33,41 +33,36 @@ describe('test tiering', () => {
];
const contextSlowTests = [
'src/scan/local-scan.test.ts',
'src/mcp/local-project-ports.test.ts',
'src/ingest/local-stage-ingest.test.ts',
'src/sl/pglite-sl-search-prototype.test.ts',
'src/core/git.service.test.ts',
'src/ingest/local-adapters.test.ts',
'src/ingest/local-bundle-ingest.test.ts',
'src/ingest/local-metabase-ingest.test.ts',
'src/sl/local-sl.test.ts',
'src/search/pglite-owner-process.test.ts',
'src/scan/local-enrichment-artifacts.test.ts',
'src/search/pglite-spike.test.ts',
'src/wiki/local-knowledge.test.ts',
'src/sl/local-query.test.ts',
'src/scan/relationship-review-decisions.test.ts',
'src/scan/relationship-profiling.test.ts',
'src/context/scan/local-scan.test.ts',
'src/context/mcp/local-project-ports.test.ts',
'src/context/ingest/local-stage-ingest.test.ts',
'src/context/sl/pglite-sl-search-prototype.test.ts',
'src/context/core/git.service.test.ts',
'src/context/ingest/local-adapters.test.ts',
'src/context/ingest/local-bundle-ingest.test.ts',
'src/context/ingest/local-metabase-ingest.test.ts',
'src/context/sl/local-sl.test.ts',
'src/context/search/pglite-owner-process.test.ts',
'src/context/scan/local-enrichment-artifacts.test.ts',
'src/context/search/pglite-spike.test.ts',
'src/context/wiki/local-knowledge.test.ts',
'src/context/sl/local-query.test.ts',
'src/context/scan/relationship-review-decisions.test.ts',
'src/context/scan/relationship-profiling.test.ts',
];
it('keeps slow package tests out of default local package test scripts', async () => {
const cliPackage = await readJson('../packages/cli/package.json');
const contextPackage = await readJson('../packages/context/package.json');
assertScriptContainsAll(cliPackage.scripts.test, cliSlowTests.map((file) => `--exclude ${file}`));
assertScriptContainsAll(contextPackage.scripts.test, contextSlowTests.map((file) => `--exclude ${file}`));
assert.match(contextPackage.scripts.test, /--exclude src\/scan\/relationship-benchmarks\.test\.ts/);
assertScriptContainsAll(cliPackage.scripts.test, contextSlowTests.map((file) => `--exclude ${file}`));
});
it('provides explicit slow package test scripts for CI', async () => {
const rootPackage = await readJson('../package.json');
const cliPackage = await readJson('../packages/cli/package.json');
const contextPackage = await readJson('../packages/context/package.json');
assert.equal(rootPackage.scripts['test:slow'], 'pnpm --filter @ktx/context run test:slow && pnpm --filter @ktx/cli run test:slow');
assert.equal(rootPackage.scripts['test:slow'], 'pnpm --filter @ktx/cli run test:slow');
assertScriptContainsAll(cliPackage.scripts['test:slow'], cliSlowTests);
assertScriptContainsAll(contextPackage.scripts['test:slow'], contextSlowTests);
assert.doesNotMatch(contextPackage.scripts['test:slow'], /relationship-benchmarks\.test\.ts/);
assertScriptContainsAll(cliPackage.scripts['test:slow'], contextSlowTests);
assert.doesNotMatch(cliPackage.scripts['test:slow'], /relationship-benchmarks\.test\.ts/);
});
});