[codex] Add Conductor workspace scripts (#2)

* Add Conductor workspace scripts

* Fix conductor boundary check fixture

* Remove stale frontend conductor guard

* Remove stale app service references

* Optimize relationship discovery benchmarks

* test: move slow suites to ci tier
This commit is contained in:
Andrey Avtomonov 2026-05-11 09:55:42 +02:00 committed by GitHub
parent ae1d95a6ce
commit 76fde89798
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 2085 additions and 1654 deletions

View file

@ -53,6 +53,12 @@ const CHECKED_IN_FIXTURE_ORIGINS = {
semantic_embedding_aliases_no_declared_constraints: 'synthetic',
} as const;
function runAdHocRelationshipBenchmarks(): boolean {
return process.env.KTX_RUN_RELATIONSHIP_BENCHMARKS === '1';
}
const adHocRelationshipBenchmarkIt = runAdHocRelationshipBenchmarks() ? it : it.skip;
function snapshot(): KtxSchemaSnapshot {
return {
connectionId: 'warehouse',
@ -644,7 +650,7 @@ describe('relationship benchmarks', () => {
expect(fixture.expected.expectedLinks).toHaveLength(1900);
});
it('runs the scale stress fixture inside the benchmark validation budget', async () => {
adHocRelationshipBenchmarkIt('runs the scale stress fixture inside the benchmark validation budget', async () => {
const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url);
const fixture = await loadKtxRelationshipBenchmarkFixture(
join(fixtureRoot.pathname, 'scale_stress_no_declared_constraints'),

View file

@ -7,6 +7,7 @@ import type {
} from './enrichment-types.js';
import { localCandidateTables } from './relationship-locality.js';
import {
type KtxRelationshipNormalizedName,
normalizeKtxRelationshipName,
pluralizeKtxRelationshipToken,
singularizeKtxRelationshipToken,
@ -97,9 +98,22 @@ const REFERENCE_SUFFIXES: Array<{ suffix: string; reason: string }> = [
{ suffix: '_uuid', reason: 'foreign_key_uuid_suffix' },
];
const RELATIONSHIP_KEY_TARGET_SUFFIXES = ['_id', '_key', '_code', '_uuid'] as const;
const tableAliasesCache = new WeakMap<KtxEnrichedTable, Set<string>>();
const parentTableNameAliasesCache = new WeakMap<KtxEnrichedTable, Set<string>>();
const normalizedColumnNameCache = new WeakMap<KtxEnrichedColumn, KtxRelationshipNormalizedName>();
function normalizedColumnName(column: KtxEnrichedColumn): KtxRelationshipNormalizedName {
const cached = normalizedColumnNameCache.get(column);
if (cached) {
return cached;
}
const normalized = normalizeKtxRelationshipName(column.name);
normalizedColumnNameCache.set(column, normalized);
return normalized;
}
function isRelationshipKeyShapedTarget(column: KtxEnrichedColumn): boolean {
const normalized = normalizeKtxRelationshipName(column.name);
const normalized = normalizedColumnName(column);
return (
normalized.tokens.length >= 2 &&
RELATIONSHIP_KEY_TARGET_SUFFIXES.some((suffix) => normalized.normalized.endsWith(suffix))
@ -107,8 +121,8 @@ function isRelationshipKeyShapedTarget(column: KtxEnrichedColumn): boolean {
}
function columnSuffixMatchesTarget(input: { fromColumn: KtxEnrichedColumn; toColumn: KtxEnrichedColumn }): boolean {
const source = normalizeKtxRelationshipName(input.fromColumn.name).normalized;
const target = normalizeKtxRelationshipName(input.toColumn.name).normalized;
const source = normalizedColumnName(input.fromColumn).normalized;
const target = normalizedColumnName(input.toColumn).normalized;
return source !== target && target.length > 0 && source.endsWith(`_${target}`);
}
@ -160,7 +174,7 @@ function hasUsableEmbedding(column: KtxEnrichedColumn): boolean {
}
function sourceColumnReference(column: KtxEnrichedColumn): KtxRelationshipSourceColumnReference | null {
const normalized = normalizeKtxRelationshipName(column.name);
const normalized = normalizedColumnName(column);
if (SELF_REFERENCE_NAMES.has(normalized.normalized)) {
return { base: normalized.normalized.replace(/_id$/u, ''), reason: 'foreign_key_suffix' };
}
@ -192,6 +206,11 @@ function addNormalizedTableAlias(aliases: Set<string>, name: string): void {
}
function tableAliases(table: KtxEnrichedTable): Set<string> {
const cached = tableAliasesCache.get(table);
if (cached) {
return cached;
}
const normalized = normalizeKtxRelationshipName(table.ref.name);
const aliases = new Set([normalized.normalized, normalized.singular, normalized.plural]);
if (normalized.tokens.length > 1) {
@ -203,6 +222,7 @@ function tableAliases(table: KtxEnrichedTable): Set<string> {
aliases.add(pluralizeKtxRelationshipToken(singularLastToken));
}
}
tableAliasesCache.set(table, aliases);
return aliases;
}
@ -212,13 +232,19 @@ function finalTableNamePart(table: KtxEnrichedTable): string {
}
function parentTableNameAliases(table: KtxEnrichedTable): Set<string> {
const aliases = tableAliases(table);
const cached = parentTableNameAliasesCache.get(table);
if (cached) {
return cached;
}
const aliases = new Set(tableAliases(table));
addNormalizedTableAlias(aliases, finalTableNamePart(table));
parentTableNameAliasesCache.set(table, aliases);
return aliases;
}
function targetKeyScore(table: KtxEnrichedTable, column: KtxEnrichedColumn): number {
const columnName = normalizeKtxRelationshipName(column.name).normalized;
const columnName = normalizedColumnName(column).normalized;
const tableKeyBases = parentTableNameAliases(table);
if (column.primaryKey) {
return 1;
@ -338,7 +364,7 @@ function candidateParentTables(input: {
maxParentTables,
}).map((item) => item.table);
const normalizedColumn = normalizeKtxRelationshipName(input.fromColumn.name).normalized;
const normalizedColumn = normalizedColumnName(input.fromColumn).normalized;
if (!SELF_REFERENCE_NAMES.has(normalizedColumn) || ranked.some((table) => table.id === input.fromTable.id)) {
return ranked;
}
@ -364,7 +390,7 @@ function targetKeyEvidence(
return { score: 0, reasons: [] };
}
const columnName = normalizeKtxRelationshipName(column.name).normalized;
const columnName = normalizedColumnName(column).normalized;
if (columnName === 'code' || columnName.endsWith('_code') || columnName === 'key' || columnName.endsWith('_key')) {
return { score: 0.86, reasons: ['profile_unique_target'] };
}
@ -500,7 +526,7 @@ function createCandidate(input: {
evidence: {
sourceColumnBase: input.sourceBase,
targetTableBase: input.targetBase,
targetColumnBase: normalizeKtxRelationshipName(input.toColumn.name).normalized,
targetColumnBase: normalizedColumnName(input.toColumn).normalized,
targetKeyScore: input.targetKeyScore,
nameScore: input.nameScore,
reasons: input.reasons,
@ -553,7 +579,7 @@ function generateKtxEmbeddingRelationshipCandidates(
continue;
}
const sourceBase = normalizeKtxRelationshipName(fromColumn.name).normalized;
const sourceBase = normalizedColumnName(fromColumn).normalized;
const targetBase = normalizeKtxRelationshipName(toTable.ref.name).singular;
const reasons = ['embedding_similarity', ...keyEvidence.reasons];
const candidate = createCandidate({
@ -620,7 +646,7 @@ export function generateKtxRelationshipDiscoveryCandidates(
const sameTable = fromTable.id === toTable.id;
const nameMatchesTarget = strictAliases.has(sourceBase);
const parentTableNameMatcher = !sameTable && !nameMatchesTarget && parentAliases.has(sourceBase);
const selfReference = sameTable && SELF_REFERENCE_NAMES.has(normalizeKtxRelationshipName(fromColumn.name).normalized);
const selfReference = sameTable && SELF_REFERENCE_NAMES.has(normalizedColumnName(fromColumn).normalized);
const strictTableMatcher = (!sameTable && nameMatchesTarget) || selfReference;
for (const toColumn of toTable.columns) {
@ -675,7 +701,7 @@ export function generateKtxRelationshipDiscoveryCandidates(
if (
!suffixMatcher &&
!parentTableNameMatcher &&
normalizeKtxRelationshipName(fromColumn.name).normalized === normalizeKtxRelationshipName(toColumn.name).normalized
normalizedColumnName(fromColumn).normalized === normalizedColumnName(toColumn).normalized
) {
reasons.push('exact_column_name');
nameScore = Math.max(nameScore, 0.9);

View file

@ -18,20 +18,28 @@ export interface LocalKtxRelationshipCandidateTablesInput {
const DEFAULT_MAX_PARENT_TABLES = 20;
const RELATIONSHIP_SUFFIX_TOKENS = new Set(['id', 'ids', 'key', 'keys', 'code', 'codes', 'uuid', 'uuids']);
const normalizedTokenVariantsCache = new Map<string, string[]>();
function roundedScore(value: number): number {
return Number(Math.max(0, Math.min(1, value)).toFixed(3));
}
function normalizedTokenVariants(name: string): string[] {
const cached = normalizedTokenVariantsCache.get(name);
if (cached) {
return cached;
}
const normalized = normalizeKtxRelationshipName(name);
return Array.from(
const variants = Array.from(
new Set([
...normalized.tokens,
...tokenizeKtxRelationshipName(normalized.singular),
...tokenizeKtxRelationshipName(normalized.plural),
]),
).filter(Boolean);
normalizedTokenVariantsCache.set(name, variants);
return variants;
}
function childColumnLocalityTokens(column: KtxEnrichedColumn): string[] {
@ -91,24 +99,29 @@ function parentEmbeddingScore(childColumn: KtxEnrichedColumn, parentTable: KtxEn
}
function tableTokenScore(input: {
childTable: KtxEnrichedTable;
childColumn: KtxEnrichedColumn;
childTableId: string;
childTableTokens: readonly string[];
childColumnTokens: readonly string[];
parentTable: KtxEnrichedTable;
}): number {
const childTableTokens = normalizedTokenVariants(input.childTable.ref.name);
const childColumnTokens = childColumnLocalityTokens(input.childColumn);
const parentTokens = normalizedTokenVariants(input.parentTable.ref.name);
const columnOnlyScore = jaccard(childColumnTokens, parentTokens);
if (input.parentTable.id === input.childTable.id) {
const columnOnlyScore = jaccard(input.childColumnTokens, parentTokens);
if (parentTokens.length === 0) {
return 0;
}
if (input.parentTable.id === input.childTableId) {
return columnOnlyScore;
}
const columnAndTableScore = jaccard(uniqueTokens([...childTableTokens, ...childColumnTokens]), parentTokens);
const columnAndTableScore = jaccard(uniqueTokens([...input.childTableTokens, ...input.childColumnTokens]), parentTokens);
return Math.max(columnOnlyScore, columnAndTableScore * 0.6);
}
function localityScore(input: {
childTable: KtxEnrichedTable;
childTableId: string;
childTableTokens: readonly string[];
childColumn: KtxEnrichedColumn;
childColumnTokens: readonly string[];
parentTable: KtxEnrichedTable;
}): Omit<KtxRelationshipLocalityCandidateTable, 'table'> {
const tokenScore = roundedScore(tableTokenScore(input));
@ -143,12 +156,18 @@ export function localCandidateTables(
return [];
}
const childTableTokens = normalizedTokenVariants(input.childTable.ref.name);
const childColumnTokens = childColumnLocalityTokens(input.childColumn);
return input.parentTables
.map((table) => ({
table,
...localityScore({
childTable: input.childTable,
childTableId: input.childTable.id,
childTableTokens,
childColumn: input.childColumn,
childColumnTokens,
parentTable: table,
}),
}))

View file

@ -1,7 +1,7 @@
import { z } from 'zod';
// Literal vocabularies — kept in lockstep with the Python Pydantic model at
// python-service/ktx-sl/semantic_layer/models.py (SourceColumn / ColumnRole /
// python/ktx-sl/semantic_layer/models.py (SourceColumn / ColumnRole /
// ColumnVisibility / JoinDeclaration). If these diverge, YAMLs can pass
// TypeScript validation at ingest time but fail Python loading at query time.
const columnTypeValues = ['string', 'number', 'time', 'boolean'] as const;

View file

@ -2,7 +2,7 @@ import { describe, expect, it, vi } from 'vitest';
import { createHttpSqlAnalysisPort } from './http-sql-analysis-port.js';
describe('createHttpSqlAnalysisPort', () => {
it('calls the python-service fingerprint endpoint and maps snake_case response fields', async () => {
it('calls the SQL-analysis fingerprint endpoint and maps snake_case response fields', async () => {
const requestJson = vi.fn(async () => ({
fingerprint: 'fingerprint-template',
normalized_sql: 'SELECT * FROM analytics.orders WHERE status = ?',
@ -26,7 +26,7 @@ describe('createHttpSqlAnalysisPort', () => {
});
});
it('preserves python-service parse errors in the mapped result', async () => {
it('preserves SQL-analysis parse errors in the mapped result', async () => {
const requestJson = vi.fn(async () => ({
fingerprint: '',
normalized_sql: '',

View file

@ -151,7 +151,7 @@ export abstract class BaseTool<TInput extends ZodType = ZodType> {
}
}
},
// Send only markdown to LLM - frontend still receives full { markdown, structured } via stream
// Send only markdown to the LLM; tool callers still receive the structured output.
toModelOutput: ({ output }) => {
if (output && typeof output === 'object' && 'markdown' in output) {
return { type: 'content', value: [{ type: 'text', text: output.markdown as string }] };