refactor: remove legacy compatibility paths

This commit is contained in:
Andrey Avtomonov 2026-05-13 14:37:05 +02:00
parent c22248dabf
commit a517c834fe
83 changed files with 239 additions and 534 deletions

View file

@ -1,6 +1,7 @@
name: orders
table: public.orders
description: Orders placed through the storefront.
descriptions:
user: Orders placed through the storefront.
grain:
- id
columns:

View file

@ -57,4 +57,4 @@ Always join through `customer.id`. Do not join on `email`.
- **Join key:** Always use `customer.id`, never `email`.
- **Timezone:** `created_at` and `last_seen_at` are UTC. Confirm whether a question expects UTC or a local business day before filtering.
- **Paying vs. all:** `free` customers must be excluded from paying-customer follow-ups. Use `paying_customer_count`, not `customer_count`.
- **plan_tier values:** `free`, `pro`, `enterprise`. Note: `pro_plus` is a legacy alias for `growth` in the account/contract layer (see `orbit-plan-segment-normalization`), but `plan_tier` on this table uses `pro` not `pro_plus`.
- **plan_tier values:** `free`, `pro`, `enterprise`. Note: use the canonical plan names from the account/contract layer (see `orbit-plan-segment-normalization`); `plan_tier` on this table uses `pro` rather than `growth`.

View file

@ -27,7 +27,7 @@ Sales Ops must complete the handoff **before the first implementation call**. Cu
| Field | Notes |
|---|---|
| Current plan | Starter / Growth / Enterprise — use canonical plan name, not legacy aliases |
| Current plan | Starter / Growth / Enterprise — use canonical plan name |
| Account segment | self_serve / commercial / enterprise (see `orbit-plan-segment-normalization`) |
| Contract shape | Term, ARR, any discounts or custom terms |
| Renewal contact | Named person on the customer side responsible for renewal |

View file

@ -132,11 +132,11 @@ function uniqueSorted(values: number[]): number[] {
}
function resolveMetabaseUrl(connection: KtxProjectConnectionConfig | undefined): string | undefined {
return stringField(connection?.api_url) ?? stringField(connection?.apiUrl) ?? stringField(connection?.url);
return stringField(connection?.api_url);
}
function resolveLiteralMetabaseApiKey(connection: KtxProjectConnectionConfig | undefined): string | undefined {
return stringField(connection?.api_key) ?? stringField(connection?.apiKey);
return stringField(connection?.api_key);
}
function listMetabaseConnectionIds(project: KtxLocalProject): string[] {

View file

@ -110,7 +110,6 @@ function shouldShowSetupEntryMenu(
disableHistoricSql?: boolean;
historicSqlWindowDays?: number;
historicSqlMinExecutions?: number;
historicSqlMinCalls?: number;
historicSqlServiceAccountPattern?: string[];
historicSqlRedactionPattern?: string[];
skipDatabases?: boolean;
@ -180,7 +179,6 @@ function shouldShowSetupEntryMenu(
'disableHistoricSql',
'historicSqlWindowDays',
'historicSqlMinExecutions',
'historicSqlMinCalls',
'skipDatabases',
'source',
'sourceConnectionId',
@ -266,11 +264,6 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
.option('--disable-historic-sql', 'Disable Historic SQL for the selected database', false)
.option('--historic-sql-window-days <number>', 'Historic SQL query-history window', positiveInteger)
.option('--historic-sql-min-executions <number>', 'Minimum Historic SQL executions for a template', positiveInteger)
.option(
'--historic-sql-min-calls <number>',
'Alias for --historic-sql-min-executions',
positiveInteger,
)
.option(
'--historic-sql-service-account-pattern <pattern>',
'Historic SQL service-account regex; repeatable',
@ -352,7 +345,6 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
const mode = options.new ? 'new' : options.existing ? 'existing' : 'auto';
const resolvedAgentScope = options.global ? 'global' : options.agentScope;
const historicSqlMinExecutions = options.historicSqlMinExecutions ?? options.historicSqlMinCalls;
await runSetupArgs(context, {
command: 'run',
projectDir: resolveCommandProjectDir(command),
@ -380,7 +372,9 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
...(options.enableHistoricSql ? { enableHistoricSql: true } : {}),
...(options.disableHistoricSql ? { disableHistoricSql: true } : {}),
...(options.historicSqlWindowDays !== undefined ? { historicSqlWindowDays: options.historicSqlWindowDays } : {}),
...(historicSqlMinExecutions !== undefined ? { historicSqlMinExecutions } : {}),
...(options.historicSqlMinExecutions !== undefined
? { historicSqlMinExecutions: options.historicSqlMinExecutions }
: {}),
...(options.historicSqlServiceAccountPattern.length > 0
? { historicSqlServiceAccountPatterns: options.historicSqlServiceAccountPattern }
: {}),

View file

@ -375,7 +375,7 @@ const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [
collection_id: 12,
archived: false,
result_metadata: [],
dataset_query: { type: 'native', database: 1, native: { query: 'select 101 as id' } },
dataset_query: { type: 'native', database: 1, stages: [{ 'lib/type': 'mbql.stage/native', native: 'select 101 as id' }] },
parameters: [],
dashboard_count: 0,
},
@ -389,7 +389,7 @@ const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [
collection_id: 12,
archived: false,
result_metadata: [],
dataset_query: { type: 'native', database: 1, native: { query: 'select 102 as id' } },
dataset_query: { type: 'native', database: 1, stages: [{ 'lib/type': 'mbql.stage/native', native: 'select 102 as id' }] },
parameters: [],
dashboard_count: 0,
},
@ -403,7 +403,7 @@ const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [
collection_id: 13,
archived: false,
result_metadata: [],
dataset_query: { type: 'native', database: 1, native: { query: 'select 103 as id' } },
dataset_query: { type: 'native', database: 1, stages: [{ 'lib/type': 'mbql.stage/native', native: 'select 103 as id' }] },
parameters: [],
dashboard_count: 0,
},
@ -453,11 +453,11 @@ function createSyncModeMetabaseClient(): MetabaseRuntimeClient {
},
getAllCards: async () => SYNC_MODE_METABASE_CARDS.map(metabaseCardSummary),
convertMbqlToNative: async () => ({ query: 'select 1' }),
getNativeSql: (card) => card.dataset_query?.native?.query ?? null,
getNativeSql: (card) => card.dataset_query?.stages?.[0]?.native ?? null,
getTemplateTags: () => ({}),
getCardSql: async (card) => card.dataset_query?.native?.query ?? null,
getCardSql: async (card) => card.dataset_query?.stages?.[0]?.native ?? null,
getResolvedSql: async (card) => ({
resolvedSql: card.dataset_query?.native?.query ?? `select ${card.id} as id`,
resolvedSql: card.dataset_query?.stages?.[0]?.native ?? `select ${card.id} as id`,
templateTags: [],
resolutionStatus: 'resolved',
}),

View file

@ -716,7 +716,6 @@ describe('runKtxIngest', () => {
patternPagesWritten: 30,
stalePatternPagesMarked: 2,
archivedPatternPages: 3,
legacyPagesDeleted: 4,
},
errors: [],
warnings: [],
@ -750,7 +749,7 @@ describe('runKtxIngest', () => {
expect(io.stderr()).toBe('');
expect(io.stdout()).toContain('Adapter: historic-sql\n');
expect(io.stdout()).toContain('Saved memory: 39 wiki, 57 SL\n');
expect(io.stdout()).toContain('Saved memory: 35 wiki, 57 SL\n');
});
it('returns a non-zero code when local ingest reports failed work units', async () => {

View file

@ -1,4 +1,3 @@
import { join } from 'node:path';
import {
createBigQueryLiveDatabaseIntrospection,
isKtxBigQueryConnectionConfig,
@ -298,7 +297,6 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli
const base = {
sqlAnalysis: ktxCliHistoricSqlAnalysis(options),
postgresBaselineRootDir: join(project.projectDir, '.ktx/cache/historic-sql'),
};
if (dialect === 'postgres') {

View file

@ -62,10 +62,7 @@ describe('createKtxCliScanConnector', () => {
expect(connector.driver).toBe('sqlite');
});
it.each([
['maxBytesBilled', ' maxBytesBilled: 123456789', 123456789],
['max_bytes_billed', ' max_bytes_billed: "987654321"', '987654321'],
])('passes BigQuery %s from standalone config', async (_label, byteCapLine, expectedMaxBytesBilled) => {
it('passes BigQuery max_bytes_billed from standalone config', async () => {
await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' });
await writeFile(
join(tempDir, 'ktx.yaml'),
@ -76,7 +73,7 @@ describe('createKtxCliScanConnector', () => {
' driver: bigquery',
' dataset_id: analytics',
' readonly: true',
byteCapLine,
' max_bytes_billed: "987654321"',
'',
].join('\n'),
'utf-8',
@ -90,7 +87,7 @@ describe('createKtxCliScanConnector', () => {
expect(bigQueryMock.constructorInputs).toEqual([
expect.objectContaining({
connectionId: 'warehouse',
maxBytesBilled: expectedMaxBytesBilled,
maxBytesBilled: '987654321',
}),
]);
});

View file

@ -6,7 +6,7 @@ const SUPPORTED_DRIVERS = 'sqlite, postgres, mysql, clickhouse, sqlserver, bigqu
function bigQueryMaxBytesBilled(
connection: KtxLocalProject['config']['connections'][string],
): number | string | undefined {
const raw = connection.maxBytesBilled ?? connection.max_bytes_billed;
const raw = connection.max_bytes_billed;
if (typeof raw === 'number') {
return Number.isFinite(raw) && raw > 0 ? raw : undefined;
}

View file

@ -6,8 +6,6 @@ import {
formatSetupNextStepLines,
} from './next-steps.js';
const command = (...parts: string[]) => parts.join(' ');
describe('KTX demo next steps', () => {
it('uses supported context-build commands before agent usage', () => {
expect(KTX_CONTEXT_BUILD_COMMANDS).toEqual([
@ -57,29 +55,6 @@ describe('KTX demo next steps', () => {
expect(rendered).not.toContain('Optional MCP:');
});
it('does not advertise removed Commander migration commands', () => {
const rendered = formatNextStepLines().join('\n');
expect(rendered).toContain('ktx status --json');
expect(rendered).not.toContain('ktx agent');
expect(rendered).toContain('ktx sl list');
expect(rendered).toContain('ktx wiki list');
for (const removed of [
command('ktx', 'ask'),
command('ktx', 'mcp'),
command('ktx', 'connect'),
command('ktx', 'knowledge'),
command('dev', 'model'),
command('dev', 'knowledge'),
command('ktx', 'ingest', 'run'),
command('ktx', 'ingest', 'replay'),
command('ktx', 'serve', '--mcp', 'stdio', '--user-id', 'local'),
]) {
expect(rendered).not.toContain(removed);
}
});
it('keeps setup next steps focused on building context when the build is not ready', () => {
const rendered = formatSetupNextStepLines({
setupReady: true,

View file

@ -92,7 +92,7 @@ function normalizedDriver(connection: KtxProjectConnectionConfig): string {
}
function sourceDirForConnection(connection: KtxProjectConnectionConfig): string | undefined {
const value = connection.source_dir ?? connection.sourceDir;
const value = connection.source_dir;
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined;
}

View file

@ -64,8 +64,6 @@ function textInputPrompt(message: string): string {
return `${title}\n│\n│ ${bodyLines.join('\n│ ')}\n│ Press Escape to go back.\n│`;
}
const legacyHistoricSqlServiceAccountPatternsKey = ['serviceAccount', 'UserPatterns'].join('');
describe('setup databases step', () => {
let tempDir: string;
@ -1288,7 +1286,6 @@ describe('setup databases step', () => {
redactionPatterns: ['(?i)secret'],
},
});
expect(config.connections.snowflake.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey);
expect(config.ingest.adapters).toContain('historic-sql');
});
@ -1336,10 +1333,8 @@ describe('setup databases step', () => {
},
},
});
expect(config.connections.warehouse.historicSql).not.toHaveProperty('minCalls');
expect(config.connections.warehouse.historicSql).not.toHaveProperty('windowDays');
expect(config.connections.warehouse.historicSql).not.toHaveProperty('redactionPatterns');
expect(config.connections.warehouse.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey);
expect(config.ingest.adapters).toContain('historic-sql');
expect(config.ingest.workUnits.maxConcurrency).toBe(6);
expect(io.stdout()).toContain('Historic SQL probe...');
@ -1393,7 +1388,6 @@ describe('setup databases step', () => {
redactionPatterns: [],
},
});
expect(config.connections.analytics.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey);
expect(config.ingest.adapters).toContain('historic-sql');
});
@ -1443,7 +1437,6 @@ describe('setup databases step', () => {
},
},
});
expect(config.connections.warehouse.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey);
});
it('prints a non-blocking Postgres Historic SQL probe failure after connection test succeeds', async () => {

View file

@ -40,7 +40,6 @@ export interface KtxSetupDatabasesArgs {
disableHistoricSql?: boolean;
historicSqlWindowDays?: number;
historicSqlMinExecutions?: number;
historicSqlMinCalls?: number;
historicSqlServiceAccountPatterns?: string[];
historicSqlRedactionPatterns?: string[];
skipDatabases: boolean;
@ -857,14 +856,13 @@ async function maybeApplyHistoricSqlConfig(input: {
dialect,
filters: historicSqlFiltersForSetup(input.args.historicSqlServiceAccountPatterns),
};
delete common[['serviceAccount', 'UserPatterns'].join('')];
if (dialect === 'postgres') {
return {
...input.connection,
historicSql: {
...common,
minExecutions: input.args.historicSqlMinExecutions ?? input.args.historicSqlMinCalls ?? 5,
minExecutions: input.args.historicSqlMinExecutions ?? 5,
},
};
}

View file

@ -544,8 +544,8 @@ function sourcePathFromFileRepoUrl(repoUrl: string, subpath?: string): string {
}
function repoAuthToken(connection: KtxProjectConnectionConfig | Record<string, unknown>): string | null {
const ref = stringField(connection.auth_token_ref) ?? stringField(connection.authTokenRef);
const literal = stringField(connection.authToken) ?? stringField(connection.auth_token);
const ref = stringField(connection.auth_token_ref);
const literal = stringField(connection.auth_token);
return literal ?? resolveKtxConfigReference(ref, process.env) ?? null;
}
@ -563,8 +563,8 @@ async function collectYamlFilesRecursive(sourceRoot: string): Promise<Array<{ co
}
async function defaultValidateDbt(connection: KtxProjectConnectionConfig): Promise<SourceValidationResult> {
let sourceDir = stringField(connection.source_dir) ?? stringField(connection.sourceDir);
const repoUrl = stringField(connection.repo_url) ?? stringField(connection.repoUrl);
let sourceDir = stringField(connection.source_dir);
const repoUrl = stringField(connection.repo_url);
if (!sourceDir && repoUrl?.startsWith('file:')) {
sourceDir = sourcePathFromFileRepoUrl(repoUrl, stringField(connection.path));
}
@ -624,7 +624,7 @@ async function defaultValidateLooker(projectDir: string, connectionId: string):
}
async function defaultValidateLookml(connection: KtxProjectConnectionConfig): Promise<SourceValidationResult> {
const repoUrl = stringField(connection.repoUrl) ?? stringField(connection.repo_url);
const repoUrl = stringField(connection.repoUrl);
if (!repoUrl) {
return { ok: false, message: 'LookML setup requires repoUrl.' };
}

View file

@ -83,7 +83,6 @@ export type KtxSetupArgs =
disableHistoricSql?: boolean;
historicSqlWindowDays?: number;
historicSqlMinExecutions?: number;
historicSqlMinCalls?: number;
historicSqlServiceAccountPatterns?: string[];
historicSqlRedactionPatterns?: string[];
skipDatabases: boolean;
@ -626,7 +625,6 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
...(args.historicSqlMinExecutions !== undefined
? { historicSqlMinExecutions: args.historicSqlMinExecutions }
: {}),
...(args.historicSqlMinCalls !== undefined ? { historicSqlMinCalls: args.historicSqlMinCalls } : {}),
...(args.historicSqlServiceAccountPatterns
? { historicSqlServiceAccountPatterns: args.historicSqlServiceAccountPatterns }
: {}),

View file

@ -1,5 +1,5 @@
<role>
You are the reconciliation agent for a multi-file ingest bundle. Stage 3 WorkUnits have already run against this job's session worktree; your input is the deterministic Stage Index listing every write each WU made, plus an Eviction Set listing raw files present in the prior sync but absent in this one. Your job is to (a) decide what happens to each evicted artifact (remove vs retain with a deprecation marker), (b) sweep the Stage Index for any cross-WU conflicts the individual WUs missed, and (c) emit conflict + eviction records that the runner will fold into the final IngestReport.
You are the reconciliation agent for a multi-file ingest bundle. Stage 3 WorkUnits have already run against this job's session worktree; your input is the deterministic Stage Index listing every write each WU made, plus an Eviction Set listing raw files present in the prior sync but absent in this one. Your job is to (a) remove artifacts produced by deleted raw files, (b) sweep the Stage Index for any cross-WU conflicts the individual WUs missed, and (c) emit conflict + eviction records that the runner will fold into the final IngestReport.
</role>
<stance>
@ -12,7 +12,7 @@ Parsimonious. Stage 3 WUs already loaded `ingest_triage` and handled conflicts t
3. If the system prompt includes `<canonical_pins>`, apply those pins before flagging a same-name or near-duplicate conflict. A pinned `canonicalArtifactKey` keeps the contested name when it is present in the Stage Index; competing variants keep or receive disambiguated names.
4. Sweep both exact-key conflicts and near-duplicate writes. Compare WUs that wrote overlapping SL source names, overlapping wiki keys, the same `tables:` or `sl_refs:` action details, or obviously equivalent topic titles under different wiki keys. Call `stage_diff` to see the actual difference, and use `wiki_read`/`sl_read_source` when two different keys appear to describe the same table, metric, or source-of-truth mapping. If they're the same content, leave one canonical artifact and record the duplicate as subsumed. If they differ per `ingest_triage` rules, apply the correct resolution (rename + capture; election of canonical; silent replace for expression-only re-ingest change; or pinned canonical), then call `emit_conflict_resolution` with the artifact key and decision.
5. For any `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call you make during reconciliation, include `rawPaths` with only the raw paths that directly caused that reconciliation action.
6. Call `eviction_list()` for deleted raw paths. For each eviction: if inbound refs are empty, remove the artifact (`sl_delete`, `wiki_remove`) and include that evicted raw path in `rawPaths`; if inbound refs exist, retain with a deprecation marker and include that evicted raw path in `rawPaths`. Then call `emit_eviction_decision` for every removed or retained artifact.
6. Call `eviction_list()` for deleted raw paths. For each listed artifact, remove it (`sl_delete`, `wiki_remove`) and include the evicted raw path in `rawPaths`. Then call `emit_eviction_decision` with `action: "removed"` for every removed artifact.
7. If the Stage 4 sweep discovers a raw file whose only honest outcome is standalone SQL, wiki-only capture, or a human flag, call `emit_unmapped_fallback` with the raw path, reason, and fallback kind.
8. Use `read_raw_span` to zoom into specific raw files when you need to resolve what two contested measures or wiki pages actually describe.
9. Exit when you've processed every item.

View file

@ -32,8 +32,8 @@ Apply the rules below before every write that could collide with an existing art
| Definitional contradiction | Same name, substantively different formulas (different aggregation, different filters, different columns) | **Rename + capture**: disambiguate ALL variants with suffix derived from the domain (`churn_risk_engagement_based`, `churn_risk_billing_based`) and write a unified wiki page listing every variant with provenance. The contested name does NOT land in the SL. **Always flag.** |
5. **Eviction (Stage 4 only)**: for each entry in `eviction_list()`:
- `inbound_refs: []` → remove the artifact (`sl_delete` for SL sources, `wiki_remove` for wiki pages).
- `inbound_refs: [...]` → retain the artifact, set `deprecated: true` on SL sources (via `sl_edit_source`), write a wiki note "origin file removed in <syncId>; preserved because referenced by: …". Flag in the IngestReport so the user can plan migration.
- Remove the artifact (`sl_delete` for SL sources, `wiki_remove` for wiki pages).
- Record the removal with `emit_eviction_decision` and `action: "removed"`.
## Why same-ingest vs re-ingest differs

View file

@ -98,7 +98,7 @@ measures:
expr: "<expression>"
```
Overlay shape: `name:` plus any of `measures:`, `segments:`, `description:`, `joins:`, `disable_joins:`. Never include `sql:`, `table:`, `grain:`, or `columns:` on a manifest-backed name — those would shadow the manifest's schema and drop its joins. Overlay `joins:` are merged additively with the manifest's joins (deduped by `to` + `on`); use `disable_joins: ["<on-clause>"]` to suppress a specific manifest join. After the overlay exists, use `sl_edit_source` for further tweaks. See `sl_capture` skill for the canonical overlay rule.
Overlay shape: `name:` plus any of `measures:`, `segments:`, `descriptions:`, `joins:`, `disable_joins:`. Never include `sql:`, `table:`, `grain:`, or `columns:` on a manifest-backed name — those would shadow the manifest's schema and drop its joins. Overlay `joins:` are merged additively with the manifest's joins (deduped by `to` + `on`); use `disable_joins: ["<on-clause>"]` to suppress a specific manifest join. After the overlay exists, use `sl_edit_source` for further tweaks. See `sl_capture` skill for the canonical overlay rule.
**Join discovery:** When your card's SQL references warehouse tables (e.g. in `FROM` or `JOIN` clauses), call `sl_discover({ query: '<table>' })` before writing. The matching manifest entry's `name` is the value you use in `joins: [- to: <name>]` only when the card output exposes a local key that matches the target source grain (for example `account_id = mart_account_segments.account_id`). Do not declare a KTX join just because the card SQL joins that table internally. If the output only exposes display fields such as `account_name`, keep the SQL source self-contained or project the key before adding the join. Use `many_to_one` for FK-to-dimension joins, `one_to_many` for the reverse.

View file

@ -177,7 +177,8 @@ semantic_models:
# KTX overlay at <connId>/orders.yaml:
# <!-- from: raw-sources/.../models/orders.yml#L1-10 -->
name: orders
description: Order fact table.
descriptions:
user: Order fact table.
measures:
- {name: order_count, expr: "count(order_id)"}
- {name: gross_amount, expr: "sum(amount)"}
@ -221,7 +222,8 @@ metrics:
# <!-- from: raw-sources/.../models/orders_ext.yml#L1-8 -->
# <!-- from: raw-sources/.../metrics/orders_final.yml#L1-10 -->
name: orders_ext
description: Extended order fact including refund handling; `revenue` = gross - refund.
descriptions:
user: Extended order fact including refund handling; `revenue` = gross - refund.
measures:
- {name: order_count, expr: "count(order_id)"}
- {name: gross_amount, expr: "sum(amount)"}

View file

@ -29,7 +29,8 @@ Enrich a manifest-backed table with measures, computed columns, joins, and segme
```yaml
name: fct_orders # must match an existing manifest table
description: "Overlay adding business measures to the orders fact table."
descriptions:
user: "Overlay adding business measures to the orders fact table."
measures:
- name: total_revenue
expr: sum(amount)

View file

@ -100,13 +100,13 @@ measures:
**Extract repeated filter bundles into named segments.** If the same predicate appears on multiple measures of the same source, lift it to a `segments[]` entry and have each measure reference it. One edit updates every measure that depends on it.
**Never write a standalone file on a manifest-backed name.** If `sl_discover({ tableName })` finds an existing schema for that name, you MUST write an overlay (`name:` + `measures:`/`segments:`/`description:` only — no `sql:`, `table:`, `grain:`, `columns:`, `joins:`). A standalone with `sql:` or `table:` on a manifest-backed name clobbers the inherited columns and joins; `sl_write_source` and `sl_validate` both reject this shape with a clear fix hint. Always run `sl_discover` before your first write on any existing name.
**Never write a standalone file on a manifest-backed name.** If `sl_discover({ tableName })` finds an existing schema for that name, you MUST write an overlay (`name:` + `measures:`/`segments:`/`descriptions:` only — no `sql:`, `table:`, `grain:`, `columns:`, `joins:`). A standalone with `sql:` or `table:` on a manifest-backed name clobbers the inherited columns and joins; `sl_write_source` and `sl_validate` both reject this shape with a clear fix hint. Always run `sl_discover` before your first write on any existing name.
**Prefer overlay decomposition over standalone SQL sources.** Before reaching for `source_type: sql`, check whether the metric decomposes into measures on existing overlays (including cross-source derived measures). Use `source_type: sql` only when:
- The metric requires per-user/per-entity derivation that cannot be expressed as a single `expr` (e.g., `EXISTS` over a time-windowed subset), OR
- The metric requires multi-step CTEs whose intermediate grain is not a column in any existing source.
When an `sql` source is unavoidable, note in its `description` which SL gap forced the choice so it can be retired once the primitive ships. It must target a name NOT in the manifest — pick a distinct one (e.g. `mrr_waterfall_rollup`, not `fct_orders`).
When an `sql` source is unavoidable, note in its `descriptions` map which SL gap forced the choice so it can be retired once the primitive ships. It must target a name NOT in the manifest — pick a distinct one (e.g. `mrr_waterfall_rollup`, not `fct_orders`).
## Slim standalone sources via `inherits_columns_from`
@ -116,7 +116,8 @@ Discover the manifest key with `sl_discover` — pass the bare name (`CONSIGNMEN
```yaml
name: aav_consignments
description: AAV consignments — filtered view of MARTS.CONSIGNMENTS for the auto-auction-vaulting channel.
descriptions:
user: AAV consignments — filtered view of MARTS.CONSIGNMENTS for the auto-auction-vaulting channel.
source_type: sql
sql: |
SELECT CONSIGNED_ITEM_ID, CASH_ADV_AMOUNT, ALT_VALUE_COMBINED, my_derived_flag
@ -127,10 +128,10 @@ sql: |
inherits_columns_from: CONSIGNMENTS
grain: [CONSIGNED_ITEM_ID]
columns:
- { name: CONSIGNED_ITEM_ID } # type/description inherited from manifest
- { name: CONSIGNED_ITEM_ID } # type/descriptions inherited from manifest
- { name: CASH_ADV_AMOUNT }
- { name: ALT_VALUE_COMBINED }
- { name: my_derived_flag, type: boolean, expr: "CASH_ADV_AMOUNT > 0", description: "Computed locally — has any cash advance." }
- { name: my_derived_flag, type: boolean, expr: "CASH_ADV_AMOUNT > 0", descriptions: { user: "Computed locally — has any cash advance." } }
measures:
- name: total_cash_advance
expr: sum(CASH_ADV_AMOUNT)

View file

@ -13,7 +13,7 @@ async function tempDir(): Promise<string> {
const sqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint() {
throw new Error('legacy analyzeForFingerprint must not be used');
throw new Error('analyzeForFingerprint must not be used');
},
async analyzeBatch() {
return new Map();
@ -66,7 +66,7 @@ describe('HistoricSqlSourceAdapter', () => {
};
const batchSqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint() {
throw new Error('legacy analyzeForFingerprint must not be used');
throw new Error('analyzeForFingerprint must not be used');
},
async analyzeBatch() {
return new Map([

View file

@ -1,5 +1,3 @@
import { rm } from 'node:fs/promises';
import { join } from 'node:path';
import type { ChunkResult, DiffSet, FetchContext, ScopeDescriptor, SourceAdapter } from '../../types.js';
import { chunkHistoricSqlUnifiedStagedDir, describeHistoricSqlUnifiedScope } from './chunk-unified.js';
import { detectHistoricSqlStagedDir } from './detect.js';
@ -28,11 +26,6 @@ export class HistoricSqlSourceAdapter implements SourceAdapter {
pullConfig,
now: this.deps.now?.(),
});
if (this.deps.legacyPostgresBaselineRootDir) {
await rm(join(this.deps.legacyPostgresBaselineRootDir, ctx.connectionId, ['pgss', 'baseline.json'].join('-')), {
force: true,
});
}
}
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {

View file

@ -284,7 +284,7 @@ describe('projectHistoricSqlEvidence', () => {
);
});
it('marks missing table usage stale and deletes legacy historic SQL query pages', async () => {
it('marks missing table usage stale without deleting old query pages', async () => {
const workdir = await tempWorkdir();
await writeText(
workdir,
@ -322,22 +322,22 @@ describe('projectHistoricSqlEvidence', () => {
});
await writeText(
workdir,
'knowledge/global/historic-sql-legacy-template.md',
'knowledge/global/historic-sql-old-template.md',
[
'---',
YAML.stringify({
summary: 'Legacy template page',
summary: 'Old template page',
tags: ['historic-sql', 'query-pattern'],
refs: [],
sl_refs: ['orders'],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.orders'],
fingerprints: ['legacy:1'],
fingerprints: ['old:1'],
}).trimEnd(),
'---',
'',
'Legacy body',
'Old body',
'',
].join('\n'),
);
@ -345,7 +345,6 @@ describe('projectHistoricSqlEvidence', () => {
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.staleTablesMarked).toBe(1);
expect(result.legacyPagesDeleted).toBe(1);
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'));
expect(shard.tables.orders.usage).toEqual({
@ -357,8 +356,8 @@ describe('projectHistoricSqlEvidence', () => {
commonJoins: [],
staleSince: '2026-05-11T00:00:00.000Z',
});
await expect(readFile(join(workdir, 'knowledge/global/historic-sql-legacy-template.md'), 'utf-8')).rejects.toMatchObject({
code: 'ENOENT',
});
await expect(readFile(join(workdir, 'knowledge/global/historic-sql-old-template.md'), 'utf-8')).resolves.toContain(
'Old body',
);
});
});

View file

@ -1,4 +1,4 @@
import { access, mkdir, readdir, readFile, rename, rm, writeFile } from 'node:fs/promises';
import { access, mkdir, readdir, readFile, rename, writeFile } from 'node:fs/promises';
import { dirname, join, relative } from 'node:path';
import YAML from 'yaml';
import { rawSourcesDirForSync } from '../../raw-sources-paths.js';
@ -20,7 +20,6 @@ export interface HistoricSqlProjectionResult {
patternPagesWritten: number;
stalePatternPagesMarked: number;
archivedPatternPages: number;
legacyPagesDeleted: number;
touchedSources: Array<{ connectionId: string; sourceName: string }>;
warnings: string[];
}
@ -152,11 +151,6 @@ function isHistoricPatternPage(page: HistoricSqlPatternPage): boolean {
);
}
function isLegacyQueryPage(page: HistoricSqlPatternPage): boolean {
const tags = Array.isArray(page.frontmatter.tags) ? page.frontmatter.tags : [];
return page.frontmatter.source === 'historic-sql' && tags.includes('query-pattern') && !tags.includes('pattern');
}
function isArchivedPatternPage(page: HistoricSqlPatternPage): boolean {
const tags = Array.isArray(page.frontmatter.tags) ? page.frontmatter.tags : [];
return tags.includes('archived');
@ -228,7 +222,6 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
patternPagesWritten: 0,
stalePatternPagesMarked: 0,
archivedPatternPages: 0,
legacyPagesDeleted: 0,
touchedSources: [],
warnings: [],
};
@ -333,10 +326,5 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
result.stalePatternPagesMarked += 1;
}
for (const page of allPages.filter(isLegacyQueryPage)) {
await rm(page.path, { force: true });
result.legacyPagesDeleted += 1;
}
return result;
}

View file

@ -8,7 +8,7 @@ import {
} from './types.js';
describe('historic-sql unified contracts', () => {
it('parses minExecutions and accepts minCalls as a one-release alias', () => {
it('parses minExecutions and service-account filters', () => {
expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minExecutions: 9 })).toMatchObject({
dialect: 'postgres',
minExecutions: 9,
@ -18,7 +18,15 @@ describe('historic-sql unified contracts', () => {
staleArchiveAfterDays: 90,
});
expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minCalls: 7 }).minExecutions).toBe(7);
const parsed = historicSqlUnifiedPullConfigSchema.parse({
dialect: 'postgres',
minExecutions: 7,
filters: {
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
},
});
expect(parsed.minExecutions).toBe(7);
expect(parsed.filters.serviceAccounts).toEqual({ patterns: ['^svc_'], mode: 'exclude' });
});
it('validates aggregate templates from warehouse readers', () => {

View file

@ -8,26 +8,7 @@ export type HistoricSqlDialect = z.infer<typeof historicSqlDialectSchema>;
const filterModeSchema = z.enum(['exclude', 'include', 'mark-only']);
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
export const historicSqlUnifiedPullConfigSchema = z.preprocess((value) => {
if (!isRecord(value)) {
return value;
}
const next: Record<string, unknown> = { ...value };
if (next.minExecutions === undefined && typeof next.minCalls === 'number') {
next.minExecutions = next.minCalls;
}
if (!next.filters && Array.isArray(next.serviceAccountUserPatterns)) {
next.filters = {
serviceAccounts: { patterns: next.serviceAccountUserPatterns, mode: 'exclude' },
dropTrivialProbes: true,
};
}
return next;
}, z.object({
export const historicSqlUnifiedPullConfigSchema = z.object({
dialect: historicSqlDialectSchema,
windowDays: z.number().int().positive().default(90),
minExecutions: z.number().int().nonnegative().default(5),
@ -48,7 +29,7 @@ export const historicSqlUnifiedPullConfigSchema = z.preprocess((value) => {
}).default({ dropTrivialProbes: true }),
redactionPatterns: z.array(z.string()).default([]),
staleArchiveAfterDays: z.number().int().positive().default(90),
}));
});
export type HistoricSqlUnifiedPullConfig = z.infer<typeof historicSqlUnifiedPullConfigSchema>;
@ -157,6 +138,5 @@ export interface HistoricSqlSourceAdapterDeps {
sqlAnalysis: SqlAnalysisPort;
reader: HistoricSqlReader;
queryClient: unknown;
legacyPostgresBaselineRootDir?: string;
now?: () => Date;
}

View file

@ -26,13 +26,11 @@ export function lookerCredentialsFromLocalConnection(
if (!connection || String(connection.driver).toLowerCase() !== 'looker') {
throw new Error(`Connection "${connectionId}" is not a Looker connection`);
}
const baseUrl = stringField(connection.base_url) ?? stringField(connection.baseUrl) ?? stringField(connection.url);
const clientId = stringField(connection.client_id) ?? stringField(connection.clientId);
const baseUrl = stringField(connection.base_url);
const clientId = stringField(connection.client_id);
const clientSecret =
stringField(connection.client_secret) ??
stringField(connection.clientSecret) ??
(stringField(connection.client_secret_ref) ? resolveEnvReference(String(connection.client_secret_ref), env) : null) ??
(stringField(connection.clientSecretRef) ? resolveEnvReference(String(connection.clientSecretRef), env) : null);
(stringField(connection.client_secret_ref) ? resolveEnvReference(String(connection.client_secret_ref), env) : null);
if (!baseUrl) {
throw new Error(`Connection "${connectionId}" is missing Looker base_url`);

View file

@ -87,10 +87,13 @@ it('allows the concrete client result shapes used by the relocated Metabase clie
const datasetQuery: MetabaseDatasetQuery = {
type: 'native',
database: 42,
native: {
query: 'SELECT * FROM orders WHERE created_at > {{ created_at }}',
'template-tags': { created_at: templateTag },
},
stages: [
{
'lib/type': 'mbql.stage/native',
native: 'SELECT * FROM orders WHERE created_at > {{ created_at }}',
'template-tags': { created_at: templateTag },
},
],
};
const card: MetabaseCard = {
id: 1,

View file

@ -116,17 +116,11 @@ interface MetabaseNativeStage {
'template-tags'?: Record<string, MetabaseTemplateTag>;
}
interface MetabaseLegacyNativeQuery {
query: string;
'template-tags'?: Record<string, MetabaseTemplateTag>;
}
export interface MetabaseDatasetQuery {
'lib/type'?: 'mbql/query';
database?: number;
type?: 'native' | 'query';
stages?: MetabaseNativeStage[];
native?: MetabaseLegacyNativeQuery;
}
export interface MetabaseNativeQueryResult {

View file

@ -32,10 +32,7 @@ function nativeCard(query: string, templateTags: Record<string, MetabaseTemplate
dataset_query: {
type: 'native',
database: 6,
native: {
query,
'template-tags': templateTags,
},
stages: [{ 'lib/type': 'mbql.stage/native', native: query, 'template-tags': templateTags }],
},
};
}
@ -318,7 +315,7 @@ describe('MetabaseClient.getResolvedSql', () => {
dataset_query: {
type: 'native',
database: 6,
native: { query: 'SELECT a, b FROM base' },
stages: [{ 'lib/type': 'mbql.stage/native', native: 'SELECT a, b FROM base' }],
},
});
const client = makeClient((client) => {

View file

@ -150,9 +150,6 @@ function injectNativeSql(datasetQuery: MetabaseDatasetQuery, sql: string): Metab
stages[0] = { ...stages[0], native: sql };
return { ...datasetQuery, stages };
}
if (datasetQuery?.native) {
return { ...datasetQuery, native: { ...datasetQuery.native, query: sql } };
}
return datasetQuery;
}
@ -370,36 +367,12 @@ export class MetabaseClient implements MetabaseRuntimeClient {
});
}
/**
* Extract native SQL from card, handling both pMBQL (v57+) and legacy formats.
* - pMBQL format: dataset_query.stages[0].native
* - Legacy format: dataset_query.native.query
*/
getNativeSql(card: MetabaseCard): string | null {
// pMBQL format (v57+): stages[0].native
const pMbqlSql = card.dataset_query?.stages?.[0]?.native;
if (pMbqlSql) {
return pMbqlSql;
}
// Legacy format: native.query
return card.dataset_query?.native?.query ?? null;
return card.dataset_query?.stages?.[0]?.native ?? null;
}
/**
* Extract template tags from card, handling both pMBQL and legacy formats.
* - pMBQL format: dataset_query.stages[0]['template-tags']
* - Legacy format: dataset_query.native['template-tags']
*/
getTemplateTags(card: MetabaseCard): Record<string, MetabaseTemplateTag> {
// pMBQL format: stages[0]['template-tags']
const pMbqlTags = card.dataset_query?.stages?.[0]?.['template-tags'];
if (pMbqlTags) {
return pMbqlTags;
}
// Legacy format: native['template-tags']
return card.dataset_query?.native?.['template-tags'] ?? {};
return card.dataset_query?.stages?.[0]?.['template-tags'] ?? {};
}
async getCardSql(card: MetabaseCard): Promise<string | null> {

View file

@ -48,19 +48,6 @@ describe('metabaseRuntimeConfigFromLocalConnection', () => {
});
});
it('accepts url as the local api URL alias', () => {
const connection: KtxProjectConnectionConfig = {
driver: 'metabase',
url: 'https://metabase.example.com',
api_key: 'literal-test-key', // pragma: allowlist secret
};
expect(metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toEqual({
apiUrl: 'https://metabase.example.com',
apiKey: 'literal-test-key', // pragma: allowlist secret
});
});
it('rejects proxy-bearing local Metabase connections', () => {
const connection: KtxProjectConnectionConfig = {
driver: 'metabase',

View file

@ -37,9 +37,9 @@ export function metabaseRuntimeConfigFromLocalConnection(
);
}
const apiUrl = stringField(connection.api_url) ?? stringField(connection.apiUrl) ?? stringField(connection.url);
const literalApiKey = stringField(connection.api_key) ?? stringField(connection.apiKey);
const apiKeyRef = stringField(connection.api_key_ref) ?? stringField(connection.apiKeyRef);
const apiUrl = stringField(connection.api_url);
const literalApiKey = stringField(connection.api_key);
const apiKeyRef = stringField(connection.api_key_ref);
const apiKey = literalApiKey ?? (apiKeyRef ? resolveKtxConfigReference(apiKeyRef, env) : null);
if (!apiUrl) {

View file

@ -14,7 +14,6 @@ import {
getMetricflowAvailableColumnNames,
mapCrossModelMetricToSource,
resolveMetricflowSemanticModelSourceName,
toKebabCaseMetricflowName,
type MetricflowHostTable,
type MetricflowSemanticModelImportContext,
} from './semantic-models.js';
@ -129,16 +128,6 @@ export async function importMetricflowSemanticModels(
{ skipValidation: true },
);
const legacyWarning = await legacyKebabSourceWarning(
semanticLayerService,
input.connectionId,
context.model.modelRef,
context.sourceName,
);
if (legacyWarning) {
warnings.push(legacyWarning);
}
if (existing) {
sourcesUpdated++;
} else {
@ -234,26 +223,6 @@ async function resolveManifestSource(
return null;
}
async function legacyKebabSourceWarning(
semanticLayerService: MetricflowSemanticLayerWriter,
connectionId: string,
modelRef: string,
sourceName: string,
): Promise<string | null> {
const kebabName = toKebabCaseMetricflowName(modelRef);
if (kebabName === sourceName) {
return null;
}
const legacy = await semanticLayerService.loadSource(connectionId, kebabName);
if (!legacy) {
return null;
}
return (
`MetricFlow sync: legacy kebab-case source '${kebabName}' still exists alongside the new source ` +
`'${sourceName}' (modelRef '${modelRef}'). Migrate persisted references before deleting the old file.`
);
}
async function repairSourcesAfterPartialImportFailures(input: {
semanticLayerService: MetricflowSemanticLayerWriter;
connectionId: string;

View file

@ -1518,7 +1518,6 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
patternPagesWritten: 3,
stalePatternPagesMarked: 1,
archivedPatternPages: 1,
legacyPagesDeleted: 1,
},
warnings: [],
errors: [],
@ -1551,7 +1550,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
expect(memoryFlow.snapshot().events).toContainEqual(
expect.objectContaining({
type: 'saved',
wikiCount: 6,
wikiCount: 5,
slCount: 3,
}),
);

View file

@ -105,7 +105,6 @@ describe('local ingest adapters', () => {
return { headers: [], rows: [] };
},
},
postgresBaselineRootDir: join(project.projectDir, '.ktx/cache/historic-sql'),
},
});
@ -181,9 +180,12 @@ describe('local ingest adapters', () => {
historicSql: {
enabled: true,
dialect: 'postgres',
minCalls: 7,
minExecutions: 7,
maxTemplatesPerRun: 123,
serviceAccountUserPatterns: ['^svc_'],
filters: {
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
dropTrivialProbes: true,
},
},
},
});
@ -385,7 +387,7 @@ describe('local ingest adapters', () => {
connections: {
'prod-lookml': {
driver: 'lookml',
repo_url: 'https://github.com/acme/looker.git',
repoUrl: 'https://github.com/acme/looker.git',
branch: 'main',
path: 'models',
auth_token_ref: 'env:GITHUB_TOKEN',
@ -410,7 +412,7 @@ describe('local ingest adapters', () => {
});
});
it('rejects local LookML scheduled pulls when repo_url is missing', async () => {
it('rejects local LookML scheduled pulls when repoUrl is missing', async () => {
const lookmlProject = {
projectDir: tempDir,
config: { connections: { 'prod-lookml': { driver: 'lookml' } } },

View file

@ -50,7 +50,6 @@ export interface DefaultLocalIngestAdaptersOptions {
reader?: HistoricSqlReader;
queryClient?: unknown;
postgresQueryClient?: KtxPostgresQueryClient;
postgresBaselineRootDir?: string;
now?: () => Date;
};
looker?: {
@ -129,7 +128,6 @@ export function createDefaultLocalIngestAdapters(
sqlAnalysis: options.historicSql.sqlAnalysis,
reader: options.historicSql.reader ?? new PostgresPgssReader(),
queryClient,
legacyPostgresBaselineRootDir: options.historicSql.postgresBaselineRootDir,
now: options.historicSql.now,
}),
);
@ -163,11 +161,11 @@ function stringField(value: unknown): string | null {
function localLookmlPullConfigFromConnection(connection: Record<string, unknown> | undefined, env: NodeJS.ProcessEnv) {
const mappings = isRecord(connection?.mappings) ? connection.mappings : {};
const authTokenRef = stringField(connection?.auth_token_ref) ?? stringField(connection?.authTokenRef);
const literalAuthToken = stringField(connection?.authToken) ?? stringField(connection?.auth_token);
const authTokenRef = stringField(connection?.auth_token_ref);
const literalAuthToken = stringField(connection?.auth_token);
return pullConfigFromIntegrationConfig({
repoUrl: stringField(connection?.repoUrl) ?? stringField(connection?.repo_url) ?? null,
repoUrl: stringField(connection?.repoUrl) ?? null,
branch: stringField(connection?.branch),
path: stringField(connection?.path),
authToken: literalAuthToken ?? resolveKtxConfigReference(authTokenRef ?? undefined, env) ?? null,
@ -176,27 +174,21 @@ function localLookmlPullConfigFromConnection(connection: Record<string, unknown>
}
function localDbtPullConfigFromConnection(connection: Record<string, unknown> | undefined, env: NodeJS.ProcessEnv) {
const sourceDir = stringField(connection?.source_dir) ?? stringField(connection?.sourceDir);
const repoUrl = stringField(connection?.repo_url) ?? stringField(connection?.repoUrl);
const sourceDir = stringField(connection?.source_dir);
const repoUrl = stringField(connection?.repo_url);
if (sourceDir) {
return {
sourceDir,
...(stringField(connection?.profiles_path) ? { profilesPath: stringField(connection?.profiles_path) } : {}),
...(stringField(connection?.profilesPath) ? { profilesPath: stringField(connection?.profilesPath) } : {}),
...(stringField(connection?.target) ? { target: stringField(connection?.target) } : {}),
...(stringField(connection?.project_name) ? { projectName: stringField(connection?.project_name) } : {}),
...(stringField(connection?.projectName) ? { projectName: stringField(connection?.projectName) } : {}),
};
}
if (!repoUrl) {
return undefined;
}
const authToken =
stringField(connection?.authToken) ??
resolveKtxConfigReference(
stringField(connection?.auth_token_ref) ?? stringField(connection?.authTokenRef) ?? undefined,
env,
);
stringField(connection?.auth_token) ?? resolveKtxConfigReference(stringField(connection?.auth_token_ref) ?? undefined, env);
return {
repoUrl,
...(stringField(connection?.branch) ? { branch: stringField(connection?.branch) } : {}),
@ -280,8 +272,8 @@ export async function localPullConfigForAdapter(
? (metricflow as Record<string, unknown>)
: null;
const authToken =
typeof metricflowConfig?.authToken === 'string'
? metricflowConfig.authToken
typeof metricflowConfig?.auth_token === 'string'
? metricflowConfig.auth_token
: resolveKtxConfigReference(
typeof metricflowConfig?.auth_token_ref === 'string' ? metricflowConfig.auth_token_ref : undefined,
options.looker?.env ?? process.env,

View file

@ -22,7 +22,7 @@ function validReportSnapshot() {
{ target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' },
{ target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' },
],
touchedSlSources: ['warehouse.orders'],
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
},
],
failedWorkUnits: [],
@ -106,7 +106,7 @@ describe('parseIngestReportSnapshot', () => {
expect(snapshot.body.toolTranscripts).toHaveLength(1);
});
it('parses target-aware actions and normalizes legacy touched source strings', () => {
it('parses target-aware actions and touched source objects', () => {
const report = validReportSnapshot();
report.body.workUnits[0] = {
...report.body.workUnits[0],
@ -119,8 +119,7 @@ describe('parseIngestReportSnapshot', () => {
targetConnectionId: 'warehouse-1',
},
],
// Legacy report shape: bare strings are normalized to the report connection ID.
touchedSlSources: ['looker__b2b__sales_pipeline'],
touchedSlSources: [{ connectionId: 'warehouse-1', sourceName: 'looker__b2b__sales_pipeline' }],
} as never;
const snapshot = parseIngestReportSnapshot(report);
@ -135,7 +134,7 @@ describe('parseIngestReportSnapshot', () => {
},
]);
expect(snapshot.body.workUnits[0]?.touchedSlSources).toEqual([
{ connectionId: 'warehouse', sourceName: 'looker__b2b__sales_pipeline' },
{ connectionId: 'warehouse-1', sourceName: 'looker__b2b__sales_pipeline' },
]);
});

View file

@ -1,5 +1,4 @@
import * as z from 'zod';
import type { TouchedSlSource } from '../tools/index.js';
import { memoryFlowReplayInputSchema } from './memory-flow/schema.js';
import type { IngestReportSnapshot } from './reports.js';
@ -24,8 +23,6 @@ const touchedSlSourceSchema = z.object({
sourceName: z.string().min(1),
});
const touchedSlSourceInputSchema = z.union([z.string(), touchedSlSourceSchema]);
const conflictResolvedSchema = z
.object({
unitKey: z.string().optional(),
@ -42,7 +39,7 @@ const evictionAppliedSchema = z
rawPath: z.string(),
artifactKind: z.enum(['sl', 'wiki']),
artifactKey: z.string(),
action: z.enum(['removed', 'retained_deprecated']),
action: z.literal('removed'),
reason: z.string(),
})
.passthrough();
@ -147,7 +144,7 @@ export const ingestReportSnapshotSchema = z
status: z.enum(['success', 'failed']),
reason: z.string().optional(),
actions: z.array(ingestActionSchema),
touchedSlSources: z.array(touchedSlSourceInputSchema),
touchedSlSources: z.array(touchedSlSourceSchema),
slDisallowed: z.boolean().optional(),
slDisallowedReason: z.enum(['lookml_connection_mismatch']).optional(),
}),
@ -171,26 +168,10 @@ export const ingestReportSnapshotSchema = z
})
.passthrough();
function normalizeTouchedSlSources(connectionId: string, value: Array<string | TouchedSlSource>): TouchedSlSource[] {
return value.map((entry) =>
typeof entry === 'string'
? { connectionId, sourceName: entry }
: { connectionId: entry.connectionId, sourceName: entry.sourceName },
);
}
export function parseIngestReportSnapshot(value: unknown): IngestReportSnapshot {
const result = ingestReportSnapshotSchema.safeParse(value);
if (!result.success) {
throw new Error(`Invalid ingest report snapshot: ${z.prettifyError(result.error)}`);
}
const snapshot = result.data as IngestReportSnapshot;
snapshot.body.workUnits = snapshot.body.workUnits.map((workUnit) => ({
...workUnit,
touchedSlSources: normalizeTouchedSlSources(
snapshot.connectionId,
workUnit.touchedSlSources as Array<string | TouchedSlSource>,
),
}));
return snapshot;
return result.data as IngestReportSnapshot;
}

View file

@ -111,8 +111,7 @@ export function postProcessorSavedMemoryCounts(
wikiCount:
numericResultField(record, 'patternPagesWritten') +
numericResultField(record, 'stalePatternPagesMarked') +
numericResultField(record, 'archivedPatternPages') +
numericResultField(record, 'legacyPagesDeleted'),
numericResultField(record, 'archivedPatternPages'),
slCount: numericResultField(record, 'tableUsageMerged') + numericResultField(record, 'staleTablesMarked'),
};
}

View file

@ -25,7 +25,7 @@ export interface EvictionAppliedRecord {
rawPath: string;
artifactKind: 'sl' | 'wiki';
artifactKey: string;
action: 'removed' | 'retained_deprecated';
action: 'removed';
reason: string;
}

View file

@ -22,7 +22,7 @@ export function createEmitEvictionDecisionTool(deps: EmitEvictionDecisionDeps) {
rawPath: z.string().min(1),
artifactKind: z.enum(['sl', 'wiki']),
artifactKey: z.string().min(1),
action: z.enum(['removed', 'retained_deprecated']),
action: z.literal('removed'),
reason: z.string().min(1),
}),
execute: async (input): Promise<string> => {

View file

@ -88,14 +88,14 @@ describe('reconciliation emit tools', () => {
await executeTool(tool, {
rawPath: 'views/old_orders.view.lkml',
artifactKind: 'wiki',
artifactKey: 'orders/legacy',
action: 'retained_deprecated',
artifactKey: 'orders/old',
action: 'removed',
reason: 'first pass',
});
await executeTool(tool, {
rawPath: 'views/old_orders.view.lkml',
artifactKind: 'wiki',
artifactKey: 'orders/legacy',
artifactKey: 'orders/old',
action: 'removed',
reason: 'second pass after checking references',
});
@ -104,7 +104,7 @@ describe('reconciliation emit tools', () => {
{
rawPath: 'views/old_orders.view.lkml',
artifactKind: 'wiki',
artifactKey: 'orders/legacy',
artifactKey: 'orders/old',
action: 'removed',
reason: 'second pass after checking references',
},

View file

@ -12,7 +12,7 @@ export interface EvictionListDeps {
export function createEvictionListTool(deps: EvictionListDeps) {
return tool({
description:
'List every artifact that the most recent completed sync produced from a now-deleted raw file. Use this to decide whether to remove (no inbound refs) or retain with deprecation (has inbound refs). Inbound refs are NOT currently computed — treat every retained entry as a candidate and ask the user via the IngestReport. After deciding, record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.',
'List every artifact that the most recent completed sync produced from a now-deleted raw file. Remove each listed artifact and record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.',
inputSchema: z.object({}),
execute: async () => {
if (deps.deletedRawPaths.length === 0) {

View file

@ -8,7 +8,7 @@ const verificationLedgerInputSchema = z.object({
notes: z.string().max(2000).optional(),
});
export interface VerificationLedgerEntry {
interface VerificationLedgerEntry {
summary: string;
verifiedIdentifiers: string[];
unverifiedIdentifiers: string[];

View file

@ -6,12 +6,6 @@ import { EntityDetailsTool } from './entity-details.tool.js';
import { SqlExecutionTool } from './sql-execution.tool.js';
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
export { DiscoverDataTool } from './discover-data.tool.js';
export { EntityDetailsTool } from './entity-details.tool.js';
export { SqlExecutionTool } from './sql-execution.tool.js';
export { WarehouseCatalogService } from './warehouse-catalog.service.js';
export type { RawSchemaHit, TableDetail, WarehouseColumnDetail } from './warehouse-catalog.service.js';
export function createWarehouseVerificationTools(deps: {
connections: SlConnectionCatalogPort;
fallbackFileStore: KtxFileStorePort;

View file

@ -14,7 +14,7 @@ export interface WarehouseCatalogServiceDeps {
fileStore: KtxFileStorePort;
}
export interface WarehouseColumnDetail extends KtxSchemaColumn {
interface WarehouseColumnDetail extends KtxSchemaColumn {
descriptions: Record<string, string>;
rowCount: number | null;
nullCount: number | null;
@ -88,9 +88,8 @@ interface ConnectionCatalog {
}
type TableWithDescriptions = KtxSchemaTable & {
description?: string | null;
descriptions?: Record<string, string>;
columns: Array<KtxSchemaColumn & { description?: string | null; descriptions?: Record<string, string> }>;
columns: Array<KtxSchemaColumn & { descriptions?: Record<string, string> }>;
};
function normalize(value: string | null | undefined): string {
@ -220,14 +219,14 @@ function matchedOnTable(table: TableWithDescriptions, query: string): RawSchemaH
if (normalize(table.comment).includes(q)) {
return 'comment';
}
if (normalize(firstDescription(table.descriptions) ?? table.description).includes(q)) {
if (normalize(firstDescription(table.descriptions)).includes(q)) {
return 'description';
}
return null;
}
function matchedOnColumn(
column: KtxSchemaColumn & { description?: string | null; descriptions?: Record<string, string> },
column: KtxSchemaColumn & { descriptions?: Record<string, string> },
query: string,
): 'name' | 'comment' | 'description' | null {
const q = normalize(query);
@ -240,7 +239,7 @@ function matchedOnColumn(
if (normalize(column.comment).includes(q)) {
return 'comment';
}
if (normalize(firstDescription(column.descriptions) ?? column.description).includes(q)) {
if (normalize(firstDescription(column.descriptions)).includes(q)) {
return 'description';
}
return null;
@ -285,13 +284,10 @@ export class WarehouseCatalogService {
display: formatDisplay(catalog.driver, table),
kind: table.kind,
comment: table.comment,
description: table.description ?? firstDescription(table.descriptions),
description: firstDescription(table.descriptions),
rowCount: profileTable?.rowCount ?? table.estimatedRows ?? null,
columns: table.columns.map((rawColumn) => {
const column = rawColumn as KtxSchemaColumn & {
description?: string | null;
descriptions?: Record<string, string>;
};
const column = rawColumn as KtxSchemaColumn & { descriptions?: Record<string, string> };
const profileColumn =
profileColumns[columnKey(table, column.name)] ??
Object.entries(profileColumns).find(

View file

@ -40,7 +40,7 @@ describe('repairWikiSlRefs', () => {
};
const configService = {
listFiles: vi.fn(async () => ({
files: ['global/accounts-at-risk.md', 'global/historic-sql/nested-legacy.md'],
files: ['global/accounts-at-risk.md', 'global/historic-sql/nested-old.md'],
})),
};
const semanticLayerService = {

View file

@ -114,7 +114,6 @@ export function stepBudgetFor(sourceType: MemoryAgentSourceType): number {
case 'external_ingest':
return 30;
case 'backfill':
case 'sql-review-migration':
return 25;
}
}
@ -122,7 +121,7 @@ export function stepBudgetFor(sourceType: MemoryAgentSourceType): number {
export function promptNameFor(sourceType: MemoryAgentSourceType): string {
return sourceType === 'external_ingest'
? 'memory_agent_external_ingest'
: sourceType === 'backfill' || sourceType === 'sql-review-migration'
: sourceType === 'backfill'
? 'memory_agent_backfill'
: 'memory_agent_research';
}

View file

@ -16,7 +16,7 @@ import type {
import type { ToolContext, ToolSession, TouchedSlSourceSet } from '../tools/index.js';
import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js';
export type MemoryAgentSourceType = 'research' | 'external_ingest' | 'backfill' | 'sql-review-migration';
export type MemoryAgentSourceType = 'research' | 'external_ingest' | 'backfill';
export interface MemoryAgentInput {
userId: string;

View file

@ -220,15 +220,15 @@ project: demo
scan:
relationships:
enabled: false
llm_proposals: false
validation_required_for_manifest: true
accept_threshold: 0.91
review_threshold: 0.61
max_llm_tables_per_batch: 12
max_candidates_per_column: 7
profile_sample_rows: 500
validation_concurrency: 2
validation_budget: 0
llmProposals: false
validationRequiredForManifest: true
acceptThreshold: 0.91
reviewThreshold: 0.61
maxLlmTablesPerBatch: 12
maxCandidatesPerColumn: 7
profileSampleRows: 500
validationConcurrency: 2
validationBudget: 0
`);
expect(config.scan.relationships).toEqual({
@ -260,7 +260,7 @@ scan:
project: demo
scan:
relationships:
validation_budget: all
validationBudget: all
`);
expect(config.scan.relationships.validationBudget).toBe('all');
@ -272,13 +272,13 @@ scan:
project: demo
scan:
relationships:
accept_threshold: 2
review_threshold: -1
max_llm_tables_per_batch: 0
max_candidates_per_column: -4
profile_sample_rows: 0
validation_concurrency: 0
validation_budget: 1.5
acceptThreshold: 2
reviewThreshold: -1
maxLlmTablesPerBatch: 0
maxCandidatesPerColumn: -4
profileSampleRows: 0
validationConcurrency: 0
validationBudget: 1.5
`);
expect(config.scan.relationships).toMatchObject({
@ -297,13 +297,13 @@ scan:
project: demo
scan:
relationships:
validation_budget: infinite
validationBudget: infinite
`);
expect(config.scan.relationships).not.toHaveProperty('validationBudget');
});
it('rejects legacy local LLM and embedding fields', () => {
it('rejects unsupported local LLM and embedding fields', () => {
expect(() =>
parseKtxProjectConfig(`
project: demo

View file

@ -212,7 +212,7 @@ function scanEnrichmentMode(value: unknown, fallback: KtxScanEnrichmentMode): Kt
throw new Error(`Unsupported scan.enrichment.mode: ${String(value)}`);
}
function rejectLegacyProvider(section: string, value: unknown): void {
function rejectUnsupportedProvider(section: string, value: unknown): void {
if (value !== undefined) {
throw new Error(`Unsupported ${section}.provider: use ${section}.backend`);
}
@ -277,7 +277,7 @@ function parseProjectLlmProviderConfig(
defaults: KtxProjectLlmProviderConfig,
section: string,
): KtxProjectLlmProviderConfig {
rejectLegacyProvider(section, raw.provider);
rejectUnsupportedProvider(section, raw.provider);
const vertex = isRecord(raw.vertex)
? {
@ -310,7 +310,7 @@ function parseProjectEmbeddingConfig(
defaults: KtxProjectEmbeddingConfig,
section: string,
): KtxProjectEmbeddingConfig {
rejectLegacyProvider(section, raw.provider);
rejectUnsupportedProvider(section, raw.provider);
const openai = optionalProviderConfig(raw.openai);
const sentenceTransformers = isRecord(raw.sentenceTransformers)
@ -340,36 +340,21 @@ function parseScanRelationshipConfig(
raw: Record<string, unknown>,
defaults: KtxScanRelationshipConfig,
): KtxScanRelationshipConfig {
const validationBudget = validationBudgetConfigValue(
raw.validation_budget ?? raw.validationBudget,
defaults.validationBudget,
);
const validationBudget = validationBudgetConfigValue(raw.validationBudget, defaults.validationBudget);
return {
enabled: booleanValue(raw.enabled, defaults.enabled),
llmProposals: booleanValue(raw.llm_proposals ?? raw.llmProposals, defaults.llmProposals),
llmProposals: booleanValue(raw.llmProposals, defaults.llmProposals),
validationRequiredForManifest: booleanValue(
raw.validation_required_for_manifest ?? raw.validationRequiredForManifest,
raw.validationRequiredForManifest,
defaults.validationRequiredForManifest,
),
acceptThreshold: ratioConfigValue(raw.accept_threshold ?? raw.acceptThreshold, defaults.acceptThreshold),
reviewThreshold: ratioConfigValue(raw.review_threshold ?? raw.reviewThreshold, defaults.reviewThreshold),
maxLlmTablesPerBatch: positiveIntegerConfigValue(
raw.max_llm_tables_per_batch ?? raw.maxLlmTablesPerBatch,
defaults.maxLlmTablesPerBatch,
),
maxCandidatesPerColumn: positiveIntegerConfigValue(
raw.max_candidates_per_column ?? raw.maxCandidatesPerColumn,
defaults.maxCandidatesPerColumn,
),
profileSampleRows: positiveIntegerConfigValue(
raw.profile_sample_rows ?? raw.profileSampleRows,
defaults.profileSampleRows,
),
validationConcurrency: positiveIntegerConfigValue(
raw.validation_concurrency ?? raw.validationConcurrency,
defaults.validationConcurrency,
),
acceptThreshold: ratioConfigValue(raw.acceptThreshold, defaults.acceptThreshold),
reviewThreshold: ratioConfigValue(raw.reviewThreshold, defaults.reviewThreshold),
maxLlmTablesPerBatch: positiveIntegerConfigValue(raw.maxLlmTablesPerBatch, defaults.maxLlmTablesPerBatch),
maxCandidatesPerColumn: positiveIntegerConfigValue(raw.maxCandidatesPerColumn, defaults.maxCandidatesPerColumn),
profileSampleRows: positiveIntegerConfigValue(raw.profileSampleRows, defaults.profileSampleRows),
validationConcurrency: positiveIntegerConfigValue(raw.validationConcurrency, defaults.validationConcurrency),
...(validationBudget !== undefined ? { validationBudget } : {}),
};
}

View file

@ -62,7 +62,7 @@ describe('KTX setup config helpers', () => {
});
});
it('combines legacy config setup steps with local state for reads', () => {
it('combines config setup steps with local state for reads', () => {
const config = {
...buildDefaultKtxProjectConfig('warehouse'),
setup: {

View file

@ -33,7 +33,7 @@ const EXPECTED_LINKS: KtxRelationshipBenchmarkExpectedLinks = {
};
const CHECKED_IN_FIXTURE_ORIGINS = {
abbreviated_legacy_no_declared_constraints: 'synthetic',
abbreviated_old_no_declared_constraints: 'synthetic',
adventureworks_oltp_with_declared_metadata: 'public',
adventureworkslt_with_declared_metadata: 'public',
analytical_warehouse_no_naming_convention: 'synthetic',
@ -606,7 +606,7 @@ describe('relationship benchmarks', () => {
const byId = new Map(fixtures.map((fixture) => [fixture.id, fixture]));
const adversarialIds = [
'non_english_naming_no_declared_constraints',
'abbreviated_legacy_no_declared_constraints',
'abbreviated_old_no_declared_constraints',
'analytical_warehouse_no_naming_convention',
'mixed_case_within_schema_no_declared_constraints',
'polymorphic_partial_overlap_no_declared_constraints',

View file

@ -141,7 +141,7 @@ describe('relationship diagnostics artifacts', () => {
);
});
it('adapts legacy relationship updates into the richer artifact shape', () => {
it('adapts relationship updates into the artifact shape', () => {
const artifacts = buildKtxRelationshipArtifacts({
connectionId: 'warehouse',
relationshipUpdate: {

View file

@ -45,7 +45,8 @@ const ORDERS_YAML = [
const FINANCE_ORDERS_YAML = [
'name: orders',
'description: Finance orders used for invoice reconciliation.',
'descriptions:',
' user: Finance orders used for invoice reconciliation.',
'table: finance.orders',
'grain:',
' - order_id',

View file

@ -28,16 +28,11 @@ function hasDescriptions(descriptions: DescriptionMap): boolean {
function withDescriptionMap(record: Record<string, unknown>, fallback: string | null): Record<string, unknown> {
const descriptions = cleanDescriptionMap(record.descriptions);
const flatDescription = cleanText(record.description);
if (flatDescription && !descriptions.user) {
descriptions.user = flatDescription;
}
if (!hasDescriptions(descriptions) && fallback) {
descriptions.ktx = fallback;
}
const next = { ...record };
delete next.description;
if (hasDescriptions(descriptions)) {
next.descriptions = descriptions;
} else {

View file

@ -29,7 +29,8 @@ const ORDERS_YAML = [
const SUPPORT_YAML = [
'name: tickets',
'description: Support tickets grouped by priority.',
'descriptions:',
' user: Support tickets grouped by priority.',
'table: public.tickets',
'grain:',
' - ticket_id',
@ -278,7 +279,8 @@ describe('local semantic-layer helpers', () => {
sourceName: 'orders',
yaml: [
'name: orders',
'description: Finance orders used for invoice reconciliation.',
'descriptions:',
' user: Finance orders used for invoice reconciliation.',
'table: finance.orders',
'grain:',
' - order_id',

View file

@ -10,7 +10,8 @@ import { searchLocalSlSourcesWithPglitePrototype } from './pglite-sl-search-prot
const ORDERS_YAML = [
'name: orders',
'description: Orders with paid revenue and refund status.',
'descriptions:',
' user: Orders with paid revenue and refund status.',
'table: public.orders',
'grain:',
' - order_id',
@ -29,7 +30,8 @@ const ORDERS_YAML = [
const FINANCE_ORDERS_YAML = [
'name: orders',
'description: Finance orders used for invoice reconciliation.',
'descriptions:',
' user: Finance orders used for invoice reconciliation.',
'table: finance.orders',
'grain:',
' - order_id',
@ -43,7 +45,8 @@ const FINANCE_ORDERS_YAML = [
const CUSTOMERS_YAML = [
'name: customers',
'description: Customer lifecycle accounts by region.',
'descriptions:',
' user: Customer lifecycle accounts by region.',
'table: public.customers',
'grain:',
' - customer_id',

View file

@ -80,14 +80,13 @@ const joinDeclarationSchema = z.object({
const sourceColumnSchema = z.object({
name: unqualifiedNameSchema,
// type/description optional on standalone sources: compose-time enrichment fills them
// type/descriptions optional on standalone sources: compose-time enrichment fills them
// from the manifest entry named in `inherits_columns_from`. If the agent does not set
// `inherits_columns_from`, or the column is not in the manifest, type must be present
// — surfaced by sl_validate.
type: z.enum(columnTypeValues).optional(),
role: z.enum(columnRoleValues).optional(),
visibility: z.enum(columnVisibilityValues).optional(),
description: z.string().optional(),
descriptions: descriptionsSchema.optional(),
expr: z.string().optional(),
constraints: sourceKeyedColumnConstraintsSchema.optional(),
@ -102,7 +101,6 @@ const overlayColumnSchema = z
type: z.enum(columnTypeValues).optional(),
role: z.enum(columnRoleValues).optional(),
visibility: z.enum(columnVisibilityValues).optional(),
description: z.string().optional(),
descriptions: descriptionsSchema.optional(),
expr: z.string().optional(),
})
@ -114,7 +112,6 @@ const overlayColumnSchema = z
export const sourceDefinitionSchema = z
.object({
name: z.string().min(1),
description: z.string().optional(),
descriptions: descriptionsSchema.optional(),
// Accepted for documentation parity with the Python spec; behavior is driven
// by the `table` / `sql` fields, not by this discriminator.
@ -150,7 +147,6 @@ export const sourceDefinitionSchema = z
export const sourceOverlaySchema = z
.object({
name: z.string().min(1),
description: z.string().optional(),
descriptions: z.record(z.string(), z.string()).optional(),
grain: z.array(unqualifiedNameSchema).optional(),
columns: z.array(overlayColumnSchema).optional(),

View file

@ -98,7 +98,7 @@ describe('composeOverlay', () => {
...baseTable,
segments: [{ name: 'pre_existing', expr: 'is_paid = true' }],
};
const overlay = { name: 'fct_labs', description: 'no segments here' };
const overlay = { name: 'fct_labs', descriptions: { user: 'no segments here' } };
const composed = composeOverlay(baseWithSegments, overlay);
expect(composed.segments).toEqual([{ name: 'pre_existing', expr: 'is_paid = true' }]);
});
@ -128,7 +128,7 @@ describe('composeOverlay', () => {
it('still handles existing known keys without regression', () => {
const overlay = {
name: 'fct_labs',
description: 'patient lab orders',
descriptions: { user: 'patient lab orders' },
exclude_columns: ['admin_user_id'],
columns: [{ name: 'is_byol', type: 'boolean', expr: "lab_type = 'byol'" }],
measures: [{ name: 'count_all', expr: 'count(*)' }],
@ -675,19 +675,21 @@ describe('loadAllSources — standalone enrichment via inherits_columns_from', (
expect(aav?.columns).toEqual([{ name: 'FOO', type: 'string' }]);
});
it('normalizes legacy flat source and column descriptions when loading standalone files', async () => {
it('loads standalone source and column description maps', async () => {
const standalonePath = 'semantic-layer/conn-1/orders.yaml';
configService.listFiles.mockResolvedValue({ files: [standalonePath] });
configService.readFile.mockResolvedValue({
content: [
'name: orders',
'description: Finance orders used for invoice reconciliation.',
'descriptions:',
' user: Finance orders used for invoice reconciliation.',
'table: public.orders',
'grain: [id]',
'columns:',
' - name: id',
' type: string',
' description: Stable order identifier.',
' descriptions:',
' user: Stable order identifier.',
].join('\n'),
});

View file

@ -113,7 +113,7 @@ export class SemanticLayerService {
`standalone source '${source.name}' shadows an existing manifest entry and ` +
`will drop the manifest's columns and joins. Rewrite as an overlay: remove ` +
`"sql:", "table:", "grain:", "columns:", "joins:"; keep only "name:" plus ` +
`"measures:"/"segments:"/"description:"`;
`"measures:"/"segments:"/"descriptions:"`;
warnings.push(msg);
this.logger.warn(`[writeSource] ${msg}. Saving anyway.`);
}
@ -935,16 +935,12 @@ export class SemanticLayerService {
string,
{
descriptions?: Record<string, string>;
description?: string;
db_description?: string;
columns?: Array<{
name: string;
type: string;
pk?: boolean;
nullable?: boolean;
descriptions?: Record<string, string>;
description?: string;
db_description?: string;
}>;
}
>;
@ -952,12 +948,12 @@ export class SemanticLayerService {
if (shard?.tables) {
for (const [tableName, entry] of Object.entries(shard.tables)) {
tables.set(tableName, {
descriptions: migrateDescriptions(entry.descriptions, entry.description, entry.db_description) ?? {},
descriptions: entry.descriptions ?? {},
});
for (const col of entry.columns ?? []) {
columns.set(`${tableName}.${col.name}`, {
type: col.type,
descriptions: migrateDescriptions(col.descriptions, col.description, col.db_description) ?? {},
descriptions: col.descriptions ?? {},
nullable: col.nullable,
pk: col.pk,
});
@ -1055,11 +1051,7 @@ interface ManifestColumnEntry {
type: string;
pk?: boolean;
nullable?: boolean;
// New format: descriptions map
descriptions?: Record<string, string>;
// Legacy format: flat fields (read-only backwards compat)
description?: string;
db_description?: string;
constraints?: { dbt?: { not_null?: boolean; unique?: boolean } };
enum_values?: { dbt?: string[] };
tests?: {
@ -1077,11 +1069,7 @@ interface ManifestJoinEntry {
export interface ManifestTableEntry {
table: string;
// New format: descriptions map
descriptions?: Record<string, string>;
// Legacy format: flat fields (read-only backwards compat)
description?: string;
db_description?: string;
columns: ManifestColumnEntry[];
joins?: ManifestJoinEntry[];
tags?: { dbt?: string[] };
@ -1089,31 +1077,12 @@ export interface ManifestTableEntry {
usage?: TableUsageOutput;
}
/** Migrate legacy flat description/db_description fields to a descriptions map. */
function migrateDescriptions(
descriptions?: Record<string, string>,
description?: string,
dbDescription?: string,
): Record<string, string> | undefined {
if (descriptions && Object.keys(descriptions).length > 0) {
return descriptions;
}
const result: Record<string, string> = {};
if (description) {
result.ai = description;
}
if (dbDescription) {
result.db = dbDescription;
}
return Object.keys(result).length > 0 ? result : undefined;
}
export function projectManifestEntry(name: string, entry: ManifestTableEntry): SemanticLayerSource {
const columns = entry.columns.map((c) => ({
name: c.name,
type: c.type,
role: c.type === 'time' ? 'time' : undefined,
descriptions: migrateDescriptions(c.descriptions, c.description, c.db_description),
descriptions: c.descriptions,
constraints: c.constraints,
enum_values: c.enum_values,
tests: c.tests,
@ -1126,7 +1095,7 @@ export function projectManifestEntry(name: string, entry: ManifestTableEntry): S
return {
name,
table: entry.table,
descriptions: migrateDescriptions(entry.descriptions, entry.description, entry.db_description),
descriptions: entry.descriptions,
grain,
columns,
joins: (entry.joins ?? []).map((j) => ({ to: j.to, on: j.on, relationship: j.relationship, source: j.source })),
@ -1359,7 +1328,6 @@ export function findDanglingSegmentRefs(source: Record<string, unknown>): string
const COMPOSE_KNOWN_KEYS = new Set([
'name',
'description',
'descriptions',
'grain',
'columns',

View file

@ -127,7 +127,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
` - name: <measure_name>`,
` expr: "<expression>"`,
` description: "<what it measures>"`,
`Overlay shape: "name:" plus any of "measures:", "segments:", "description:". Do NOT include "sql:", "table:", "grain:", "columns:", or "joins:" — those are inherited from the manifest.`,
`Overlay shape: "name:" plus any of "measures:", "segments:", "descriptions:". Do NOT include "sql:", "table:", "grain:", "columns:", or "joins:" — those are inherited from the manifest.`,
].join('\n'),
],
sourceName,

View file

@ -89,7 +89,7 @@ export async function validateSingleSource(
`${sourceName}.yaml: standalone source shadows an existing manifest entry — ` +
`writing it as-is drops the manifest's columns and joins. ` +
`Remove "sql:", "table:", "grain:", "columns:", and "joins:" and keep only ` +
`"name:" plus "measures:"/"segments:"/"description:" to write an overlay ` +
`"name:" plus "measures:"/"segments:"/"descriptions:" to write an overlay ` +
`that inherits the manifest schema. Call sl_read_source to inspect the existing source first.`,
);
return { errors, warnings };

View file

@ -176,7 +176,7 @@ describe('SlWriteSourceTool — session gating', () => {
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
});
it('normalizes flat source and column descriptions before writing', async () => {
it('writes source and column description maps', async () => {
const { tool, semanticLayerService } = makeTool();
const result = await tool.call(
{
@ -184,10 +184,10 @@ describe('SlWriteSourceTool — session gating', () => {
sourceName: 'orders',
source: {
name: 'orders',
description: 'Finance orders used for invoice reconciliation.',
descriptions: { user: 'Finance orders used for invoice reconciliation.' },
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'string', description: 'Stable order identifier.' }],
columns: [{ name: 'id', type: 'string', descriptions: { user: 'Stable order identifier.' } }],
measures: [],
joins: [],
} as any,

View file

@ -318,7 +318,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
` Writing standalone would drop the manifest's columns and joins, leaving only what you list here.`,
`To add measures/segments on top of the manifest, rewrite this YAML as an overlay:`,
` - Remove "sql:", "table:", "grain:", "columns:", and "joins:".`,
` - Keep only "name:", plus "measures:", "segments:", and/or "description:".`,
` - Keep only "name:", plus "measures:", "segments:", and/or "descriptions:".`,
` - The manifest's schema is inherited automatically.`,
`If you really need a different base table, use a different source name.`,
].join('\n');

View file

@ -23,7 +23,7 @@ interface EvictionDecisionRecord {
rawPath: string;
artifactKind: 'wiki' | 'sl';
artifactKey: string;
action: 'removed' | 'retained_deprecated' | 'retained_supported';
action: 'removed';
reason: string;
}

View file

@ -245,29 +245,4 @@ describe('local knowledge helpers', () => {
).rejects.toThrow('Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".');
});
it('ignores nested historic-SQL legacy paths when listing local knowledge pages', async () => {
await writeLocalKnowledgePage(project, {
key: 'historic-sql-paid-orders',
scope: 'GLOBAL',
summary: 'Flat historic SQL page',
content: 'Flat page body.',
tags: ['historic-sql'],
});
await project.fileStore.writeFile(
'knowledge/global/historic-sql/paid-orders.md',
'---\nsummary: Nested historic SQL page\nusage_mode: auto\n---\n\nNested body\n',
'Test',
'test@example.com',
'Write nested legacy page',
);
await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([
{
key: 'historic-sql-paid-orders',
path: 'knowledge/global/historic-sql-paid-orders.md',
scope: 'GLOBAL',
summary: 'Flat historic SQL page',
},
]);
});
});

View file

@ -1,5 +1,5 @@
id: abbreviated_legacy_no_declared_constraints
name: Abbreviated legacy naming fixture with no declared constraints
id: abbreviated_old_no_declared_constraints
name: Abbreviated old naming fixture with no declared constraints
tier: row_bearing
origin: synthetic
thresholdEligible: false

View file

@ -1,5 +1,5 @@
{
"connectionId": "abbreviated_legacy_no_declared_constraints",
"connectionId": "abbreviated_old_no_declared_constraints",
"driver": "sqlite",
"extractedAt": "2026-05-07T00:00:00.000Z",
"scope": {},

View file

@ -59,9 +59,7 @@ class SourceLoader:
f"Duplicate source name '{name}' in manifest shard {path}"
)
sources[name] = project_manifest_entry(name, entry)
description_sources[name] = _description_sources(
entry.descriptions, entry.description, entry.db_description
)
description_sources[name] = _description_sources(entry.descriptions)
# 2. Load files outside _schema/
for path in sorted(self.sources_dir.rglob("*.yaml")):
@ -138,11 +136,6 @@ class SourceLoader:
source = deepcopy(base)
description_sources = dict(base_description_sources or {})
# Overlay description semantics match the server: `description` writes the
# `user` source key, and `descriptions` merges keyed sources before a single
# visible description is resolved from the full map.
if overlay.get("description"):
description_sources["user"] = overlay["description"]
if overlay.get("descriptions"):
description_sources.update(
{
@ -151,7 +144,7 @@ class SourceLoader:
if text
}
)
if overlay.get("description") or overlay.get("descriptions"):
if overlay.get("descriptions"):
source.description = _resolve_description(
description_sources or None,
)

View file

@ -76,31 +76,17 @@ def map_column_type(db_type: str) -> str:
_DEFAULT_PRIORITY = ["user", "ai", "dbt", "db"]
def _description_sources(
descriptions: dict[str, str] | None,
description: str | None = None,
db_description: str | None = None,
) -> dict[str, str] | None:
def _description_sources(descriptions: dict[str, str] | None) -> dict[str, str] | None:
"""Normalize multi-source descriptions to a keyed map."""
if descriptions:
result = {source: text for source, text in descriptions.items() if text}
if result:
return result
result: dict[str, str] = {}
if description:
result["ai"] = description
if db_description:
result["db"] = db_description
return result or None
return None
def _resolve_description(
descriptions: dict[str, str] | None,
description: str | None = None,
db_description: str | None = None,
) -> str | None:
"""Resolve a single description from a multi-source map or legacy flat fields."""
def _resolve_description(descriptions: dict[str, str] | None) -> str | None:
"""Resolve a single description from a multi-source map."""
if descriptions:
for source in _DEFAULT_PRIORITY:
if text := descriptions.get(source):
@ -109,11 +95,6 @@ def _resolve_description(
for text in descriptions.values():
if text:
return text
# Legacy flat fields
if description:
return description
if db_description:
return db_description
return None
@ -123,18 +104,13 @@ class ManifestColumn(BaseModel):
pk: bool = False
nullable: bool = True
descriptions: dict[str, str] | None = None
# Legacy flat fields (backwards-compatible YAML parsing)
description: str | None = None
db_description: str | None = None
constraints: dict | None = None
enum_values: dict[str, list[str]] | None = None
tests: SourceColumnTests | None = None
@property
def resolved_description(self) -> str | None:
return _resolve_description(
self.descriptions, self.description, self.db_description
)
return _resolve_description(self.descriptions)
class ManifestJoin(BaseModel):
@ -147,9 +123,6 @@ class ManifestJoin(BaseModel):
class ManifestEntry(BaseModel):
table: str
descriptions: dict[str, str] | None = None
# Legacy flat fields (backwards-compatible YAML parsing)
description: str | None = None
db_description: str | None = None
columns: list[ManifestColumn]
joins: list[ManifestJoin] = []
default_time_dimension: DefaultTimeDimensionDbt | None = None
@ -158,9 +131,7 @@ class ManifestEntry(BaseModel):
@property
def resolved_description(self) -> str | None:
return _resolve_description(
self.descriptions, self.description, self.db_description
)
return _resolve_description(self.descriptions)
class Manifest(BaseModel):
@ -178,6 +149,8 @@ def validate_overlay(data: dict) -> list[str]:
Returns a list of error messages (empty if valid).
"""
errors: list[str] = []
if "description" in data:
errors.append("Overlay must use 'descriptions' for source descriptions")
if "table" in data:
errors.append("Overlay must not contain 'table' (owned by manifest)")
if "sql" in data:
@ -185,6 +158,10 @@ def validate_overlay(data: dict) -> list[str]:
"Overlay must not contain 'sql' (that makes it a standalone source)"
)
for col in data.get("columns", []):
if "description" in col:
errors.append(
f"Overlay column '{col.get('name', '?')}' must use 'descriptions'"
)
if "type" in col and "expr" not in col:
errors.append(
f"Overlay column '{col.get('name', '?')}' specifies 'type' without 'expr' "

View file

@ -1,10 +1,11 @@
name: churn_risk
description: |
Per-account churn risk scoring for B2B SaaS customers. Combines signals from
subscriptions (cancellation history), support tickets (severity, SLA breaches),
product usage (adoption decline), contracts (renewal proximity), CSM activities
(engagement recency), and invoices (payment issues) into a weighted composite
risk_score (0-1) and risk_tier (High/Medium/Low). One row per customer account.
descriptions:
user: |
Per-account churn risk scoring for B2B SaaS customers. Combines signals from
subscriptions (cancellation history), support tickets (severity, SLA breaches),
product usage (adoption decline), contracts (renewal proximity), CSM activities
(engagement recency), and invoices (payment issues) into a weighted composite
risk_score (0-1) and risk_tier (High/Medium/Low). One row per customer account.
sql: |
WITH sub_signals AS (
SELECT

View file

@ -1,7 +1,8 @@
name: churn_risk
description: |
Customer churn risk score combining tenure,
usage trends, and support burden.
descriptions:
user: |
Customer churn risk score combining tenure,
usage trends, and support burden.
sql: |
SELECT
c.id AS customer_id,

View file

@ -95,7 +95,7 @@ class TestProjectManifestEntry:
def orders_entry(self) -> ManifestEntry:
return ManifestEntry(
table="public.orders",
description="Customer orders",
descriptions={"user": "Customer orders"},
columns=[
ManifestColumn(name="id", type="integer", pk=True),
ManifestColumn(name="customer_id", type="integer"),
@ -202,7 +202,7 @@ class TestValidateOverlay:
def test_validate_overlay_valid(self):
data = {
"name": "orders",
"description": "Revenue-bearing orders",
"descriptions": {"user": "Revenue-bearing orders"},
"grain": ["id"],
"measures": [{"name": "revenue", "expr": "sum(total)"}],
"columns": [
@ -259,7 +259,7 @@ def _manifest_tables() -> dict:
"tables": {
"orders": {
"table": "public.orders",
"description": "Customer orders",
"descriptions": {"user": "Customer orders"},
"columns": [
{"name": "id", "type": "integer", "pk": True},
{"name": "customer_id", "type": "integer"},
@ -278,7 +278,7 @@ def _manifest_tables() -> dict:
},
"customers": {
"table": "public.customers",
"description": "Customer accounts",
"descriptions": {"user": "Customer accounts"},
"columns": [
{"name": "id", "type": "integer", "pk": True},
{"name": "name", "type": "varchar"},
@ -329,12 +329,12 @@ class TestTwoTierLoading:
assert sources["regions"].table == "public.regions"
assert sources["regions"].is_table_source
def test_overlay_descriptions_do_not_promote_base_description_to_user_source(
def test_overlay_descriptions_do_not_promote_base_map_to_user_source(
self, tmp_path: Path
):
standalone = {
"name": "regions",
"description": "Standalone description",
"descriptions": {"ai": "Standalone description"},
"table": "public.regions",
"grain": ["id"],
"columns": [
@ -376,7 +376,7 @@ class TestTwoTierLoading:
overlay = {
"name": "orders",
"description": "Revenue-bearing orders",
"descriptions": {"user": "Revenue-bearing orders"},
"grain": ["id"],
"measures": [{"name": "revenue", "expr": "sum(total)"}],
}
@ -394,11 +394,11 @@ class TestTwoTierLoading:
assert len(orders.measures) == 1
assert orders.measures[0].name == "revenue"
def test_overlay_description_override(self, tmp_path: Path):
def test_overlay_description_map_override(self, tmp_path: Path):
schema_dir = tmp_path / "_schema"
_write_yaml(schema_dir / "public.yaml", _manifest_tables())
overlay = {"name": "orders", "description": "Overridden description"}
overlay = {"name": "orders", "descriptions": {"user": "Overridden description"}}
_write_yaml(tmp_path / "orders.yaml", overlay)
_write_yaml(tmp_path / "customers.yaml", {"name": "customers"})
@ -426,7 +426,7 @@ class TestTwoTierLoading:
sources = loader.load_all()
assert sources["orders"].description == "Customer orders"
def test_overlay_descriptions_map_overrides_lower_priority_db_description(
def test_overlay_descriptions_map_overrides_lower_priority_db_source(
self, tmp_path: Path
):
schema_dir = tmp_path / "_schema"

View file

@ -129,10 +129,10 @@ function nonEnglishFixture() {
};
}
function abbreviatedLegacyFixture() {
function abbreviatedOldNamingFixture() {
return {
id: 'abbreviated_legacy_no_declared_constraints',
name: 'Abbreviated legacy naming fixture with no declared constraints',
id: 'abbreviated_old_no_declared_constraints',
name: 'Abbreviated old naming fixture with no declared constraints',
tier: 'row_bearing',
sql: [
'CREATE TABLE cust (cust_id TEXT NOT NULL, nm TEXT NOT NULL, stat_cd TEXT NOT NULL);',
@ -480,7 +480,7 @@ function scaleFixture() {
const fixtures = [
nonEnglishFixture(),
abbreviatedLegacyFixture(),
abbreviatedOldNamingFixture(),
analyticalWarehouseFixture(),
mixedCaseFixture(),
polymorphicFixture(),

View file

@ -46,15 +46,15 @@ const llmBoundaryPatterns = [
pattern: /\bembedMany\b/,
},
{
label: 'legacy context LLM provider port',
label: 'context-owned LLM provider port',
pattern: /\bLlmProviderPort\b/,
},
{
label: 'legacy scan LLM provider port',
label: 'scan-owned LLM provider port',
pattern: /\bKtxScanLlmPort\b/,
},
{
label: 'legacy gateway LLM provider helper',
label: 'context-owned gateway LLM provider helper',
pattern: /\bcreateGatewayLlmProvider\b/,
},
];

View file

@ -92,7 +92,7 @@ describe('scanFileContent', () => {
);
});
it('rejects context-owned LLM provider construction after @ktx/llm migration', () => {
it('rejects context-owned LLM provider construction outside @ktx/llm', () => {
const violations = [
...scanFileContent(
'packages/context/src/agent/local-llm-provider.ts',

View file

@ -128,7 +128,6 @@ describe('standalone example docs', () => {
.join('|'),
),
);
assert.doesNotMatch(readme, /--historic-sql-min-calls/);
});
it('lists every workspace package in the contributor docs', async () => {