From a517c834fedb55aecc340c35cf93fa6f9713bf77 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 14:37:05 +0200 Subject: [PATCH] refactor: remove legacy compatibility paths --- .../semantic-layer/warehouse/orders.yaml | 3 +- .../global/orbit-customers-source.md | 2 +- .../global/sales-ops-cs-handoff-process.md | 2 +- .../src/commands/connection-metabase-setup.ts | 4 +- packages/cli/src/commands/setup-commands.ts | 12 ++--- packages/cli/src/ingest.test-utils.ts | 12 ++--- packages/cli/src/ingest.test.ts | 3 +- packages/cli/src/local-adapters.ts | 2 - .../cli/src/local-scan-connectors.test.ts | 9 ++-- packages/cli/src/local-scan-connectors.ts | 2 +- packages/cli/src/next-steps.test.ts | 25 ---------- packages/cli/src/public-ingest.ts | 2 +- packages/cli/src/setup-databases.test.ts | 7 --- packages/cli/src/setup-databases.ts | 4 +- packages/cli/src/setup-sources.ts | 10 ++-- packages/cli/src/setup.ts | 2 - .../memory_agent_bundle_ingest_reconcile.md | 4 +- .../context/skills/ingest_triage/SKILL.md | 4 +- .../context/skills/metabase_ingest/SKILL.md | 2 +- .../context/skills/metricflow_ingest/SKILL.md | 6 ++- packages/context/skills/sl/SKILL.md | 3 +- packages/context/skills/sl_capture/SKILL.md | 11 ++-- .../historic-sql/historic-sql.adapter.test.ts | 4 +- .../historic-sql/historic-sql.adapter.ts | 7 --- .../adapters/historic-sql/projection.test.ts | 17 +++---- .../adapters/historic-sql/projection.ts | 14 +----- .../adapters/historic-sql/types.test.ts | 12 ++++- .../src/ingest/adapters/historic-sql/types.ts | 24 +-------- .../adapters/looker/local-looker.adapter.ts | 8 ++- .../adapters/metabase/client-port.test.ts | 11 ++-- .../ingest/adapters/metabase/client-port.ts | 6 --- .../ingest/adapters/metabase/client.test.ts | 7 +-- .../src/ingest/adapters/metabase/client.ts | 31 +----------- .../metabase/local-metabase.adapter.test.ts | 13 ----- .../metabase/local-metabase.adapter.ts | 6 +-- .../metricflow/import-semantic-models.ts | 31 ------------ .../src/ingest/ingest-bundle.runner.test.ts | 3 +- .../context/src/ingest/local-adapters.test.ts | 12 +++-- packages/context/src/ingest/local-adapters.ts | 24 +++------ .../src/ingest/report-snapshot.test.ts | 9 ++-- .../context/src/ingest/report-snapshot.ts | 25 ++-------- packages/context/src/ingest/reports.ts | 3 +- .../src/ingest/stages/stage-index.types.ts | 2 +- .../tools/emit-eviction-decision.tool.ts | 2 +- .../emit-reconciliation-records.tool.test.ts | 8 +-- .../src/ingest/tools/eviction-list.tool.ts | 2 +- .../ingest/tools/verification-ledger.tool.ts | 2 +- .../tools/warehouse-verification/index.ts | 6 --- .../warehouse-catalog.service.ts | 18 +++---- .../src/ingest/wiki-sl-ref-repair.test.ts | 2 +- .../context/src/memory/capture-signals.ts | 3 +- packages/context/src/memory/types.ts | 2 +- packages/context/src/project/config.test.ts | 38 +++++++------- packages/context/src/project/config.ts | 39 +++++---------- .../context/src/project/setup-config.test.ts | 2 +- .../src/scan/relationship-benchmarks.test.ts | 4 +- .../src/scan/relationship-diagnostics.test.ts | 2 +- .../src/search/backend-conformance.test.ts | 3 +- .../src/sl/description-normalization.ts | 5 -- packages/context/src/sl/local-sl.test.ts | 6 ++- .../src/sl/pglite-sl-search-prototype.test.ts | 9 ++-- packages/context/src/sl/schemas.ts | 6 +-- .../src/sl/semantic-layer.service.test.ts | 12 +++-- .../context/src/sl/semantic-layer.service.ts | 42 ++-------------- .../src/sl/tools/sl-edit-source.tool.ts | 2 +- .../src/sl/tools/sl-warehouse-validation.ts | 2 +- .../src/sl/tools/sl-write-source.tool.test.ts | 6 +-- .../src/sl/tools/sl-write-source.tool.ts | 2 +- packages/context/src/tools/tool-session.ts | 2 +- .../context/src/wiki/local-knowledge.test.ts | 25 ---------- .../data.sqlite | Bin .../expected-links.yaml | 0 .../fixture.yaml | 4 +- .../snapshot.json | 2 +- python/ktx-sl/semantic_layer/loader.py | 11 +--- python/ktx-sl/semantic_layer/manifest.py | 47 +++++------------- .../ktx-sl/sources/b2b_saas/churn_risk.yaml | 13 ++--- .../ktx-sl/sources/ecommerce/churn_risk.yaml | 7 +-- python/ktx-sl/tests/test_manifest.py | 20 ++++---- ...d-evidence-fusion-adversarial-fixtures.mjs | 8 +-- scripts/check-boundaries.mjs | 6 +-- scripts/check-boundaries.test.mjs | 2 +- scripts/examples-docs.test.mjs | 1 - 83 files changed, 239 insertions(+), 534 deletions(-) rename packages/context/test/fixtures/relationship-benchmarks/{abbreviated_legacy_no_declared_constraints => abbreviated_old_no_declared_constraints}/data.sqlite (100%) rename packages/context/test/fixtures/relationship-benchmarks/{abbreviated_legacy_no_declared_constraints => abbreviated_old_no_declared_constraints}/expected-links.yaml (100%) rename packages/context/test/fixtures/relationship-benchmarks/{abbreviated_legacy_no_declared_constraints => abbreviated_old_no_declared_constraints}/fixture.yaml (50%) rename packages/context/test/fixtures/relationship-benchmarks/{abbreviated_legacy_no_declared_constraints => abbreviated_old_no_declared_constraints}/snapshot.json (98%) diff --git a/examples/local-warehouse/semantic-layer/warehouse/orders.yaml b/examples/local-warehouse/semantic-layer/warehouse/orders.yaml index ffcca12b..8ffbe973 100644 --- a/examples/local-warehouse/semantic-layer/warehouse/orders.yaml +++ b/examples/local-warehouse/semantic-layer/warehouse/orders.yaml @@ -1,6 +1,7 @@ name: orders table: public.orders -description: Orders placed through the storefront. +descriptions: + user: Orders placed through the storefront. grain: - id columns: diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md index e98c1663..2c9f2c65 100644 --- a/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md @@ -57,4 +57,4 @@ Always join through `customer.id`. Do not join on `email`. - **Join key:** Always use `customer.id`, never `email`. - **Timezone:** `created_at` and `last_seen_at` are UTC. Confirm whether a question expects UTC or a local business day before filtering. - **Paying vs. all:** `free` customers must be excluded from paying-customer follow-ups. Use `paying_customer_count`, not `customer_count`. -- **plan_tier values:** `free`, `pro`, `enterprise`. Note: `pro_plus` is a legacy alias for `growth` in the account/contract layer (see `orbit-plan-segment-normalization`), but `plan_tier` on this table uses `pro` not `pro_plus`. +- **plan_tier values:** `free`, `pro`, `enterprise`. Note: use the canonical plan names from the account/contract layer (see `orbit-plan-segment-normalization`); `plan_tier` on this table uses `pro` rather than `growth`. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md b/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md index d547d026..65693ee6 100644 --- a/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md +++ b/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md @@ -27,7 +27,7 @@ Sales Ops must complete the handoff **before the first implementation call**. Cu | Field | Notes | |---|---| -| Current plan | Starter / Growth / Enterprise — use canonical plan name, not legacy aliases | +| Current plan | Starter / Growth / Enterprise — use canonical plan name | | Account segment | self_serve / commercial / enterprise (see `orbit-plan-segment-normalization`) | | Contract shape | Term, ARR, any discounts or custom terms | | Renewal contact | Named person on the customer side responsible for renewal | diff --git a/packages/cli/src/commands/connection-metabase-setup.ts b/packages/cli/src/commands/connection-metabase-setup.ts index 2321ea3d..122ebea8 100644 --- a/packages/cli/src/commands/connection-metabase-setup.ts +++ b/packages/cli/src/commands/connection-metabase-setup.ts @@ -132,11 +132,11 @@ function uniqueSorted(values: number[]): number[] { } function resolveMetabaseUrl(connection: KtxProjectConnectionConfig | undefined): string | undefined { - return stringField(connection?.api_url) ?? stringField(connection?.apiUrl) ?? stringField(connection?.url); + return stringField(connection?.api_url); } function resolveLiteralMetabaseApiKey(connection: KtxProjectConnectionConfig | undefined): string | undefined { - return stringField(connection?.api_key) ?? stringField(connection?.apiKey); + return stringField(connection?.api_key); } function listMetabaseConnectionIds(project: KtxLocalProject): string[] { diff --git a/packages/cli/src/commands/setup-commands.ts b/packages/cli/src/commands/setup-commands.ts index 90251ae1..7ecbda22 100644 --- a/packages/cli/src/commands/setup-commands.ts +++ b/packages/cli/src/commands/setup-commands.ts @@ -110,7 +110,6 @@ function shouldShowSetupEntryMenu( disableHistoricSql?: boolean; historicSqlWindowDays?: number; historicSqlMinExecutions?: number; - historicSqlMinCalls?: number; historicSqlServiceAccountPattern?: string[]; historicSqlRedactionPattern?: string[]; skipDatabases?: boolean; @@ -180,7 +179,6 @@ function shouldShowSetupEntryMenu( 'disableHistoricSql', 'historicSqlWindowDays', 'historicSqlMinExecutions', - 'historicSqlMinCalls', 'skipDatabases', 'source', 'sourceConnectionId', @@ -266,11 +264,6 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo .option('--disable-historic-sql', 'Disable Historic SQL for the selected database', false) .option('--historic-sql-window-days ', 'Historic SQL query-history window', positiveInteger) .option('--historic-sql-min-executions ', 'Minimum Historic SQL executions for a template', positiveInteger) - .option( - '--historic-sql-min-calls ', - 'Alias for --historic-sql-min-executions', - positiveInteger, - ) .option( '--historic-sql-service-account-pattern ', 'Historic SQL service-account regex; repeatable', @@ -352,7 +345,6 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo const mode = options.new ? 'new' : options.existing ? 'existing' : 'auto'; const resolvedAgentScope = options.global ? 'global' : options.agentScope; - const historicSqlMinExecutions = options.historicSqlMinExecutions ?? options.historicSqlMinCalls; await runSetupArgs(context, { command: 'run', projectDir: resolveCommandProjectDir(command), @@ -380,7 +372,9 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo ...(options.enableHistoricSql ? { enableHistoricSql: true } : {}), ...(options.disableHistoricSql ? { disableHistoricSql: true } : {}), ...(options.historicSqlWindowDays !== undefined ? { historicSqlWindowDays: options.historicSqlWindowDays } : {}), - ...(historicSqlMinExecutions !== undefined ? { historicSqlMinExecutions } : {}), + ...(options.historicSqlMinExecutions !== undefined + ? { historicSqlMinExecutions: options.historicSqlMinExecutions } + : {}), ...(options.historicSqlServiceAccountPattern.length > 0 ? { historicSqlServiceAccountPatterns: options.historicSqlServiceAccountPattern } : {}), diff --git a/packages/cli/src/ingest.test-utils.ts b/packages/cli/src/ingest.test-utils.ts index 9241fa34..c6cd987b 100644 --- a/packages/cli/src/ingest.test-utils.ts +++ b/packages/cli/src/ingest.test-utils.ts @@ -375,7 +375,7 @@ const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ collection_id: 12, archived: false, result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 101 as id' } }, + dataset_query: { type: 'native', database: 1, stages: [{ 'lib/type': 'mbql.stage/native', native: 'select 101 as id' }] }, parameters: [], dashboard_count: 0, }, @@ -389,7 +389,7 @@ const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ collection_id: 12, archived: false, result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 102 as id' } }, + dataset_query: { type: 'native', database: 1, stages: [{ 'lib/type': 'mbql.stage/native', native: 'select 102 as id' }] }, parameters: [], dashboard_count: 0, }, @@ -403,7 +403,7 @@ const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ collection_id: 13, archived: false, result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 103 as id' } }, + dataset_query: { type: 'native', database: 1, stages: [{ 'lib/type': 'mbql.stage/native', native: 'select 103 as id' }] }, parameters: [], dashboard_count: 0, }, @@ -453,11 +453,11 @@ function createSyncModeMetabaseClient(): MetabaseRuntimeClient { }, getAllCards: async () => SYNC_MODE_METABASE_CARDS.map(metabaseCardSummary), convertMbqlToNative: async () => ({ query: 'select 1' }), - getNativeSql: (card) => card.dataset_query?.native?.query ?? null, + getNativeSql: (card) => card.dataset_query?.stages?.[0]?.native ?? null, getTemplateTags: () => ({}), - getCardSql: async (card) => card.dataset_query?.native?.query ?? null, + getCardSql: async (card) => card.dataset_query?.stages?.[0]?.native ?? null, getResolvedSql: async (card) => ({ - resolvedSql: card.dataset_query?.native?.query ?? `select ${card.id} as id`, + resolvedSql: card.dataset_query?.stages?.[0]?.native ?? `select ${card.id} as id`, templateTags: [], resolutionStatus: 'resolved', }), diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index de226bc4..53bb02c0 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -716,7 +716,6 @@ describe('runKtxIngest', () => { patternPagesWritten: 30, stalePatternPagesMarked: 2, archivedPatternPages: 3, - legacyPagesDeleted: 4, }, errors: [], warnings: [], @@ -750,7 +749,7 @@ describe('runKtxIngest', () => { expect(io.stderr()).toBe(''); expect(io.stdout()).toContain('Adapter: historic-sql\n'); - expect(io.stdout()).toContain('Saved memory: 39 wiki, 57 SL\n'); + expect(io.stdout()).toContain('Saved memory: 35 wiki, 57 SL\n'); }); it('returns a non-zero code when local ingest reports failed work units', async () => { diff --git a/packages/cli/src/local-adapters.ts b/packages/cli/src/local-adapters.ts index 8557674c..9a6915c2 100644 --- a/packages/cli/src/local-adapters.ts +++ b/packages/cli/src/local-adapters.ts @@ -1,4 +1,3 @@ -import { join } from 'node:path'; import { createBigQueryLiveDatabaseIntrospection, isKtxBigQueryConnectionConfig, @@ -298,7 +297,6 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli const base = { sqlAnalysis: ktxCliHistoricSqlAnalysis(options), - postgresBaselineRootDir: join(project.projectDir, '.ktx/cache/historic-sql'), }; if (dialect === 'postgres') { diff --git a/packages/cli/src/local-scan-connectors.test.ts b/packages/cli/src/local-scan-connectors.test.ts index 0fe57518..087e978d 100644 --- a/packages/cli/src/local-scan-connectors.test.ts +++ b/packages/cli/src/local-scan-connectors.test.ts @@ -62,10 +62,7 @@ describe('createKtxCliScanConnector', () => { expect(connector.driver).toBe('sqlite'); }); - it.each([ - ['maxBytesBilled', ' maxBytesBilled: 123456789', 123456789], - ['max_bytes_billed', ' max_bytes_billed: "987654321"', '987654321'], - ])('passes BigQuery %s from standalone config', async (_label, byteCapLine, expectedMaxBytesBilled) => { + it('passes BigQuery max_bytes_billed from standalone config', async () => { await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); await writeFile( join(tempDir, 'ktx.yaml'), @@ -76,7 +73,7 @@ describe('createKtxCliScanConnector', () => { ' driver: bigquery', ' dataset_id: analytics', ' readonly: true', - byteCapLine, + ' max_bytes_billed: "987654321"', '', ].join('\n'), 'utf-8', @@ -90,7 +87,7 @@ describe('createKtxCliScanConnector', () => { expect(bigQueryMock.constructorInputs).toEqual([ expect.objectContaining({ connectionId: 'warehouse', - maxBytesBilled: expectedMaxBytesBilled, + maxBytesBilled: '987654321', }), ]); }); diff --git a/packages/cli/src/local-scan-connectors.ts b/packages/cli/src/local-scan-connectors.ts index d3377b0c..3058b96e 100644 --- a/packages/cli/src/local-scan-connectors.ts +++ b/packages/cli/src/local-scan-connectors.ts @@ -6,7 +6,7 @@ const SUPPORTED_DRIVERS = 'sqlite, postgres, mysql, clickhouse, sqlserver, bigqu function bigQueryMaxBytesBilled( connection: KtxLocalProject['config']['connections'][string], ): number | string | undefined { - const raw = connection.maxBytesBilled ?? connection.max_bytes_billed; + const raw = connection.max_bytes_billed; if (typeof raw === 'number') { return Number.isFinite(raw) && raw > 0 ? raw : undefined; } diff --git a/packages/cli/src/next-steps.test.ts b/packages/cli/src/next-steps.test.ts index b4706d72..facb4eb8 100644 --- a/packages/cli/src/next-steps.test.ts +++ b/packages/cli/src/next-steps.test.ts @@ -6,8 +6,6 @@ import { formatSetupNextStepLines, } from './next-steps.js'; -const command = (...parts: string[]) => parts.join(' '); - describe('KTX demo next steps', () => { it('uses supported context-build commands before agent usage', () => { expect(KTX_CONTEXT_BUILD_COMMANDS).toEqual([ @@ -57,29 +55,6 @@ describe('KTX demo next steps', () => { expect(rendered).not.toContain('Optional MCP:'); }); - it('does not advertise removed Commander migration commands', () => { - const rendered = formatNextStepLines().join('\n'); - - expect(rendered).toContain('ktx status --json'); - expect(rendered).not.toContain('ktx agent'); - expect(rendered).toContain('ktx sl list'); - expect(rendered).toContain('ktx wiki list'); - - for (const removed of [ - command('ktx', 'ask'), - command('ktx', 'mcp'), - command('ktx', 'connect'), - command('ktx', 'knowledge'), - command('dev', 'model'), - command('dev', 'knowledge'), - command('ktx', 'ingest', 'run'), - command('ktx', 'ingest', 'replay'), - command('ktx', 'serve', '--mcp', 'stdio', '--user-id', 'local'), - ]) { - expect(rendered).not.toContain(removed); - } - }); - it('keeps setup next steps focused on building context when the build is not ready', () => { const rendered = formatSetupNextStepLines({ setupReady: true, diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index f8296177..71d93e4a 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -92,7 +92,7 @@ function normalizedDriver(connection: KtxProjectConnectionConfig): string { } function sourceDirForConnection(connection: KtxProjectConnectionConfig): string | undefined { - const value = connection.source_dir ?? connection.sourceDir; + const value = connection.source_dir; return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined; } diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 46506ae7..02edfec9 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -64,8 +64,6 @@ function textInputPrompt(message: string): string { return `${title}\n│\n│ ${bodyLines.join('\n│ ')}\n│ Press Escape to go back.\n│`; } -const legacyHistoricSqlServiceAccountPatternsKey = ['serviceAccount', 'UserPatterns'].join(''); - describe('setup databases step', () => { let tempDir: string; @@ -1288,7 +1286,6 @@ describe('setup databases step', () => { redactionPatterns: ['(?i)secret'], }, }); - expect(config.connections.snowflake.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey); expect(config.ingest.adapters).toContain('historic-sql'); }); @@ -1336,10 +1333,8 @@ describe('setup databases step', () => { }, }, }); - expect(config.connections.warehouse.historicSql).not.toHaveProperty('minCalls'); expect(config.connections.warehouse.historicSql).not.toHaveProperty('windowDays'); expect(config.connections.warehouse.historicSql).not.toHaveProperty('redactionPatterns'); - expect(config.connections.warehouse.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey); expect(config.ingest.adapters).toContain('historic-sql'); expect(config.ingest.workUnits.maxConcurrency).toBe(6); expect(io.stdout()).toContain('Historic SQL probe...'); @@ -1393,7 +1388,6 @@ describe('setup databases step', () => { redactionPatterns: [], }, }); - expect(config.connections.analytics.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey); expect(config.ingest.adapters).toContain('historic-sql'); }); @@ -1443,7 +1437,6 @@ describe('setup databases step', () => { }, }, }); - expect(config.connections.warehouse.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey); }); it('prints a non-blocking Postgres Historic SQL probe failure after connection test succeeds', async () => { diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index f770c5c4..fc834476 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -40,7 +40,6 @@ export interface KtxSetupDatabasesArgs { disableHistoricSql?: boolean; historicSqlWindowDays?: number; historicSqlMinExecutions?: number; - historicSqlMinCalls?: number; historicSqlServiceAccountPatterns?: string[]; historicSqlRedactionPatterns?: string[]; skipDatabases: boolean; @@ -857,14 +856,13 @@ async function maybeApplyHistoricSqlConfig(input: { dialect, filters: historicSqlFiltersForSetup(input.args.historicSqlServiceAccountPatterns), }; - delete common[['serviceAccount', 'UserPatterns'].join('')]; if (dialect === 'postgres') { return { ...input.connection, historicSql: { ...common, - minExecutions: input.args.historicSqlMinExecutions ?? input.args.historicSqlMinCalls ?? 5, + minExecutions: input.args.historicSqlMinExecutions ?? 5, }, }; } diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index 6674ef75..72bee456 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -544,8 +544,8 @@ function sourcePathFromFileRepoUrl(repoUrl: string, subpath?: string): string { } function repoAuthToken(connection: KtxProjectConnectionConfig | Record): string | null { - const ref = stringField(connection.auth_token_ref) ?? stringField(connection.authTokenRef); - const literal = stringField(connection.authToken) ?? stringField(connection.auth_token); + const ref = stringField(connection.auth_token_ref); + const literal = stringField(connection.auth_token); return literal ?? resolveKtxConfigReference(ref, process.env) ?? null; } @@ -563,8 +563,8 @@ async function collectYamlFilesRecursive(sourceRoot: string): Promise { - let sourceDir = stringField(connection.source_dir) ?? stringField(connection.sourceDir); - const repoUrl = stringField(connection.repo_url) ?? stringField(connection.repoUrl); + let sourceDir = stringField(connection.source_dir); + const repoUrl = stringField(connection.repo_url); if (!sourceDir && repoUrl?.startsWith('file:')) { sourceDir = sourcePathFromFileRepoUrl(repoUrl, stringField(connection.path)); } @@ -624,7 +624,7 @@ async function defaultValidateLooker(projectDir: string, connectionId: string): } async function defaultValidateLookml(connection: KtxProjectConnectionConfig): Promise { - const repoUrl = stringField(connection.repoUrl) ?? stringField(connection.repo_url); + const repoUrl = stringField(connection.repoUrl); if (!repoUrl) { return { ok: false, message: 'LookML setup requires repoUrl.' }; } diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index dec0f4d7..0e928832 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -83,7 +83,6 @@ export type KtxSetupArgs = disableHistoricSql?: boolean; historicSqlWindowDays?: number; historicSqlMinExecutions?: number; - historicSqlMinCalls?: number; historicSqlServiceAccountPatterns?: string[]; historicSqlRedactionPatterns?: string[]; skipDatabases: boolean; @@ -626,7 +625,6 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup ...(args.historicSqlMinExecutions !== undefined ? { historicSqlMinExecutions: args.historicSqlMinExecutions } : {}), - ...(args.historicSqlMinCalls !== undefined ? { historicSqlMinCalls: args.historicSqlMinCalls } : {}), ...(args.historicSqlServiceAccountPatterns ? { historicSqlServiceAccountPatterns: args.historicSqlServiceAccountPatterns } : {}), diff --git a/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md b/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md index 5d2316fd..30b52537 100644 --- a/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md +++ b/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md @@ -1,5 +1,5 @@ -You are the reconciliation agent for a multi-file ingest bundle. Stage 3 WorkUnits have already run against this job's session worktree; your input is the deterministic Stage Index listing every write each WU made, plus an Eviction Set listing raw files present in the prior sync but absent in this one. Your job is to (a) decide what happens to each evicted artifact (remove vs retain with a deprecation marker), (b) sweep the Stage Index for any cross-WU conflicts the individual WUs missed, and (c) emit conflict + eviction records that the runner will fold into the final IngestReport. +You are the reconciliation agent for a multi-file ingest bundle. Stage 3 WorkUnits have already run against this job's session worktree; your input is the deterministic Stage Index listing every write each WU made, plus an Eviction Set listing raw files present in the prior sync but absent in this one. Your job is to (a) remove artifacts produced by deleted raw files, (b) sweep the Stage Index for any cross-WU conflicts the individual WUs missed, and (c) emit conflict + eviction records that the runner will fold into the final IngestReport. @@ -12,7 +12,7 @@ Parsimonious. Stage 3 WUs already loaded `ingest_triage` and handled conflicts t 3. If the system prompt includes ``, apply those pins before flagging a same-name or near-duplicate conflict. A pinned `canonicalArtifactKey` keeps the contested name when it is present in the Stage Index; competing variants keep or receive disambiguated names. 4. Sweep both exact-key conflicts and near-duplicate writes. Compare WUs that wrote overlapping SL source names, overlapping wiki keys, the same `tables:` or `sl_refs:` action details, or obviously equivalent topic titles under different wiki keys. Call `stage_diff` to see the actual difference, and use `wiki_read`/`sl_read_source` when two different keys appear to describe the same table, metric, or source-of-truth mapping. If they're the same content, leave one canonical artifact and record the duplicate as subsumed. If they differ per `ingest_triage` rules, apply the correct resolution (rename + capture; election of canonical; silent replace for expression-only re-ingest change; or pinned canonical), then call `emit_conflict_resolution` with the artifact key and decision. 5. For any `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call you make during reconciliation, include `rawPaths` with only the raw paths that directly caused that reconciliation action. -6. Call `eviction_list()` for deleted raw paths. For each eviction: if inbound refs are empty, remove the artifact (`sl_delete`, `wiki_remove`) and include that evicted raw path in `rawPaths`; if inbound refs exist, retain with a deprecation marker and include that evicted raw path in `rawPaths`. Then call `emit_eviction_decision` for every removed or retained artifact. +6. Call `eviction_list()` for deleted raw paths. For each listed artifact, remove it (`sl_delete`, `wiki_remove`) and include the evicted raw path in `rawPaths`. Then call `emit_eviction_decision` with `action: "removed"` for every removed artifact. 7. If the Stage 4 sweep discovers a raw file whose only honest outcome is standalone SQL, wiki-only capture, or a human flag, call `emit_unmapped_fallback` with the raw path, reason, and fallback kind. 8. Use `read_raw_span` to zoom into specific raw files when you need to resolve what two contested measures or wiki pages actually describe. 9. Exit when you've processed every item. diff --git a/packages/context/skills/ingest_triage/SKILL.md b/packages/context/skills/ingest_triage/SKILL.md index 1ac3d108..df13ed83 100644 --- a/packages/context/skills/ingest_triage/SKILL.md +++ b/packages/context/skills/ingest_triage/SKILL.md @@ -32,8 +32,8 @@ Apply the rules below before every write that could collide with an existing art | Definitional contradiction | Same name, substantively different formulas (different aggregation, different filters, different columns) | **Rename + capture**: disambiguate ALL variants with suffix derived from the domain (`churn_risk_engagement_based`, `churn_risk_billing_based`) and write a unified wiki page listing every variant with provenance. The contested name does NOT land in the SL. **Always flag.** | 5. **Eviction (Stage 4 only)**: for each entry in `eviction_list()`: - - `inbound_refs: []` → remove the artifact (`sl_delete` for SL sources, `wiki_remove` for wiki pages). - - `inbound_refs: [...]` → retain the artifact, set `deprecated: true` on SL sources (via `sl_edit_source`), write a wiki note "origin file removed in ; preserved because referenced by: …". Flag in the IngestReport so the user can plan migration. + - Remove the artifact (`sl_delete` for SL sources, `wiki_remove` for wiki pages). + - Record the removal with `emit_eviction_decision` and `action: "removed"`. ## Why same-ingest vs re-ingest differs diff --git a/packages/context/skills/metabase_ingest/SKILL.md b/packages/context/skills/metabase_ingest/SKILL.md index f5aa00e2..d35166dc 100644 --- a/packages/context/skills/metabase_ingest/SKILL.md +++ b/packages/context/skills/metabase_ingest/SKILL.md @@ -98,7 +98,7 @@ measures: expr: "" ``` -Overlay shape: `name:` plus any of `measures:`, `segments:`, `description:`, `joins:`, `disable_joins:`. Never include `sql:`, `table:`, `grain:`, or `columns:` on a manifest-backed name — those would shadow the manifest's schema and drop its joins. Overlay `joins:` are merged additively with the manifest's joins (deduped by `to` + `on`); use `disable_joins: [""]` to suppress a specific manifest join. After the overlay exists, use `sl_edit_source` for further tweaks. See `sl_capture` skill for the canonical overlay rule. +Overlay shape: `name:` plus any of `measures:`, `segments:`, `descriptions:`, `joins:`, `disable_joins:`. Never include `sql:`, `table:`, `grain:`, or `columns:` on a manifest-backed name — those would shadow the manifest's schema and drop its joins. Overlay `joins:` are merged additively with the manifest's joins (deduped by `to` + `on`); use `disable_joins: [""]` to suppress a specific manifest join. After the overlay exists, use `sl_edit_source` for further tweaks. See `sl_capture` skill for the canonical overlay rule. **Join discovery:** When your card's SQL references warehouse tables (e.g. in `FROM` or `JOIN` clauses), call `sl_discover({ query: '' })` before writing. The matching manifest entry's `name` is the value you use in `joins: [- to: ]` only when the card output exposes a local key that matches the target source grain (for example `account_id = mart_account_segments.account_id`). Do not declare a KTX join just because the card SQL joins that table internally. If the output only exposes display fields such as `account_name`, keep the SQL source self-contained or project the key before adding the join. Use `many_to_one` for FK-to-dimension joins, `one_to_many` for the reverse. diff --git a/packages/context/skills/metricflow_ingest/SKILL.md b/packages/context/skills/metricflow_ingest/SKILL.md index 47187ffb..6ed4b916 100644 --- a/packages/context/skills/metricflow_ingest/SKILL.md +++ b/packages/context/skills/metricflow_ingest/SKILL.md @@ -177,7 +177,8 @@ semantic_models: # KTX overlay at /orders.yaml: # name: orders -description: Order fact table. +descriptions: + user: Order fact table. measures: - {name: order_count, expr: "count(order_id)"} - {name: gross_amount, expr: "sum(amount)"} @@ -221,7 +222,8 @@ metrics: # # name: orders_ext -description: Extended order fact including refund handling; `revenue` = gross - refund. +descriptions: + user: Extended order fact including refund handling; `revenue` = gross - refund. measures: - {name: order_count, expr: "count(order_id)"} - {name: gross_amount, expr: "sum(amount)"} diff --git a/packages/context/skills/sl/SKILL.md b/packages/context/skills/sl/SKILL.md index 9cdb8b34..f7077c33 100644 --- a/packages/context/skills/sl/SKILL.md +++ b/packages/context/skills/sl/SKILL.md @@ -29,7 +29,8 @@ Enrich a manifest-backed table with measures, computed columns, joins, and segme ```yaml name: fct_orders # must match an existing manifest table -description: "Overlay adding business measures to the orders fact table." +descriptions: + user: "Overlay adding business measures to the orders fact table." measures: - name: total_revenue expr: sum(amount) diff --git a/packages/context/skills/sl_capture/SKILL.md b/packages/context/skills/sl_capture/SKILL.md index 4bc383eb..a40111ea 100644 --- a/packages/context/skills/sl_capture/SKILL.md +++ b/packages/context/skills/sl_capture/SKILL.md @@ -100,13 +100,13 @@ measures: **Extract repeated filter bundles into named segments.** If the same predicate appears on multiple measures of the same source, lift it to a `segments[]` entry and have each measure reference it. One edit updates every measure that depends on it. -**Never write a standalone file on a manifest-backed name.** If `sl_discover({ tableName })` finds an existing schema for that name, you MUST write an overlay (`name:` + `measures:`/`segments:`/`description:` only — no `sql:`, `table:`, `grain:`, `columns:`, `joins:`). A standalone with `sql:` or `table:` on a manifest-backed name clobbers the inherited columns and joins; `sl_write_source` and `sl_validate` both reject this shape with a clear fix hint. Always run `sl_discover` before your first write on any existing name. +**Never write a standalone file on a manifest-backed name.** If `sl_discover({ tableName })` finds an existing schema for that name, you MUST write an overlay (`name:` + `measures:`/`segments:`/`descriptions:` only — no `sql:`, `table:`, `grain:`, `columns:`, `joins:`). A standalone with `sql:` or `table:` on a manifest-backed name clobbers the inherited columns and joins; `sl_write_source` and `sl_validate` both reject this shape with a clear fix hint. Always run `sl_discover` before your first write on any existing name. **Prefer overlay decomposition over standalone SQL sources.** Before reaching for `source_type: sql`, check whether the metric decomposes into measures on existing overlays (including cross-source derived measures). Use `source_type: sql` only when: - The metric requires per-user/per-entity derivation that cannot be expressed as a single `expr` (e.g., `EXISTS` over a time-windowed subset), OR - The metric requires multi-step CTEs whose intermediate grain is not a column in any existing source. -When an `sql` source is unavoidable, note in its `description` which SL gap forced the choice so it can be retired once the primitive ships. It must target a name NOT in the manifest — pick a distinct one (e.g. `mrr_waterfall_rollup`, not `fct_orders`). +When an `sql` source is unavoidable, note in its `descriptions` map which SL gap forced the choice so it can be retired once the primitive ships. It must target a name NOT in the manifest — pick a distinct one (e.g. `mrr_waterfall_rollup`, not `fct_orders`). ## Slim standalone sources via `inherits_columns_from` @@ -116,7 +116,8 @@ Discover the manifest key with `sl_discover` — pass the bare name (`CONSIGNMEN ```yaml name: aav_consignments -description: AAV consignments — filtered view of MARTS.CONSIGNMENTS for the auto-auction-vaulting channel. +descriptions: + user: AAV consignments — filtered view of MARTS.CONSIGNMENTS for the auto-auction-vaulting channel. source_type: sql sql: | SELECT CONSIGNED_ITEM_ID, CASH_ADV_AMOUNT, ALT_VALUE_COMBINED, my_derived_flag @@ -127,10 +128,10 @@ sql: | inherits_columns_from: CONSIGNMENTS grain: [CONSIGNED_ITEM_ID] columns: - - { name: CONSIGNED_ITEM_ID } # type/description inherited from manifest + - { name: CONSIGNED_ITEM_ID } # type/descriptions inherited from manifest - { name: CASH_ADV_AMOUNT } - { name: ALT_VALUE_COMBINED } - - { name: my_derived_flag, type: boolean, expr: "CASH_ADV_AMOUNT > 0", description: "Computed locally — has any cash advance." } + - { name: my_derived_flag, type: boolean, expr: "CASH_ADV_AMOUNT > 0", descriptions: { user: "Computed locally — has any cash advance." } } measures: - name: total_cash_advance expr: sum(CASH_ADV_AMOUNT) diff --git a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts index c2c679e5..36461bb2 100644 --- a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts @@ -13,7 +13,7 @@ async function tempDir(): Promise { const sqlAnalysis: SqlAnalysisPort = { async analyzeForFingerprint() { - throw new Error('legacy analyzeForFingerprint must not be used'); + throw new Error('analyzeForFingerprint must not be used'); }, async analyzeBatch() { return new Map(); @@ -66,7 +66,7 @@ describe('HistoricSqlSourceAdapter', () => { }; const batchSqlAnalysis: SqlAnalysisPort = { async analyzeForFingerprint() { - throw new Error('legacy analyzeForFingerprint must not be used'); + throw new Error('analyzeForFingerprint must not be used'); }, async analyzeBatch() { return new Map([ diff --git a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts index aee051e7..be2fc9f0 100644 --- a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts +++ b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts @@ -1,5 +1,3 @@ -import { rm } from 'node:fs/promises'; -import { join } from 'node:path'; import type { ChunkResult, DiffSet, FetchContext, ScopeDescriptor, SourceAdapter } from '../../types.js'; import { chunkHistoricSqlUnifiedStagedDir, describeHistoricSqlUnifiedScope } from './chunk-unified.js'; import { detectHistoricSqlStagedDir } from './detect.js'; @@ -28,11 +26,6 @@ export class HistoricSqlSourceAdapter implements SourceAdapter { pullConfig, now: this.deps.now?.(), }); - if (this.deps.legacyPostgresBaselineRootDir) { - await rm(join(this.deps.legacyPostgresBaselineRootDir, ctx.connectionId, ['pgss', 'baseline.json'].join('-')), { - force: true, - }); - } } chunk(stagedDir: string, diffSet?: DiffSet): Promise { diff --git a/packages/context/src/ingest/adapters/historic-sql/projection.test.ts b/packages/context/src/ingest/adapters/historic-sql/projection.test.ts index f2a5b068..95adf13f 100644 --- a/packages/context/src/ingest/adapters/historic-sql/projection.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/projection.test.ts @@ -284,7 +284,7 @@ describe('projectHistoricSqlEvidence', () => { ); }); - it('marks missing table usage stale and deletes legacy historic SQL query pages', async () => { + it('marks missing table usage stale without deleting old query pages', async () => { const workdir = await tempWorkdir(); await writeText( workdir, @@ -322,22 +322,22 @@ describe('projectHistoricSqlEvidence', () => { }); await writeText( workdir, - 'knowledge/global/historic-sql-legacy-template.md', + 'knowledge/global/historic-sql-old-template.md', [ '---', YAML.stringify({ - summary: 'Legacy template page', + summary: 'Old template page', tags: ['historic-sql', 'query-pattern'], refs: [], sl_refs: ['orders'], usage_mode: 'auto', source: 'historic-sql', tables: ['public.orders'], - fingerprints: ['legacy:1'], + fingerprints: ['old:1'], }).trimEnd(), '---', '', - 'Legacy body', + 'Old body', '', ].join('\n'), ); @@ -345,7 +345,6 @@ describe('projectHistoricSqlEvidence', () => { const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' }); expect(result.staleTablesMarked).toBe(1); - expect(result.legacyPagesDeleted).toBe(1); expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]); const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')); expect(shard.tables.orders.usage).toEqual({ @@ -357,8 +356,8 @@ describe('projectHistoricSqlEvidence', () => { commonJoins: [], staleSince: '2026-05-11T00:00:00.000Z', }); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql-legacy-template.md'), 'utf-8')).rejects.toMatchObject({ - code: 'ENOENT', - }); + await expect(readFile(join(workdir, 'knowledge/global/historic-sql-old-template.md'), 'utf-8')).resolves.toContain( + 'Old body', + ); }); }); diff --git a/packages/context/src/ingest/adapters/historic-sql/projection.ts b/packages/context/src/ingest/adapters/historic-sql/projection.ts index 25a317f3..7d4da94f 100644 --- a/packages/context/src/ingest/adapters/historic-sql/projection.ts +++ b/packages/context/src/ingest/adapters/historic-sql/projection.ts @@ -1,4 +1,4 @@ -import { access, mkdir, readdir, readFile, rename, rm, writeFile } from 'node:fs/promises'; +import { access, mkdir, readdir, readFile, rename, writeFile } from 'node:fs/promises'; import { dirname, join, relative } from 'node:path'; import YAML from 'yaml'; import { rawSourcesDirForSync } from '../../raw-sources-paths.js'; @@ -20,7 +20,6 @@ export interface HistoricSqlProjectionResult { patternPagesWritten: number; stalePatternPagesMarked: number; archivedPatternPages: number; - legacyPagesDeleted: number; touchedSources: Array<{ connectionId: string; sourceName: string }>; warnings: string[]; } @@ -152,11 +151,6 @@ function isHistoricPatternPage(page: HistoricSqlPatternPage): boolean { ); } -function isLegacyQueryPage(page: HistoricSqlPatternPage): boolean { - const tags = Array.isArray(page.frontmatter.tags) ? page.frontmatter.tags : []; - return page.frontmatter.source === 'historic-sql' && tags.includes('query-pattern') && !tags.includes('pattern'); -} - function isArchivedPatternPage(page: HistoricSqlPatternPage): boolean { const tags = Array.isArray(page.frontmatter.tags) ? page.frontmatter.tags : []; return tags.includes('archived'); @@ -228,7 +222,6 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp patternPagesWritten: 0, stalePatternPagesMarked: 0, archivedPatternPages: 0, - legacyPagesDeleted: 0, touchedSources: [], warnings: [], }; @@ -333,10 +326,5 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp result.stalePatternPagesMarked += 1; } - for (const page of allPages.filter(isLegacyQueryPage)) { - await rm(page.path, { force: true }); - result.legacyPagesDeleted += 1; - } - return result; } diff --git a/packages/context/src/ingest/adapters/historic-sql/types.test.ts b/packages/context/src/ingest/adapters/historic-sql/types.test.ts index 076e5d8e..f5a6f853 100644 --- a/packages/context/src/ingest/adapters/historic-sql/types.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/types.test.ts @@ -8,7 +8,7 @@ import { } from './types.js'; describe('historic-sql unified contracts', () => { - it('parses minExecutions and accepts minCalls as a one-release alias', () => { + it('parses minExecutions and service-account filters', () => { expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minExecutions: 9 })).toMatchObject({ dialect: 'postgres', minExecutions: 9, @@ -18,7 +18,15 @@ describe('historic-sql unified contracts', () => { staleArchiveAfterDays: 90, }); - expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minCalls: 7 }).minExecutions).toBe(7); + const parsed = historicSqlUnifiedPullConfigSchema.parse({ + dialect: 'postgres', + minExecutions: 7, + filters: { + serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' }, + }, + }); + expect(parsed.minExecutions).toBe(7); + expect(parsed.filters.serviceAccounts).toEqual({ patterns: ['^svc_'], mode: 'exclude' }); }); it('validates aggregate templates from warehouse readers', () => { diff --git a/packages/context/src/ingest/adapters/historic-sql/types.ts b/packages/context/src/ingest/adapters/historic-sql/types.ts index a827e8ae..07711d52 100644 --- a/packages/context/src/ingest/adapters/historic-sql/types.ts +++ b/packages/context/src/ingest/adapters/historic-sql/types.ts @@ -8,26 +8,7 @@ export type HistoricSqlDialect = z.infer; const filterModeSchema = z.enum(['exclude', 'include', 'mark-only']); -function isRecord(value: unknown): value is Record { - return typeof value === 'object' && value !== null && !Array.isArray(value); -} - -export const historicSqlUnifiedPullConfigSchema = z.preprocess((value) => { - if (!isRecord(value)) { - return value; - } - const next: Record = { ...value }; - if (next.minExecutions === undefined && typeof next.minCalls === 'number') { - next.minExecutions = next.minCalls; - } - if (!next.filters && Array.isArray(next.serviceAccountUserPatterns)) { - next.filters = { - serviceAccounts: { patterns: next.serviceAccountUserPatterns, mode: 'exclude' }, - dropTrivialProbes: true, - }; - } - return next; -}, z.object({ +export const historicSqlUnifiedPullConfigSchema = z.object({ dialect: historicSqlDialectSchema, windowDays: z.number().int().positive().default(90), minExecutions: z.number().int().nonnegative().default(5), @@ -48,7 +29,7 @@ export const historicSqlUnifiedPullConfigSchema = z.preprocess((value) => { }).default({ dropTrivialProbes: true }), redactionPatterns: z.array(z.string()).default([]), staleArchiveAfterDays: z.number().int().positive().default(90), -})); +}); export type HistoricSqlUnifiedPullConfig = z.infer; @@ -157,6 +138,5 @@ export interface HistoricSqlSourceAdapterDeps { sqlAnalysis: SqlAnalysisPort; reader: HistoricSqlReader; queryClient: unknown; - legacyPostgresBaselineRootDir?: string; now?: () => Date; } diff --git a/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts b/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts index a29fecd1..47299373 100644 --- a/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts +++ b/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts @@ -26,13 +26,11 @@ export function lookerCredentialsFromLocalConnection( if (!connection || String(connection.driver).toLowerCase() !== 'looker') { throw new Error(`Connection "${connectionId}" is not a Looker connection`); } - const baseUrl = stringField(connection.base_url) ?? stringField(connection.baseUrl) ?? stringField(connection.url); - const clientId = stringField(connection.client_id) ?? stringField(connection.clientId); + const baseUrl = stringField(connection.base_url); + const clientId = stringField(connection.client_id); const clientSecret = stringField(connection.client_secret) ?? - stringField(connection.clientSecret) ?? - (stringField(connection.client_secret_ref) ? resolveEnvReference(String(connection.client_secret_ref), env) : null) ?? - (stringField(connection.clientSecretRef) ? resolveEnvReference(String(connection.clientSecretRef), env) : null); + (stringField(connection.client_secret_ref) ? resolveEnvReference(String(connection.client_secret_ref), env) : null); if (!baseUrl) { throw new Error(`Connection "${connectionId}" is missing Looker base_url`); diff --git a/packages/context/src/ingest/adapters/metabase/client-port.test.ts b/packages/context/src/ingest/adapters/metabase/client-port.test.ts index 9686e552..8f775b56 100644 --- a/packages/context/src/ingest/adapters/metabase/client-port.test.ts +++ b/packages/context/src/ingest/adapters/metabase/client-port.test.ts @@ -87,10 +87,13 @@ it('allows the concrete client result shapes used by the relocated Metabase clie const datasetQuery: MetabaseDatasetQuery = { type: 'native', database: 42, - native: { - query: 'SELECT * FROM orders WHERE created_at > {{ created_at }}', - 'template-tags': { created_at: templateTag }, - }, + stages: [ + { + 'lib/type': 'mbql.stage/native', + native: 'SELECT * FROM orders WHERE created_at > {{ created_at }}', + 'template-tags': { created_at: templateTag }, + }, + ], }; const card: MetabaseCard = { id: 1, diff --git a/packages/context/src/ingest/adapters/metabase/client-port.ts b/packages/context/src/ingest/adapters/metabase/client-port.ts index 7aa1f3ed..2eec804e 100644 --- a/packages/context/src/ingest/adapters/metabase/client-port.ts +++ b/packages/context/src/ingest/adapters/metabase/client-port.ts @@ -116,17 +116,11 @@ interface MetabaseNativeStage { 'template-tags'?: Record; } -interface MetabaseLegacyNativeQuery { - query: string; - 'template-tags'?: Record; -} - export interface MetabaseDatasetQuery { 'lib/type'?: 'mbql/query'; database?: number; type?: 'native' | 'query'; stages?: MetabaseNativeStage[]; - native?: MetabaseLegacyNativeQuery; } export interface MetabaseNativeQueryResult { diff --git a/packages/context/src/ingest/adapters/metabase/client.test.ts b/packages/context/src/ingest/adapters/metabase/client.test.ts index 1ee3fe93..431efe76 100644 --- a/packages/context/src/ingest/adapters/metabase/client.test.ts +++ b/packages/context/src/ingest/adapters/metabase/client.test.ts @@ -32,10 +32,7 @@ function nativeCard(query: string, templateTags: Record { dataset_query: { type: 'native', database: 6, - native: { query: 'SELECT a, b FROM base' }, + stages: [{ 'lib/type': 'mbql.stage/native', native: 'SELECT a, b FROM base' }], }, }); const client = makeClient((client) => { diff --git a/packages/context/src/ingest/adapters/metabase/client.ts b/packages/context/src/ingest/adapters/metabase/client.ts index 2b70bc79..2ab8b81a 100644 --- a/packages/context/src/ingest/adapters/metabase/client.ts +++ b/packages/context/src/ingest/adapters/metabase/client.ts @@ -150,9 +150,6 @@ function injectNativeSql(datasetQuery: MetabaseDatasetQuery, sql: string): Metab stages[0] = { ...stages[0], native: sql }; return { ...datasetQuery, stages }; } - if (datasetQuery?.native) { - return { ...datasetQuery, native: { ...datasetQuery.native, query: sql } }; - } return datasetQuery; } @@ -370,36 +367,12 @@ export class MetabaseClient implements MetabaseRuntimeClient { }); } - /** - * Extract native SQL from card, handling both pMBQL (v57+) and legacy formats. - * - pMBQL format: dataset_query.stages[0].native - * - Legacy format: dataset_query.native.query - */ getNativeSql(card: MetabaseCard): string | null { - // pMBQL format (v57+): stages[0].native - const pMbqlSql = card.dataset_query?.stages?.[0]?.native; - if (pMbqlSql) { - return pMbqlSql; - } - - // Legacy format: native.query - return card.dataset_query?.native?.query ?? null; + return card.dataset_query?.stages?.[0]?.native ?? null; } - /** - * Extract template tags from card, handling both pMBQL and legacy formats. - * - pMBQL format: dataset_query.stages[0]['template-tags'] - * - Legacy format: dataset_query.native['template-tags'] - */ getTemplateTags(card: MetabaseCard): Record { - // pMBQL format: stages[0]['template-tags'] - const pMbqlTags = card.dataset_query?.stages?.[0]?.['template-tags']; - if (pMbqlTags) { - return pMbqlTags; - } - - // Legacy format: native['template-tags'] - return card.dataset_query?.native?.['template-tags'] ?? {}; + return card.dataset_query?.stages?.[0]?.['template-tags'] ?? {}; } async getCardSql(card: MetabaseCard): Promise { diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts index 0c854f6d..7cbe913b 100644 --- a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts @@ -48,19 +48,6 @@ describe('metabaseRuntimeConfigFromLocalConnection', () => { }); }); - it('accepts url as the local api URL alias', () => { - const connection: KtxProjectConnectionConfig = { - driver: 'metabase', - url: 'https://metabase.example.com', - api_key: 'literal-test-key', // pragma: allowlist secret - }; - - expect(metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toEqual({ - apiUrl: 'https://metabase.example.com', - apiKey: 'literal-test-key', // pragma: allowlist secret - }); - }); - it('rejects proxy-bearing local Metabase connections', () => { const connection: KtxProjectConnectionConfig = { driver: 'metabase', diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts index ec5e163e..029e667e 100644 --- a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts @@ -37,9 +37,9 @@ export function metabaseRuntimeConfigFromLocalConnection( ); } - const apiUrl = stringField(connection.api_url) ?? stringField(connection.apiUrl) ?? stringField(connection.url); - const literalApiKey = stringField(connection.api_key) ?? stringField(connection.apiKey); - const apiKeyRef = stringField(connection.api_key_ref) ?? stringField(connection.apiKeyRef); + const apiUrl = stringField(connection.api_url); + const literalApiKey = stringField(connection.api_key); + const apiKeyRef = stringField(connection.api_key_ref); const apiKey = literalApiKey ?? (apiKeyRef ? resolveKtxConfigReference(apiKeyRef, env) : null); if (!apiUrl) { diff --git a/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts b/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts index bfdd824f..13127a3d 100644 --- a/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts +++ b/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts @@ -14,7 +14,6 @@ import { getMetricflowAvailableColumnNames, mapCrossModelMetricToSource, resolveMetricflowSemanticModelSourceName, - toKebabCaseMetricflowName, type MetricflowHostTable, type MetricflowSemanticModelImportContext, } from './semantic-models.js'; @@ -129,16 +128,6 @@ export async function importMetricflowSemanticModels( { skipValidation: true }, ); - const legacyWarning = await legacyKebabSourceWarning( - semanticLayerService, - input.connectionId, - context.model.modelRef, - context.sourceName, - ); - if (legacyWarning) { - warnings.push(legacyWarning); - } - if (existing) { sourcesUpdated++; } else { @@ -234,26 +223,6 @@ async function resolveManifestSource( return null; } -async function legacyKebabSourceWarning( - semanticLayerService: MetricflowSemanticLayerWriter, - connectionId: string, - modelRef: string, - sourceName: string, -): Promise { - const kebabName = toKebabCaseMetricflowName(modelRef); - if (kebabName === sourceName) { - return null; - } - const legacy = await semanticLayerService.loadSource(connectionId, kebabName); - if (!legacy) { - return null; - } - return ( - `MetricFlow sync: legacy kebab-case source '${kebabName}' still exists alongside the new source ` + - `'${sourceName}' (modelRef '${modelRef}'). Migrate persisted references before deleting the old file.` - ); -} - async function repairSourcesAfterPartialImportFailures(input: { semanticLayerService: MetricflowSemanticLayerWriter; connectionId: string; diff --git a/packages/context/src/ingest/ingest-bundle.runner.test.ts b/packages/context/src/ingest/ingest-bundle.runner.test.ts index 6134fbe7..b337a3f0 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.test.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.test.ts @@ -1518,7 +1518,6 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { patternPagesWritten: 3, stalePatternPagesMarked: 1, archivedPatternPages: 1, - legacyPagesDeleted: 1, }, warnings: [], errors: [], @@ -1551,7 +1550,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { expect(memoryFlow.snapshot().events).toContainEqual( expect.objectContaining({ type: 'saved', - wikiCount: 6, + wikiCount: 5, slCount: 3, }), ); diff --git a/packages/context/src/ingest/local-adapters.test.ts b/packages/context/src/ingest/local-adapters.test.ts index 7161743a..cfe0058a 100644 --- a/packages/context/src/ingest/local-adapters.test.ts +++ b/packages/context/src/ingest/local-adapters.test.ts @@ -105,7 +105,6 @@ describe('local ingest adapters', () => { return { headers: [], rows: [] }; }, }, - postgresBaselineRootDir: join(project.projectDir, '.ktx/cache/historic-sql'), }, }); @@ -181,9 +180,12 @@ describe('local ingest adapters', () => { historicSql: { enabled: true, dialect: 'postgres', - minCalls: 7, + minExecutions: 7, maxTemplatesPerRun: 123, - serviceAccountUserPatterns: ['^svc_'], + filters: { + serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' }, + dropTrivialProbes: true, + }, }, }, }); @@ -385,7 +387,7 @@ describe('local ingest adapters', () => { connections: { 'prod-lookml': { driver: 'lookml', - repo_url: 'https://github.com/acme/looker.git', + repoUrl: 'https://github.com/acme/looker.git', branch: 'main', path: 'models', auth_token_ref: 'env:GITHUB_TOKEN', @@ -410,7 +412,7 @@ describe('local ingest adapters', () => { }); }); - it('rejects local LookML scheduled pulls when repo_url is missing', async () => { + it('rejects local LookML scheduled pulls when repoUrl is missing', async () => { const lookmlProject = { projectDir: tempDir, config: { connections: { 'prod-lookml': { driver: 'lookml' } } }, diff --git a/packages/context/src/ingest/local-adapters.ts b/packages/context/src/ingest/local-adapters.ts index 533bd526..0bf5fd42 100644 --- a/packages/context/src/ingest/local-adapters.ts +++ b/packages/context/src/ingest/local-adapters.ts @@ -50,7 +50,6 @@ export interface DefaultLocalIngestAdaptersOptions { reader?: HistoricSqlReader; queryClient?: unknown; postgresQueryClient?: KtxPostgresQueryClient; - postgresBaselineRootDir?: string; now?: () => Date; }; looker?: { @@ -129,7 +128,6 @@ export function createDefaultLocalIngestAdapters( sqlAnalysis: options.historicSql.sqlAnalysis, reader: options.historicSql.reader ?? new PostgresPgssReader(), queryClient, - legacyPostgresBaselineRootDir: options.historicSql.postgresBaselineRootDir, now: options.historicSql.now, }), ); @@ -163,11 +161,11 @@ function stringField(value: unknown): string | null { function localLookmlPullConfigFromConnection(connection: Record | undefined, env: NodeJS.ProcessEnv) { const mappings = isRecord(connection?.mappings) ? connection.mappings : {}; - const authTokenRef = stringField(connection?.auth_token_ref) ?? stringField(connection?.authTokenRef); - const literalAuthToken = stringField(connection?.authToken) ?? stringField(connection?.auth_token); + const authTokenRef = stringField(connection?.auth_token_ref); + const literalAuthToken = stringField(connection?.auth_token); return pullConfigFromIntegrationConfig({ - repoUrl: stringField(connection?.repoUrl) ?? stringField(connection?.repo_url) ?? null, + repoUrl: stringField(connection?.repoUrl) ?? null, branch: stringField(connection?.branch), path: stringField(connection?.path), authToken: literalAuthToken ?? resolveKtxConfigReference(authTokenRef ?? undefined, env) ?? null, @@ -176,27 +174,21 @@ function localLookmlPullConfigFromConnection(connection: Record } function localDbtPullConfigFromConnection(connection: Record | undefined, env: NodeJS.ProcessEnv) { - const sourceDir = stringField(connection?.source_dir) ?? stringField(connection?.sourceDir); - const repoUrl = stringField(connection?.repo_url) ?? stringField(connection?.repoUrl); + const sourceDir = stringField(connection?.source_dir); + const repoUrl = stringField(connection?.repo_url); if (sourceDir) { return { sourceDir, ...(stringField(connection?.profiles_path) ? { profilesPath: stringField(connection?.profiles_path) } : {}), - ...(stringField(connection?.profilesPath) ? { profilesPath: stringField(connection?.profilesPath) } : {}), ...(stringField(connection?.target) ? { target: stringField(connection?.target) } : {}), ...(stringField(connection?.project_name) ? { projectName: stringField(connection?.project_name) } : {}), - ...(stringField(connection?.projectName) ? { projectName: stringField(connection?.projectName) } : {}), }; } if (!repoUrl) { return undefined; } const authToken = - stringField(connection?.authToken) ?? - resolveKtxConfigReference( - stringField(connection?.auth_token_ref) ?? stringField(connection?.authTokenRef) ?? undefined, - env, - ); + stringField(connection?.auth_token) ?? resolveKtxConfigReference(stringField(connection?.auth_token_ref) ?? undefined, env); return { repoUrl, ...(stringField(connection?.branch) ? { branch: stringField(connection?.branch) } : {}), @@ -280,8 +272,8 @@ export async function localPullConfigForAdapter( ? (metricflow as Record) : null; const authToken = - typeof metricflowConfig?.authToken === 'string' - ? metricflowConfig.authToken + typeof metricflowConfig?.auth_token === 'string' + ? metricflowConfig.auth_token : resolveKtxConfigReference( typeof metricflowConfig?.auth_token_ref === 'string' ? metricflowConfig.auth_token_ref : undefined, options.looker?.env ?? process.env, diff --git a/packages/context/src/ingest/report-snapshot.test.ts b/packages/context/src/ingest/report-snapshot.test.ts index 13d3eff7..c949a3cc 100644 --- a/packages/context/src/ingest/report-snapshot.test.ts +++ b/packages/context/src/ingest/report-snapshot.test.ts @@ -22,7 +22,7 @@ function validReportSnapshot() { { target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' }, { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' }, ], - touchedSlSources: ['warehouse.orders'], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'orders' }], }, ], failedWorkUnits: [], @@ -106,7 +106,7 @@ describe('parseIngestReportSnapshot', () => { expect(snapshot.body.toolTranscripts).toHaveLength(1); }); - it('parses target-aware actions and normalizes legacy touched source strings', () => { + it('parses target-aware actions and touched source objects', () => { const report = validReportSnapshot(); report.body.workUnits[0] = { ...report.body.workUnits[0], @@ -119,8 +119,7 @@ describe('parseIngestReportSnapshot', () => { targetConnectionId: 'warehouse-1', }, ], - // Legacy report shape: bare strings are normalized to the report connection ID. - touchedSlSources: ['looker__b2b__sales_pipeline'], + touchedSlSources: [{ connectionId: 'warehouse-1', sourceName: 'looker__b2b__sales_pipeline' }], } as never; const snapshot = parseIngestReportSnapshot(report); @@ -135,7 +134,7 @@ describe('parseIngestReportSnapshot', () => { }, ]); expect(snapshot.body.workUnits[0]?.touchedSlSources).toEqual([ - { connectionId: 'warehouse', sourceName: 'looker__b2b__sales_pipeline' }, + { connectionId: 'warehouse-1', sourceName: 'looker__b2b__sales_pipeline' }, ]); }); diff --git a/packages/context/src/ingest/report-snapshot.ts b/packages/context/src/ingest/report-snapshot.ts index 76565ad9..de377dd5 100644 --- a/packages/context/src/ingest/report-snapshot.ts +++ b/packages/context/src/ingest/report-snapshot.ts @@ -1,5 +1,4 @@ import * as z from 'zod'; -import type { TouchedSlSource } from '../tools/index.js'; import { memoryFlowReplayInputSchema } from './memory-flow/schema.js'; import type { IngestReportSnapshot } from './reports.js'; @@ -24,8 +23,6 @@ const touchedSlSourceSchema = z.object({ sourceName: z.string().min(1), }); -const touchedSlSourceInputSchema = z.union([z.string(), touchedSlSourceSchema]); - const conflictResolvedSchema = z .object({ unitKey: z.string().optional(), @@ -42,7 +39,7 @@ const evictionAppliedSchema = z rawPath: z.string(), artifactKind: z.enum(['sl', 'wiki']), artifactKey: z.string(), - action: z.enum(['removed', 'retained_deprecated']), + action: z.literal('removed'), reason: z.string(), }) .passthrough(); @@ -147,7 +144,7 @@ export const ingestReportSnapshotSchema = z status: z.enum(['success', 'failed']), reason: z.string().optional(), actions: z.array(ingestActionSchema), - touchedSlSources: z.array(touchedSlSourceInputSchema), + touchedSlSources: z.array(touchedSlSourceSchema), slDisallowed: z.boolean().optional(), slDisallowedReason: z.enum(['lookml_connection_mismatch']).optional(), }), @@ -171,26 +168,10 @@ export const ingestReportSnapshotSchema = z }) .passthrough(); -function normalizeTouchedSlSources(connectionId: string, value: Array): TouchedSlSource[] { - return value.map((entry) => - typeof entry === 'string' - ? { connectionId, sourceName: entry } - : { connectionId: entry.connectionId, sourceName: entry.sourceName }, - ); -} - export function parseIngestReportSnapshot(value: unknown): IngestReportSnapshot { const result = ingestReportSnapshotSchema.safeParse(value); if (!result.success) { throw new Error(`Invalid ingest report snapshot: ${z.prettifyError(result.error)}`); } - const snapshot = result.data as IngestReportSnapshot; - snapshot.body.workUnits = snapshot.body.workUnits.map((workUnit) => ({ - ...workUnit, - touchedSlSources: normalizeTouchedSlSources( - snapshot.connectionId, - workUnit.touchedSlSources as Array, - ), - })); - return snapshot; + return result.data as IngestReportSnapshot; } diff --git a/packages/context/src/ingest/reports.ts b/packages/context/src/ingest/reports.ts index cda4d7c1..672c5bfb 100644 --- a/packages/context/src/ingest/reports.ts +++ b/packages/context/src/ingest/reports.ts @@ -111,8 +111,7 @@ export function postProcessorSavedMemoryCounts( wikiCount: numericResultField(record, 'patternPagesWritten') + numericResultField(record, 'stalePatternPagesMarked') + - numericResultField(record, 'archivedPatternPages') + - numericResultField(record, 'legacyPagesDeleted'), + numericResultField(record, 'archivedPatternPages'), slCount: numericResultField(record, 'tableUsageMerged') + numericResultField(record, 'staleTablesMarked'), }; } diff --git a/packages/context/src/ingest/stages/stage-index.types.ts b/packages/context/src/ingest/stages/stage-index.types.ts index c8d7e4b3..7de26bc8 100644 --- a/packages/context/src/ingest/stages/stage-index.types.ts +++ b/packages/context/src/ingest/stages/stage-index.types.ts @@ -25,7 +25,7 @@ export interface EvictionAppliedRecord { rawPath: string; artifactKind: 'sl' | 'wiki'; artifactKey: string; - action: 'removed' | 'retained_deprecated'; + action: 'removed'; reason: string; } diff --git a/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts b/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts index f44214ea..28a32a5b 100644 --- a/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts +++ b/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts @@ -22,7 +22,7 @@ export function createEmitEvictionDecisionTool(deps: EmitEvictionDecisionDeps) { rawPath: z.string().min(1), artifactKind: z.enum(['sl', 'wiki']), artifactKey: z.string().min(1), - action: z.enum(['removed', 'retained_deprecated']), + action: z.literal('removed'), reason: z.string().min(1), }), execute: async (input): Promise => { diff --git a/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts b/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts index a3e7b34f..9178c989 100644 --- a/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts +++ b/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts @@ -88,14 +88,14 @@ describe('reconciliation emit tools', () => { await executeTool(tool, { rawPath: 'views/old_orders.view.lkml', artifactKind: 'wiki', - artifactKey: 'orders/legacy', - action: 'retained_deprecated', + artifactKey: 'orders/old', + action: 'removed', reason: 'first pass', }); await executeTool(tool, { rawPath: 'views/old_orders.view.lkml', artifactKind: 'wiki', - artifactKey: 'orders/legacy', + artifactKey: 'orders/old', action: 'removed', reason: 'second pass after checking references', }); @@ -104,7 +104,7 @@ describe('reconciliation emit tools', () => { { rawPath: 'views/old_orders.view.lkml', artifactKind: 'wiki', - artifactKey: 'orders/legacy', + artifactKey: 'orders/old', action: 'removed', reason: 'second pass after checking references', }, diff --git a/packages/context/src/ingest/tools/eviction-list.tool.ts b/packages/context/src/ingest/tools/eviction-list.tool.ts index 1e2ca3a0..4ed08d63 100644 --- a/packages/context/src/ingest/tools/eviction-list.tool.ts +++ b/packages/context/src/ingest/tools/eviction-list.tool.ts @@ -12,7 +12,7 @@ export interface EvictionListDeps { export function createEvictionListTool(deps: EvictionListDeps) { return tool({ description: - 'List every artifact that the most recent completed sync produced from a now-deleted raw file. Use this to decide whether to remove (no inbound refs) or retain with deprecation (has inbound refs). Inbound refs are NOT currently computed — treat every retained entry as a candidate and ask the user via the IngestReport. After deciding, record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.', + 'List every artifact that the most recent completed sync produced from a now-deleted raw file. Remove each listed artifact and record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.', inputSchema: z.object({}), execute: async () => { if (deps.deletedRawPaths.length === 0) { diff --git a/packages/context/src/ingest/tools/verification-ledger.tool.ts b/packages/context/src/ingest/tools/verification-ledger.tool.ts index f99e79be..ac880607 100644 --- a/packages/context/src/ingest/tools/verification-ledger.tool.ts +++ b/packages/context/src/ingest/tools/verification-ledger.tool.ts @@ -8,7 +8,7 @@ const verificationLedgerInputSchema = z.object({ notes: z.string().max(2000).optional(), }); -export interface VerificationLedgerEntry { +interface VerificationLedgerEntry { summary: string; verifiedIdentifiers: string[]; unverifiedIdentifiers: string[]; diff --git a/packages/context/src/ingest/tools/warehouse-verification/index.ts b/packages/context/src/ingest/tools/warehouse-verification/index.ts index 0901eace..e6ac2c1c 100644 --- a/packages/context/src/ingest/tools/warehouse-verification/index.ts +++ b/packages/context/src/ingest/tools/warehouse-verification/index.ts @@ -6,12 +6,6 @@ import { EntityDetailsTool } from './entity-details.tool.js'; import { SqlExecutionTool } from './sql-execution.tool.js'; import { WarehouseCatalogService } from './warehouse-catalog.service.js'; -export { DiscoverDataTool } from './discover-data.tool.js'; -export { EntityDetailsTool } from './entity-details.tool.js'; -export { SqlExecutionTool } from './sql-execution.tool.js'; -export { WarehouseCatalogService } from './warehouse-catalog.service.js'; -export type { RawSchemaHit, TableDetail, WarehouseColumnDetail } from './warehouse-catalog.service.js'; - export function createWarehouseVerificationTools(deps: { connections: SlConnectionCatalogPort; fallbackFileStore: KtxFileStorePort; diff --git a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts index 691f88e9..b916107c 100644 --- a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts +++ b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts @@ -14,7 +14,7 @@ export interface WarehouseCatalogServiceDeps { fileStore: KtxFileStorePort; } -export interface WarehouseColumnDetail extends KtxSchemaColumn { +interface WarehouseColumnDetail extends KtxSchemaColumn { descriptions: Record; rowCount: number | null; nullCount: number | null; @@ -88,9 +88,8 @@ interface ConnectionCatalog { } type TableWithDescriptions = KtxSchemaTable & { - description?: string | null; descriptions?: Record; - columns: Array }>; + columns: Array }>; }; function normalize(value: string | null | undefined): string { @@ -220,14 +219,14 @@ function matchedOnTable(table: TableWithDescriptions, query: string): RawSchemaH if (normalize(table.comment).includes(q)) { return 'comment'; } - if (normalize(firstDescription(table.descriptions) ?? table.description).includes(q)) { + if (normalize(firstDescription(table.descriptions)).includes(q)) { return 'description'; } return null; } function matchedOnColumn( - column: KtxSchemaColumn & { description?: string | null; descriptions?: Record }, + column: KtxSchemaColumn & { descriptions?: Record }, query: string, ): 'name' | 'comment' | 'description' | null { const q = normalize(query); @@ -240,7 +239,7 @@ function matchedOnColumn( if (normalize(column.comment).includes(q)) { return 'comment'; } - if (normalize(firstDescription(column.descriptions) ?? column.description).includes(q)) { + if (normalize(firstDescription(column.descriptions)).includes(q)) { return 'description'; } return null; @@ -285,13 +284,10 @@ export class WarehouseCatalogService { display: formatDisplay(catalog.driver, table), kind: table.kind, comment: table.comment, - description: table.description ?? firstDescription(table.descriptions), + description: firstDescription(table.descriptions), rowCount: profileTable?.rowCount ?? table.estimatedRows ?? null, columns: table.columns.map((rawColumn) => { - const column = rawColumn as KtxSchemaColumn & { - description?: string | null; - descriptions?: Record; - }; + const column = rawColumn as KtxSchemaColumn & { descriptions?: Record }; const profileColumn = profileColumns[columnKey(table, column.name)] ?? Object.entries(profileColumns).find( diff --git a/packages/context/src/ingest/wiki-sl-ref-repair.test.ts b/packages/context/src/ingest/wiki-sl-ref-repair.test.ts index 958386c7..68f2b349 100644 --- a/packages/context/src/ingest/wiki-sl-ref-repair.test.ts +++ b/packages/context/src/ingest/wiki-sl-ref-repair.test.ts @@ -40,7 +40,7 @@ describe('repairWikiSlRefs', () => { }; const configService = { listFiles: vi.fn(async () => ({ - files: ['global/accounts-at-risk.md', 'global/historic-sql/nested-legacy.md'], + files: ['global/accounts-at-risk.md', 'global/historic-sql/nested-old.md'], })), }; const semanticLayerService = { diff --git a/packages/context/src/memory/capture-signals.ts b/packages/context/src/memory/capture-signals.ts index 8860474a..856df30b 100644 --- a/packages/context/src/memory/capture-signals.ts +++ b/packages/context/src/memory/capture-signals.ts @@ -114,7 +114,6 @@ export function stepBudgetFor(sourceType: MemoryAgentSourceType): number { case 'external_ingest': return 30; case 'backfill': - case 'sql-review-migration': return 25; } } @@ -122,7 +121,7 @@ export function stepBudgetFor(sourceType: MemoryAgentSourceType): number { export function promptNameFor(sourceType: MemoryAgentSourceType): string { return sourceType === 'external_ingest' ? 'memory_agent_external_ingest' - : sourceType === 'backfill' || sourceType === 'sql-review-migration' + : sourceType === 'backfill' ? 'memory_agent_backfill' : 'memory_agent_research'; } diff --git a/packages/context/src/memory/types.ts b/packages/context/src/memory/types.ts index aa50cd8c..207eb238 100644 --- a/packages/context/src/memory/types.ts +++ b/packages/context/src/memory/types.ts @@ -16,7 +16,7 @@ import type { import type { ToolContext, ToolSession, TouchedSlSourceSet } from '../tools/index.js'; import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js'; -export type MemoryAgentSourceType = 'research' | 'external_ingest' | 'backfill' | 'sql-review-migration'; +export type MemoryAgentSourceType = 'research' | 'external_ingest' | 'backfill'; export interface MemoryAgentInput { userId: string; diff --git a/packages/context/src/project/config.test.ts b/packages/context/src/project/config.test.ts index 1be70322..01af9779 100644 --- a/packages/context/src/project/config.test.ts +++ b/packages/context/src/project/config.test.ts @@ -220,15 +220,15 @@ project: demo scan: relationships: enabled: false - llm_proposals: false - validation_required_for_manifest: true - accept_threshold: 0.91 - review_threshold: 0.61 - max_llm_tables_per_batch: 12 - max_candidates_per_column: 7 - profile_sample_rows: 500 - validation_concurrency: 2 - validation_budget: 0 + llmProposals: false + validationRequiredForManifest: true + acceptThreshold: 0.91 + reviewThreshold: 0.61 + maxLlmTablesPerBatch: 12 + maxCandidatesPerColumn: 7 + profileSampleRows: 500 + validationConcurrency: 2 + validationBudget: 0 `); expect(config.scan.relationships).toEqual({ @@ -260,7 +260,7 @@ scan: project: demo scan: relationships: - validation_budget: all + validationBudget: all `); expect(config.scan.relationships.validationBudget).toBe('all'); @@ -272,13 +272,13 @@ scan: project: demo scan: relationships: - accept_threshold: 2 - review_threshold: -1 - max_llm_tables_per_batch: 0 - max_candidates_per_column: -4 - profile_sample_rows: 0 - validation_concurrency: 0 - validation_budget: 1.5 + acceptThreshold: 2 + reviewThreshold: -1 + maxLlmTablesPerBatch: 0 + maxCandidatesPerColumn: -4 + profileSampleRows: 0 + validationConcurrency: 0 + validationBudget: 1.5 `); expect(config.scan.relationships).toMatchObject({ @@ -297,13 +297,13 @@ scan: project: demo scan: relationships: - validation_budget: infinite + validationBudget: infinite `); expect(config.scan.relationships).not.toHaveProperty('validationBudget'); }); - it('rejects legacy local LLM and embedding fields', () => { + it('rejects unsupported local LLM and embedding fields', () => { expect(() => parseKtxProjectConfig(` project: demo diff --git a/packages/context/src/project/config.ts b/packages/context/src/project/config.ts index f1aa9d71..21e5c59b 100644 --- a/packages/context/src/project/config.ts +++ b/packages/context/src/project/config.ts @@ -212,7 +212,7 @@ function scanEnrichmentMode(value: unknown, fallback: KtxScanEnrichmentMode): Kt throw new Error(`Unsupported scan.enrichment.mode: ${String(value)}`); } -function rejectLegacyProvider(section: string, value: unknown): void { +function rejectUnsupportedProvider(section: string, value: unknown): void { if (value !== undefined) { throw new Error(`Unsupported ${section}.provider: use ${section}.backend`); } @@ -277,7 +277,7 @@ function parseProjectLlmProviderConfig( defaults: KtxProjectLlmProviderConfig, section: string, ): KtxProjectLlmProviderConfig { - rejectLegacyProvider(section, raw.provider); + rejectUnsupportedProvider(section, raw.provider); const vertex = isRecord(raw.vertex) ? { @@ -310,7 +310,7 @@ function parseProjectEmbeddingConfig( defaults: KtxProjectEmbeddingConfig, section: string, ): KtxProjectEmbeddingConfig { - rejectLegacyProvider(section, raw.provider); + rejectUnsupportedProvider(section, raw.provider); const openai = optionalProviderConfig(raw.openai); const sentenceTransformers = isRecord(raw.sentenceTransformers) @@ -340,36 +340,21 @@ function parseScanRelationshipConfig( raw: Record, defaults: KtxScanRelationshipConfig, ): KtxScanRelationshipConfig { - const validationBudget = validationBudgetConfigValue( - raw.validation_budget ?? raw.validationBudget, - defaults.validationBudget, - ); + const validationBudget = validationBudgetConfigValue(raw.validationBudget, defaults.validationBudget); return { enabled: booleanValue(raw.enabled, defaults.enabled), - llmProposals: booleanValue(raw.llm_proposals ?? raw.llmProposals, defaults.llmProposals), + llmProposals: booleanValue(raw.llmProposals, defaults.llmProposals), validationRequiredForManifest: booleanValue( - raw.validation_required_for_manifest ?? raw.validationRequiredForManifest, + raw.validationRequiredForManifest, defaults.validationRequiredForManifest, ), - acceptThreshold: ratioConfigValue(raw.accept_threshold ?? raw.acceptThreshold, defaults.acceptThreshold), - reviewThreshold: ratioConfigValue(raw.review_threshold ?? raw.reviewThreshold, defaults.reviewThreshold), - maxLlmTablesPerBatch: positiveIntegerConfigValue( - raw.max_llm_tables_per_batch ?? raw.maxLlmTablesPerBatch, - defaults.maxLlmTablesPerBatch, - ), - maxCandidatesPerColumn: positiveIntegerConfigValue( - raw.max_candidates_per_column ?? raw.maxCandidatesPerColumn, - defaults.maxCandidatesPerColumn, - ), - profileSampleRows: positiveIntegerConfigValue( - raw.profile_sample_rows ?? raw.profileSampleRows, - defaults.profileSampleRows, - ), - validationConcurrency: positiveIntegerConfigValue( - raw.validation_concurrency ?? raw.validationConcurrency, - defaults.validationConcurrency, - ), + acceptThreshold: ratioConfigValue(raw.acceptThreshold, defaults.acceptThreshold), + reviewThreshold: ratioConfigValue(raw.reviewThreshold, defaults.reviewThreshold), + maxLlmTablesPerBatch: positiveIntegerConfigValue(raw.maxLlmTablesPerBatch, defaults.maxLlmTablesPerBatch), + maxCandidatesPerColumn: positiveIntegerConfigValue(raw.maxCandidatesPerColumn, defaults.maxCandidatesPerColumn), + profileSampleRows: positiveIntegerConfigValue(raw.profileSampleRows, defaults.profileSampleRows), + validationConcurrency: positiveIntegerConfigValue(raw.validationConcurrency, defaults.validationConcurrency), ...(validationBudget !== undefined ? { validationBudget } : {}), }; } diff --git a/packages/context/src/project/setup-config.test.ts b/packages/context/src/project/setup-config.test.ts index 46912d43..40ec5308 100644 --- a/packages/context/src/project/setup-config.test.ts +++ b/packages/context/src/project/setup-config.test.ts @@ -62,7 +62,7 @@ describe('KTX setup config helpers', () => { }); }); - it('combines legacy config setup steps with local state for reads', () => { + it('combines config setup steps with local state for reads', () => { const config = { ...buildDefaultKtxProjectConfig('warehouse'), setup: { diff --git a/packages/context/src/scan/relationship-benchmarks.test.ts b/packages/context/src/scan/relationship-benchmarks.test.ts index b4e5c782..aff025aa 100644 --- a/packages/context/src/scan/relationship-benchmarks.test.ts +++ b/packages/context/src/scan/relationship-benchmarks.test.ts @@ -33,7 +33,7 @@ const EXPECTED_LINKS: KtxRelationshipBenchmarkExpectedLinks = { }; const CHECKED_IN_FIXTURE_ORIGINS = { - abbreviated_legacy_no_declared_constraints: 'synthetic', + abbreviated_old_no_declared_constraints: 'synthetic', adventureworks_oltp_with_declared_metadata: 'public', adventureworkslt_with_declared_metadata: 'public', analytical_warehouse_no_naming_convention: 'synthetic', @@ -606,7 +606,7 @@ describe('relationship benchmarks', () => { const byId = new Map(fixtures.map((fixture) => [fixture.id, fixture])); const adversarialIds = [ 'non_english_naming_no_declared_constraints', - 'abbreviated_legacy_no_declared_constraints', + 'abbreviated_old_no_declared_constraints', 'analytical_warehouse_no_naming_convention', 'mixed_case_within_schema_no_declared_constraints', 'polymorphic_partial_overlap_no_declared_constraints', diff --git a/packages/context/src/scan/relationship-diagnostics.test.ts b/packages/context/src/scan/relationship-diagnostics.test.ts index 3f3bad1b..7c1dbb76 100644 --- a/packages/context/src/scan/relationship-diagnostics.test.ts +++ b/packages/context/src/scan/relationship-diagnostics.test.ts @@ -141,7 +141,7 @@ describe('relationship diagnostics artifacts', () => { ); }); - it('adapts legacy relationship updates into the richer artifact shape', () => { + it('adapts relationship updates into the artifact shape', () => { const artifacts = buildKtxRelationshipArtifacts({ connectionId: 'warehouse', relationshipUpdate: { diff --git a/packages/context/src/search/backend-conformance.test.ts b/packages/context/src/search/backend-conformance.test.ts index d2d8e3bf..95858486 100644 --- a/packages/context/src/search/backend-conformance.test.ts +++ b/packages/context/src/search/backend-conformance.test.ts @@ -45,7 +45,8 @@ const ORDERS_YAML = [ const FINANCE_ORDERS_YAML = [ 'name: orders', - 'description: Finance orders used for invoice reconciliation.', + 'descriptions:', + ' user: Finance orders used for invoice reconciliation.', 'table: finance.orders', 'grain:', ' - order_id', diff --git a/packages/context/src/sl/description-normalization.ts b/packages/context/src/sl/description-normalization.ts index 5a1b5ab6..ef657fdd 100644 --- a/packages/context/src/sl/description-normalization.ts +++ b/packages/context/src/sl/description-normalization.ts @@ -28,16 +28,11 @@ function hasDescriptions(descriptions: DescriptionMap): boolean { function withDescriptionMap(record: Record, fallback: string | null): Record { const descriptions = cleanDescriptionMap(record.descriptions); - const flatDescription = cleanText(record.description); - if (flatDescription && !descriptions.user) { - descriptions.user = flatDescription; - } if (!hasDescriptions(descriptions) && fallback) { descriptions.ktx = fallback; } const next = { ...record }; - delete next.description; if (hasDescriptions(descriptions)) { next.descriptions = descriptions; } else { diff --git a/packages/context/src/sl/local-sl.test.ts b/packages/context/src/sl/local-sl.test.ts index aa48546b..b7d56e22 100644 --- a/packages/context/src/sl/local-sl.test.ts +++ b/packages/context/src/sl/local-sl.test.ts @@ -29,7 +29,8 @@ const ORDERS_YAML = [ const SUPPORT_YAML = [ 'name: tickets', - 'description: Support tickets grouped by priority.', + 'descriptions:', + ' user: Support tickets grouped by priority.', 'table: public.tickets', 'grain:', ' - ticket_id', @@ -278,7 +279,8 @@ describe('local semantic-layer helpers', () => { sourceName: 'orders', yaml: [ 'name: orders', - 'description: Finance orders used for invoice reconciliation.', + 'descriptions:', + ' user: Finance orders used for invoice reconciliation.', 'table: finance.orders', 'grain:', ' - order_id', diff --git a/packages/context/src/sl/pglite-sl-search-prototype.test.ts b/packages/context/src/sl/pglite-sl-search-prototype.test.ts index 1d0ece25..0c599dca 100644 --- a/packages/context/src/sl/pglite-sl-search-prototype.test.ts +++ b/packages/context/src/sl/pglite-sl-search-prototype.test.ts @@ -10,7 +10,8 @@ import { searchLocalSlSourcesWithPglitePrototype } from './pglite-sl-search-prot const ORDERS_YAML = [ 'name: orders', - 'description: Orders with paid revenue and refund status.', + 'descriptions:', + ' user: Orders with paid revenue and refund status.', 'table: public.orders', 'grain:', ' - order_id', @@ -29,7 +30,8 @@ const ORDERS_YAML = [ const FINANCE_ORDERS_YAML = [ 'name: orders', - 'description: Finance orders used for invoice reconciliation.', + 'descriptions:', + ' user: Finance orders used for invoice reconciliation.', 'table: finance.orders', 'grain:', ' - order_id', @@ -43,7 +45,8 @@ const FINANCE_ORDERS_YAML = [ const CUSTOMERS_YAML = [ 'name: customers', - 'description: Customer lifecycle accounts by region.', + 'descriptions:', + ' user: Customer lifecycle accounts by region.', 'table: public.customers', 'grain:', ' - customer_id', diff --git a/packages/context/src/sl/schemas.ts b/packages/context/src/sl/schemas.ts index a42ecc87..a57359d4 100644 --- a/packages/context/src/sl/schemas.ts +++ b/packages/context/src/sl/schemas.ts @@ -80,14 +80,13 @@ const joinDeclarationSchema = z.object({ const sourceColumnSchema = z.object({ name: unqualifiedNameSchema, - // type/description optional on standalone sources: compose-time enrichment fills them + // type/descriptions optional on standalone sources: compose-time enrichment fills them // from the manifest entry named in `inherits_columns_from`. If the agent does not set // `inherits_columns_from`, or the column is not in the manifest, type must be present // — surfaced by sl_validate. type: z.enum(columnTypeValues).optional(), role: z.enum(columnRoleValues).optional(), visibility: z.enum(columnVisibilityValues).optional(), - description: z.string().optional(), descriptions: descriptionsSchema.optional(), expr: z.string().optional(), constraints: sourceKeyedColumnConstraintsSchema.optional(), @@ -102,7 +101,6 @@ const overlayColumnSchema = z type: z.enum(columnTypeValues).optional(), role: z.enum(columnRoleValues).optional(), visibility: z.enum(columnVisibilityValues).optional(), - description: z.string().optional(), descriptions: descriptionsSchema.optional(), expr: z.string().optional(), }) @@ -114,7 +112,6 @@ const overlayColumnSchema = z export const sourceDefinitionSchema = z .object({ name: z.string().min(1), - description: z.string().optional(), descriptions: descriptionsSchema.optional(), // Accepted for documentation parity with the Python spec; behavior is driven // by the `table` / `sql` fields, not by this discriminator. @@ -150,7 +147,6 @@ export const sourceDefinitionSchema = z export const sourceOverlaySchema = z .object({ name: z.string().min(1), - description: z.string().optional(), descriptions: z.record(z.string(), z.string()).optional(), grain: z.array(unqualifiedNameSchema).optional(), columns: z.array(overlayColumnSchema).optional(), diff --git a/packages/context/src/sl/semantic-layer.service.test.ts b/packages/context/src/sl/semantic-layer.service.test.ts index 308cc5aa..179904d5 100644 --- a/packages/context/src/sl/semantic-layer.service.test.ts +++ b/packages/context/src/sl/semantic-layer.service.test.ts @@ -98,7 +98,7 @@ describe('composeOverlay', () => { ...baseTable, segments: [{ name: 'pre_existing', expr: 'is_paid = true' }], }; - const overlay = { name: 'fct_labs', description: 'no segments here' }; + const overlay = { name: 'fct_labs', descriptions: { user: 'no segments here' } }; const composed = composeOverlay(baseWithSegments, overlay); expect(composed.segments).toEqual([{ name: 'pre_existing', expr: 'is_paid = true' }]); }); @@ -128,7 +128,7 @@ describe('composeOverlay', () => { it('still handles existing known keys without regression', () => { const overlay = { name: 'fct_labs', - description: 'patient lab orders', + descriptions: { user: 'patient lab orders' }, exclude_columns: ['admin_user_id'], columns: [{ name: 'is_byol', type: 'boolean', expr: "lab_type = 'byol'" }], measures: [{ name: 'count_all', expr: 'count(*)' }], @@ -675,19 +675,21 @@ describe('loadAllSources — standalone enrichment via inherits_columns_from', ( expect(aav?.columns).toEqual([{ name: 'FOO', type: 'string' }]); }); - it('normalizes legacy flat source and column descriptions when loading standalone files', async () => { + it('loads standalone source and column description maps', async () => { const standalonePath = 'semantic-layer/conn-1/orders.yaml'; configService.listFiles.mockResolvedValue({ files: [standalonePath] }); configService.readFile.mockResolvedValue({ content: [ 'name: orders', - 'description: Finance orders used for invoice reconciliation.', + 'descriptions:', + ' user: Finance orders used for invoice reconciliation.', 'table: public.orders', 'grain: [id]', 'columns:', ' - name: id', ' type: string', - ' description: Stable order identifier.', + ' descriptions:', + ' user: Stable order identifier.', ].join('\n'), }); diff --git a/packages/context/src/sl/semantic-layer.service.ts b/packages/context/src/sl/semantic-layer.service.ts index 0616851d..7d13d10a 100644 --- a/packages/context/src/sl/semantic-layer.service.ts +++ b/packages/context/src/sl/semantic-layer.service.ts @@ -113,7 +113,7 @@ export class SemanticLayerService { `standalone source '${source.name}' shadows an existing manifest entry and ` + `will drop the manifest's columns and joins. Rewrite as an overlay: remove ` + `"sql:", "table:", "grain:", "columns:", "joins:"; keep only "name:" plus ` + - `"measures:"/"segments:"/"description:"`; + `"measures:"/"segments:"/"descriptions:"`; warnings.push(msg); this.logger.warn(`[writeSource] ${msg}. Saving anyway.`); } @@ -935,16 +935,12 @@ export class SemanticLayerService { string, { descriptions?: Record; - description?: string; - db_description?: string; columns?: Array<{ name: string; type: string; pk?: boolean; nullable?: boolean; descriptions?: Record; - description?: string; - db_description?: string; }>; } >; @@ -952,12 +948,12 @@ export class SemanticLayerService { if (shard?.tables) { for (const [tableName, entry] of Object.entries(shard.tables)) { tables.set(tableName, { - descriptions: migrateDescriptions(entry.descriptions, entry.description, entry.db_description) ?? {}, + descriptions: entry.descriptions ?? {}, }); for (const col of entry.columns ?? []) { columns.set(`${tableName}.${col.name}`, { type: col.type, - descriptions: migrateDescriptions(col.descriptions, col.description, col.db_description) ?? {}, + descriptions: col.descriptions ?? {}, nullable: col.nullable, pk: col.pk, }); @@ -1055,11 +1051,7 @@ interface ManifestColumnEntry { type: string; pk?: boolean; nullable?: boolean; - // New format: descriptions map descriptions?: Record; - // Legacy format: flat fields (read-only backwards compat) - description?: string; - db_description?: string; constraints?: { dbt?: { not_null?: boolean; unique?: boolean } }; enum_values?: { dbt?: string[] }; tests?: { @@ -1077,11 +1069,7 @@ interface ManifestJoinEntry { export interface ManifestTableEntry { table: string; - // New format: descriptions map descriptions?: Record; - // Legacy format: flat fields (read-only backwards compat) - description?: string; - db_description?: string; columns: ManifestColumnEntry[]; joins?: ManifestJoinEntry[]; tags?: { dbt?: string[] }; @@ -1089,31 +1077,12 @@ export interface ManifestTableEntry { usage?: TableUsageOutput; } -/** Migrate legacy flat description/db_description fields to a descriptions map. */ -function migrateDescriptions( - descriptions?: Record, - description?: string, - dbDescription?: string, -): Record | undefined { - if (descriptions && Object.keys(descriptions).length > 0) { - return descriptions; - } - const result: Record = {}; - if (description) { - result.ai = description; - } - if (dbDescription) { - result.db = dbDescription; - } - return Object.keys(result).length > 0 ? result : undefined; -} - export function projectManifestEntry(name: string, entry: ManifestTableEntry): SemanticLayerSource { const columns = entry.columns.map((c) => ({ name: c.name, type: c.type, role: c.type === 'time' ? 'time' : undefined, - descriptions: migrateDescriptions(c.descriptions, c.description, c.db_description), + descriptions: c.descriptions, constraints: c.constraints, enum_values: c.enum_values, tests: c.tests, @@ -1126,7 +1095,7 @@ export function projectManifestEntry(name: string, entry: ManifestTableEntry): S return { name, table: entry.table, - descriptions: migrateDescriptions(entry.descriptions, entry.description, entry.db_description), + descriptions: entry.descriptions, grain, columns, joins: (entry.joins ?? []).map((j) => ({ to: j.to, on: j.on, relationship: j.relationship, source: j.source })), @@ -1359,7 +1328,6 @@ export function findDanglingSegmentRefs(source: Record): string const COMPOSE_KNOWN_KEYS = new Set([ 'name', - 'description', 'descriptions', 'grain', 'columns', diff --git a/packages/context/src/sl/tools/sl-edit-source.tool.ts b/packages/context/src/sl/tools/sl-edit-source.tool.ts index 27b582d5..30972707 100644 --- a/packages/context/src/sl/tools/sl-edit-source.tool.ts +++ b/packages/context/src/sl/tools/sl-edit-source.tool.ts @@ -127,7 +127,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t ` - name: `, ` expr: ""`, ` description: ""`, - `Overlay shape: "name:" plus any of "measures:", "segments:", "description:". Do NOT include "sql:", "table:", "grain:", "columns:", or "joins:" — those are inherited from the manifest.`, + `Overlay shape: "name:" plus any of "measures:", "segments:", "descriptions:". Do NOT include "sql:", "table:", "grain:", "columns:", or "joins:" — those are inherited from the manifest.`, ].join('\n'), ], sourceName, diff --git a/packages/context/src/sl/tools/sl-warehouse-validation.ts b/packages/context/src/sl/tools/sl-warehouse-validation.ts index a200dad9..e0d48721 100644 --- a/packages/context/src/sl/tools/sl-warehouse-validation.ts +++ b/packages/context/src/sl/tools/sl-warehouse-validation.ts @@ -89,7 +89,7 @@ export async function validateSingleSource( `${sourceName}.yaml: standalone source shadows an existing manifest entry — ` + `writing it as-is drops the manifest's columns and joins. ` + `Remove "sql:", "table:", "grain:", "columns:", and "joins:" and keep only ` + - `"name:" plus "measures:"/"segments:"/"description:" to write an overlay ` + + `"name:" plus "measures:"/"segments:"/"descriptions:" to write an overlay ` + `that inherits the manifest schema. Call sl_read_source to inspect the existing source first.`, ); return { errors, warnings }; diff --git a/packages/context/src/sl/tools/sl-write-source.tool.test.ts b/packages/context/src/sl/tools/sl-write-source.tool.test.ts index 1502c177..d9c58225 100644 --- a/packages/context/src/sl/tools/sl-write-source.tool.test.ts +++ b/packages/context/src/sl/tools/sl-write-source.tool.test.ts @@ -176,7 +176,7 @@ describe('SlWriteSourceTool — session gating', () => { expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled(); }); - it('normalizes flat source and column descriptions before writing', async () => { + it('writes source and column description maps', async () => { const { tool, semanticLayerService } = makeTool(); const result = await tool.call( { @@ -184,10 +184,10 @@ describe('SlWriteSourceTool — session gating', () => { sourceName: 'orders', source: { name: 'orders', - description: 'Finance orders used for invoice reconciliation.', + descriptions: { user: 'Finance orders used for invoice reconciliation.' }, table: 'public.orders', grain: ['id'], - columns: [{ name: 'id', type: 'string', description: 'Stable order identifier.' }], + columns: [{ name: 'id', type: 'string', descriptions: { user: 'Stable order identifier.' } }], measures: [], joins: [], } as any, diff --git a/packages/context/src/sl/tools/sl-write-source.tool.ts b/packages/context/src/sl/tools/sl-write-source.tool.ts index 34b6f8c4..e7efb357 100644 --- a/packages/context/src/sl/tools/sl-write-source.tool.ts +++ b/packages/context/src/sl/tools/sl-write-source.tool.ts @@ -318,7 +318,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co ` Writing standalone would drop the manifest's columns and joins, leaving only what you list here.`, `To add measures/segments on top of the manifest, rewrite this YAML as an overlay:`, ` - Remove "sql:", "table:", "grain:", "columns:", and "joins:".`, - ` - Keep only "name:", plus "measures:", "segments:", and/or "description:".`, + ` - Keep only "name:", plus "measures:", "segments:", and/or "descriptions:".`, ` - The manifest's schema is inherited automatically.`, `If you really need a different base table, use a different source name.`, ].join('\n'); diff --git a/packages/context/src/tools/tool-session.ts b/packages/context/src/tools/tool-session.ts index 023a8c8e..05da85d9 100644 --- a/packages/context/src/tools/tool-session.ts +++ b/packages/context/src/tools/tool-session.ts @@ -23,7 +23,7 @@ interface EvictionDecisionRecord { rawPath: string; artifactKind: 'wiki' | 'sl'; artifactKey: string; - action: 'removed' | 'retained_deprecated' | 'retained_supported'; + action: 'removed'; reason: string; } diff --git a/packages/context/src/wiki/local-knowledge.test.ts b/packages/context/src/wiki/local-knowledge.test.ts index 5ad66eb1..54bd3771 100644 --- a/packages/context/src/wiki/local-knowledge.test.ts +++ b/packages/context/src/wiki/local-knowledge.test.ts @@ -245,29 +245,4 @@ describe('local knowledge helpers', () => { ).rejects.toThrow('Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".'); }); - it('ignores nested historic-SQL legacy paths when listing local knowledge pages', async () => { - await writeLocalKnowledgePage(project, { - key: 'historic-sql-paid-orders', - scope: 'GLOBAL', - summary: 'Flat historic SQL page', - content: 'Flat page body.', - tags: ['historic-sql'], - }); - await project.fileStore.writeFile( - 'knowledge/global/historic-sql/paid-orders.md', - '---\nsummary: Nested historic SQL page\nusage_mode: auto\n---\n\nNested body\n', - 'Test', - 'test@example.com', - 'Write nested legacy page', - ); - - await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([ - { - key: 'historic-sql-paid-orders', - path: 'knowledge/global/historic-sql-paid-orders.md', - scope: 'GLOBAL', - summary: 'Flat historic SQL page', - }, - ]); - }); }); diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/data.sqlite similarity index 100% rename from packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/data.sqlite rename to packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/data.sqlite diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/expected-links.yaml similarity index 100% rename from packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/expected-links.yaml rename to packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/expected-links.yaml diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/fixture.yaml similarity index 50% rename from packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/fixture.yaml rename to packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/fixture.yaml index 275a1008..6a9b3810 100644 --- a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/fixture.yaml +++ b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/fixture.yaml @@ -1,5 +1,5 @@ -id: abbreviated_legacy_no_declared_constraints -name: Abbreviated legacy naming fixture with no declared constraints +id: abbreviated_old_no_declared_constraints +name: Abbreviated old naming fixture with no declared constraints tier: row_bearing origin: synthetic thresholdEligible: false diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/snapshot.json similarity index 98% rename from packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/snapshot.json rename to packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/snapshot.json index ac3840e2..b4cb7a92 100644 --- a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/snapshot.json +++ b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/snapshot.json @@ -1,5 +1,5 @@ { - "connectionId": "abbreviated_legacy_no_declared_constraints", + "connectionId": "abbreviated_old_no_declared_constraints", "driver": "sqlite", "extractedAt": "2026-05-07T00:00:00.000Z", "scope": {}, diff --git a/python/ktx-sl/semantic_layer/loader.py b/python/ktx-sl/semantic_layer/loader.py index c6956c19..02b5b2c4 100644 --- a/python/ktx-sl/semantic_layer/loader.py +++ b/python/ktx-sl/semantic_layer/loader.py @@ -59,9 +59,7 @@ class SourceLoader: f"Duplicate source name '{name}' in manifest shard {path}" ) sources[name] = project_manifest_entry(name, entry) - description_sources[name] = _description_sources( - entry.descriptions, entry.description, entry.db_description - ) + description_sources[name] = _description_sources(entry.descriptions) # 2. Load files outside _schema/ for path in sorted(self.sources_dir.rglob("*.yaml")): @@ -138,11 +136,6 @@ class SourceLoader: source = deepcopy(base) description_sources = dict(base_description_sources or {}) - # Overlay description semantics match the server: `description` writes the - # `user` source key, and `descriptions` merges keyed sources before a single - # visible description is resolved from the full map. - if overlay.get("description"): - description_sources["user"] = overlay["description"] if overlay.get("descriptions"): description_sources.update( { @@ -151,7 +144,7 @@ class SourceLoader: if text } ) - if overlay.get("description") or overlay.get("descriptions"): + if overlay.get("descriptions"): source.description = _resolve_description( description_sources or None, ) diff --git a/python/ktx-sl/semantic_layer/manifest.py b/python/ktx-sl/semantic_layer/manifest.py index 3023cf58..432019e8 100644 --- a/python/ktx-sl/semantic_layer/manifest.py +++ b/python/ktx-sl/semantic_layer/manifest.py @@ -76,31 +76,17 @@ def map_column_type(db_type: str) -> str: _DEFAULT_PRIORITY = ["user", "ai", "dbt", "db"] -def _description_sources( - descriptions: dict[str, str] | None, - description: str | None = None, - db_description: str | None = None, -) -> dict[str, str] | None: +def _description_sources(descriptions: dict[str, str] | None) -> dict[str, str] | None: """Normalize multi-source descriptions to a keyed map.""" if descriptions: result = {source: text for source, text in descriptions.items() if text} if result: return result - - result: dict[str, str] = {} - if description: - result["ai"] = description - if db_description: - result["db"] = db_description - return result or None + return None -def _resolve_description( - descriptions: dict[str, str] | None, - description: str | None = None, - db_description: str | None = None, -) -> str | None: - """Resolve a single description from a multi-source map or legacy flat fields.""" +def _resolve_description(descriptions: dict[str, str] | None) -> str | None: + """Resolve a single description from a multi-source map.""" if descriptions: for source in _DEFAULT_PRIORITY: if text := descriptions.get(source): @@ -109,11 +95,6 @@ def _resolve_description( for text in descriptions.values(): if text: return text - # Legacy flat fields - if description: - return description - if db_description: - return db_description return None @@ -123,18 +104,13 @@ class ManifestColumn(BaseModel): pk: bool = False nullable: bool = True descriptions: dict[str, str] | None = None - # Legacy flat fields (backwards-compatible YAML parsing) - description: str | None = None - db_description: str | None = None constraints: dict | None = None enum_values: dict[str, list[str]] | None = None tests: SourceColumnTests | None = None @property def resolved_description(self) -> str | None: - return _resolve_description( - self.descriptions, self.description, self.db_description - ) + return _resolve_description(self.descriptions) class ManifestJoin(BaseModel): @@ -147,9 +123,6 @@ class ManifestJoin(BaseModel): class ManifestEntry(BaseModel): table: str descriptions: dict[str, str] | None = None - # Legacy flat fields (backwards-compatible YAML parsing) - description: str | None = None - db_description: str | None = None columns: list[ManifestColumn] joins: list[ManifestJoin] = [] default_time_dimension: DefaultTimeDimensionDbt | None = None @@ -158,9 +131,7 @@ class ManifestEntry(BaseModel): @property def resolved_description(self) -> str | None: - return _resolve_description( - self.descriptions, self.description, self.db_description - ) + return _resolve_description(self.descriptions) class Manifest(BaseModel): @@ -178,6 +149,8 @@ def validate_overlay(data: dict) -> list[str]: Returns a list of error messages (empty if valid). """ errors: list[str] = [] + if "description" in data: + errors.append("Overlay must use 'descriptions' for source descriptions") if "table" in data: errors.append("Overlay must not contain 'table' (owned by manifest)") if "sql" in data: @@ -185,6 +158,10 @@ def validate_overlay(data: dict) -> list[str]: "Overlay must not contain 'sql' (that makes it a standalone source)" ) for col in data.get("columns", []): + if "description" in col: + errors.append( + f"Overlay column '{col.get('name', '?')}' must use 'descriptions'" + ) if "type" in col and "expr" not in col: errors.append( f"Overlay column '{col.get('name', '?')}' specifies 'type' without 'expr' " diff --git a/python/ktx-sl/sources/b2b_saas/churn_risk.yaml b/python/ktx-sl/sources/b2b_saas/churn_risk.yaml index 2fae793e..602d263a 100644 --- a/python/ktx-sl/sources/b2b_saas/churn_risk.yaml +++ b/python/ktx-sl/sources/b2b_saas/churn_risk.yaml @@ -1,10 +1,11 @@ name: churn_risk -description: | - Per-account churn risk scoring for B2B SaaS customers. Combines signals from - subscriptions (cancellation history), support tickets (severity, SLA breaches), - product usage (adoption decline), contracts (renewal proximity), CSM activities - (engagement recency), and invoices (payment issues) into a weighted composite - risk_score (0-1) and risk_tier (High/Medium/Low). One row per customer account. +descriptions: + user: | + Per-account churn risk scoring for B2B SaaS customers. Combines signals from + subscriptions (cancellation history), support tickets (severity, SLA breaches), + product usage (adoption decline), contracts (renewal proximity), CSM activities + (engagement recency), and invoices (payment issues) into a weighted composite + risk_score (0-1) and risk_tier (High/Medium/Low). One row per customer account. sql: | WITH sub_signals AS ( SELECT diff --git a/python/ktx-sl/sources/ecommerce/churn_risk.yaml b/python/ktx-sl/sources/ecommerce/churn_risk.yaml index 32e919ed..7a009a59 100644 --- a/python/ktx-sl/sources/ecommerce/churn_risk.yaml +++ b/python/ktx-sl/sources/ecommerce/churn_risk.yaml @@ -1,7 +1,8 @@ name: churn_risk -description: | - Customer churn risk score combining tenure, - usage trends, and support burden. +descriptions: + user: | + Customer churn risk score combining tenure, + usage trends, and support burden. sql: | SELECT c.id AS customer_id, diff --git a/python/ktx-sl/tests/test_manifest.py b/python/ktx-sl/tests/test_manifest.py index e025c3da..1007fc89 100644 --- a/python/ktx-sl/tests/test_manifest.py +++ b/python/ktx-sl/tests/test_manifest.py @@ -95,7 +95,7 @@ class TestProjectManifestEntry: def orders_entry(self) -> ManifestEntry: return ManifestEntry( table="public.orders", - description="Customer orders", + descriptions={"user": "Customer orders"}, columns=[ ManifestColumn(name="id", type="integer", pk=True), ManifestColumn(name="customer_id", type="integer"), @@ -202,7 +202,7 @@ class TestValidateOverlay: def test_validate_overlay_valid(self): data = { "name": "orders", - "description": "Revenue-bearing orders", + "descriptions": {"user": "Revenue-bearing orders"}, "grain": ["id"], "measures": [{"name": "revenue", "expr": "sum(total)"}], "columns": [ @@ -259,7 +259,7 @@ def _manifest_tables() -> dict: "tables": { "orders": { "table": "public.orders", - "description": "Customer orders", + "descriptions": {"user": "Customer orders"}, "columns": [ {"name": "id", "type": "integer", "pk": True}, {"name": "customer_id", "type": "integer"}, @@ -278,7 +278,7 @@ def _manifest_tables() -> dict: }, "customers": { "table": "public.customers", - "description": "Customer accounts", + "descriptions": {"user": "Customer accounts"}, "columns": [ {"name": "id", "type": "integer", "pk": True}, {"name": "name", "type": "varchar"}, @@ -329,12 +329,12 @@ class TestTwoTierLoading: assert sources["regions"].table == "public.regions" assert sources["regions"].is_table_source - def test_overlay_descriptions_do_not_promote_base_description_to_user_source( + def test_overlay_descriptions_do_not_promote_base_map_to_user_source( self, tmp_path: Path ): standalone = { "name": "regions", - "description": "Standalone description", + "descriptions": {"ai": "Standalone description"}, "table": "public.regions", "grain": ["id"], "columns": [ @@ -376,7 +376,7 @@ class TestTwoTierLoading: overlay = { "name": "orders", - "description": "Revenue-bearing orders", + "descriptions": {"user": "Revenue-bearing orders"}, "grain": ["id"], "measures": [{"name": "revenue", "expr": "sum(total)"}], } @@ -394,11 +394,11 @@ class TestTwoTierLoading: assert len(orders.measures) == 1 assert orders.measures[0].name == "revenue" - def test_overlay_description_override(self, tmp_path: Path): + def test_overlay_description_map_override(self, tmp_path: Path): schema_dir = tmp_path / "_schema" _write_yaml(schema_dir / "public.yaml", _manifest_tables()) - overlay = {"name": "orders", "description": "Overridden description"} + overlay = {"name": "orders", "descriptions": {"user": "Overridden description"}} _write_yaml(tmp_path / "orders.yaml", overlay) _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) @@ -426,7 +426,7 @@ class TestTwoTierLoading: sources = loader.load_all() assert sources["orders"].description == "Customer orders" - def test_overlay_descriptions_map_overrides_lower_priority_db_description( + def test_overlay_descriptions_map_overrides_lower_priority_db_source( self, tmp_path: Path ): schema_dir = tmp_path / "_schema" diff --git a/scripts/build-evidence-fusion-adversarial-fixtures.mjs b/scripts/build-evidence-fusion-adversarial-fixtures.mjs index 6994d5c2..282a6477 100644 --- a/scripts/build-evidence-fusion-adversarial-fixtures.mjs +++ b/scripts/build-evidence-fusion-adversarial-fixtures.mjs @@ -129,10 +129,10 @@ function nonEnglishFixture() { }; } -function abbreviatedLegacyFixture() { +function abbreviatedOldNamingFixture() { return { - id: 'abbreviated_legacy_no_declared_constraints', - name: 'Abbreviated legacy naming fixture with no declared constraints', + id: 'abbreviated_old_no_declared_constraints', + name: 'Abbreviated old naming fixture with no declared constraints', tier: 'row_bearing', sql: [ 'CREATE TABLE cust (cust_id TEXT NOT NULL, nm TEXT NOT NULL, stat_cd TEXT NOT NULL);', @@ -480,7 +480,7 @@ function scaleFixture() { const fixtures = [ nonEnglishFixture(), - abbreviatedLegacyFixture(), + abbreviatedOldNamingFixture(), analyticalWarehouseFixture(), mixedCaseFixture(), polymorphicFixture(), diff --git a/scripts/check-boundaries.mjs b/scripts/check-boundaries.mjs index 53455abd..9f2953e7 100644 --- a/scripts/check-boundaries.mjs +++ b/scripts/check-boundaries.mjs @@ -46,15 +46,15 @@ const llmBoundaryPatterns = [ pattern: /\bembedMany\b/, }, { - label: 'legacy context LLM provider port', + label: 'context-owned LLM provider port', pattern: /\bLlmProviderPort\b/, }, { - label: 'legacy scan LLM provider port', + label: 'scan-owned LLM provider port', pattern: /\bKtxScanLlmPort\b/, }, { - label: 'legacy gateway LLM provider helper', + label: 'context-owned gateway LLM provider helper', pattern: /\bcreateGatewayLlmProvider\b/, }, ]; diff --git a/scripts/check-boundaries.test.mjs b/scripts/check-boundaries.test.mjs index db8afafe..9d5bf6f9 100644 --- a/scripts/check-boundaries.test.mjs +++ b/scripts/check-boundaries.test.mjs @@ -92,7 +92,7 @@ describe('scanFileContent', () => { ); }); - it('rejects context-owned LLM provider construction after @ktx/llm migration', () => { + it('rejects context-owned LLM provider construction outside @ktx/llm', () => { const violations = [ ...scanFileContent( 'packages/context/src/agent/local-llm-provider.ts', diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 793566ed..87e6f2bc 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -128,7 +128,6 @@ describe('standalone example docs', () => { .join('|'), ), ); - assert.doesNotMatch(readme, /--historic-sql-min-calls/); }); it('lists every workspace package in the contributor docs', async () => {