From 5c8d78b903f3a05290ab65389f4b4d27e7f7be84 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 23:49:38 +0200 Subject: [PATCH] fix(context): verify warehouse column display targets --- .../entity-details.tool.test.ts | 25 ++++++++ .../entity-details.tool.ts | 33 +++++++++- .../warehouse-catalog.service.test.ts | 37 +++++++++-- .../warehouse-catalog.service.ts | 64 +++++++++++++++++++ 4 files changed, 151 insertions(+), 8 deletions(-) diff --git a/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts b/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts index 6511580d..4d58f58c 100644 --- a/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts +++ b/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts @@ -118,6 +118,31 @@ describe('EntityDetailsTool', () => { expect(result.structured.resolved).toHaveLength(1); }); + it('resolves display targets that include a column name', async () => { + const result = await tool.call( + { connectionName: 'warehouse', targets: [{ display: 'public.orders.status' }] }, + context, + ); + + expect(result.markdown).toContain('### public.orders'); + expect(result.markdown).toContain('- status (text, nullable=false)'); + expect(result.markdown).not.toContain('- id (integer'); + expect(result.structured.resolved).toHaveLength(1); + expect(result.structured.resolved[0]?.columns.map((column) => column.name)).toEqual(['status']); + }); + + it('reports missing explicit columns instead of returning an empty column list', async () => { + const result = await tool.call( + { connectionName: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] }, + context, + ); + + expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier'); + expect(result.markdown).toContain('Available columns: id, status'); + expect(result.structured.resolved).toHaveLength(0); + expect(result.structured.missing).toHaveLength(1); + }); + it('returns a no-scan state distinct from not found', async () => { const result = await tool.call( { connectionName: 'empty', targets: [{ display: 'public.orders' }] }, diff --git a/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts b/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts index 58f5fb35..7337a884 100644 --- a/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts +++ b/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts @@ -49,6 +49,11 @@ function appendTableMarkdown(parts: string[], detail: TableDetail, columnName?: parts.push(''); } +function findColumn(detail: TableDetail, columnName: string): TableDetail['columns'][number] | null { + const normalized = columnName.toLowerCase(); + return detail.columns.find((column) => column.name.toLowerCase() === normalized) ?? null; +} + export class EntityDetailsTool extends BaseTool { readonly name = 'entity_details'; @@ -89,8 +94,12 @@ export class EntityDetailsTool extends BaseTool for (const target of input.targets) { const resolution = 'display' in target - ? await catalog.resolveDisplay(input.connectionName, target.display) - : { resolved: { catalog: target.catalog, db: target.db, name: target.name }, candidates: [], dialect: '' }; + ? await catalog.resolveDisplayTarget(input.connectionName, target.display) + : { + resolved: { catalog: target.catalog, db: target.db, name: target.name, column: target.column }, + candidates: [], + dialect: '', + }; if (!resolution.resolved) { missing.push({ target, candidates: resolution.candidates }); parts.push(`Not found in scan: ${'display' in target ? target.display : target.name}`); @@ -104,8 +113,26 @@ export class EntityDetailsTool extends BaseTool missing.push({ target, candidates: resolution.candidates }); continue; } + const requestedColumn = resolution.resolved.column; + if (requestedColumn) { + const column = findColumn(detail, requestedColumn); + if (!column) { + missing.push({ + target, + candidates: [{ catalog: detail.catalog, db: detail.db, name: detail.name }], + }); + parts.push(`Column not found in scan: ${detail.display}.${requestedColumn}`); + parts.push(`Available columns: ${detail.columns.map((candidate) => candidate.name).join(', ')}`); + continue; + } + const scopedDetail = { ...detail, columns: [column] }; + resolved.push(scopedDetail); + appendTableMarkdown(parts, scopedDetail, column.name); + continue; + } + resolved.push(detail); - appendTableMarkdown(parts, detail, 'column' in target ? target.column : undefined); + appendTableMarkdown(parts, detail); } return { diff --git a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts index f2c9697f..c2ab1f36 100644 --- a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts +++ b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts @@ -20,6 +20,11 @@ describe('WarehouseCatalogService', () => { async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-2', driver = 'postgres') { const root = `raw-sources/${connectionName}/live-database/${syncId}`; + const tableRef = { + catalog: driver === 'bigquery' ? 'analytics' : null, + db: driver === 'sqlite' ? null : 'public', + name: 'orders', + }; await project.fileStore.writeFile( `${root}/connection.json`, JSON.stringify({ connectionId: connectionName, driver, extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2), @@ -31,9 +36,9 @@ describe('WarehouseCatalogService', () => { `${root}/tables/orders.json`, JSON.stringify( { - catalog: null, - db: driver === 'sqlite' ? null : 'public', - name: 'orders', + catalog: tableRef.catalog, + db: tableRef.db, + name: tableRef.name, kind: 'table', comment: 'Customer orders', estimatedRows: 12, @@ -74,10 +79,10 @@ describe('WarehouseCatalogService', () => { driver, sqlAvailable: true, queryCount: 3, - tables: [{ table: { catalog: null, db: driver === 'sqlite' ? null : 'public', name: 'orders' }, rowCount: 12 }], + tables: [{ table: { catalog: tableRef.catalog, db: tableRef.db, name: tableRef.name }, rowCount: 12 }], columns: { 'orders.status': { - table: { catalog: null, db: driver === 'sqlite' ? null : 'public', name: 'orders' }, + table: { catalog: tableRef.catalog, db: tableRef.db, name: tableRef.name }, column: 'status', nativeType: 'text', normalizedType: 'text', @@ -152,6 +157,28 @@ describe('WarehouseCatalogService', () => { }); }); + it('resolves postgres column display strings without treating the column as a table', async () => { + await seedLiveDatabaseScan(); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.resolveDisplayTarget('warehouse', 'public.orders.status')).resolves.toMatchObject({ + resolved: { catalog: null, db: 'public', name: 'orders', column: 'status' }, + candidates: [], + dialect: 'postgres', + }); + }); + + it('resolves BigQuery column display strings with four parts', async () => { + await seedLiveDatabaseScan('warehouse', 'sync-bigquery', 'bigquery'); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.resolveDisplayTarget('warehouse', 'analytics.public.orders.status')).resolves.toMatchObject({ + resolved: { catalog: 'analytics', db: 'public', name: 'orders', column: 'status' }, + candidates: [], + dialect: 'bigquery', + }); + }); + it('searches table names, column names, comments, and descriptions', async () => { await seedLiveDatabaseScan(); const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); diff --git a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts index f935f0ca..9989ff38 100644 --- a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts +++ b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts @@ -41,6 +41,12 @@ export type RawSchemaHit = | { kind: 'table'; ref: KtxTableRef; display: string; matchedOn: 'name' | 'db' | 'comment' | 'description' } | { kind: 'column'; ref: KtxTableRef & { column: string }; display: string; matchedOn: 'name' | 'comment' | 'description' }; +export interface DisplayTargetResolution { + resolved: (KtxTableRef & { column?: string }) | null; + candidates: KtxTableRef[]; + dialect: string; +} + interface ConnectionArtifact { driver?: CatalogDriver; } @@ -138,6 +144,30 @@ function parseDisplay(driver: CatalogDriver, display: string): KtxTableRef | nul return parts.length === 1 ? { catalog: null, db: null, name: parts[0]! } : null; } +function expectedDisplayPartCount(driver: CatalogDriver): number { + if (driver === 'sqlite' || driver === 'sqlite3') { + return 1; + } + if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') { + return 3; + } + return 2; +} + +function parseColumnDisplay(driver: CatalogDriver, display: string): (KtxTableRef & { column: string }) | null { + const parts = splitDisplay(display); + const tablePartCount = expectedDisplayPartCount(driver); + if (parts.length !== tablePartCount + 1) { + return null; + } + const column = parts.at(-1); + if (!column) { + return null; + } + const table = parseDisplay(driver, parts.slice(0, -1).join('.')); + return table ? { ...table, column } : null; +} + function bestCandidates(tables: KtxSchemaTable[], display: string, limit = 5): KtxTableRef[] { const needle = normalize(splitDisplay(display).at(-1) ?? display); return tables @@ -295,6 +325,40 @@ export class WarehouseCatalogService { return { resolved: { catalog: table.catalog, db: table.db, name: table.name }, candidates: [], dialect }; } + async resolveDisplayTarget(connectionName: string, display: string): Promise { + const catalog = await this.loadCatalog(connectionName); + if (!catalog) { + return { resolved: null, candidates: [], dialect: 'unknown' }; + } + + const dialect = getDialectForDriver(catalog.driver).type; + const tableResolution = await this.resolveDisplay(connectionName, display); + if (tableResolution.resolved) { + return tableResolution; + } + + const parsedColumn = parseColumnDisplay(catalog.driver, display); + if (!parsedColumn) { + return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect }; + } + + const table = catalog.tables.find((candidate) => refsEqual(candidate, parsedColumn)); + if (!table) { + return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect }; + } + + return { + resolved: { + catalog: table.catalog, + db: table.db, + name: table.name, + column: parsedColumn.column, + }, + candidates: [], + dialect, + }; + } + async searchByName(connectionName: string, query: string, limit: number): Promise { const catalog = await this.loadCatalog(connectionName); if (!catalog) {