fix(context): verify warehouse column display targets

This commit is contained in:
Andrey Avtomonov 2026-05-12 23:49:38 +02:00
parent 8a758ce80b
commit 5c8d78b903
4 changed files with 151 additions and 8 deletions

View file

@ -118,6 +118,31 @@ describe('EntityDetailsTool', () => {
expect(result.structured.resolved).toHaveLength(1);
});
it('resolves display targets that include a column name', async () => {
const result = await tool.call(
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.status' }] },
context,
);
expect(result.markdown).toContain('### public.orders');
expect(result.markdown).toContain('- status (text, nullable=false)');
expect(result.markdown).not.toContain('- id (integer');
expect(result.structured.resolved).toHaveLength(1);
expect(result.structured.resolved[0]?.columns.map((column) => column.name)).toEqual(['status']);
});
it('reports missing explicit columns instead of returning an empty column list', async () => {
const result = await tool.call(
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] },
context,
);
expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier');
expect(result.markdown).toContain('Available columns: id, status');
expect(result.structured.resolved).toHaveLength(0);
expect(result.structured.missing).toHaveLength(1);
});
it('returns a no-scan state distinct from not found', async () => {
const result = await tool.call(
{ connectionName: 'empty', targets: [{ display: 'public.orders' }] },

View file

@ -49,6 +49,11 @@ function appendTableMarkdown(parts: string[], detail: TableDetail, columnName?:
parts.push('');
}
function findColumn(detail: TableDetail, columnName: string): TableDetail['columns'][number] | null {
const normalized = columnName.toLowerCase();
return detail.columns.find((column) => column.name.toLowerCase() === normalized) ?? null;
}
export class EntityDetailsTool extends BaseTool<typeof entityDetailsInputSchema> {
readonly name = 'entity_details';
@ -89,8 +94,12 @@ export class EntityDetailsTool extends BaseTool<typeof entityDetailsInputSchema>
for (const target of input.targets) {
const resolution =
'display' in target
? await catalog.resolveDisplay(input.connectionName, target.display)
: { resolved: { catalog: target.catalog, db: target.db, name: target.name }, candidates: [], dialect: '' };
? await catalog.resolveDisplayTarget(input.connectionName, target.display)
: {
resolved: { catalog: target.catalog, db: target.db, name: target.name, column: target.column },
candidates: [],
dialect: '',
};
if (!resolution.resolved) {
missing.push({ target, candidates: resolution.candidates });
parts.push(`Not found in scan: ${'display' in target ? target.display : target.name}`);
@ -104,8 +113,26 @@ export class EntityDetailsTool extends BaseTool<typeof entityDetailsInputSchema>
missing.push({ target, candidates: resolution.candidates });
continue;
}
const requestedColumn = resolution.resolved.column;
if (requestedColumn) {
const column = findColumn(detail, requestedColumn);
if (!column) {
missing.push({
target,
candidates: [{ catalog: detail.catalog, db: detail.db, name: detail.name }],
});
parts.push(`Column not found in scan: ${detail.display}.${requestedColumn}`);
parts.push(`Available columns: ${detail.columns.map((candidate) => candidate.name).join(', ')}`);
continue;
}
const scopedDetail = { ...detail, columns: [column] };
resolved.push(scopedDetail);
appendTableMarkdown(parts, scopedDetail, column.name);
continue;
}
resolved.push(detail);
appendTableMarkdown(parts, detail, 'column' in target ? target.column : undefined);
appendTableMarkdown(parts, detail);
}
return {

View file

@ -20,6 +20,11 @@ describe('WarehouseCatalogService', () => {
async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-2', driver = 'postgres') {
const root = `raw-sources/${connectionName}/live-database/${syncId}`;
const tableRef = {
catalog: driver === 'bigquery' ? 'analytics' : null,
db: driver === 'sqlite' ? null : 'public',
name: 'orders',
};
await project.fileStore.writeFile(
`${root}/connection.json`,
JSON.stringify({ connectionId: connectionName, driver, extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
@ -31,9 +36,9 @@ describe('WarehouseCatalogService', () => {
`${root}/tables/orders.json`,
JSON.stringify(
{
catalog: null,
db: driver === 'sqlite' ? null : 'public',
name: 'orders',
catalog: tableRef.catalog,
db: tableRef.db,
name: tableRef.name,
kind: 'table',
comment: 'Customer orders',
estimatedRows: 12,
@ -74,10 +79,10 @@ describe('WarehouseCatalogService', () => {
driver,
sqlAvailable: true,
queryCount: 3,
tables: [{ table: { catalog: null, db: driver === 'sqlite' ? null : 'public', name: 'orders' }, rowCount: 12 }],
tables: [{ table: { catalog: tableRef.catalog, db: tableRef.db, name: tableRef.name }, rowCount: 12 }],
columns: {
'orders.status': {
table: { catalog: null, db: driver === 'sqlite' ? null : 'public', name: 'orders' },
table: { catalog: tableRef.catalog, db: tableRef.db, name: tableRef.name },
column: 'status',
nativeType: 'text',
normalizedType: 'text',
@ -152,6 +157,28 @@ describe('WarehouseCatalogService', () => {
});
});
it('resolves postgres column display strings without treating the column as a table', async () => {
await seedLiveDatabaseScan();
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
await expect(catalog.resolveDisplayTarget('warehouse', 'public.orders.status')).resolves.toMatchObject({
resolved: { catalog: null, db: 'public', name: 'orders', column: 'status' },
candidates: [],
dialect: 'postgres',
});
});
it('resolves BigQuery column display strings with four parts', async () => {
await seedLiveDatabaseScan('warehouse', 'sync-bigquery', 'bigquery');
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
await expect(catalog.resolveDisplayTarget('warehouse', 'analytics.public.orders.status')).resolves.toMatchObject({
resolved: { catalog: 'analytics', db: 'public', name: 'orders', column: 'status' },
candidates: [],
dialect: 'bigquery',
});
});
it('searches table names, column names, comments, and descriptions', async () => {
await seedLiveDatabaseScan();
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });

View file

@ -41,6 +41,12 @@ export type RawSchemaHit =
| { kind: 'table'; ref: KtxTableRef; display: string; matchedOn: 'name' | 'db' | 'comment' | 'description' }
| { kind: 'column'; ref: KtxTableRef & { column: string }; display: string; matchedOn: 'name' | 'comment' | 'description' };
export interface DisplayTargetResolution {
resolved: (KtxTableRef & { column?: string }) | null;
candidates: KtxTableRef[];
dialect: string;
}
interface ConnectionArtifact {
driver?: CatalogDriver;
}
@ -138,6 +144,30 @@ function parseDisplay(driver: CatalogDriver, display: string): KtxTableRef | nul
return parts.length === 1 ? { catalog: null, db: null, name: parts[0]! } : null;
}
function expectedDisplayPartCount(driver: CatalogDriver): number {
if (driver === 'sqlite' || driver === 'sqlite3') {
return 1;
}
if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') {
return 3;
}
return 2;
}
function parseColumnDisplay(driver: CatalogDriver, display: string): (KtxTableRef & { column: string }) | null {
const parts = splitDisplay(display);
const tablePartCount = expectedDisplayPartCount(driver);
if (parts.length !== tablePartCount + 1) {
return null;
}
const column = parts.at(-1);
if (!column) {
return null;
}
const table = parseDisplay(driver, parts.slice(0, -1).join('.'));
return table ? { ...table, column } : null;
}
function bestCandidates(tables: KtxSchemaTable[], display: string, limit = 5): KtxTableRef[] {
const needle = normalize(splitDisplay(display).at(-1) ?? display);
return tables
@ -295,6 +325,40 @@ export class WarehouseCatalogService {
return { resolved: { catalog: table.catalog, db: table.db, name: table.name }, candidates: [], dialect };
}
async resolveDisplayTarget(connectionName: string, display: string): Promise<DisplayTargetResolution> {
const catalog = await this.loadCatalog(connectionName);
if (!catalog) {
return { resolved: null, candidates: [], dialect: 'unknown' };
}
const dialect = getDialectForDriver(catalog.driver).type;
const tableResolution = await this.resolveDisplay(connectionName, display);
if (tableResolution.resolved) {
return tableResolution;
}
const parsedColumn = parseColumnDisplay(catalog.driver, display);
if (!parsedColumn) {
return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect };
}
const table = catalog.tables.find((candidate) => refsEqual(candidate, parsedColumn));
if (!table) {
return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect };
}
return {
resolved: {
catalog: table.catalog,
db: table.db,
name: table.name,
column: parsedColumn.column,
},
candidates: [],
dialect,
};
}
async searchByName(connectionName: string, query: string, limit: number): Promise<RawSchemaHit[]> {
const catalog = await this.loadCatalog(connectionName);
if (!catalog) {