mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
fix(context): normalize manifest column identifiers
This commit is contained in:
parent
cf93ee53bd
commit
9938469b2d
7 changed files with 477 additions and 29 deletions
|
|
@ -11,6 +11,78 @@ function shardObject(shards: Map<string, LiveDatabaseManifestShard>): Record<str
|
|||
}
|
||||
|
||||
describe('buildLiveDatabaseManifestShards', () => {
|
||||
it('normalizes unsafe physical column names while preserving quoted expressions', () => {
|
||||
const existingDescriptions = new Map<string, LiveDatabaseManifestExistingDescriptions>([
|
||||
[
|
||||
'npi',
|
||||
{
|
||||
columns: new Map([
|
||||
['provider_business_mailing_address_country_code_if_outside_u_s', { user: 'Pinned normalized description' }],
|
||||
['provider_business_practice_location_address_country_code_(if_outside_u.s.)', { user: 'Pinned raw description' }],
|
||||
]),
|
||||
},
|
||||
],
|
||||
]);
|
||||
|
||||
const result = buildLiveDatabaseManifestShards({
|
||||
connectionType: 'DUCKDB',
|
||||
mapColumnType: (nativeType) => nativeType.toLowerCase(),
|
||||
existingDescriptions,
|
||||
tables: [
|
||||
{
|
||||
name: 'npi',
|
||||
catalog: 'provider',
|
||||
db: 'main',
|
||||
columns: [
|
||||
{ name: 'npi', type: 'INTEGER', pk: true },
|
||||
{ name: 'provider_business_mailing_address_country_code_(if_outside_u.s.)', type: 'VARCHAR' },
|
||||
{ name: 'provider_business_practice_location_address_country_code_(if_outside_u.s.)', type: 'VARCHAR' },
|
||||
{ name: 'Display Name', type: 'VARCHAR' },
|
||||
{ name: 'display_name', type: 'VARCHAR' },
|
||||
{ name: '123 code', type: 'VARCHAR' },
|
||||
],
|
||||
},
|
||||
],
|
||||
joins: [],
|
||||
});
|
||||
|
||||
expect(shardObject(result.shards)).toEqual({
|
||||
main: {
|
||||
tables: {
|
||||
npi: {
|
||||
table: 'provider.main.npi',
|
||||
columns: [
|
||||
{ name: 'npi', type: 'integer', pk: true },
|
||||
{
|
||||
name: 'provider_business_mailing_address_country_code_if_outside_u_s',
|
||||
type: 'varchar',
|
||||
expr: 'npi."provider_business_mailing_address_country_code_(if_outside_u.s.)"',
|
||||
descriptions: { user: 'Pinned normalized description' },
|
||||
},
|
||||
{
|
||||
name: 'provider_business_practice_location_address_country_code_if_outside_u_s',
|
||||
type: 'varchar',
|
||||
expr: 'npi."provider_business_practice_location_address_country_code_(if_outside_u.s.)"',
|
||||
descriptions: { user: 'Pinned raw description' },
|
||||
},
|
||||
{
|
||||
name: 'display_name_2',
|
||||
type: 'varchar',
|
||||
expr: 'npi."Display Name"',
|
||||
},
|
||||
{ name: 'display_name', type: 'varchar' },
|
||||
{
|
||||
name: 'column_123_code',
|
||||
type: 'varchar',
|
||||
expr: 'npi."123 code"',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('builds shard objects with generated joins and preserved external descriptions', () => {
|
||||
const existingDescriptions = new Map<string, LiveDatabaseManifestExistingDescriptions>([
|
||||
[
|
||||
|
|
|
|||
|
|
@ -22,9 +22,74 @@ const HISTORIC_SQL_MANAGED_USAGE_KEYS = new Set([
|
|||
'staleSince',
|
||||
]);
|
||||
|
||||
const SAFE_SEMANTIC_COLUMN_NAME = /^[A-Za-z_][A-Za-z0-9_]*$/;
|
||||
const SQL_RESERVED_WORDS = new Set([
|
||||
'all',
|
||||
'and',
|
||||
'as',
|
||||
'between',
|
||||
'by',
|
||||
'case',
|
||||
'column',
|
||||
'constraint',
|
||||
'create',
|
||||
'default',
|
||||
'delete',
|
||||
'distinct',
|
||||
'drop',
|
||||
'else',
|
||||
'end',
|
||||
'except',
|
||||
'exists',
|
||||
'false',
|
||||
'fetch',
|
||||
'for',
|
||||
'from',
|
||||
'full',
|
||||
'grant',
|
||||
'group',
|
||||
'having',
|
||||
'in',
|
||||
'index',
|
||||
'inner',
|
||||
'insert',
|
||||
'intersect',
|
||||
'is',
|
||||
'join',
|
||||
'key',
|
||||
'left',
|
||||
'like',
|
||||
'limit',
|
||||
'natural',
|
||||
'not',
|
||||
'null',
|
||||
'on',
|
||||
'or',
|
||||
'order',
|
||||
'outer',
|
||||
'primary',
|
||||
'references',
|
||||
'revoke',
|
||||
'right',
|
||||
'select',
|
||||
'set',
|
||||
'table',
|
||||
'then',
|
||||
'true',
|
||||
'union',
|
||||
'update',
|
||||
'using',
|
||||
'values',
|
||||
'view',
|
||||
'when',
|
||||
'where',
|
||||
'with',
|
||||
]);
|
||||
|
||||
export interface LiveDatabaseManifestColumn {
|
||||
name: string;
|
||||
type: string;
|
||||
expr?: string;
|
||||
pk?: boolean;
|
||||
nullable?: boolean;
|
||||
descriptions?: Record<string, string>;
|
||||
|
|
@ -93,6 +158,64 @@ export interface BuildLiveDatabaseManifestShardsResult {
|
|||
tablesProcessed: number;
|
||||
}
|
||||
|
||||
function isSafeSemanticColumnName(name: string): boolean {
|
||||
return SAFE_SEMANTIC_COLUMN_NAME.test(name) && !SQL_RESERVED_WORDS.has(name.toLowerCase());
|
||||
}
|
||||
|
||||
export function normalizeManifestColumnName(rawName: string): string {
|
||||
const trimmed = rawName.trim();
|
||||
if (isSafeSemanticColumnName(trimmed)) {
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
const normalized = trimmed
|
||||
.replace(/[^A-Za-z0-9]+/g, '_')
|
||||
.replace(/_+/g, '_')
|
||||
.replace(/^_+|_+$/g, '')
|
||||
.toLowerCase();
|
||||
const withFallback = normalized.length > 0 ? normalized : 'column';
|
||||
const withSafePrefix = /^[0-9]/.test(withFallback) ? `column_${withFallback}` : withFallback;
|
||||
return SQL_RESERVED_WORDS.has(withSafePrefix.toLowerCase()) ? `${withSafePrefix}_column` : withSafePrefix;
|
||||
}
|
||||
|
||||
export function normalizeManifestColumnNames(rawNames: readonly string[]): string[] {
|
||||
const safeNameCounts = new Map<string, number>();
|
||||
for (const rawName of rawNames) {
|
||||
const trimmed = rawName.trim();
|
||||
if (isSafeSemanticColumnName(trimmed)) {
|
||||
safeNameCounts.set(trimmed, (safeNameCounts.get(trimmed) ?? 0) + 1);
|
||||
}
|
||||
}
|
||||
const reservedSafeNames = new Set([...safeNameCounts.entries()].filter(([, count]) => count === 1).map(([name]) => name));
|
||||
const used = new Set<string>();
|
||||
|
||||
return rawNames.map((rawName) => {
|
||||
const trimmed = rawName.trim();
|
||||
if (isSafeSemanticColumnName(trimmed) && !used.has(trimmed)) {
|
||||
used.add(trimmed);
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
const base = normalizeManifestColumnName(rawName);
|
||||
let candidate = base;
|
||||
let suffix = 2;
|
||||
while (used.has(candidate) || reservedSafeNames.has(candidate)) {
|
||||
candidate = `${base}_${suffix}`;
|
||||
suffix += 1;
|
||||
}
|
||||
used.add(candidate);
|
||||
return candidate;
|
||||
});
|
||||
}
|
||||
|
||||
function quoteManifestIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
export function manifestColumnPhysicalExpression(sourceName: string, physicalColumnName: string): string {
|
||||
return `${sourceName}.${quoteManifestIdentifier(physicalColumnName)}`;
|
||||
}
|
||||
|
||||
function mergeDescriptionsPreservingExternal(
|
||||
existing: Record<string, string> | undefined,
|
||||
incoming: Record<string, string> | undefined,
|
||||
|
|
@ -189,6 +312,7 @@ function joinCondition(
|
|||
leftColumns: readonly string[],
|
||||
rightTable: string,
|
||||
rightColumns: readonly string[],
|
||||
columnNameMaps: Map<string, Map<string, string>>,
|
||||
): string {
|
||||
if (leftColumns.length === 0 || leftColumns.length !== rightColumns.length) {
|
||||
throw new Error(`Invalid relationship join from ${leftTable} to ${rightTable}: column tuple widths differ`);
|
||||
|
|
@ -199,7 +323,9 @@ function joinCondition(
|
|||
if (!rightColumn) {
|
||||
throw new Error(`Invalid relationship join from ${leftTable} to ${rightTable}: missing target column`);
|
||||
}
|
||||
return `${leftTable}.${leftColumn} = ${rightTable}.${rightColumn}`;
|
||||
const normalizedLeftColumn = columnNameMaps.get(leftTable)?.get(leftColumn) ?? normalizeManifestColumnName(leftColumn);
|
||||
const normalizedRightColumn = columnNameMaps.get(rightTable)?.get(rightColumn) ?? normalizeManifestColumnName(rightColumn);
|
||||
return `${leftTable}.${normalizedLeftColumn} = ${rightTable}.${normalizedRightColumn}`;
|
||||
})
|
||||
.join(' AND ');
|
||||
}
|
||||
|
|
@ -208,6 +334,7 @@ function buildJoinsByTable(
|
|||
tableNames: Set<string>,
|
||||
joins: LiveDatabaseManifestJoinData[],
|
||||
preservedJoins: Map<string, LiveDatabaseManifestJoinEntry[]>,
|
||||
columnNameMaps: Map<string, Map<string, string>>,
|
||||
): Map<string, LiveDatabaseManifestJoinEntry[]> {
|
||||
const joinsByTable = new Map<string, LiveDatabaseManifestJoinEntry[]>();
|
||||
|
||||
|
|
@ -218,7 +345,7 @@ function buildJoinsByTable(
|
|||
const relationship = RELATIONSHIP_MAP[join.relationship] ?? join.relationship;
|
||||
addJoinOnce(joinsByTable, join.fromTable, {
|
||||
to: join.toTable,
|
||||
on: joinCondition(join.fromTable, join.fromColumns, join.toTable, join.toColumns),
|
||||
on: joinCondition(join.fromTable, join.fromColumns, join.toTable, join.toColumns, columnNameMaps),
|
||||
relationship,
|
||||
source: join.source,
|
||||
});
|
||||
|
|
@ -226,7 +353,7 @@ function buildJoinsByTable(
|
|||
const reverseRelationship = RELATIONSHIP_INVERSE[relationship] ?? 'one_to_many';
|
||||
addJoinOnce(joinsByTable, join.toTable, {
|
||||
to: join.fromTable,
|
||||
on: joinCondition(join.toTable, join.toColumns, join.fromTable, join.fromColumns),
|
||||
on: joinCondition(join.toTable, join.toColumns, join.fromTable, join.fromColumns, columnNameMaps),
|
||||
relationship: reverseRelationship,
|
||||
source: join.source,
|
||||
});
|
||||
|
|
@ -250,19 +377,30 @@ export function buildLiveDatabaseManifestShards(
|
|||
input: BuildLiveDatabaseManifestShardsInput,
|
||||
): BuildLiveDatabaseManifestShardsResult {
|
||||
const tableNames = new Set(input.tables.map((table) => table.name));
|
||||
const joinsByTable = buildJoinsByTable(tableNames, input.joins, input.existingPreservedJoins ?? new Map());
|
||||
const columnNameMaps = new Map(
|
||||
input.tables.map((table) => {
|
||||
const normalizedNames = normalizeManifestColumnNames(table.columns.map((column) => column.name));
|
||||
return [table.name, new Map(table.columns.map((column, index) => [column.name, normalizedNames[index] ?? column.name]))] as const;
|
||||
}),
|
||||
);
|
||||
const joinsByTable = buildJoinsByTable(tableNames, input.joins, input.existingPreservedJoins ?? new Map(), columnNameMaps);
|
||||
const shards = new Map<string, LiveDatabaseManifestShard>();
|
||||
|
||||
for (const table of input.tables) {
|
||||
const shardKey = getShardKey(input.connectionType, table.catalog, table.db);
|
||||
const shard = shards.get(shardKey) ?? { tables: {} };
|
||||
const existingDescriptions = input.existingDescriptions?.get(table.name);
|
||||
const normalizedNames = normalizeManifestColumnNames(table.columns.map((column) => column.name));
|
||||
|
||||
const columns: LiveDatabaseManifestColumn[] = table.columns.map((column) => {
|
||||
const columns: LiveDatabaseManifestColumn[] = table.columns.map((column, index) => {
|
||||
const name = normalizedNames[index] ?? normalizeManifestColumnName(column.name);
|
||||
const manifestColumn: LiveDatabaseManifestColumn = {
|
||||
name: column.name,
|
||||
name,
|
||||
type: input.mapColumnType(column.type),
|
||||
};
|
||||
if (name !== column.name) {
|
||||
manifestColumn.expr = manifestColumnPhysicalExpression(table.name, column.name);
|
||||
}
|
||||
if (column.pk) {
|
||||
manifestColumn.pk = true;
|
||||
}
|
||||
|
|
@ -270,7 +408,7 @@ export function buildLiveDatabaseManifestShards(
|
|||
manifestColumn.nullable = false;
|
||||
}
|
||||
const descriptions = mergeDescriptionsPreservingExternal(
|
||||
existingDescriptions?.columns.get(column.name),
|
||||
existingDescriptions?.columns.get(name) ?? existingDescriptions?.columns.get(column.name),
|
||||
column.descriptions,
|
||||
);
|
||||
if (descriptions) {
|
||||
|
|
|
|||
|
|
@ -182,6 +182,71 @@ grain: []
|
|||
});
|
||||
});
|
||||
|
||||
it('sends normalized legacy manifest columns with physical expressions to compute', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/main.yaml',
|
||||
`tables:
|
||||
npi:
|
||||
table: provider.main.npi
|
||||
columns:
|
||||
- name: npi
|
||||
type: number
|
||||
- name: provider_business_mailing_address_country_code_(if_outside_u.s.)
|
||||
type: string
|
||||
- name: Display Name
|
||||
type: string
|
||||
- name: display_name
|
||||
type: string
|
||||
`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add legacy manifest shard',
|
||||
);
|
||||
|
||||
await compileLocalSlQuery(project, {
|
||||
connectionId: 'warehouse',
|
||||
query: {
|
||||
measures: [],
|
||||
dimensions: ['npi.provider_business_mailing_address_country_code_if_outside_u_s'],
|
||||
},
|
||||
compute,
|
||||
});
|
||||
|
||||
expect(compute.query).toHaveBeenLastCalledWith({
|
||||
sources: expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
name: 'npi',
|
||||
table: 'provider.main.npi',
|
||||
grain: [
|
||||
'npi',
|
||||
'provider_business_mailing_address_country_code_if_outside_u_s',
|
||||
'display_name_2',
|
||||
'display_name',
|
||||
],
|
||||
columns: [
|
||||
expect.objectContaining({ name: 'npi', type: 'number' }),
|
||||
expect.objectContaining({
|
||||
name: 'provider_business_mailing_address_country_code_if_outside_u_s',
|
||||
type: 'string',
|
||||
expr: 'npi."provider_business_mailing_address_country_code_(if_outside_u.s.)"',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
name: 'display_name_2',
|
||||
type: 'string',
|
||||
expr: 'npi."Display Name"',
|
||||
}),
|
||||
expect.objectContaining({ name: 'display_name', type: 'string' }),
|
||||
],
|
||||
}),
|
||||
]),
|
||||
dialect: 'postgres',
|
||||
query: {
|
||||
measures: [],
|
||||
dimensions: ['npi.provider_business_mailing_address_country_code_if_outside_u_s'],
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('strips authoring-only fields (usage, inherits_columns_from) before sending sources to the daemon', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
|
|
|
|||
|
|
@ -165,6 +165,45 @@ describe('local semantic-layer helpers', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('normalizes unsafe column names from existing manifest-backed scan sources', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/main.yaml',
|
||||
`tables:
|
||||
npi:
|
||||
table: provider.main.npi
|
||||
columns:
|
||||
- name: npi
|
||||
type: number
|
||||
- name: provider_business_mailing_address_country_code_(if_outside_u.s.)
|
||||
type: string
|
||||
- name: Display Name
|
||||
type: string
|
||||
- name: display_name
|
||||
type: string
|
||||
`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add legacy manifest shard',
|
||||
);
|
||||
|
||||
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
|
||||
{
|
||||
columnCount: 4,
|
||||
connectionId: 'warehouse',
|
||||
joinCount: 0,
|
||||
measureCount: 0,
|
||||
name: 'npi',
|
||||
path: 'semantic-layer/warehouse/_schema/main.yaml#npi',
|
||||
},
|
||||
]);
|
||||
|
||||
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'npi' })).resolves.toEqual(
|
||||
expect.objectContaining({
|
||||
yaml: expect.stringContaining('provider_business_mailing_address_country_code_if_outside_u_s'),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('expands manifest-backed scan sources when listing all connections', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
|
|
|
|||
|
|
@ -1127,6 +1127,56 @@ describe('validateWithProposedSource', () => {
|
|||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('allows generated manifest columns to reference quoted physical identifiers', async () => {
|
||||
const schemaPath = 'semantic-layer/warehouse/_schema/main.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
if (dir === 'semantic-layer/warehouse') {
|
||||
return Promise.resolve({ files: [] });
|
||||
}
|
||||
if (dir === 'semantic-layer') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
if (dir === 'semantic-layer/warehouse/_schema') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
return Promise.resolve({ files: [] });
|
||||
});
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: [
|
||||
'tables:',
|
||||
' npi:',
|
||||
' table: provider.main.npi',
|
||||
' columns:',
|
||||
' - name: npi',
|
||||
' type: number',
|
||||
' - name: provider_business_mailing_address_country_code_if_outside_u_s',
|
||||
' type: string',
|
||||
' expr: npi."provider_business_mailing_address_country_code_(if_outside_u.s.)"',
|
||||
].join('\n'),
|
||||
});
|
||||
pythonPort.validateSources.mockResolvedValue({
|
||||
data: { errors: [], warnings: [] },
|
||||
});
|
||||
|
||||
const result = await service.validateWithProposedSource('warehouse', {
|
||||
name: 'npi',
|
||||
table: 'provider.main.npi',
|
||||
grain: ['npi'],
|
||||
columns: [
|
||||
{ name: 'npi', type: 'number' },
|
||||
{
|
||||
name: 'provider_business_mailing_address_country_code_if_outside_u_s',
|
||||
type: 'string',
|
||||
expr: 'npi."provider_business_mailing_address_country_code_(if_outside_u.s.)"',
|
||||
},
|
||||
],
|
||||
joins: [],
|
||||
measures: [],
|
||||
});
|
||||
|
||||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('rejects join keys that are absent from matched physical sources', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,11 @@ import YAML from 'yaml';
|
|||
import type { KtxFileStorePort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger } from '../core/index.js';
|
||||
import type { TableUsageOutput } from '../ingest/adapters/historic-sql/skill-schemas.js';
|
||||
import {
|
||||
manifestColumnPhysicalExpression,
|
||||
normalizeManifestColumnName,
|
||||
normalizeManifestColumnNames,
|
||||
} from '../ingest/adapters/live-database/manifest.js';
|
||||
import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import { isOverlaySource, resolvedSourceSchema, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||
|
|
@ -495,6 +500,17 @@ export class SemanticLayerService {
|
|||
|
||||
const manifestSource = manifestMatch.source;
|
||||
const manifestColumns = new Map(manifestSource.columns.map((c) => [c.name.toLowerCase(), c.name]));
|
||||
const manifestPhysicalColumns = new Set<string>();
|
||||
for (const manifestColumn of manifestSource.columns) {
|
||||
if (!manifestColumn.expr) {
|
||||
continue;
|
||||
}
|
||||
for (const ref of extractSqlIdentifierRefs(manifestColumn.expr)) {
|
||||
if (refBelongsToSource(ref, manifestSource.name, sourceNames)) {
|
||||
manifestPhysicalColumns.add(ref.name.toLowerCase());
|
||||
}
|
||||
}
|
||||
}
|
||||
const declaredColumns = source.columns ?? [];
|
||||
const declaredByLower = new Map(declaredColumns.map((c) => [c.name.toLowerCase(), c]));
|
||||
const validOutputColumns = new Set(
|
||||
|
|
@ -537,7 +553,7 @@ export class SemanticLayerService {
|
|||
expr: column.expr,
|
||||
sourceName: source.name,
|
||||
sourceNames,
|
||||
validColumns: new Set([...manifestColumns.keys(), ...validOutputColumns]),
|
||||
validColumns: new Set([...manifestColumns.keys(), ...manifestPhysicalColumns, ...validOutputColumns]),
|
||||
validMeasures: new Set(),
|
||||
});
|
||||
if (missing.length > 0) {
|
||||
|
|
@ -1137,6 +1153,7 @@ export class SemanticLayerService {
|
|||
interface ManifestColumnEntry {
|
||||
name: string;
|
||||
type: string;
|
||||
expr?: string;
|
||||
pk?: boolean;
|
||||
nullable?: boolean;
|
||||
descriptions?: Record<string, string>;
|
||||
|
|
@ -1166,18 +1183,26 @@ export interface ManifestTableEntry {
|
|||
}
|
||||
|
||||
export function projectManifestEntry(name: string, entry: ManifestTableEntry): SemanticLayerSource {
|
||||
const columns = entry.columns.map((c) => ({
|
||||
name: c.name,
|
||||
type: c.type,
|
||||
role: c.type === 'time' ? 'time' : undefined,
|
||||
descriptions: c.descriptions,
|
||||
constraints: c.constraints,
|
||||
enum_values: c.enum_values,
|
||||
tests: c.tests,
|
||||
}));
|
||||
const columnNames = normalizeManifestColumnNames(entry.columns.map((c) => c.name));
|
||||
const columns = entry.columns.map((c, index) => {
|
||||
const columnName = columnNames[index] ?? normalizeManifestColumnName(c.name);
|
||||
const expr = c.expr ?? (columnName !== c.name ? manifestColumnPhysicalExpression(name, c.name) : undefined);
|
||||
return {
|
||||
name: columnName,
|
||||
type: c.type,
|
||||
role: c.type === 'time' ? 'time' : undefined,
|
||||
descriptions: c.descriptions,
|
||||
...(expr ? { expr } : {}),
|
||||
constraints: c.constraints,
|
||||
enum_values: c.enum_values,
|
||||
tests: c.tests,
|
||||
};
|
||||
});
|
||||
|
||||
const pkColumns = entry.columns.filter((c) => c.pk).map((c) => c.name);
|
||||
const grain = pkColumns.length > 0 ? pkColumns : entry.columns.map((c) => c.name);
|
||||
const pkColumns = entry.columns.flatMap((c, index) =>
|
||||
c.pk ? [columnNames[index] ?? normalizeManifestColumnName(c.name)] : [],
|
||||
);
|
||||
const grain = pkColumns.length > 0 ? pkColumns : columnNames;
|
||||
|
||||
// Table-level dbt config from manifest shards is surfaced on the source for search / tools.
|
||||
const source: SemanticLayerSource = {
|
||||
|
|
@ -1276,9 +1301,7 @@ const SQL_KEYWORDS = new Set([
|
|||
]);
|
||||
|
||||
function extractColumnReferences(expr: string): string[] {
|
||||
const cleaned = expr.replace(/'[^']*'/g, '').replace(/\b\d+(\.\d+)?\b/g, '');
|
||||
const tokens = cleaned.match(/\b[a-zA-Z_]\w*\b/g) ?? [];
|
||||
return [...new Set(tokens.filter((t) => !SQL_KEYWORDS.has(t.toLowerCase())))];
|
||||
return [...new Set(extractSqlIdentifierRefs(expr).map((ref) => ref.name))];
|
||||
}
|
||||
|
||||
function manifestEntryMatchesRef(source: SemanticLayerSource, ref: string): boolean {
|
||||
|
|
@ -1295,19 +1318,23 @@ function normalizeSqlExpressionForIdentifierScan(expr: string): string {
|
|||
.replace(/--.*$/gm, ' ')
|
||||
.replace(/\/\*[\s\S]*?\*\//g, ' ')
|
||||
.replace(/'([^']|'')*'/g, ' ')
|
||||
.replace(/"([^"]+)"/g, '$1')
|
||||
.replace(/`([^`]+)`/g, '$1')
|
||||
.replace(/\[([^\]]+)\]/g, '$1')
|
||||
.replace(/::\s*[A-Za-z_][\w$]*(?:\s*\([^)]*\))?/g, ' ');
|
||||
}
|
||||
|
||||
function extractSqlIdentifierRefs(expr: string): Array<{ qualifier?: string; name: string }> {
|
||||
const normalized = normalizeSqlExpressionForIdentifierScan(expr);
|
||||
const refs = new Map<string, { qualifier?: string; name: string }>();
|
||||
const re = /(?:\b([A-Za-z_][\w$]*)\s*\.\s*)?(\b[A-Za-z_][\w$]*)\b/g;
|
||||
|
||||
const bareIdentifier = String.raw`[A-Za-z_][\w$]*`;
|
||||
const quotedIdentifier = String.raw`"(?:""|[^"])*"|` + String.raw`\`(?:\`\`|[^\`])*\`|\[(?:[^\]])+\]`;
|
||||
const identifier = String.raw`(?:${quotedIdentifier}|${bareIdentifier})`;
|
||||
const re = new RegExp(String.raw`(${identifier})\s*\.\s*(${identifier})|(${identifier})`, 'g');
|
||||
|
||||
for (const match of normalized.matchAll(re)) {
|
||||
const qualifier = match[1];
|
||||
const name = match[2];
|
||||
const rawQualifier = match[1];
|
||||
const rawName = match[2] ?? match[3] ?? '';
|
||||
const qualifier = rawQualifier ? unquoteSqlIdentifier(rawQualifier) : undefined;
|
||||
const name = unquoteSqlIdentifier(rawName);
|
||||
if (!name) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1317,7 +1344,10 @@ function extractSqlIdentifierRefs(expr: string): Array<{ qualifier?: string; nam
|
|||
if (!qualifier && after.startsWith('(')) {
|
||||
continue;
|
||||
}
|
||||
if (SQL_KEYWORDS.has(nameLower) || (qualifierLower && SQL_KEYWORDS.has(qualifierLower))) {
|
||||
if (!isQuotedSqlIdentifier(rawName) && SQL_KEYWORDS.has(nameLower)) {
|
||||
continue;
|
||||
}
|
||||
if (rawQualifier && !isQuotedSqlIdentifier(rawQualifier) && qualifierLower && SQL_KEYWORDS.has(qualifierLower)) {
|
||||
continue;
|
||||
}
|
||||
refs.set(`${qualifierLower ?? ''}.${nameLower}`, qualifier ? { qualifier, name } : { name });
|
||||
|
|
@ -1325,6 +1355,27 @@ function extractSqlIdentifierRefs(expr: string): Array<{ qualifier?: string; nam
|
|||
return [...refs.values()];
|
||||
}
|
||||
|
||||
function isQuotedSqlIdentifier(identifier: string): boolean {
|
||||
return (
|
||||
(identifier.startsWith('"') && identifier.endsWith('"')) ||
|
||||
(identifier.startsWith('`') && identifier.endsWith('`')) ||
|
||||
(identifier.startsWith('[') && identifier.endsWith(']'))
|
||||
);
|
||||
}
|
||||
|
||||
function unquoteSqlIdentifier(identifier: string): string {
|
||||
if (identifier.startsWith('"') && identifier.endsWith('"')) {
|
||||
return identifier.slice(1, -1).replace(/""/g, '"');
|
||||
}
|
||||
if (identifier.startsWith('`') && identifier.endsWith('`')) {
|
||||
return identifier.slice(1, -1).replace(/``/g, '`');
|
||||
}
|
||||
if (identifier.startsWith('[') && identifier.endsWith(']')) {
|
||||
return identifier.slice(1, -1);
|
||||
}
|
||||
return identifier;
|
||||
}
|
||||
|
||||
function refBelongsToSource(
|
||||
ref: { qualifier?: string; name: string },
|
||||
sourceName: string,
|
||||
|
|
|
|||
|
|
@ -62,6 +62,39 @@ class TestSimpleSingleSource:
|
|||
assert "GROUP BY" in sql.upper()
|
||||
assert "public.orders" in sql
|
||||
|
||||
def test_computed_column_can_reference_quoted_physical_name(self):
|
||||
source = SourceDefinition(
|
||||
name="npi",
|
||||
table="provider.main.npi",
|
||||
grain=["provider_business_mailing_address_country_code_if_outside_u_s"],
|
||||
columns=[
|
||||
SourceColumn(
|
||||
name="provider_business_mailing_address_country_code_if_outside_u_s",
|
||||
type="string",
|
||||
expr='npi."provider_business_mailing_address_country_code_(if_outside_u.s.)"',
|
||||
)
|
||||
],
|
||||
)
|
||||
sources = {"npi": source}
|
||||
engine = SemanticEngine.from_sources(sources, dialect="duckdb")
|
||||
|
||||
sql = engine.query(
|
||||
{
|
||||
"measures": [],
|
||||
"dimensions": [
|
||||
"npi.provider_business_mailing_address_country_code_if_outside_u_s"
|
||||
],
|
||||
"limit": 5,
|
||||
}
|
||||
).sql
|
||||
|
||||
assert_valid_sql(sql)
|
||||
assert (
|
||||
'npi."provider_business_mailing_address_country_code_(if_outside_u.s.)"'
|
||||
in sql
|
||||
)
|
||||
assert "AS provider_business_mailing_address_country_code_if_outside_u_s" in sql
|
||||
|
||||
|
||||
class TestCrossSourceM2O:
|
||||
"""Test 2: Cross-source, all m2o (the LATAM query)."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue