mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-25 08:48:08 +02:00
Initial open-source release
This commit is contained in:
commit
1a42152e6f
1199 changed files with 257054 additions and 0 deletions
42
packages/context/src/ingest/action-identity.test.ts
Normal file
42
packages/context/src/ingest/action-identity.test.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { actionTargetConnectionId, memoryActionIdentity } from './action-identity.js';
|
||||
|
||||
describe('memory action target identity', () => {
|
||||
it('keys SL actions by target connection and wiki actions by run connection', () => {
|
||||
expect(
|
||||
memoryActionIdentity(
|
||||
{ target: 'sl', type: 'created', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' },
|
||||
'looker-run',
|
||||
),
|
||||
).toBe('sl:warehouse-b:orders');
|
||||
|
||||
expect(memoryActionIdentity({ target: 'sl', type: 'created', key: 'orders', detail: '' }, 'warehouse-a')).toBe(
|
||||
'sl:warehouse-a:orders',
|
||||
);
|
||||
|
||||
expect(
|
||||
memoryActionIdentity(
|
||||
{
|
||||
target: 'wiki',
|
||||
type: 'created',
|
||||
key: 'knowledge/global/orders.md',
|
||||
detail: '',
|
||||
targetConnectionId: 'ignored',
|
||||
},
|
||||
'looker-run',
|
||||
),
|
||||
).toBe('wiki:looker-run:knowledge/global/orders.md');
|
||||
});
|
||||
|
||||
it('resolves action target connection only for SL actions', () => {
|
||||
expect(
|
||||
actionTargetConnectionId(
|
||||
{ target: 'sl', type: 'updated', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' },
|
||||
'looker-run',
|
||||
),
|
||||
).toBe('warehouse-b');
|
||||
expect(actionTargetConnectionId({ target: 'wiki', type: 'updated', key: 'orders', detail: '' }, 'looker-run')).toBe(
|
||||
'looker-run',
|
||||
);
|
||||
});
|
||||
});
|
||||
9
packages/context/src/ingest/action-identity.ts
Normal file
9
packages/context/src/ingest/action-identity.ts
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
import type { MemoryAction } from '../memory/index.js';
|
||||
|
||||
export function actionTargetConnectionId(action: MemoryAction, runConnectionId: string): string {
|
||||
return action.target === 'sl' ? (action.targetConnectionId ?? runConnectionId) : runConnectionId;
|
||||
}
|
||||
|
||||
export function memoryActionIdentity(action: MemoryAction, runConnectionId: string): string {
|
||||
return `${action.target}:${actionTargetConnectionId(action, runConnectionId)}:${action.key}`;
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import type { DbtParsedTable } from './parse-schema.js';
|
||||
import { findMatchingKloTable, matchDbtTables, type DbtHostTableLite } from './match-tables.js';
|
||||
|
||||
const hostTables: DbtHostTableLite[] = [
|
||||
{ id: '1', name: 'orders', catalog: 'warehouse', db: 'analytics', columns: [{ id: 'c1', name: 'id' }] },
|
||||
{ id: '2', name: 'orders', catalog: 'warehouse', db: 'staging', columns: [{ id: 'c2', name: 'id' }] },
|
||||
{ id: '3', name: 'customers', catalog: null, db: null, columns: [{ id: 'c3', name: 'id' }] },
|
||||
];
|
||||
|
||||
function table(input: Partial<DbtParsedTable>): DbtParsedTable {
|
||||
return {
|
||||
name: 'orders',
|
||||
description: null,
|
||||
database: null,
|
||||
schema: null,
|
||||
columns: [],
|
||||
resourceType: 'model',
|
||||
...input,
|
||||
};
|
||||
}
|
||||
|
||||
describe('dbt descriptions table matching', () => {
|
||||
it('uses schema plus name first and checks catalog when dbt database is present', () => {
|
||||
expect(
|
||||
findMatchingKloTable(table({ database: 'warehouse', schema: 'analytics' }), hostTables, null)?.id,
|
||||
).toBe('1');
|
||||
});
|
||||
|
||||
it('does not fall back to name-only for source tables', () => {
|
||||
expect(findMatchingKloTable(table({ resourceType: 'source' }), hostTables, null)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('uses targetSchema for models and name-only only when unique', () => {
|
||||
expect(findMatchingKloTable(table({ resourceType: 'model' }), hostTables, 'staging')?.id).toBe('2');
|
||||
expect(findMatchingKloTable(table({ name: 'customers', resourceType: 'model' }), hostTables, null)?.id).toBe(
|
||||
'3',
|
||||
);
|
||||
expect(findMatchingKloTable(table({ resourceType: 'model' }), hostTables, null)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('summarizes matched columns and descriptions', () => {
|
||||
const matches = matchDbtTables(
|
||||
[
|
||||
table({
|
||||
name: 'customers',
|
||||
description: 'Customers',
|
||||
columns: [
|
||||
{ name: 'id', description: 'Primary key', dataType: null },
|
||||
{ name: 'missing', description: 'Missing', dataType: null },
|
||||
],
|
||||
}),
|
||||
],
|
||||
hostTables,
|
||||
null,
|
||||
);
|
||||
|
||||
expect(matches).toEqual([
|
||||
{
|
||||
dbtTable: 'customers',
|
||||
dbtSchema: null,
|
||||
dbtDatabase: null,
|
||||
hostTableId: '3',
|
||||
hostTableName: 'customers',
|
||||
matched: true,
|
||||
tableDescriptionAction: 'import',
|
||||
tableDescriptionFound: true,
|
||||
columnsToImport: 1,
|
||||
columnsMatched: 1,
|
||||
columnsTotal: 2,
|
||||
columnDescriptionsFound: 1,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
import type { DbtParsedTable } from './parse-schema.js';
|
||||
|
||||
export interface DbtHostTableLite {
|
||||
id: string;
|
||||
name: string;
|
||||
catalog: string | null;
|
||||
db: string | null;
|
||||
columns: Array<{ id: string; name: string }>;
|
||||
}
|
||||
|
||||
export interface DbtTableMatch {
|
||||
dbtTable: string;
|
||||
dbtSchema: string | null;
|
||||
dbtDatabase: string | null;
|
||||
hostTableId: string | null;
|
||||
hostTableName: string | null;
|
||||
matched: boolean;
|
||||
tableDescriptionAction: 'skip' | 'import';
|
||||
tableDescriptionFound: boolean;
|
||||
columnsToImport: number;
|
||||
columnsMatched: number;
|
||||
columnsTotal: number;
|
||||
columnDescriptionsFound: number;
|
||||
}
|
||||
|
||||
export function matchDbtTables(
|
||||
dbtTables: DbtParsedTable[],
|
||||
hostTables: DbtHostTableLite[],
|
||||
targetSchema?: string | null,
|
||||
): DbtTableMatch[] {
|
||||
return dbtTables.map((dbtTable) => {
|
||||
const hostTable = findMatchingKloTable(dbtTable, hostTables, targetSchema);
|
||||
|
||||
if (!hostTable) {
|
||||
return {
|
||||
dbtTable: dbtTable.name,
|
||||
dbtSchema: dbtTable.schema,
|
||||
dbtDatabase: dbtTable.database,
|
||||
hostTableId: null,
|
||||
hostTableName: null,
|
||||
matched: false,
|
||||
tableDescriptionAction: 'skip',
|
||||
tableDescriptionFound: Boolean(dbtTable.description),
|
||||
columnsToImport: 0,
|
||||
columnsMatched: 0,
|
||||
columnsTotal: dbtTable.columns.length,
|
||||
columnDescriptionsFound: dbtTable.columns.filter((column) => Boolean(column.description)).length,
|
||||
};
|
||||
}
|
||||
|
||||
const analysis = analyzeColumns(dbtTable, hostTable);
|
||||
return {
|
||||
dbtTable: dbtTable.name,
|
||||
dbtSchema: dbtTable.schema,
|
||||
dbtDatabase: dbtTable.database,
|
||||
hostTableId: hostTable.id,
|
||||
hostTableName: hostTable.name,
|
||||
matched: true,
|
||||
tableDescriptionAction: dbtTable.description ? 'import' : 'skip',
|
||||
tableDescriptionFound: Boolean(dbtTable.description),
|
||||
...analysis,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export function findMatchingKloTable(
|
||||
dbtTable: DbtParsedTable,
|
||||
hostTables: DbtHostTableLite[],
|
||||
targetSchema?: string | null,
|
||||
): DbtHostTableLite | undefined {
|
||||
const dbtName = dbtTable.name.toLowerCase();
|
||||
const effectiveSchema = dbtTable.schema ?? targetSchema ?? null;
|
||||
|
||||
if (effectiveSchema) {
|
||||
const strictMatch = hostTables.find((table) => {
|
||||
const nameMatches = table.name.toLowerCase() === dbtName;
|
||||
const schemaMatches = table.db?.toLowerCase() === effectiveSchema.toLowerCase();
|
||||
if (!nameMatches || !schemaMatches) {
|
||||
return false;
|
||||
}
|
||||
if (dbtTable.database && table.catalog) {
|
||||
return table.catalog.toLowerCase() === dbtTable.database.toLowerCase();
|
||||
}
|
||||
return true;
|
||||
});
|
||||
if (strictMatch) {
|
||||
return strictMatch;
|
||||
}
|
||||
}
|
||||
|
||||
if (dbtTable.resourceType === 'source') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const nameMatches = hostTables.filter((table) => table.name.toLowerCase() === dbtName);
|
||||
return nameMatches.length === 1 ? nameMatches[0] : undefined;
|
||||
}
|
||||
|
||||
function analyzeColumns(
|
||||
dbtTable: DbtParsedTable,
|
||||
hostTable: DbtHostTableLite,
|
||||
): Pick<DbtTableMatch, 'columnsToImport' | 'columnsMatched' | 'columnsTotal' | 'columnDescriptionsFound'> {
|
||||
let columnsToImport = 0;
|
||||
let columnsMatched = 0;
|
||||
let columnDescriptionsFound = 0;
|
||||
|
||||
for (const dbtColumn of dbtTable.columns) {
|
||||
const hostColumn = hostTable.columns.find(
|
||||
(column) => column.name.toLowerCase() === dbtColumn.name.toLowerCase(),
|
||||
);
|
||||
if (!hostColumn) {
|
||||
continue;
|
||||
}
|
||||
columnsMatched++;
|
||||
if (dbtColumn.description) {
|
||||
columnDescriptionsFound++;
|
||||
columnsToImport++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
columnsToImport,
|
||||
columnsMatched,
|
||||
columnsTotal: dbtTable.columns.length,
|
||||
columnDescriptionsFound,
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import type { ParsedSemanticModel } from '../metricflow/deep-parse.js';
|
||||
import { mergeSemanticModelTables } from './merge-semantic-model-tables.js';
|
||||
import type { DbtSchemaParseResult } from './parse-schema.js';
|
||||
|
||||
const semanticModel: ParsedSemanticModel = {
|
||||
name: 'orders_semantic',
|
||||
description: 'Order facts',
|
||||
modelRef: 'fct_orders',
|
||||
dimensions: [
|
||||
{ name: 'status', column: 'status', type: 'categorical', description: 'Order status' },
|
||||
{ name: 'ordered_at', column: 'ordered_at', type: 'time' },
|
||||
],
|
||||
measures: [],
|
||||
entities: [],
|
||||
defaultTimeDimension: null,
|
||||
};
|
||||
|
||||
describe('mergeSemanticModelTables', () => {
|
||||
it('adds missing MetricFlow model refs as dbt model tables', () => {
|
||||
const input: DbtSchemaParseResult = { projectName: null, dbtVersion: null, tables: [], relationships: [] };
|
||||
|
||||
expect(mergeSemanticModelTables(input, [semanticModel])).toEqual({
|
||||
projectName: null,
|
||||
dbtVersion: null,
|
||||
relationships: [],
|
||||
tables: [
|
||||
{
|
||||
name: 'fct_orders',
|
||||
description: 'Order facts',
|
||||
database: null,
|
||||
schema: null,
|
||||
resourceType: 'model',
|
||||
columns: [
|
||||
{ name: 'status', description: 'Order status', dataType: null },
|
||||
{ name: 'ordered_at', description: null, dataType: 'TIMESTAMP' },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('does not add a duplicate table when schema parsing already found the model ref', () => {
|
||||
const input: DbtSchemaParseResult = {
|
||||
projectName: null,
|
||||
dbtVersion: null,
|
||||
relationships: [],
|
||||
tables: [
|
||||
{
|
||||
name: 'FCT_ORDERS',
|
||||
description: 'Existing',
|
||||
database: null,
|
||||
schema: null,
|
||||
resourceType: 'model',
|
||||
columns: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
expect(mergeSemanticModelTables(input, [semanticModel]).tables).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import type { ParsedSemanticModel } from '../metricflow/deep-parse.js';
|
||||
import type { DbtSchemaParseResult } from './parse-schema.js';
|
||||
|
||||
export function mergeSemanticModelTables(
|
||||
parseResult: DbtSchemaParseResult,
|
||||
semanticModels: ParsedSemanticModel[],
|
||||
): DbtSchemaParseResult {
|
||||
const merged: DbtSchemaParseResult = {
|
||||
...parseResult,
|
||||
tables: [...parseResult.tables],
|
||||
relationships: [...parseResult.relationships],
|
||||
};
|
||||
const existingTableNames = new Set(merged.tables.map((table) => table.name.toLowerCase()));
|
||||
|
||||
for (const model of semanticModels) {
|
||||
const tableName = model.modelRef;
|
||||
if (existingTableNames.has(tableName.toLowerCase())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
merged.tables.push({
|
||||
name: tableName,
|
||||
description: model.description,
|
||||
database: null,
|
||||
schema: null,
|
||||
columns: model.dimensions.map((dimension) => ({
|
||||
name: dimension.column,
|
||||
description: dimension.description ?? null,
|
||||
dataType: dimension.type === 'time' ? 'TIMESTAMP' : null,
|
||||
})),
|
||||
resourceType: 'model',
|
||||
});
|
||||
existingTableNames.add(tableName.toLowerCase());
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
|
@ -0,0 +1,214 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { parseDbtSchemaFile, parseDbtSchemaFiles } from './parse-schema.js';
|
||||
|
||||
describe('dbt descriptions schema parser', () => {
|
||||
it('resolves shared dbt vars and defaults before parsing schema YAML', () => {
|
||||
const result = parseDbtSchemaFile(
|
||||
`
|
||||
version: 2
|
||||
sources:
|
||||
- name: raw
|
||||
database: "{{ var('database') }}"
|
||||
schema: "{{ var('schema', 'fallback_schema') }}"
|
||||
tables:
|
||||
- name: orders
|
||||
identifier: fct_orders
|
||||
description: "Orders from {{ var('database') }}"
|
||||
columns:
|
||||
- name: customer_id
|
||||
description: "Customer id"
|
||||
tests:
|
||||
- relationships:
|
||||
to: ref('customers')
|
||||
field: id
|
||||
models:
|
||||
- name: "{{ var('model_name', 'orders_model') }}"
|
||||
schema: "{{ var('model_schema') }}"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Order id"
|
||||
`,
|
||||
{ path: 'models/schema.yml', variables: new Map([['database', 'analytics'], ['model_schema', 'mart']]) },
|
||||
);
|
||||
|
||||
expect(result.tables).toEqual([
|
||||
{
|
||||
name: 'fct_orders',
|
||||
description: 'Orders from analytics',
|
||||
database: 'analytics',
|
||||
schema: 'fallback_schema',
|
||||
columns: [
|
||||
{
|
||||
name: 'customer_id',
|
||||
description: 'Customer id',
|
||||
dataType: null,
|
||||
dataTests: [{ name: 'relationships', package: 'dbt', kwargs: { to: "ref('customers')", field: 'id' } }],
|
||||
},
|
||||
],
|
||||
resourceType: 'source',
|
||||
},
|
||||
{
|
||||
name: 'orders_model',
|
||||
description: null,
|
||||
database: null,
|
||||
schema: 'mart',
|
||||
columns: [{ name: 'id', description: 'Order id', dataType: null }],
|
||||
resourceType: 'model',
|
||||
},
|
||||
]);
|
||||
expect(result.relationships).toEqual([
|
||||
{
|
||||
fromTable: 'fct_orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
fromSchema: 'fallback_schema',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('deduplicates tables by database schema and name while merging columns', () => {
|
||||
const result = parseDbtSchemaFiles([
|
||||
{
|
||||
path: 'models/a.yml',
|
||||
content: `
|
||||
version: 2
|
||||
models:
|
||||
- name: orders
|
||||
description: Orders
|
||||
columns:
|
||||
- name: id
|
||||
description: Primary key
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: 'models/b.yml',
|
||||
content: `
|
||||
version: 2
|
||||
models:
|
||||
- name: orders
|
||||
columns:
|
||||
- name: status
|
||||
description: Status
|
||||
- name: id
|
||||
data_type: integer
|
||||
`,
|
||||
},
|
||||
]);
|
||||
|
||||
expect(result.tables).toEqual([
|
||||
{
|
||||
name: 'orders',
|
||||
description: 'Orders',
|
||||
database: null,
|
||||
schema: null,
|
||||
resourceType: 'model',
|
||||
columns: [
|
||||
{ name: 'id', description: 'Primary key', dataType: 'integer' },
|
||||
{ name: 'status', description: 'Status', dataType: null },
|
||||
],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('returns an empty result for malformed YAML and preserves unresolved Jinja text', () => {
|
||||
expect(parseDbtSchemaFile('{{{{ invalid yaml', { path: 'broken.yml' })).toEqual({
|
||||
projectName: null,
|
||||
dbtVersion: null,
|
||||
tables: [],
|
||||
relationships: [],
|
||||
});
|
||||
|
||||
const unresolved = parseDbtSchemaFile(
|
||||
`
|
||||
version: 2
|
||||
models:
|
||||
- name: "{{ var('missing_model') }}"
|
||||
`,
|
||||
{ variables: new Map() },
|
||||
);
|
||||
expect(unresolved.tables[0]?.name).toBe("{{ var('missing_model') }}");
|
||||
});
|
||||
|
||||
it('extracts data tests, constraints, enum values, tags, and freshness', () => {
|
||||
const result = parseDbtSchemaFile(`
|
||||
version: 2
|
||||
sources:
|
||||
- name: raw
|
||||
schema: jaffle
|
||||
tags: ["raw"]
|
||||
tables:
|
||||
- name: customers
|
||||
tags: ["core"]
|
||||
loaded_at_field: updated_at
|
||||
freshness:
|
||||
warn_after: { count: 12, period: hour }
|
||||
columns:
|
||||
- name: id
|
||||
tests:
|
||||
- not_null
|
||||
- unique
|
||||
- name: status
|
||||
data_tests:
|
||||
- accepted_values:
|
||||
values: ['active', 'inactive']
|
||||
models:
|
||||
- name: orders
|
||||
tags: ["finance"]
|
||||
loaded_at_field: run_at
|
||||
columns:
|
||||
- name: status
|
||||
data_tests:
|
||||
- dbt_utils.expression_is_true:
|
||||
expression: "status is not null"
|
||||
- accepted_values: ['placed', 'shipped']
|
||||
`);
|
||||
|
||||
const customers = result.tables.find((table) => table.name === 'customers');
|
||||
expect(customers?.tagsDbt).toEqual(['raw', 'core']);
|
||||
expect(customers?.freshnessDbt?.loadedAtField).toBe('updated_at');
|
||||
expect(customers?.freshnessDbt?.raw).toBeDefined();
|
||||
const id = customers?.columns.find((column) => column.name === 'id');
|
||||
expect(id?.constraints?.dbt).toEqual({ not_null: true, unique: true });
|
||||
const status = customers?.columns.find((column) => column.name === 'status');
|
||||
expect(status?.enumValuesDbt).toEqual(['active', 'inactive']);
|
||||
|
||||
const orders = result.tables.find((table) => table.name === 'orders');
|
||||
expect(orders?.tagsDbt).toEqual(['finance']);
|
||||
expect(orders?.freshnessDbt?.loadedAtField).toBe('run_at');
|
||||
const ordersStatus = orders?.columns.find((column) => column.name === 'status');
|
||||
expect(ordersStatus?.enumValuesDbt).toEqual(['placed', 'shipped']);
|
||||
expect(ordersStatus?.dataTests).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ package: 'dbt_utils', name: 'expression_is_true' }),
|
||||
expect.objectContaining({ package: 'dbt', name: 'accepted_values' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('parses relationships from model column data tests', () => {
|
||||
const result = parseDbtSchemaFile(`
|
||||
version: 2
|
||||
models:
|
||||
- name: orders
|
||||
schema: public
|
||||
columns:
|
||||
- name: customer_id
|
||||
data_tests:
|
||||
- relationships:
|
||||
arguments:
|
||||
to: "ref('customers')"
|
||||
field: id
|
||||
`);
|
||||
|
||||
expect(result.relationships).toEqual([
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
fromSchema: 'public',
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,655 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { parse as parseYaml } from 'yaml';
|
||||
import { type KloLogger, noopLogger } from '../../../core/index.js';
|
||||
import { resolveJinjaVariables } from '../../dbt-shared/project-vars.js';
|
||||
|
||||
export interface DbtParsedColumn {
|
||||
name: string;
|
||||
description: string | null;
|
||||
dataType: string | null;
|
||||
dataTests?: DbtDataTestRef[];
|
||||
constraints?: DbtColumnConstraints;
|
||||
enumValuesDbt?: string[];
|
||||
}
|
||||
|
||||
export interface DbtDataTestRef {
|
||||
name: string;
|
||||
package: string;
|
||||
kwargs?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface DbtColumnConstraints {
|
||||
dbt: {
|
||||
not_null?: boolean;
|
||||
unique?: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
export interface DbtParsedRelationship {
|
||||
fromTable: string;
|
||||
fromColumn: string;
|
||||
toTable: string;
|
||||
toColumn: string;
|
||||
fromSchema?: string;
|
||||
toSchema?: string;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export interface DbtParsedTable {
|
||||
name: string;
|
||||
description: string | null;
|
||||
database: string | null;
|
||||
schema: string | null;
|
||||
columns: DbtParsedColumn[];
|
||||
resourceType?: 'source' | 'model';
|
||||
tagsDbt?: string[];
|
||||
freshnessDbt?: {
|
||||
raw?: unknown;
|
||||
loadedAtField?: string | null;
|
||||
};
|
||||
}
|
||||
|
||||
export interface DbtSchemaParseResult {
|
||||
projectName: string | null;
|
||||
dbtVersion: string | null;
|
||||
tables: DbtParsedTable[];
|
||||
relationships: DbtParsedRelationship[];
|
||||
}
|
||||
|
||||
export interface DbtSchemaFile {
|
||||
content: string;
|
||||
path: string;
|
||||
}
|
||||
|
||||
interface ParseDbtSchemaOptions {
|
||||
path?: string;
|
||||
variables?: Map<string, string>;
|
||||
projectName?: string | null;
|
||||
logger?: KloLogger;
|
||||
}
|
||||
|
||||
interface DbtSchemaYaml {
|
||||
version?: number;
|
||||
sources?: DbtSchemaSource[];
|
||||
models?: DbtSchemaModel[];
|
||||
}
|
||||
|
||||
interface DbtSchemaSource {
|
||||
name: string;
|
||||
description?: string;
|
||||
database?: string;
|
||||
schema?: string;
|
||||
tags?: string[];
|
||||
tables?: DbtSchemaTable[];
|
||||
}
|
||||
|
||||
interface DbtSchemaTable {
|
||||
name: string;
|
||||
description?: string;
|
||||
identifier?: string;
|
||||
tags?: string[];
|
||||
loaded_at_field?: string;
|
||||
freshness?: unknown;
|
||||
columns?: DbtSchemaColumn[];
|
||||
}
|
||||
|
||||
interface DbtSchemaModel {
|
||||
name: string;
|
||||
description?: string;
|
||||
database?: string;
|
||||
schema?: string;
|
||||
tags?: string[];
|
||||
loaded_at_field?: string;
|
||||
freshness?: unknown;
|
||||
columns?: DbtSchemaColumn[];
|
||||
}
|
||||
|
||||
interface DbtSchemaColumn {
|
||||
name: string;
|
||||
description?: string;
|
||||
data_type?: string;
|
||||
data_tests?: DbtSchemaDataTest[];
|
||||
tests?: DbtSchemaDataTest[];
|
||||
}
|
||||
|
||||
type DbtSchemaDataTest =
|
||||
| string
|
||||
| {
|
||||
relationships?: {
|
||||
to?: string;
|
||||
field?: string;
|
||||
arguments?: { to?: string; field?: string };
|
||||
};
|
||||
not_null?: unknown;
|
||||
unique?: unknown;
|
||||
accepted_values?: { values?: unknown } | unknown;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
export function parseDbtSchemaFile(content: string, options: ParseDbtSchemaOptions = {}): DbtSchemaParseResult {
|
||||
return new DbtSchemaParser(options.logger ?? noopLogger).parseFile(content, options);
|
||||
}
|
||||
|
||||
export function parseDbtSchemaFiles(
|
||||
files: DbtSchemaFile[],
|
||||
variables?: Map<string, string>,
|
||||
options: { projectName?: string | null; logger?: KloLogger } = {},
|
||||
): DbtSchemaParseResult {
|
||||
return new DbtSchemaParser(options.logger ?? noopLogger).parseFiles(files, variables, options.projectName ?? null);
|
||||
}
|
||||
|
||||
export function computeDbtSchemaHash(files: DbtSchemaFile[]): string {
|
||||
const combined = [...files]
|
||||
.sort((a, b) => a.path.localeCompare(b.path))
|
||||
.map((file) => `${file.path}:${file.content}`)
|
||||
.join('\n');
|
||||
return createHash('sha256').update(combined).digest('hex').substring(0, 16);
|
||||
}
|
||||
|
||||
class DbtSchemaParser {
|
||||
constructor(private readonly logger: KloLogger) {}
|
||||
|
||||
parseFile(yamlContent: string, options: ParseDbtSchemaOptions = {}): DbtSchemaParseResult {
|
||||
this.logger.debug(`Parsing schema file: ${options.path ?? 'unknown'}`);
|
||||
|
||||
const resolved = options.variables
|
||||
? resolveJinjaVariables(yamlContent, options.variables)
|
||||
: { content: yamlContent, unresolvedVars: [] };
|
||||
if (resolved.unresolvedVars.length > 0) {
|
||||
this.logger.warn(
|
||||
`Unresolved dbt variables in ${options.path ?? 'schema file'}: ${resolved.unresolvedVars.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
let schema: DbtSchemaYaml;
|
||||
try {
|
||||
schema = parseYaml(resolved.content) as DbtSchemaYaml;
|
||||
} catch (error) {
|
||||
this.logger.warn(`Failed to parse YAML${options.path ? ` at ${options.path}` : ''}: ${error}`);
|
||||
return this.emptyResult(options.projectName ?? null);
|
||||
}
|
||||
|
||||
if (!schema || typeof schema !== 'object') {
|
||||
return this.emptyResult(options.projectName ?? null);
|
||||
}
|
||||
|
||||
const tables = [...this.parseSources(schema.sources), ...this.parseModels(schema.models)];
|
||||
const relationships = [
|
||||
...this.parseSourceRelationships(schema.sources),
|
||||
...this.parseModelRelationships(schema.models),
|
||||
];
|
||||
|
||||
return {
|
||||
projectName: options.projectName ?? null,
|
||||
dbtVersion: null,
|
||||
tables,
|
||||
relationships,
|
||||
};
|
||||
}
|
||||
|
||||
parseFiles(
|
||||
files: DbtSchemaFile[],
|
||||
variables?: Map<string, string>,
|
||||
projectName: string | null = null,
|
||||
): DbtSchemaParseResult {
|
||||
const allTables: DbtParsedTable[] = [];
|
||||
const allRelationships: DbtParsedRelationship[] = [];
|
||||
|
||||
for (const file of files) {
|
||||
const result = this.parseFile(file.content, { path: file.path, variables, projectName });
|
||||
allTables.push(...result.tables);
|
||||
allRelationships.push(...result.relationships);
|
||||
}
|
||||
|
||||
return {
|
||||
projectName,
|
||||
dbtVersion: null,
|
||||
tables: this.deduplicateTables(allTables),
|
||||
relationships: this.deduplicateRelationships(allRelationships),
|
||||
};
|
||||
}
|
||||
|
||||
private parseSources(sources: DbtSchemaSource[] | undefined): DbtParsedTable[] {
|
||||
if (!sources || !Array.isArray(sources)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tables: DbtParsedTable[] = [];
|
||||
|
||||
for (const source of sources) {
|
||||
const sourceSchema = source.schema ?? source.name;
|
||||
const sourceDatabase = source.database ?? null;
|
||||
const sourceTags = this.normalizeTagList(source.tags);
|
||||
|
||||
if (!source.tables || !Array.isArray(source.tables)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const table of source.tables) {
|
||||
const tagsDbt = this.mergeTagsDbt(sourceTags, this.normalizeTagList(table.tags));
|
||||
const freshnessDbt = this.buildFreshnessDbt(table.freshness, table.loaded_at_field);
|
||||
tables.push({
|
||||
name: table.identifier ?? table.name,
|
||||
description: this.normalizeDescription(table.description),
|
||||
database: sourceDatabase,
|
||||
schema: sourceSchema,
|
||||
columns: this.parseColumns(table.columns),
|
||||
resourceType: 'source',
|
||||
...(tagsDbt ? { tagsDbt } : {}),
|
||||
...(freshnessDbt ? { freshnessDbt } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return tables;
|
||||
}
|
||||
|
||||
private parseModels(models: DbtSchemaModel[] | undefined): DbtParsedTable[] {
|
||||
if (!models || !Array.isArray(models)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tables: DbtParsedTable[] = [];
|
||||
|
||||
for (const model of models) {
|
||||
if (!model.name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const tagsDbt = this.mergeTagsDbt(this.normalizeTagList(model.tags));
|
||||
const freshnessDbt = this.buildFreshnessDbt(model.freshness, model.loaded_at_field);
|
||||
tables.push({
|
||||
name: model.name,
|
||||
description: this.normalizeDescription(model.description),
|
||||
database: model.database ?? null,
|
||||
schema: model.schema ?? null,
|
||||
columns: this.parseColumns(model.columns),
|
||||
resourceType: 'model',
|
||||
...(tagsDbt ? { tagsDbt } : {}),
|
||||
...(freshnessDbt ? { freshnessDbt } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
return tables;
|
||||
}
|
||||
|
||||
private parseColumns(columns: DbtSchemaColumn[] | undefined): DbtParsedColumn[] {
|
||||
if (!columns || !Array.isArray(columns)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return columns.map((column) => {
|
||||
const { refs, constraints, enumValues } = this.parseDataTests(column.data_tests ?? column.tests);
|
||||
return {
|
||||
name: column.name,
|
||||
description: this.normalizeDescription(column.description),
|
||||
dataType: column.data_type ?? null,
|
||||
...(refs.length > 0 ? { dataTests: refs } : {}),
|
||||
...(constraints ? { constraints } : {}),
|
||||
...(enumValues.length > 0 ? { enumValuesDbt: enumValues } : {}),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private parseDataTests(tests: DbtSchemaDataTest[] | undefined): {
|
||||
refs: DbtDataTestRef[];
|
||||
constraints: DbtColumnConstraints | undefined;
|
||||
enumValues: string[];
|
||||
} {
|
||||
const refs: DbtDataTestRef[] = [];
|
||||
const dbt: { not_null?: boolean; unique?: boolean } = {};
|
||||
const enumValues: string[] = [];
|
||||
if (!tests?.length) {
|
||||
return { refs, constraints: undefined, enumValues };
|
||||
}
|
||||
|
||||
for (const test of tests) {
|
||||
if (typeof test === 'string') {
|
||||
const parsed = this.parseTestNameString(test);
|
||||
refs.push(parsed);
|
||||
if (parsed.package === 'dbt' && parsed.name === 'not_null') {
|
||||
dbt.not_null = true;
|
||||
}
|
||||
if (parsed.package === 'dbt' && parsed.name === 'unique') {
|
||||
dbt.unique = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(test)) {
|
||||
if (key === 'relationships') {
|
||||
refs.push({
|
||||
name: 'relationships',
|
||||
package: 'dbt',
|
||||
...(value && typeof value === 'object' && !Array.isArray(value)
|
||||
? { kwargs: value as Record<string, unknown> }
|
||||
: {}),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (key === 'not_null') {
|
||||
refs.push({ name: 'not_null', package: 'dbt' });
|
||||
dbt.not_null = true;
|
||||
continue;
|
||||
}
|
||||
if (key === 'unique') {
|
||||
refs.push({ name: 'unique', package: 'dbt' });
|
||||
dbt.unique = true;
|
||||
continue;
|
||||
}
|
||||
if (key === 'accepted_values') {
|
||||
if (Array.isArray(value)) {
|
||||
enumValues.push(...value.map((item) => String(item)));
|
||||
refs.push({ name: 'accepted_values', package: 'dbt', kwargs: { values: value } });
|
||||
continue;
|
||||
}
|
||||
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
||||
const values = (value as { values?: unknown }).values;
|
||||
if (Array.isArray(values)) {
|
||||
enumValues.push(...values.map((item) => String(item)));
|
||||
}
|
||||
refs.push({ name: 'accepted_values', package: 'dbt', kwargs: value as Record<string, unknown> });
|
||||
continue;
|
||||
}
|
||||
}
|
||||
refs.push({
|
||||
...this.parseTestNameString(key),
|
||||
...(value && typeof value === 'object' && !Array.isArray(value)
|
||||
? { kwargs: value as Record<string, unknown> }
|
||||
: {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const constraints = dbt.not_null || dbt.unique ? { dbt } : undefined;
|
||||
return { refs, constraints, enumValues };
|
||||
}
|
||||
|
||||
private parseTestNameString(value: string): { name: string; package: string } {
|
||||
const parts = value.split('.');
|
||||
if (parts.length >= 2) {
|
||||
return { package: parts[0]!, name: parts.slice(1).join('.') };
|
||||
}
|
||||
return { package: 'dbt', name: value };
|
||||
}
|
||||
|
||||
private parseSourceRelationships(sources: DbtSchemaSource[] | undefined): DbtParsedRelationship[] {
|
||||
if (!sources || !Array.isArray(sources)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const relationships: DbtParsedRelationship[] = [];
|
||||
|
||||
for (const source of sources) {
|
||||
const sourceSchema = source.schema ?? source.name;
|
||||
|
||||
if (!source.tables || !Array.isArray(source.tables)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const table of source.tables) {
|
||||
const tableName = table.identifier ?? table.name;
|
||||
|
||||
if (!table.columns || !Array.isArray(table.columns)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const column of table.columns) {
|
||||
const tests = column.data_tests ?? column.tests ?? [];
|
||||
|
||||
for (const test of tests) {
|
||||
const relationship = this.parseRelationshipTest(test, tableName, column.name, sourceSchema);
|
||||
if (relationship) {
|
||||
relationships.push(relationship);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return relationships;
|
||||
}
|
||||
|
||||
private parseModelRelationships(models: DbtSchemaModel[] | undefined): DbtParsedRelationship[] {
|
||||
if (!models || !Array.isArray(models)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const relationships: DbtParsedRelationship[] = [];
|
||||
|
||||
for (const model of models) {
|
||||
if (!model.name || !model.columns || !Array.isArray(model.columns)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const column of model.columns) {
|
||||
const tests = column.data_tests ?? column.tests ?? [];
|
||||
|
||||
for (const test of tests) {
|
||||
const relationship = this.parseRelationshipTest(test, model.name, column.name, model.schema ?? undefined);
|
||||
if (relationship) {
|
||||
relationships.push(relationship);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return relationships;
|
||||
}
|
||||
|
||||
private parseRelationshipTest(
|
||||
test: DbtSchemaDataTest,
|
||||
fromTable: string,
|
||||
fromColumn: string,
|
||||
fromSchema?: string,
|
||||
): DbtParsedRelationship | null {
|
||||
if (typeof test === 'string' || !test.relationships) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const relationship = test.relationships;
|
||||
const toRef = relationship.to ?? relationship.arguments?.to;
|
||||
const toColumn = relationship.field ?? relationship.arguments?.field;
|
||||
|
||||
if (!toRef || !toColumn) {
|
||||
this.logger.debug(`Skipping incomplete relationship test for ${fromTable}.${fromColumn}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const toTable = this.parseRef(toRef);
|
||||
if (!toTable) {
|
||||
this.logger.debug(`Could not parse ref: ${toRef}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
fromTable,
|
||||
fromColumn,
|
||||
toTable,
|
||||
toColumn,
|
||||
...(fromSchema ? { fromSchema } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
private parseRef(refString: string): string | null {
|
||||
const refMatch = refString.match(/ref\s*\(\s*['"]([^'"]+)['"]\s*\)/);
|
||||
if (refMatch) {
|
||||
return refMatch[1];
|
||||
}
|
||||
|
||||
const sourceMatch = refString.match(/source\s*\(\s*['"][^'"]+['"]\s*,\s*['"]([^'"]+)['"]\s*\)/);
|
||||
if (sourceMatch) {
|
||||
return sourceMatch[1];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private normalizeDescription(description: string | undefined): string | null {
|
||||
if (!description) {
|
||||
return null;
|
||||
}
|
||||
const trimmed = description.trim();
|
||||
return trimmed.length > 0 ? trimmed : null;
|
||||
}
|
||||
|
||||
private normalizeTagList(tags: string[] | undefined): string[] {
|
||||
if (!tags || !Array.isArray(tags)) {
|
||||
return [];
|
||||
}
|
||||
return tags.map((tag) => String(tag));
|
||||
}
|
||||
|
||||
private mergeTagsDbt(...lists: Array<string[] | undefined>): string[] | undefined {
|
||||
const merged: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
for (const list of lists) {
|
||||
for (const item of list ?? []) {
|
||||
if (!seen.has(item)) {
|
||||
seen.add(item);
|
||||
merged.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
return merged.length > 0 ? merged : undefined;
|
||||
}
|
||||
|
||||
private buildFreshnessDbt(freshness: unknown, loadedAtField: string | undefined): DbtParsedTable['freshnessDbt'] {
|
||||
const loadedTrim = loadedAtField?.trim();
|
||||
const hasFreshness = freshness !== undefined && freshness !== null;
|
||||
if (!hasFreshness && !loadedTrim) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
...(hasFreshness ? { raw: freshness } : {}),
|
||||
...(hasFreshness ? { loadedAtField: loadedTrim ?? null } : loadedTrim ? { loadedAtField: loadedTrim } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
private deduplicateTables(tables: DbtParsedTable[]): DbtParsedTable[] {
|
||||
const seen = new Map<string, DbtParsedTable>();
|
||||
|
||||
for (const table of tables) {
|
||||
const key = `${table.database ?? ''}.${table.schema ?? ''}.${table.name}`.toLowerCase();
|
||||
const existing = seen.get(key);
|
||||
|
||||
if (!existing) {
|
||||
seen.set(key, table);
|
||||
continue;
|
||||
}
|
||||
|
||||
seen.set(key, {
|
||||
...existing,
|
||||
description: existing.description ?? table.description,
|
||||
columns: this.mergeColumns(existing.columns, table.columns),
|
||||
tagsDbt: this.mergeTagsDbt(existing.tagsDbt, table.tagsDbt),
|
||||
freshnessDbt: this.mergeFreshnessDbt(existing.freshnessDbt, table.freshnessDbt),
|
||||
});
|
||||
}
|
||||
|
||||
return Array.from(seen.values());
|
||||
}
|
||||
|
||||
private mergeColumns(existing: DbtParsedColumn[], incoming: DbtParsedColumn[]): DbtParsedColumn[] {
|
||||
const seen = new Map<string, DbtParsedColumn>();
|
||||
|
||||
for (const column of existing) {
|
||||
seen.set(column.name.toLowerCase(), column);
|
||||
}
|
||||
|
||||
for (const column of incoming) {
|
||||
const key = column.name.toLowerCase();
|
||||
const existingColumn = seen.get(key);
|
||||
|
||||
if (!existingColumn) {
|
||||
seen.set(key, column);
|
||||
continue;
|
||||
}
|
||||
|
||||
seen.set(key, {
|
||||
...existingColumn,
|
||||
description: existingColumn.description ?? column.description,
|
||||
dataType: existingColumn.dataType ?? column.dataType,
|
||||
dataTests: this.mergeDbtDataTests(existingColumn.dataTests, column.dataTests),
|
||||
constraints: this.mergeDbtConstraints(existingColumn.constraints, column.constraints),
|
||||
enumValuesDbt: this.mergeStringList(existingColumn.enumValuesDbt, column.enumValuesDbt),
|
||||
});
|
||||
}
|
||||
|
||||
return Array.from(seen.values());
|
||||
}
|
||||
|
||||
private deduplicateRelationships(relationships: DbtParsedRelationship[]): DbtParsedRelationship[] {
|
||||
const seen = new Set<string>();
|
||||
const result: DbtParsedRelationship[] = [];
|
||||
|
||||
for (const relationship of relationships) {
|
||||
const key =
|
||||
`${relationship.fromTable}.${relationship.fromColumn}->${relationship.toTable}.${relationship.toColumn}`.toLowerCase();
|
||||
if (!seen.has(key)) {
|
||||
seen.add(key);
|
||||
result.push(relationship);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private mergeFreshnessDbt(
|
||||
existing?: DbtParsedTable['freshnessDbt'],
|
||||
incoming?: DbtParsedTable['freshnessDbt'],
|
||||
): DbtParsedTable['freshnessDbt'] {
|
||||
if (!existing && !incoming) {
|
||||
return undefined;
|
||||
}
|
||||
const raw = existing?.raw !== undefined ? existing.raw : incoming?.raw;
|
||||
const loadedAtField = existing?.loadedAtField ?? incoming?.loadedAtField;
|
||||
return {
|
||||
...(raw !== undefined ? { raw } : {}),
|
||||
...(loadedAtField !== undefined ? { loadedAtField } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
private mergeDbtConstraints(
|
||||
existing?: DbtColumnConstraints,
|
||||
incoming?: DbtColumnConstraints,
|
||||
): DbtColumnConstraints | undefined {
|
||||
const notNull = !!(existing?.dbt.not_null || incoming?.dbt.not_null);
|
||||
const unique = !!(existing?.dbt.unique || incoming?.dbt.unique);
|
||||
if (!notNull && !unique) {
|
||||
return undefined;
|
||||
}
|
||||
return { dbt: { ...(notNull ? { not_null: true } : {}), ...(unique ? { unique: true } : {}) } };
|
||||
}
|
||||
|
||||
private mergeStringList(existing?: string[], incoming?: string[]): string[] | undefined {
|
||||
return this.mergeTagsDbt(existing, incoming);
|
||||
}
|
||||
|
||||
private mergeDbtDataTests(existing?: DbtDataTestRef[], incoming?: DbtDataTestRef[]): DbtDataTestRef[] | undefined {
|
||||
if (!existing?.length) {
|
||||
return incoming?.length ? [...incoming] : undefined;
|
||||
}
|
||||
if (!incoming?.length) {
|
||||
return [...existing];
|
||||
}
|
||||
const tests = new Map<string, DbtDataTestRef>();
|
||||
for (const test of [...existing, ...incoming]) {
|
||||
const kwargsKey =
|
||||
test.kwargs && Object.keys(test.kwargs).length > 0
|
||||
? `:${createHash('sha256').update(JSON.stringify(test.kwargs)).digest('hex').slice(0, 16)}`
|
||||
: '';
|
||||
tests.set(`${test.package}:${test.name}${kwargsKey}`, test);
|
||||
}
|
||||
return [...tests.values()];
|
||||
}
|
||||
|
||||
private emptyResult(projectName: string | null): DbtSchemaParseResult {
|
||||
return {
|
||||
projectName,
|
||||
dbtVersion: null,
|
||||
tables: [],
|
||||
relationships: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import type { DbtSchemaParseResult } from './parse-schema.js';
|
||||
import { toDescriptionUpdates } from './to-description-updates.js';
|
||||
import type { DbtHostTableLite } from './match-tables.js';
|
||||
|
||||
const hostTables: DbtHostTableLite[] = [
|
||||
{
|
||||
id: '1',
|
||||
name: 'orders',
|
||||
catalog: 'warehouse',
|
||||
db: 'analytics',
|
||||
columns: [
|
||||
{ id: 'c1', name: 'id' },
|
||||
{ id: 'c2', name: 'amount' },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
function parseResult(description: string | null, columnDescription: string | null): DbtSchemaParseResult {
|
||||
return {
|
||||
projectName: null,
|
||||
dbtVersion: null,
|
||||
relationships: [],
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
description,
|
||||
database: 'warehouse',
|
||||
schema: 'analytics',
|
||||
resourceType: 'model',
|
||||
columns: [
|
||||
{ name: 'id', description: columnDescription, dataType: null },
|
||||
{ name: 'missing', description: 'not imported', dataType: null },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
describe('dbt descriptions update payloads', () => {
|
||||
it('emits dbt writes and matching ai invalidations when descriptions exist', () => {
|
||||
expect(
|
||||
toDescriptionUpdates({
|
||||
connectionId: 'conn-1',
|
||||
parseResult: parseResult('Orders table', 'Primary key'),
|
||||
hostTables,
|
||||
targetSchema: null,
|
||||
}),
|
||||
).toEqual({
|
||||
dbt: [
|
||||
{
|
||||
connectionId: 'conn-1',
|
||||
table: { catalog: 'warehouse', db: 'analytics', name: 'orders' },
|
||||
source: 'dbt',
|
||||
tableDescription: 'Orders table',
|
||||
columnDescriptions: { id: 'Primary key' },
|
||||
},
|
||||
],
|
||||
aiInvalidations: [
|
||||
{
|
||||
connectionId: 'conn-1',
|
||||
table: { catalog: 'warehouse', db: 'analytics', name: 'orders' },
|
||||
source: 'ai',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('does not emit spurious dbt writes or ai invalidations when no descriptions exist', () => {
|
||||
expect(
|
||||
toDescriptionUpdates({
|
||||
connectionId: 'conn-1',
|
||||
parseResult: parseResult(null, null),
|
||||
hostTables,
|
||||
targetSchema: null,
|
||||
}),
|
||||
).toEqual({ dbt: [], aiInvalidations: [] });
|
||||
});
|
||||
|
||||
it('emits ai invalidation without a dbt description write when only structural metadata exists', () => {
|
||||
const result = parseResult(null, null);
|
||||
result.tables[0]!.tagsDbt = ['finance'];
|
||||
|
||||
expect(
|
||||
toDescriptionUpdates({
|
||||
connectionId: 'conn-1',
|
||||
parseResult: result,
|
||||
hostTables,
|
||||
targetSchema: null,
|
||||
}),
|
||||
).toEqual({
|
||||
dbt: [],
|
||||
aiInvalidations: [
|
||||
{
|
||||
connectionId: 'conn-1',
|
||||
table: { catalog: 'warehouse', db: 'analytics', name: 'orders' },
|
||||
source: 'ai',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
import type { KloDescriptionUpdate } from '../../../scan/enrichment-types.js';
|
||||
import { findMatchingKloTable, type DbtHostTableLite } from './match-tables.js';
|
||||
import type { DbtSchemaParseResult } from './parse-schema.js';
|
||||
|
||||
export interface DbtDescriptionUpdates {
|
||||
dbt: KloDescriptionUpdate[];
|
||||
aiInvalidations: KloDescriptionUpdate[];
|
||||
}
|
||||
|
||||
export function toDescriptionUpdates(input: {
|
||||
connectionId: string;
|
||||
parseResult: DbtSchemaParseResult;
|
||||
hostTables: DbtHostTableLite[];
|
||||
targetSchema: string | null;
|
||||
}): DbtDescriptionUpdates {
|
||||
const dbt: KloDescriptionUpdate[] = [];
|
||||
const aiInvalidations: KloDescriptionUpdate[] = [];
|
||||
|
||||
for (const dbtTable of input.parseResult.tables) {
|
||||
const hostTable = findMatchingKloTable(dbtTable, input.hostTables, input.targetSchema);
|
||||
if (!hostTable) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const tableDescription = dbtTable.description ?? undefined;
|
||||
const columnDescriptions: Record<string, string | null> = {};
|
||||
|
||||
for (const dbtColumn of dbtTable.columns) {
|
||||
if (!dbtColumn.description) {
|
||||
continue;
|
||||
}
|
||||
const hostColumn = hostTable.columns.find(
|
||||
(column) => column.name.toLowerCase() === dbtColumn.name.toLowerCase(),
|
||||
);
|
||||
if (hostColumn) {
|
||||
columnDescriptions[hostColumn.name] = dbtColumn.description;
|
||||
}
|
||||
}
|
||||
|
||||
const hasColumnDescriptions = Object.keys(columnDescriptions).length > 0;
|
||||
const hasDescriptionChange = tableDescription !== undefined || hasColumnDescriptions;
|
||||
const hasMetadataChange =
|
||||
!!dbtTable.tagsDbt?.length ||
|
||||
dbtTable.freshnessDbt !== undefined ||
|
||||
dbtTable.columns.some(
|
||||
(column) => column.constraints !== undefined || !!column.enumValuesDbt?.length || !!column.dataTests?.length,
|
||||
);
|
||||
if (!hasDescriptionChange && !hasMetadataChange) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const tableRef = { catalog: hostTable.catalog, db: hostTable.db, name: hostTable.name };
|
||||
if (hasDescriptionChange) {
|
||||
dbt.push({
|
||||
connectionId: input.connectionId,
|
||||
table: tableRef,
|
||||
source: 'dbt',
|
||||
...(tableDescription !== undefined ? { tableDescription } : {}),
|
||||
...(hasColumnDescriptions ? { columnDescriptions } : {}),
|
||||
});
|
||||
}
|
||||
aiInvalidations.push({
|
||||
connectionId: input.connectionId,
|
||||
table: tableRef,
|
||||
source: 'ai',
|
||||
});
|
||||
}
|
||||
|
||||
return { dbt, aiInvalidations };
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { toMetadataUpdates } from './to-metadata-updates.js';
|
||||
|
||||
describe('toMetadataUpdates', () => {
|
||||
it('emits source-keyed dbt metadata updates for matched tables and columns', () => {
|
||||
const updates = toMetadataUpdates({
|
||||
connectionId: 'conn_1',
|
||||
targetSchema: 'analytics',
|
||||
hostTables: [
|
||||
{
|
||||
id: 'orders-id',
|
||||
name: 'orders',
|
||||
catalog: 'warehouse',
|
||||
db: 'analytics',
|
||||
columns: [
|
||||
{ id: 'status-id', name: 'status' },
|
||||
{ id: 'created-id', name: 'created_at' },
|
||||
],
|
||||
},
|
||||
],
|
||||
parseResult: {
|
||||
projectName: null,
|
||||
dbtVersion: null,
|
||||
relationships: [],
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
description: null,
|
||||
database: 'warehouse',
|
||||
schema: 'analytics',
|
||||
resourceType: 'model',
|
||||
tagsDbt: ['finance'],
|
||||
freshnessDbt: { loadedAtField: 'created_at' },
|
||||
columns: [
|
||||
{
|
||||
name: 'status',
|
||||
description: null,
|
||||
dataType: null,
|
||||
enumValuesDbt: ['placed', 'shipped'],
|
||||
constraints: { dbt: { not_null: true } },
|
||||
dataTests: [{ name: 'accepted_values', package: 'dbt', kwargs: { values: ['placed', 'shipped'] } }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(updates).toEqual([
|
||||
{
|
||||
connectionId: 'conn_1',
|
||||
table: { catalog: 'warehouse', db: 'analytics', name: 'orders' },
|
||||
source: 'dbt',
|
||||
tableFields: {
|
||||
tags: ['finance'],
|
||||
freshness: { loaded_at_field: 'created_at' },
|
||||
},
|
||||
columnFields: {
|
||||
status: {
|
||||
constraints: { not_null: true },
|
||||
enum_values: ['placed', 'shipped'],
|
||||
tests: [
|
||||
{ name: 'accepted_values', package: 'dbt', kwargs: { values: ['placed', 'shipped'] } },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
import type { KloMetadataUpdate } from '../../../scan/enrichment-types.js';
|
||||
import { findMatchingKloTable, type DbtHostTableLite } from './match-tables.js';
|
||||
import type { DbtSchemaParseResult } from './parse-schema.js';
|
||||
|
||||
export function toMetadataUpdates(input: {
|
||||
connectionId: string;
|
||||
parseResult: DbtSchemaParseResult;
|
||||
hostTables: DbtHostTableLite[];
|
||||
targetSchema: string | null;
|
||||
}): KloMetadataUpdate[] {
|
||||
const updates: KloMetadataUpdate[] = [];
|
||||
|
||||
for (const dbtTable of input.parseResult.tables) {
|
||||
const hostTable = findMatchingKloTable(dbtTable, input.hostTables, input.targetSchema);
|
||||
if (!hostTable) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const tableFields: Record<string, unknown> = {};
|
||||
if (dbtTable.tagsDbt?.length) {
|
||||
tableFields.tags = dbtTable.tagsDbt;
|
||||
}
|
||||
if (dbtTable.freshnessDbt) {
|
||||
tableFields.freshness = {
|
||||
...(dbtTable.freshnessDbt.raw !== undefined ? { raw: dbtTable.freshnessDbt.raw } : {}),
|
||||
...(dbtTable.freshnessDbt.loadedAtField !== undefined
|
||||
? { loaded_at_field: dbtTable.freshnessDbt.loadedAtField }
|
||||
: {}),
|
||||
};
|
||||
}
|
||||
|
||||
const columnFields: Record<string, Record<string, unknown>> = {};
|
||||
for (const dbtColumn of dbtTable.columns) {
|
||||
const hostColumn = hostTable.columns.find(
|
||||
(column) => column.name.toLowerCase() === dbtColumn.name.toLowerCase(),
|
||||
);
|
||||
if (!hostColumn) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const fields: Record<string, unknown> = {};
|
||||
if (dbtColumn.constraints) {
|
||||
fields.constraints = dbtColumn.constraints.dbt;
|
||||
}
|
||||
if (dbtColumn.enumValuesDbt?.length) {
|
||||
fields.enum_values = dbtColumn.enumValuesDbt;
|
||||
}
|
||||
if (dbtColumn.dataTests?.length) {
|
||||
fields.tests = dbtColumn.dataTests.map((test) => ({
|
||||
name: test.name,
|
||||
package: test.package,
|
||||
...(test.kwargs ? { kwargs: test.kwargs } : {}),
|
||||
}));
|
||||
}
|
||||
if (Object.keys(fields).length > 0) {
|
||||
columnFields[hostColumn.name] = fields;
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(tableFields).length === 0 && Object.keys(columnFields).length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
updates.push({
|
||||
connectionId: input.connectionId,
|
||||
table: { catalog: hostTable.catalog, db: hostTable.db, name: hostTable.name },
|
||||
source: 'dbt',
|
||||
...(Object.keys(tableFields).length > 0 ? { tableFields } : {}),
|
||||
...(Object.keys(columnFields).length > 0 ? { columnFields } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
return updates;
|
||||
}
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import type { DbtHostTableLite } from './match-tables.js';
|
||||
import type { DbtSchemaParseResult } from './parse-schema.js';
|
||||
import { toRelationshipUpdates } from './to-relationship-updates.js';
|
||||
|
||||
const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join('');
|
||||
|
||||
const hostTables: DbtHostTableLite[] = [
|
||||
{
|
||||
id: '1',
|
||||
name: 'orders',
|
||||
catalog: 'warehouse',
|
||||
db: 'analytics',
|
||||
columns: [{ id: 'c1', name: 'customer_id' }],
|
||||
},
|
||||
{
|
||||
id: '2',
|
||||
name: 'customers',
|
||||
catalog: 'warehouse',
|
||||
db: 'staging',
|
||||
columns: [{ id: 'c2', name: 'id' }],
|
||||
},
|
||||
];
|
||||
|
||||
const parseResult: DbtSchemaParseResult = {
|
||||
projectName: null,
|
||||
dbtVersion: null,
|
||||
tables: [],
|
||||
relationships: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
fromSchema: 'analytics',
|
||||
toSchema: 'analytics',
|
||||
description: 'schema intentionally differs from the host customers table',
|
||||
},
|
||||
{ fromTable: 'orders', fromColumn: 'missing', toTable: 'customers', toColumn: 'id' },
|
||||
{ fromTable: 'orders', fromColumn: 'customer_id', toTable: 'missing_table', toColumn: 'id' },
|
||||
],
|
||||
};
|
||||
|
||||
describe('dbt relationship update payloads', () => {
|
||||
it('validates relationships using the current name-only matching behavior and dbt provenance', () => {
|
||||
expect(toRelationshipUpdates({ connectionId: 'conn-1', parseResult, hostTables })).toEqual({
|
||||
joins: [
|
||||
{
|
||||
connectionId: 'conn-1',
|
||||
fromTable: 'orders',
|
||||
fromColumns: ['customer_id'],
|
||||
toTable: 'customers',
|
||||
toColumns: ['id'],
|
||||
relationship: 'many_to_one',
|
||||
author: 'dbt',
|
||||
authorEmail: DBT_SYSTEM_EMAIL,
|
||||
},
|
||||
],
|
||||
skippedNoMatch: 2,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
import type { KloJoinUpdate } from '../../../scan/enrichment-types.js';
|
||||
import type { DbtHostTableLite } from './match-tables.js';
|
||||
import type { DbtSchemaParseResult } from './parse-schema.js';
|
||||
|
||||
export interface DbtRelationshipUpdates {
|
||||
joins: KloJoinUpdate[];
|
||||
skippedNoMatch: number;
|
||||
}
|
||||
|
||||
const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join('');
|
||||
|
||||
export function toRelationshipUpdates(input: {
|
||||
connectionId: string;
|
||||
parseResult: DbtSchemaParseResult;
|
||||
hostTables: DbtHostTableLite[];
|
||||
}): DbtRelationshipUpdates {
|
||||
const tablesByName = new Map<string, DbtHostTableLite>();
|
||||
for (const table of input.hostTables) {
|
||||
tablesByName.set(table.name.toLowerCase(), table);
|
||||
}
|
||||
|
||||
const joins: KloJoinUpdate[] = [];
|
||||
let skippedNoMatch = 0;
|
||||
|
||||
for (const relationship of input.parseResult.relationships) {
|
||||
const fromTable = tablesByName.get(relationship.fromTable.toLowerCase());
|
||||
const toTable = tablesByName.get(relationship.toTable.toLowerCase());
|
||||
if (!fromTable || !toTable) {
|
||||
skippedNoMatch++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const fromColumn = fromTable.columns.find(
|
||||
(column) => column.name.toLowerCase() === relationship.fromColumn.toLowerCase(),
|
||||
);
|
||||
const toColumn = toTable.columns.find(
|
||||
(column) => column.name.toLowerCase() === relationship.toColumn.toLowerCase(),
|
||||
);
|
||||
if (!fromColumn || !toColumn) {
|
||||
skippedNoMatch++;
|
||||
continue;
|
||||
}
|
||||
|
||||
joins.push({
|
||||
connectionId: input.connectionId,
|
||||
fromTable: fromTable.name,
|
||||
fromColumns: [fromColumn.name],
|
||||
toTable: toTable.name,
|
||||
toColumns: [toColumn.name],
|
||||
relationship: 'many_to_one',
|
||||
author: 'dbt',
|
||||
authorEmail: DBT_SYSTEM_EMAIL,
|
||||
});
|
||||
}
|
||||
|
||||
return { joins, skippedNoMatch };
|
||||
}
|
||||
|
|
@ -0,0 +1,410 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { type DbtHostTableLite, matchDbtTables } from './dbt-descriptions/match-tables.js';
|
||||
import { mergeSemanticModelTables } from './dbt-descriptions/merge-semantic-model-tables.js';
|
||||
import { parseDbtSchemaFiles } from './dbt-descriptions/parse-schema.js';
|
||||
import { toDescriptionUpdates } from './dbt-descriptions/to-description-updates.js';
|
||||
import { toRelationshipUpdates } from './dbt-descriptions/to-relationship-updates.js';
|
||||
import { parseMetricflowFiles } from './metricflow/deep-parse.js';
|
||||
import { mapCrossModelMetricToSource, mapSemanticModelToSource } from './metricflow/semantic-models.js';
|
||||
|
||||
const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join('');
|
||||
|
||||
const metricflowYaml = `
|
||||
semantic_models:
|
||||
- name: orders_semantic
|
||||
description: MetricFlow order facts
|
||||
model: ref('fct_orders')
|
||||
defaults:
|
||||
agg_time_dimension: ordered_at
|
||||
entities:
|
||||
- name: customer
|
||||
type: foreign
|
||||
expr: customer_id
|
||||
description: Customer relationship
|
||||
dimensions:
|
||||
- name: status
|
||||
type: categorical
|
||||
expr: status
|
||||
description: Order status
|
||||
- name: ordered_at
|
||||
type: time
|
||||
expr: ordered_at
|
||||
measures:
|
||||
- name: total_revenue
|
||||
agg: sum
|
||||
expr: amount
|
||||
description: Revenue
|
||||
- name: customers_semantic
|
||||
description: Customer dimension
|
||||
model: ref('dim_customers')
|
||||
entities:
|
||||
- name: customer
|
||||
type: primary
|
||||
expr: id
|
||||
dimensions:
|
||||
- name: country
|
||||
type: categorical
|
||||
expr: country
|
||||
description: Customer country
|
||||
measures:
|
||||
- name: customer_count
|
||||
agg: count
|
||||
expr: id
|
||||
description: Customer count
|
||||
metrics:
|
||||
- name: total_revenue
|
||||
type: simple
|
||||
type_params:
|
||||
measure: total_revenue
|
||||
- name: customer_count
|
||||
type: simple
|
||||
type_params:
|
||||
measure: customer_count
|
||||
- name: revenue_per_customer
|
||||
description: Revenue per customer
|
||||
type: derived
|
||||
type_params:
|
||||
expr: total_revenue / NULLIF(customer_count, 0)
|
||||
metrics:
|
||||
- name: total_revenue
|
||||
alias: total_revenue
|
||||
- name: customer_count
|
||||
alias: customer_count
|
||||
`;
|
||||
|
||||
const schemaYaml = `
|
||||
version: 2
|
||||
sources:
|
||||
- name: raw
|
||||
database: warehouse
|
||||
schema: landing
|
||||
tables:
|
||||
- name: customers
|
||||
identifier: dim_customers
|
||||
description: Raw customer dimension
|
||||
columns:
|
||||
- name: id
|
||||
description: Customer primary key
|
||||
- name: country
|
||||
description: Country name
|
||||
models:
|
||||
- name: "{{ var('orders_model', 'fct_orders') }}"
|
||||
schema: "{{ var('mart_schema', 'analytics') }}"
|
||||
description: Modeled orders
|
||||
columns:
|
||||
- name: customer_id
|
||||
description: Linked customer id
|
||||
tests:
|
||||
- relationships:
|
||||
to: ref('dim_customers')
|
||||
field: id
|
||||
- name: status
|
||||
description: Order status
|
||||
- name: amount
|
||||
description: Gross amount
|
||||
`;
|
||||
|
||||
const hostTables: DbtHostTableLite[] = [
|
||||
{
|
||||
id: 'orders-table',
|
||||
name: 'fct_orders',
|
||||
catalog: 'warehouse',
|
||||
db: 'analytics',
|
||||
columns: [
|
||||
{ id: 'orders-customer-id', name: 'customer_id' },
|
||||
{ id: 'orders-status', name: 'status' },
|
||||
{ id: 'orders-amount', name: 'amount' },
|
||||
{ id: 'orders-ordered-at', name: 'ordered_at' },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'customers-table',
|
||||
name: 'dim_customers',
|
||||
catalog: 'warehouse',
|
||||
db: 'landing',
|
||||
columns: [
|
||||
{ id: 'customers-id', name: 'id' },
|
||||
{ id: 'customers-country', name: 'country' },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
describe('dbt extraction golden parity fixture', () => {
|
||||
it('freezes the relocated MetricFlow and dbt-description contract together', () => {
|
||||
const metricflow = parseMetricflowFiles([{ path: 'semantic_models/orders.yml', content: metricflowYaml }]);
|
||||
|
||||
expect(metricflow).toEqual({
|
||||
semanticModels: [
|
||||
{
|
||||
name: 'orders_semantic',
|
||||
description: 'MetricFlow order facts',
|
||||
modelRef: 'fct_orders',
|
||||
dimensions: [
|
||||
{
|
||||
name: 'status',
|
||||
column: 'status',
|
||||
type: 'string',
|
||||
label: 'Status',
|
||||
description: 'Order status',
|
||||
},
|
||||
{
|
||||
name: 'ordered_at',
|
||||
column: 'ordered_at',
|
||||
type: 'time',
|
||||
label: 'Ordered At',
|
||||
description: undefined,
|
||||
},
|
||||
],
|
||||
measures: [
|
||||
{
|
||||
type: 'simple',
|
||||
name: 'total_revenue',
|
||||
column: 'amount',
|
||||
aggregation: 'sum',
|
||||
label: 'Total Revenue',
|
||||
description: 'Revenue',
|
||||
},
|
||||
],
|
||||
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id', description: 'Customer relationship' }],
|
||||
defaultTimeDimension: 'ordered_at',
|
||||
},
|
||||
{
|
||||
name: 'customers_semantic',
|
||||
description: 'Customer dimension',
|
||||
modelRef: 'dim_customers',
|
||||
dimensions: [
|
||||
{
|
||||
name: 'country',
|
||||
column: 'country',
|
||||
type: 'string',
|
||||
label: 'Country',
|
||||
description: 'Customer country',
|
||||
},
|
||||
],
|
||||
measures: [
|
||||
{
|
||||
type: 'simple',
|
||||
name: 'customer_count',
|
||||
column: 'id',
|
||||
aggregation: 'count',
|
||||
label: 'Customer Count',
|
||||
description: 'Customer count',
|
||||
},
|
||||
],
|
||||
entities: [{ name: 'customer', type: 'primary', expr: 'id' }],
|
||||
defaultTimeDimension: null,
|
||||
},
|
||||
],
|
||||
crossModelMetrics: [
|
||||
{
|
||||
name: 'revenue_per_customer',
|
||||
label: null,
|
||||
description: 'Revenue per customer',
|
||||
type: 'derived',
|
||||
expr: 'total_revenue / NULLIF(customer_count, 0)',
|
||||
dependsOn: [
|
||||
{ metricName: 'orders_semantic', alias: 'total_revenue' },
|
||||
{ metricName: 'customers_semantic', alias: 'customer_count' },
|
||||
],
|
||||
filter: null,
|
||||
},
|
||||
],
|
||||
relationships: [
|
||||
{
|
||||
fromTable: 'fct_orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'dim_customers',
|
||||
toColumn: 'id',
|
||||
description: 'Customer relationship',
|
||||
},
|
||||
],
|
||||
warnings: [],
|
||||
});
|
||||
|
||||
expect(mapSemanticModelToSource(metricflow.semanticModels[0], 'analytics.fct_orders')).toEqual({
|
||||
name: 'fct-orders',
|
||||
table: 'analytics.fct_orders',
|
||||
grain: ['status', 'ordered_at'],
|
||||
columns: [
|
||||
{ name: 'status', type: 'string', description: 'Order status' },
|
||||
{ name: 'ordered_at', type: 'time' },
|
||||
],
|
||||
measures: [
|
||||
{
|
||||
name: 'total_revenue',
|
||||
expr: 'sum(amount)',
|
||||
description: 'Revenue',
|
||||
},
|
||||
],
|
||||
joins: [],
|
||||
descriptions: { dbt: 'MetricFlow order facts' },
|
||||
});
|
||||
|
||||
expect(mapCrossModelMetricToSource(metricflow.crossModelMetrics[0])).toEqual({
|
||||
name: 'revenue-per-customer',
|
||||
sql: 'total_revenue / NULLIF(customer_count, 0)',
|
||||
descriptions: { dbt: 'Revenue per customer' },
|
||||
grain: [],
|
||||
columns: [],
|
||||
measures: [
|
||||
{
|
||||
name: 'revenue_per_customer',
|
||||
expr: 'total_revenue / NULLIF(customer_count, 0)',
|
||||
description: 'Revenue per customer',
|
||||
},
|
||||
],
|
||||
joins: [],
|
||||
});
|
||||
|
||||
const schema = parseDbtSchemaFiles(
|
||||
[{ path: 'models/schema.yml', content: schemaYaml }],
|
||||
new Map([
|
||||
['orders_model', 'fct_orders'],
|
||||
['mart_schema', 'analytics'],
|
||||
]),
|
||||
);
|
||||
const merged = mergeSemanticModelTables(schema, metricflow.semanticModels);
|
||||
|
||||
expect(merged).toEqual({
|
||||
projectName: null,
|
||||
dbtVersion: null,
|
||||
tables: [
|
||||
{
|
||||
name: 'dim_customers',
|
||||
description: 'Raw customer dimension',
|
||||
database: 'warehouse',
|
||||
schema: 'landing',
|
||||
columns: [
|
||||
{ name: 'id', description: 'Customer primary key', dataType: null },
|
||||
{ name: 'country', description: 'Country name', dataType: null },
|
||||
],
|
||||
resourceType: 'source',
|
||||
},
|
||||
{
|
||||
name: 'fct_orders',
|
||||
description: 'Modeled orders',
|
||||
database: null,
|
||||
schema: 'analytics',
|
||||
columns: [
|
||||
{
|
||||
name: 'customer_id',
|
||||
description: 'Linked customer id',
|
||||
dataType: null,
|
||||
dataTests: [
|
||||
{
|
||||
name: 'relationships',
|
||||
package: 'dbt',
|
||||
kwargs: { to: "ref('dim_customers')", field: 'id' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{ name: 'status', description: 'Order status', dataType: null },
|
||||
{ name: 'amount', description: 'Gross amount', dataType: null },
|
||||
],
|
||||
resourceType: 'model',
|
||||
},
|
||||
],
|
||||
relationships: [
|
||||
{
|
||||
fromTable: 'fct_orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'dim_customers',
|
||||
toColumn: 'id',
|
||||
fromSchema: 'analytics',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(matchDbtTables(merged.tables, hostTables, 'analytics')).toEqual([
|
||||
{
|
||||
dbtTable: 'dim_customers',
|
||||
dbtSchema: 'landing',
|
||||
dbtDatabase: 'warehouse',
|
||||
hostTableId: 'customers-table',
|
||||
hostTableName: 'dim_customers',
|
||||
matched: true,
|
||||
tableDescriptionAction: 'import',
|
||||
tableDescriptionFound: true,
|
||||
columnsToImport: 2,
|
||||
columnsMatched: 2,
|
||||
columnsTotal: 2,
|
||||
columnDescriptionsFound: 2,
|
||||
},
|
||||
{
|
||||
dbtTable: 'fct_orders',
|
||||
dbtSchema: 'analytics',
|
||||
dbtDatabase: null,
|
||||
hostTableId: 'orders-table',
|
||||
hostTableName: 'fct_orders',
|
||||
matched: true,
|
||||
tableDescriptionAction: 'import',
|
||||
tableDescriptionFound: true,
|
||||
columnsToImport: 3,
|
||||
columnsMatched: 3,
|
||||
columnsTotal: 3,
|
||||
columnDescriptionsFound: 3,
|
||||
},
|
||||
]);
|
||||
|
||||
expect(
|
||||
toDescriptionUpdates({
|
||||
connectionId: 'warehouse-1',
|
||||
parseResult: merged,
|
||||
hostTables,
|
||||
targetSchema: 'analytics',
|
||||
}),
|
||||
).toEqual({
|
||||
dbt: [
|
||||
{
|
||||
connectionId: 'warehouse-1',
|
||||
table: { catalog: 'warehouse', db: 'landing', name: 'dim_customers' },
|
||||
source: 'dbt',
|
||||
tableDescription: 'Raw customer dimension',
|
||||
columnDescriptions: {
|
||||
id: 'Customer primary key',
|
||||
country: 'Country name',
|
||||
},
|
||||
},
|
||||
{
|
||||
connectionId: 'warehouse-1',
|
||||
table: { catalog: 'warehouse', db: 'analytics', name: 'fct_orders' },
|
||||
source: 'dbt',
|
||||
tableDescription: 'Modeled orders',
|
||||
columnDescriptions: {
|
||||
customer_id: 'Linked customer id',
|
||||
status: 'Order status',
|
||||
amount: 'Gross amount',
|
||||
},
|
||||
},
|
||||
],
|
||||
aiInvalidations: [
|
||||
{
|
||||
connectionId: 'warehouse-1',
|
||||
table: { catalog: 'warehouse', db: 'landing', name: 'dim_customers' },
|
||||
source: 'ai',
|
||||
},
|
||||
{
|
||||
connectionId: 'warehouse-1',
|
||||
table: { catalog: 'warehouse', db: 'analytics', name: 'fct_orders' },
|
||||
source: 'ai',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(toRelationshipUpdates({ connectionId: 'warehouse-1', parseResult: merged, hostTables })).toEqual({
|
||||
joins: [
|
||||
{
|
||||
connectionId: 'warehouse-1',
|
||||
fromTable: 'fct_orders',
|
||||
fromColumns: ['customer_id'],
|
||||
toTable: 'dim_customers',
|
||||
toColumns: ['id'],
|
||||
relationship: 'many_to_one',
|
||||
author: 'dbt',
|
||||
authorEmail: DBT_SYSTEM_EMAIL,
|
||||
},
|
||||
],
|
||||
skippedNoMatch: 0,
|
||||
});
|
||||
});
|
||||
});
|
||||
36
packages/context/src/ingest/adapters/dbt/chunk.test.ts
Normal file
36
packages/context/src/ingest/adapters/dbt/chunk.test.ts
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { chunkDbtProject } from './chunk.js';
|
||||
|
||||
describe('chunkDbtProject', () => {
|
||||
const diffSet = (modified: string[]) => ({ added: [], modified, deleted: [], unchanged: [] });
|
||||
|
||||
it('caps peerFileIndex when the project has very many yaml files', () => {
|
||||
const modelPaths = Array.from({ length: 201 }, (_, i) => `models/m${i}.yml`);
|
||||
const allPaths = ['dbt_project.yml', ...modelPaths].sort();
|
||||
const { workUnits } = chunkDbtProject({ allPaths });
|
||||
const [first] = workUnits;
|
||||
expect(first).toBeDefined();
|
||||
expect(first?.peerFileIndex).toHaveLength(200);
|
||||
expect(first?.notes).toMatch(/capped at 200/);
|
||||
});
|
||||
|
||||
it('keeps large-project model work units when dbt_project.yml changes', () => {
|
||||
const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`);
|
||||
const allPaths = ['dbt_project.yml', ...modelPaths].sort();
|
||||
const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['dbt_project.yml']) });
|
||||
|
||||
expect(workUnits).toHaveLength(30);
|
||||
expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']);
|
||||
expect(workUnits[0]?.dependencyPaths).toContain('dbt_project.yml');
|
||||
});
|
||||
|
||||
it('keeps large-project model work units when non-model yaml peers change', () => {
|
||||
const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`);
|
||||
const allPaths = ['dbt_project.yml', 'seeds/seed_properties.yml', ...modelPaths].sort();
|
||||
const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['seeds/seed_properties.yml']) });
|
||||
|
||||
expect(workUnits).toHaveLength(30);
|
||||
expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']);
|
||||
expect(workUnits[0]?.dependencyPaths).toContain('seeds/seed_properties.yml');
|
||||
});
|
||||
});
|
||||
130
packages/context/src/ingest/adapters/dbt/chunk.ts
Normal file
130
packages/context/src/ingest/adapters/dbt/chunk.ts
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js';
|
||||
import type { ParsedDbtProject } from './parse.js';
|
||||
|
||||
interface ChunkOptions {
|
||||
diffSet?: DiffSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-model work units (when the project has more than 25 YAML files) only name `rawFiles` under
|
||||
* `models/**`. Other `.yml` (e.g. some `seeds/` or custom layouts) still appear in `peerFileIndex`
|
||||
* or in the small-project / no-models fallbacks — v1 does not emit one WU per non-models file.
|
||||
*/
|
||||
const MODELS_PREFIX = 'models/';
|
||||
|
||||
/** `peerFileIndex` is a hint only (agents may not read those paths). Cap to limit prompt size. */
|
||||
const MAX_PEER_FILE_INDEX = 200;
|
||||
|
||||
function projectYamlPath(allPaths: string[]): string | undefined {
|
||||
if (allPaths.includes('dbt_project.yml')) {
|
||||
return 'dbt_project.yml';
|
||||
}
|
||||
if (allPaths.includes('dbt_project.yaml')) {
|
||||
return 'dbt_project.yaml';
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function modelRelativePaths(allPaths: string[]): string[] {
|
||||
return allPaths.filter((p) => p.replace(/\\/g, '/').startsWith(MODELS_PREFIX)).sort();
|
||||
}
|
||||
|
||||
function unitKeyForModelFile(mf: string): string {
|
||||
const base = mf
|
||||
.replace(/\.(ya?ml)$/i, '')
|
||||
.replace(/\\/g, '/')
|
||||
.replace(/[^a-zA-Z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '');
|
||||
return `dbt-${base.toLowerCase()}`;
|
||||
}
|
||||
|
||||
function emitFirstRunWorkUnits(allPaths: string[], dbtDep: string | undefined): WorkUnit[] {
|
||||
if (allPaths.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (allPaths.length <= 25) {
|
||||
return [
|
||||
{
|
||||
unitKey: 'dbt-all',
|
||||
displayLabel: 'dbt project (all yaml)',
|
||||
rawFiles: [...allPaths],
|
||||
peerFileIndex: [],
|
||||
dependencyPaths: [],
|
||||
notes: 'dbt project — all YAML in one WorkUnit (≤25 files)',
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const modelFiles = modelRelativePaths(allPaths);
|
||||
if (modelFiles.length === 0) {
|
||||
return [
|
||||
{
|
||||
unitKey: 'dbt-all',
|
||||
displayLabel: 'dbt project (all yaml, no models/**)',
|
||||
rawFiles: [...allPaths],
|
||||
peerFileIndex: [],
|
||||
dependencyPaths: dbtDep ? [dbtDep] : [],
|
||||
notes: 'dbt: no models/**/*.yml — single slice with dbt_project as dependency if present',
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return modelFiles.map((mf) => {
|
||||
const allPeers = allPaths.filter((p) => p !== mf).sort();
|
||||
const truncated = allPeers.length > MAX_PEER_FILE_INDEX;
|
||||
const peerFileIndex = truncated ? allPeers.slice(0, MAX_PEER_FILE_INDEX) : allPeers;
|
||||
const dependencyPaths = dbtDep && allPaths.includes(dbtDep) && mf !== dbtDep ? [dbtDep].sort() : [];
|
||||
const notes = truncated
|
||||
? `dbt model schema slice (peer index capped at ${MAX_PEER_FILE_INDEX} of ${allPeers.length} paths)`
|
||||
: 'dbt model schema slice';
|
||||
return {
|
||||
unitKey: unitKeyForModelFile(mf),
|
||||
displayLabel: `dbt ${mf}`,
|
||||
rawFiles: [mf],
|
||||
peerFileIndex,
|
||||
dependencyPaths: dependencyPaths,
|
||||
notes,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function applyDiffSet(firstRunUnits: WorkUnit[], diffSet: DiffSet): ChunkResult {
|
||||
const touched = new Set([...diffSet.added, ...diffSet.modified]);
|
||||
const kept: WorkUnit[] = [];
|
||||
|
||||
for (const wu of firstRunUnits) {
|
||||
const touchedRawFiles = wu.rawFiles.filter((p) => touched.has(p));
|
||||
const touchedDependencies = wu.dependencyPaths.filter((p) => touched.has(p));
|
||||
const touchedPeerFiles = wu.peerFileIndex.filter((p) => touched.has(p));
|
||||
if (touchedRawFiles.length === 0 && touchedDependencies.length === 0 && touchedPeerFiles.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const rawFiles = touchedRawFiles.length > 0 ? touchedRawFiles : wu.rawFiles;
|
||||
const unchangedRaw = touchedRawFiles.length > 0 ? wu.rawFiles.filter((p) => !touched.has(p)) : [];
|
||||
for (const p of wu.rawFiles) {
|
||||
if (!rawFiles.includes(p) && !unchangedRaw.includes(p)) {
|
||||
unchangedRaw.push(p);
|
||||
}
|
||||
}
|
||||
const combinedDeps = new Set<string>([...wu.dependencyPaths, ...unchangedRaw, ...touchedPeerFiles]);
|
||||
kept.push({
|
||||
...wu,
|
||||
rawFiles: rawFiles.sort(),
|
||||
dependencyPaths: [...combinedDeps].sort(),
|
||||
});
|
||||
}
|
||||
|
||||
const eviction = diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : undefined;
|
||||
return { workUnits: kept, eviction };
|
||||
}
|
||||
|
||||
export function chunkDbtProject(project: ParsedDbtProject, opts: ChunkOptions = {}): ChunkResult {
|
||||
const dbtDep = projectYamlPath(project.allPaths);
|
||||
const firstRun = emitFirstRunWorkUnits(project.allPaths, dbtDep);
|
||||
if (!opts.diffSet) {
|
||||
return { workUnits: firstRun };
|
||||
}
|
||||
return applyDiffSet(firstRun, opts.diffSet);
|
||||
}
|
||||
51
packages/context/src/ingest/adapters/dbt/dbt.adapter.test.ts
Normal file
51
packages/context/src/ingest/adapters/dbt/dbt.adapter.test.ts
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { SourceAdapter } from '../../types.js';
|
||||
import { DbtSourceAdapter } from './dbt.adapter.js';
|
||||
|
||||
describe('DbtSourceAdapter', () => {
|
||||
let stagedDir: string;
|
||||
let adapter: SourceAdapter;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'dbt-adapter-'));
|
||||
adapter = new DbtSourceAdapter();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(stagedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('declares the expected source key and skill list', () => {
|
||||
expect(adapter.source).toBe('dbt');
|
||||
expect(adapter.skillNames).toEqual(['dbt_ingest']);
|
||||
});
|
||||
|
||||
it('detects a staged dbt project root (dbt_project.yml)', async () => {
|
||||
await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\nversion: '1.0.0'\n", 'utf-8');
|
||||
expect(await adapter.detect(stagedDir)).toBe(true);
|
||||
});
|
||||
|
||||
it('chunk: dbt_project.yml + models/a.yml yields one WU (≤25 files)', async () => {
|
||||
await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\n", 'utf-8');
|
||||
await mkdir(join(stagedDir, 'models'), { recursive: true });
|
||||
await writeFile(
|
||||
join(stagedDir, 'models/a.yml'),
|
||||
'version: 2\nmodels:\n - name: orders\n description: Orders\n',
|
||||
'utf-8',
|
||||
);
|
||||
const result = await adapter.chunk(stagedDir);
|
||||
expect(result.workUnits).toHaveLength(1);
|
||||
expect(result.workUnits[0].unitKey).toBe('dbt-all');
|
||||
expect(result.parseArtifacts).toMatchObject({
|
||||
projectName: 'jaffle',
|
||||
tables: [{ name: 'orders', description: 'Orders' }],
|
||||
});
|
||||
});
|
||||
|
||||
it('implements fetch() for git-backed dbt source setup', () => {
|
||||
expect(adapter.fetch).toBeTypeOf('function');
|
||||
});
|
||||
});
|
||||
48
packages/context/src/ingest/adapters/dbt/dbt.adapter.ts
Normal file
48
packages/context/src/ingest/adapters/dbt/dbt.adapter.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
import { join } from 'node:path';
|
||||
import type { ChunkResult, DiffSet, SourceAdapter } from '../../types.js';
|
||||
import type { FetchContext } from '../../types.js';
|
||||
import { loadProjectInfo } from '../../dbt-shared/project-vars.js';
|
||||
import { loadDbtSchemaFiles } from '../../dbt-shared/schema-files.js';
|
||||
import { parseDbtSchemaFiles } from '../dbt-descriptions/parse-schema.js';
|
||||
import { chunkDbtProject } from './chunk.js';
|
||||
import { detectDbtStagedDir } from './detect.js';
|
||||
import { fetchDbtRepo, type DbtPullConfig } from './fetch.js';
|
||||
import { parseDbtStagedDir } from './parse.js';
|
||||
|
||||
interface DbtSourceAdapterOptions {
|
||||
homeDir?: string;
|
||||
}
|
||||
|
||||
export class DbtSourceAdapter implements SourceAdapter {
|
||||
readonly source = 'dbt' as const;
|
||||
/** Runner merges: ingest_triage, sl_capture, knowledge_capture (see ingest-bundle.runner.ts) */
|
||||
readonly skillNames: string[] = ['dbt_ingest'];
|
||||
|
||||
constructor(private readonly options: DbtSourceAdapterOptions = {}) {}
|
||||
|
||||
detect(stagedDir: string): Promise<boolean> {
|
||||
return detectDbtStagedDir(stagedDir);
|
||||
}
|
||||
|
||||
async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
|
||||
const config = pullConfig as DbtPullConfig | undefined;
|
||||
if (!config?.repoUrl) {
|
||||
throw new Error('dbt fetch requires repoUrl');
|
||||
}
|
||||
await fetchDbtRepo({
|
||||
config,
|
||||
cacheDir: join(this.options.homeDir ?? '.klo/cache', 'dbt', ctx.connectionId),
|
||||
stagedDir,
|
||||
});
|
||||
}
|
||||
|
||||
async chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
const project = await parseDbtStagedDir(stagedDir);
|
||||
const projectInfo = await loadProjectInfo(stagedDir);
|
||||
const schemaFiles = await loadDbtSchemaFiles(stagedDir);
|
||||
const parseArtifacts = parseDbtSchemaFiles(schemaFiles, projectInfo.variables, {
|
||||
projectName: projectInfo.projectName,
|
||||
});
|
||||
return { ...chunkDbtProject(project, { diffSet }), parseArtifacts };
|
||||
}
|
||||
}
|
||||
12
packages/context/src/ingest/adapters/dbt/detect.ts
Normal file
12
packages/context/src/ingest/adapters/dbt/detect.ts
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
import { access } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
|
||||
export async function detectDbtStagedDir(stagedDir: string): Promise<boolean> {
|
||||
for (const name of ['dbt_project.yml', 'dbt_project.yaml'] as const) {
|
||||
try {
|
||||
await access(join(stagedDir, name));
|
||||
return true;
|
||||
} catch {}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
38
packages/context/src/ingest/adapters/dbt/fetch.test.ts
Normal file
38
packages/context/src/ingest/adapters/dbt/fetch.test.ts
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { fetchDbtRepo } from './fetch.js';
|
||||
|
||||
describe('fetchDbtRepo', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'klo-dbt-fetch-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('copies dbt yaml files from a fetched repo subpath into staged dir', async () => {
|
||||
const cacheDir = join(tempDir, 'cache');
|
||||
const stagedDir = join(tempDir, 'staged');
|
||||
await mkdir(join(cacheDir, 'analytics', 'models'), { recursive: true });
|
||||
await writeFile(join(cacheDir, 'analytics', 'dbt_project.yml'), 'name: analytics\n', 'utf-8');
|
||||
await writeFile(join(cacheDir, 'analytics', 'models', 'orders.yml'), 'models: []\n', 'utf-8');
|
||||
const cloneOrPull = vi.fn(async () => ({ commitHash: 'abc123' }));
|
||||
|
||||
await expect(
|
||||
fetchDbtRepo({
|
||||
config: { repoUrl: 'https://github.com/acme/dbt.git', path: 'analytics' },
|
||||
cacheDir,
|
||||
stagedDir,
|
||||
deps: { cloneOrPull },
|
||||
}),
|
||||
).resolves.toEqual({ commitHash: 'abc123', filesCopied: 2 });
|
||||
|
||||
await expect(readFile(join(stagedDir, 'dbt_project.yml'), 'utf-8')).resolves.toContain('analytics');
|
||||
await expect(readFile(join(stagedDir, 'models', 'orders.yml'), 'utf-8')).resolves.toContain('models');
|
||||
});
|
||||
});
|
||||
60
packages/context/src/ingest/adapters/dbt/fetch.ts
Normal file
60
packages/context/src/ingest/adapters/dbt/fetch.ts
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import { access, copyFile, mkdir, readdir } from 'node:fs/promises';
|
||||
import { dirname, join, relative } from 'node:path';
|
||||
import { cloneOrPull, sanitizeRepoError } from '../../repo-fetch.js';
|
||||
|
||||
export interface DbtPullConfig {
|
||||
repoUrl: string;
|
||||
branch?: string;
|
||||
path?: string;
|
||||
authToken?: string | null;
|
||||
}
|
||||
|
||||
export interface FetchDbtRepoParams {
|
||||
config: DbtPullConfig;
|
||||
cacheDir: string;
|
||||
stagedDir: string;
|
||||
deps?: {
|
||||
cloneOrPull?: typeof cloneOrPull;
|
||||
};
|
||||
}
|
||||
|
||||
export async function fetchDbtRepo(params: FetchDbtRepoParams): Promise<{ commitHash: string; filesCopied: number }> {
|
||||
try {
|
||||
const runCloneOrPull = params.deps?.cloneOrPull ?? cloneOrPull;
|
||||
const { commitHash } = await runCloneOrPull({
|
||||
repoUrl: params.config.repoUrl,
|
||||
authToken: params.config.authToken,
|
||||
cacheDir: params.cacheDir,
|
||||
branch: params.config.branch ?? 'main',
|
||||
});
|
||||
const sourceRoot = params.config.path ? join(params.cacheDir, params.config.path) : params.cacheDir;
|
||||
const filesCopied = await copyYamlFilesRecursive(sourceRoot, params.stagedDir);
|
||||
return { commitHash, filesCopied };
|
||||
} catch (error) {
|
||||
throw new Error(sanitizeRepoError(error, params.config.authToken));
|
||||
}
|
||||
}
|
||||
|
||||
async function copyYamlFilesRecursive(sourceRoot: string, destRoot: string): Promise<number> {
|
||||
try {
|
||||
await access(sourceRoot);
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
|
||||
await mkdir(destRoot, { recursive: true });
|
||||
const entries = await readdir(sourceRoot, { withFileTypes: true, recursive: true });
|
||||
let copied = 0;
|
||||
for (const entry of entries) {
|
||||
if (!entry.isFile() || !/\.ya?ml$/i.test(entry.name)) {
|
||||
continue;
|
||||
}
|
||||
const absSrc = join(entry.parentPath, entry.name);
|
||||
const rel = relative(sourceRoot, absSrc);
|
||||
const dest = join(destRoot, rel);
|
||||
await mkdir(dirname(dest), { recursive: true });
|
||||
await copyFile(absSrc, dest);
|
||||
copied += 1;
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
8
packages/context/src/ingest/adapters/dbt/parse.test.ts
Normal file
8
packages/context/src/ingest/adapters/dbt/parse.test.ts
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { normalizeDbtPath } from './parse.js';
|
||||
|
||||
describe('normalizeDbtPath', () => {
|
||||
it('normalizes Windows separators to POSIX separators', () => {
|
||||
expect(normalizeDbtPath('models\\marts\\orders.yml')).toBe('models/marts/orders.yml');
|
||||
});
|
||||
});
|
||||
32
packages/context/src/ingest/adapters/dbt/parse.ts
Normal file
32
packages/context/src/ingest/adapters/dbt/parse.ts
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
import { readdir } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
|
||||
const YAML_EXT_RE = /\.(ya?ml)$/i;
|
||||
|
||||
export function normalizeDbtPath(path: string): string {
|
||||
return path.replaceAll('\\', '/');
|
||||
}
|
||||
|
||||
async function collectYamlFiles(stagedDir: string): Promise<string[]> {
|
||||
const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true });
|
||||
const paths: string[] = [];
|
||||
for (const entry of entries) {
|
||||
if (!entry.isFile() || !YAML_EXT_RE.test(entry.name)) {
|
||||
continue;
|
||||
}
|
||||
const abs = join(entry.parentPath, entry.name);
|
||||
paths.push(normalizeDbtPath(relative(stagedDir, abs)));
|
||||
}
|
||||
paths.sort();
|
||||
return paths;
|
||||
}
|
||||
|
||||
export interface ParsedDbtProject {
|
||||
/** All `.yml` / `.yaml` paths under stagedDir, relative + sorted. */
|
||||
allPaths: string[];
|
||||
}
|
||||
|
||||
export async function parseDbtStagedDir(stagedDir: string): Promise<ParsedDbtProject> {
|
||||
const allPaths = await collectYamlFiles(stagedDir);
|
||||
return { allPaths };
|
||||
}
|
||||
48
packages/context/src/ingest/adapters/fake/fake.adapter.ts
Normal file
48
packages/context/src/ingest/adapters/fake/fake.adapter.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
import { readdir } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import type { ChunkResult, DiffSet, SourceAdapter, WorkUnit } from '../../types.js';
|
||||
|
||||
export class FakeSourceAdapter implements SourceAdapter {
|
||||
readonly source = 'fake';
|
||||
readonly skillNames: string[] = [];
|
||||
|
||||
detect(): Promise<boolean> {
|
||||
return Promise.resolve(true);
|
||||
}
|
||||
|
||||
async chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
const subDirs = (await readdir(stagedDir, { withFileTypes: true }))
|
||||
.filter((e) => e.isDirectory())
|
||||
.map((e) => e.name)
|
||||
.sort();
|
||||
|
||||
const workUnits: WorkUnit[] = [];
|
||||
for (const subDir of subDirs) {
|
||||
const entries = await readdir(join(stagedDir, subDir), { withFileTypes: true, recursive: true });
|
||||
const rawFiles = entries
|
||||
.filter((e) => e.isFile())
|
||||
.map((e) => relative(stagedDir, join(e.parentPath, e.name)))
|
||||
.sort();
|
||||
if (rawFiles.length === 0) {
|
||||
continue;
|
||||
}
|
||||
if (diffSet) {
|
||||
const touched = new Set([...diffSet.added, ...diffSet.modified]);
|
||||
const anyTouched = rawFiles.some((p) => touched.has(p));
|
||||
if (!anyTouched) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
workUnits.push({
|
||||
unitKey: `fake-${subDir}`,
|
||||
displayLabel: subDir,
|
||||
rawFiles,
|
||||
peerFileIndex: [],
|
||||
dependencyPaths: [],
|
||||
});
|
||||
}
|
||||
|
||||
const eviction = diffSet && diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted] } : undefined;
|
||||
return { workUnits, eviction };
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,146 @@
|
|||
{
|
||||
"name": "eviction-churn",
|
||||
"now": "2026-05-08T12:00:00.000Z",
|
||||
"connectionId": "warehouse",
|
||||
"probe": {
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"warnings": [
|
||||
"pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn"
|
||||
]
|
||||
},
|
||||
"snapshot": {
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"deallocCount": 3,
|
||||
"rows": [
|
||||
{
|
||||
"queryid": "501",
|
||||
"userid": "11",
|
||||
"username": "analyst",
|
||||
"dbid": "5",
|
||||
"database": "analytics",
|
||||
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"calls": 20,
|
||||
"totalExecTime": 500,
|
||||
"meanExecTime": 25,
|
||||
"totalRows": 40
|
||||
}
|
||||
]
|
||||
},
|
||||
"pullConfig": {
|
||||
"dialect": "postgres",
|
||||
"windowDays": 90,
|
||||
"lastSuccessfulCursor": null,
|
||||
"serviceAccountUserPatterns": [],
|
||||
"redactionPatterns": [],
|
||||
"maxTemplatesPerRun": 5000,
|
||||
"minCalls": 5
|
||||
},
|
||||
"analysisBySql": {
|
||||
"SELECT count(*) FROM analytics.orders WHERE status = $1": {
|
||||
"fingerprint": "fp_orders_status",
|
||||
"normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"tablesTouched": [
|
||||
"analytics.orders"
|
||||
],
|
||||
"literalSlots": []
|
||||
}
|
||||
},
|
||||
"baseline": null,
|
||||
"expectedBaseline": {
|
||||
"version": 1,
|
||||
"fetchedAt": "2026-05-08T12:00:00.000Z",
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"templates": {
|
||||
"db5_q501": {
|
||||
"firstObservedAt": "2026-05-08T12:00:00.000Z",
|
||||
"perUser": {
|
||||
"11": {
|
||||
"calls": 20,
|
||||
"totalExecTime": 500,
|
||||
"totalRows": 40
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"expectedFiles": {
|
||||
"manifest.json": {
|
||||
"json": {
|
||||
"source": "historic-sql",
|
||||
"connectionId": "warehouse",
|
||||
"dialect": "postgres",
|
||||
"fetchedAt": "2026-05-08T12:00:00.000Z",
|
||||
"windowStart": "2026-05-08T08:00:00.000Z",
|
||||
"windowEnd": "2026-05-08T12:00:00.000Z",
|
||||
"nextSuccessfulCursor": "2026-05-08T12:00:00.000Z",
|
||||
"templateCount": 1,
|
||||
"capped": false,
|
||||
"warnings": [
|
||||
"pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn",
|
||||
"pgss_dealloc_count:3; pg_stat_statements.max may be too low, causing template eviction churn",
|
||||
"baseline_first_run:no_previous_pgss_baseline"
|
||||
],
|
||||
"degraded": true,
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"baselineFirstRun": true,
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"deallocCount": 3,
|
||||
"templates": [
|
||||
{
|
||||
"id": "db5_q501",
|
||||
"fingerprint": "fp_orders_status",
|
||||
"subClusterId": null,
|
||||
"path": "templates/db5_q501/page.md"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"templates/db5_q501/metadata.json": {
|
||||
"json": {
|
||||
"id": "db5_q501",
|
||||
"title": "postgres · analytics.orders [db5_q501]",
|
||||
"path": "templates/db5_q501/page.md",
|
||||
"objectType": "historic_sql_template",
|
||||
"lastEditedAt": null,
|
||||
"properties": {
|
||||
"fingerprint": "fp_orders_status",
|
||||
"sub_cluster_id": null,
|
||||
"dialect": "postgres",
|
||||
"tables_touched": [
|
||||
"analytics.orders"
|
||||
],
|
||||
"literal_slots": [],
|
||||
"triage_signals": {
|
||||
"executions_bucket": "mid",
|
||||
"distinct_users_bucket": "solo",
|
||||
"error_rate_bucket": "ok",
|
||||
"recency_bucket": "active",
|
||||
"service_account_only": "false",
|
||||
"runtime_bucket": "fast"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"templates/db5_q501/page.md": {
|
||||
"text": "# db5_q501\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n"
|
||||
},
|
||||
"templates/db5_q501/usage.json": {
|
||||
"json": {
|
||||
"stats": {
|
||||
"executions": 20,
|
||||
"distinct_users": 1,
|
||||
"first_seen": "2026-05-08T12:00:00.000Z",
|
||||
"last_seen": "2026-05-08T12:00:00.000Z",
|
||||
"p50_runtime_ms": null,
|
||||
"p95_runtime_ms": null,
|
||||
"mean_runtime_ms": 25,
|
||||
"error_rate": 0,
|
||||
"rows_produced": 40
|
||||
},
|
||||
"literal_slots": [],
|
||||
"samples": []
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
{
|
||||
"name": "first-run",
|
||||
"now": "2026-05-08T12:00:00.000Z",
|
||||
"connectionId": "warehouse",
|
||||
"probe": {
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"warnings": []
|
||||
},
|
||||
"snapshot": {
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"deallocCount": 0,
|
||||
"rows": [
|
||||
{
|
||||
"queryid": "101",
|
||||
"userid": "11",
|
||||
"username": "analyst",
|
||||
"dbid": "5",
|
||||
"database": "analytics",
|
||||
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"calls": 10,
|
||||
"totalExecTime": 250,
|
||||
"meanExecTime": 25,
|
||||
"totalRows": 20
|
||||
}
|
||||
]
|
||||
},
|
||||
"pullConfig": {
|
||||
"dialect": "postgres",
|
||||
"windowDays": 90,
|
||||
"lastSuccessfulCursor": null,
|
||||
"serviceAccountUserPatterns": [
|
||||
"^svc_"
|
||||
],
|
||||
"redactionPatterns": [],
|
||||
"maxTemplatesPerRun": 5000,
|
||||
"minCalls": 5
|
||||
},
|
||||
"analysisBySql": {
|
||||
"SELECT count(*) FROM analytics.orders WHERE status = $1": {
|
||||
"fingerprint": "fp_orders_status",
|
||||
"normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"tablesTouched": [
|
||||
"analytics.orders"
|
||||
],
|
||||
"literalSlots": []
|
||||
}
|
||||
},
|
||||
"baseline": null,
|
||||
"expectedBaseline": {
|
||||
"version": 1,
|
||||
"fetchedAt": "2026-05-08T12:00:00.000Z",
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"templates": {
|
||||
"db5_q101": {
|
||||
"firstObservedAt": "2026-05-08T12:00:00.000Z",
|
||||
"perUser": {
|
||||
"11": {
|
||||
"calls": 10,
|
||||
"totalExecTime": 250,
|
||||
"totalRows": 20
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"expectedFiles": {
|
||||
"manifest.json": {
|
||||
"json": {
|
||||
"source": "historic-sql",
|
||||
"connectionId": "warehouse",
|
||||
"dialect": "postgres",
|
||||
"fetchedAt": "2026-05-08T12:00:00.000Z",
|
||||
"windowStart": "2026-05-08T08:00:00.000Z",
|
||||
"windowEnd": "2026-05-08T12:00:00.000Z",
|
||||
"nextSuccessfulCursor": "2026-05-08T12:00:00.000Z",
|
||||
"templateCount": 1,
|
||||
"capped": false,
|
||||
"warnings": [
|
||||
"baseline_first_run:no_previous_pgss_baseline"
|
||||
],
|
||||
"degraded": true,
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"baselineFirstRun": true,
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"deallocCount": 0,
|
||||
"templates": [
|
||||
{
|
||||
"id": "db5_q101",
|
||||
"fingerprint": "fp_orders_status",
|
||||
"subClusterId": null,
|
||||
"path": "templates/db5_q101/page.md"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"templates/db5_q101/metadata.json": {
|
||||
"json": {
|
||||
"id": "db5_q101",
|
||||
"title": "postgres · analytics.orders [db5_q101]",
|
||||
"path": "templates/db5_q101/page.md",
|
||||
"objectType": "historic_sql_template",
|
||||
"lastEditedAt": null,
|
||||
"properties": {
|
||||
"fingerprint": "fp_orders_status",
|
||||
"sub_cluster_id": null,
|
||||
"dialect": "postgres",
|
||||
"tables_touched": [
|
||||
"analytics.orders"
|
||||
],
|
||||
"literal_slots": [],
|
||||
"triage_signals": {
|
||||
"executions_bucket": "mid",
|
||||
"distinct_users_bucket": "solo",
|
||||
"error_rate_bucket": "ok",
|
||||
"recency_bucket": "active",
|
||||
"service_account_only": "false",
|
||||
"runtime_bucket": "fast"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"templates/db5_q101/page.md": {
|
||||
"text": "# db5_q101\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n"
|
||||
},
|
||||
"templates/db5_q101/usage.json": {
|
||||
"json": {
|
||||
"stats": {
|
||||
"executions": 10,
|
||||
"distinct_users": 1,
|
||||
"first_seen": "2026-05-08T12:00:00.000Z",
|
||||
"last_seen": "2026-05-08T12:00:00.000Z",
|
||||
"p50_runtime_ms": null,
|
||||
"p95_runtime_ms": null,
|
||||
"mean_runtime_ms": 25,
|
||||
"error_rate": 0,
|
||||
"rows_produced": 20
|
||||
},
|
||||
"literal_slots": [],
|
||||
"samples": []
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
{
|
||||
"name": "normal-delta",
|
||||
"now": "2026-05-08T12:00:00.000Z",
|
||||
"connectionId": "warehouse",
|
||||
"probe": {
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"warnings": []
|
||||
},
|
||||
"snapshot": {
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"deallocCount": 0,
|
||||
"rows": [
|
||||
{
|
||||
"queryid": "201",
|
||||
"userid": "11",
|
||||
"username": "analyst",
|
||||
"dbid": "5",
|
||||
"database": "analytics",
|
||||
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"calls": 12,
|
||||
"totalExecTime": 160,
|
||||
"meanExecTime": 13.333333333333334,
|
||||
"totalRows": 58
|
||||
},
|
||||
{
|
||||
"queryid": "201",
|
||||
"userid": "12",
|
||||
"username": "svc_loader",
|
||||
"dbid": "5",
|
||||
"database": "analytics",
|
||||
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"calls": 5,
|
||||
"totalExecTime": 50,
|
||||
"meanExecTime": 10,
|
||||
"totalRows": 25
|
||||
}
|
||||
]
|
||||
},
|
||||
"pullConfig": {
|
||||
"dialect": "postgres",
|
||||
"windowDays": 90,
|
||||
"lastSuccessfulCursor": null,
|
||||
"serviceAccountUserPatterns": [
|
||||
"^svc_"
|
||||
],
|
||||
"redactionPatterns": [],
|
||||
"maxTemplatesPerRun": 5000,
|
||||
"minCalls": 5
|
||||
},
|
||||
"analysisBySql": {
|
||||
"SELECT count(*) FROM analytics.orders WHERE status = $1": {
|
||||
"fingerprint": "fp_orders_status",
|
||||
"normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"tablesTouched": [
|
||||
"analytics.orders"
|
||||
],
|
||||
"literalSlots": []
|
||||
}
|
||||
},
|
||||
"baseline": {
|
||||
"version": 1,
|
||||
"fetchedAt": "2026-05-08T10:00:00.000Z",
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"templates": {
|
||||
"db5_q201": {
|
||||
"firstObservedAt": "2026-05-08T09:00:00.000Z",
|
||||
"perUser": {
|
||||
"11": {
|
||||
"calls": 10,
|
||||
"totalExecTime": 100,
|
||||
"totalRows": 50
|
||||
},
|
||||
"12": {
|
||||
"calls": 5,
|
||||
"totalExecTime": 50,
|
||||
"totalRows": 25
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"expectedBaseline": {
|
||||
"version": 1,
|
||||
"fetchedAt": "2026-05-08T12:00:00.000Z",
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"templates": {
|
||||
"db5_q201": {
|
||||
"firstObservedAt": "2026-05-08T09:00:00.000Z",
|
||||
"perUser": {
|
||||
"11": {
|
||||
"calls": 12,
|
||||
"totalExecTime": 160,
|
||||
"totalRows": 58
|
||||
},
|
||||
"12": {
|
||||
"calls": 5,
|
||||
"totalExecTime": 50,
|
||||
"totalRows": 25
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"expectedFiles": {
|
||||
"manifest.json": {
|
||||
"json": {
|
||||
"source": "historic-sql",
|
||||
"connectionId": "warehouse",
|
||||
"dialect": "postgres",
|
||||
"fetchedAt": "2026-05-08T12:00:00.000Z",
|
||||
"windowStart": "2026-05-08T10:00:00.000Z",
|
||||
"windowEnd": "2026-05-08T12:00:00.000Z",
|
||||
"nextSuccessfulCursor": "2026-05-08T12:00:00.000Z",
|
||||
"templateCount": 1,
|
||||
"capped": false,
|
||||
"warnings": [],
|
||||
"degraded": true,
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"baselineFirstRun": false,
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"deallocCount": 0,
|
||||
"templates": [
|
||||
{
|
||||
"id": "db5_q201",
|
||||
"fingerprint": "fp_orders_status",
|
||||
"subClusterId": null,
|
||||
"path": "templates/db5_q201/page.md"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"templates/db5_q201/metadata.json": {
|
||||
"json": {
|
||||
"id": "db5_q201",
|
||||
"title": "postgres · analytics.orders [db5_q201]",
|
||||
"path": "templates/db5_q201/page.md",
|
||||
"objectType": "historic_sql_template",
|
||||
"lastEditedAt": null,
|
||||
"properties": {
|
||||
"fingerprint": "fp_orders_status",
|
||||
"sub_cluster_id": null,
|
||||
"dialect": "postgres",
|
||||
"tables_touched": [
|
||||
"analytics.orders"
|
||||
],
|
||||
"literal_slots": [],
|
||||
"triage_signals": {
|
||||
"executions_bucket": "low",
|
||||
"distinct_users_bucket": "solo",
|
||||
"error_rate_bucket": "ok",
|
||||
"recency_bucket": "active",
|
||||
"service_account_only": "false",
|
||||
"runtime_bucket": "fast"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"templates/db5_q201/page.md": {
|
||||
"text": "# db5_q201\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n"
|
||||
},
|
||||
"templates/db5_q201/usage.json": {
|
||||
"json": {
|
||||
"stats": {
|
||||
"executions": 2,
|
||||
"distinct_users": 1,
|
||||
"first_seen": "2026-05-08T09:00:00.000Z",
|
||||
"last_seen": "2026-05-08T12:00:00.000Z",
|
||||
"p50_runtime_ms": null,
|
||||
"p95_runtime_ms": null,
|
||||
"mean_runtime_ms": 30,
|
||||
"error_rate": 0,
|
||||
"rows_produced": 8
|
||||
},
|
||||
"literal_slots": [],
|
||||
"samples": []
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
{
|
||||
"name": "reset-detected",
|
||||
"now": "2026-05-08T12:00:00.000Z",
|
||||
"connectionId": "warehouse",
|
||||
"probe": {
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"warnings": []
|
||||
},
|
||||
"snapshot": {
|
||||
"statsResetAt": "2026-05-08T11:00:00.000Z",
|
||||
"deallocCount": 0,
|
||||
"rows": [
|
||||
{
|
||||
"queryid": "301",
|
||||
"userid": "11",
|
||||
"username": "analyst",
|
||||
"dbid": "5",
|
||||
"database": "analytics",
|
||||
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"calls": 3,
|
||||
"totalExecTime": 90,
|
||||
"meanExecTime": 30,
|
||||
"totalRows": 9
|
||||
}
|
||||
]
|
||||
},
|
||||
"pullConfig": {
|
||||
"dialect": "postgres",
|
||||
"windowDays": 90,
|
||||
"lastSuccessfulCursor": null,
|
||||
"serviceAccountUserPatterns": [],
|
||||
"redactionPatterns": [],
|
||||
"maxTemplatesPerRun": 5000,
|
||||
"minCalls": 5
|
||||
},
|
||||
"analysisBySql": {
|
||||
"SELECT count(*) FROM analytics.orders WHERE status = $1": {
|
||||
"fingerprint": "fp_orders_status",
|
||||
"normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"tablesTouched": [
|
||||
"analytics.orders"
|
||||
],
|
||||
"literalSlots": []
|
||||
}
|
||||
},
|
||||
"baseline": {
|
||||
"version": 1,
|
||||
"fetchedAt": "2026-05-08T10:00:00.000Z",
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"templates": {
|
||||
"db5_q301": {
|
||||
"firstObservedAt": "2026-05-08T09:00:00.000Z",
|
||||
"perUser": {
|
||||
"11": {
|
||||
"calls": 100,
|
||||
"totalExecTime": 1000,
|
||||
"totalRows": 500
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"expectedBaseline": {
|
||||
"version": 1,
|
||||
"fetchedAt": "2026-05-08T12:00:00.000Z",
|
||||
"statsResetAt": "2026-05-08T11:00:00.000Z",
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"templates": {
|
||||
"db5_q301": {
|
||||
"firstObservedAt": "2026-05-08T12:00:00.000Z",
|
||||
"perUser": {
|
||||
"11": {
|
||||
"calls": 3,
|
||||
"totalExecTime": 90,
|
||||
"totalRows": 9
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"expectedFiles": {
|
||||
"manifest.json": {
|
||||
"json": {
|
||||
"source": "historic-sql",
|
||||
"connectionId": "warehouse",
|
||||
"dialect": "postgres",
|
||||
"fetchedAt": "2026-05-08T12:00:00.000Z",
|
||||
"windowStart": "2026-05-08T10:00:00.000Z",
|
||||
"windowEnd": "2026-05-08T12:00:00.000Z",
|
||||
"nextSuccessfulCursor": "2026-05-08T12:00:00.000Z",
|
||||
"templateCount": 1,
|
||||
"capped": false,
|
||||
"warnings": [
|
||||
"baseline_reset:stats_reset advanced from 2026-05-08T08:00:00.000Z to 2026-05-08T11:00:00.000Z"
|
||||
],
|
||||
"degraded": true,
|
||||
"statsResetAt": "2026-05-08T11:00:00.000Z",
|
||||
"baselineFirstRun": true,
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"deallocCount": 0,
|
||||
"templates": [
|
||||
{
|
||||
"id": "db5_q301",
|
||||
"fingerprint": "fp_orders_status",
|
||||
"subClusterId": null,
|
||||
"path": "templates/db5_q301/page.md"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"templates/db5_q301/metadata.json": {
|
||||
"json": {
|
||||
"id": "db5_q301",
|
||||
"title": "postgres · analytics.orders [db5_q301]",
|
||||
"path": "templates/db5_q301/page.md",
|
||||
"objectType": "historic_sql_template",
|
||||
"lastEditedAt": null,
|
||||
"properties": {
|
||||
"fingerprint": "fp_orders_status",
|
||||
"sub_cluster_id": null,
|
||||
"dialect": "postgres",
|
||||
"tables_touched": [
|
||||
"analytics.orders"
|
||||
],
|
||||
"literal_slots": [],
|
||||
"triage_signals": {
|
||||
"executions_bucket": "mid",
|
||||
"distinct_users_bucket": "solo",
|
||||
"error_rate_bucket": "ok",
|
||||
"recency_bucket": "active",
|
||||
"service_account_only": "false",
|
||||
"runtime_bucket": "fast"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"templates/db5_q301/page.md": {
|
||||
"text": "# db5_q301\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n"
|
||||
},
|
||||
"templates/db5_q301/usage.json": {
|
||||
"json": {
|
||||
"stats": {
|
||||
"executions": 3,
|
||||
"distinct_users": 1,
|
||||
"first_seen": "2026-05-08T12:00:00.000Z",
|
||||
"last_seen": "2026-05-08T12:00:00.000Z",
|
||||
"p50_runtime_ms": null,
|
||||
"p95_runtime_ms": null,
|
||||
"mean_runtime_ms": 30,
|
||||
"error_rate": 0,
|
||||
"rows_produced": 9
|
||||
},
|
||||
"literal_slots": [],
|
||||
"samples": []
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
{
|
||||
"name": "version-change",
|
||||
"now": "2026-05-08T12:00:00.000Z",
|
||||
"connectionId": "warehouse",
|
||||
"probe": {
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"warnings": []
|
||||
},
|
||||
"snapshot": {
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"deallocCount": 0,
|
||||
"rows": [
|
||||
{
|
||||
"queryid": "401",
|
||||
"userid": "11",
|
||||
"username": "analyst",
|
||||
"dbid": "5",
|
||||
"database": "analytics",
|
||||
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"calls": 4,
|
||||
"totalExecTime": 80,
|
||||
"meanExecTime": 20,
|
||||
"totalRows": 8
|
||||
}
|
||||
]
|
||||
},
|
||||
"pullConfig": {
|
||||
"dialect": "postgres",
|
||||
"windowDays": 90,
|
||||
"lastSuccessfulCursor": null,
|
||||
"serviceAccountUserPatterns": [],
|
||||
"redactionPatterns": [],
|
||||
"maxTemplatesPerRun": 5000,
|
||||
"minCalls": 5
|
||||
},
|
||||
"analysisBySql": {
|
||||
"SELECT count(*) FROM analytics.orders WHERE status = $1": {
|
||||
"fingerprint": "fp_orders_status",
|
||||
"normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1",
|
||||
"tablesTouched": [
|
||||
"analytics.orders"
|
||||
],
|
||||
"literalSlots": []
|
||||
}
|
||||
},
|
||||
"baseline": {
|
||||
"version": 1,
|
||||
"fetchedAt": "2026-05-08T10:00:00.000Z",
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"pgServerVersion": "PostgreSQL 15.7",
|
||||
"templates": {
|
||||
"db5_q401": {
|
||||
"firstObservedAt": "2026-05-08T09:00:00.000Z",
|
||||
"perUser": {
|
||||
"11": {
|
||||
"calls": 100,
|
||||
"totalExecTime": 1000,
|
||||
"totalRows": 500
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"expectedBaseline": {
|
||||
"version": 1,
|
||||
"fetchedAt": "2026-05-08T12:00:00.000Z",
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"templates": {
|
||||
"db5_q401": {
|
||||
"firstObservedAt": "2026-05-08T12:00:00.000Z",
|
||||
"perUser": {
|
||||
"11": {
|
||||
"calls": 4,
|
||||
"totalExecTime": 80,
|
||||
"totalRows": 8
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"expectedFiles": {
|
||||
"manifest.json": {
|
||||
"json": {
|
||||
"source": "historic-sql",
|
||||
"connectionId": "warehouse",
|
||||
"dialect": "postgres",
|
||||
"fetchedAt": "2026-05-08T12:00:00.000Z",
|
||||
"windowStart": "2026-05-08T10:00:00.000Z",
|
||||
"windowEnd": "2026-05-08T12:00:00.000Z",
|
||||
"nextSuccessfulCursor": "2026-05-08T12:00:00.000Z",
|
||||
"templateCount": 1,
|
||||
"capped": false,
|
||||
"warnings": [
|
||||
"baseline_reset:pg_server_major changed from 15 to 16"
|
||||
],
|
||||
"degraded": true,
|
||||
"statsResetAt": "2026-05-08T08:00:00.000Z",
|
||||
"baselineFirstRun": true,
|
||||
"pgServerVersion": "PostgreSQL 16.4",
|
||||
"deallocCount": 0,
|
||||
"templates": [
|
||||
{
|
||||
"id": "db5_q401",
|
||||
"fingerprint": "fp_orders_status",
|
||||
"subClusterId": null,
|
||||
"path": "templates/db5_q401/page.md"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"templates/db5_q401/metadata.json": {
|
||||
"json": {
|
||||
"id": "db5_q401",
|
||||
"title": "postgres · analytics.orders [db5_q401]",
|
||||
"path": "templates/db5_q401/page.md",
|
||||
"objectType": "historic_sql_template",
|
||||
"lastEditedAt": null,
|
||||
"properties": {
|
||||
"fingerprint": "fp_orders_status",
|
||||
"sub_cluster_id": null,
|
||||
"dialect": "postgres",
|
||||
"tables_touched": [
|
||||
"analytics.orders"
|
||||
],
|
||||
"literal_slots": [],
|
||||
"triage_signals": {
|
||||
"executions_bucket": "mid",
|
||||
"distinct_users_bucket": "solo",
|
||||
"error_rate_bucket": "ok",
|
||||
"recency_bucket": "active",
|
||||
"service_account_only": "false",
|
||||
"runtime_bucket": "fast"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"templates/db5_q401/page.md": {
|
||||
"text": "# db5_q401\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n"
|
||||
},
|
||||
"templates/db5_q401/usage.json": {
|
||||
"json": {
|
||||
"stats": {
|
||||
"executions": 4,
|
||||
"distinct_users": 1,
|
||||
"first_seen": "2026-05-08T12:00:00.000Z",
|
||||
"last_seen": "2026-05-08T12:00:00.000Z",
|
||||
"p50_runtime_ms": null,
|
||||
"p95_runtime_ms": null,
|
||||
"mean_runtime_ms": 20,
|
||||
"error_rate": 0,
|
||||
"rows_produced": 8
|
||||
},
|
||||
"literal_slots": [],
|
||||
"samples": []
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,200 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { BigQueryHistoricSqlQueryHistoryReader } from './bigquery-query-history-reader.js';
|
||||
import { HistoricSqlGrantsMissingError } from './errors.js';
|
||||
|
||||
interface FakeQueryResult {
|
||||
headers: string[];
|
||||
rows: unknown[][];
|
||||
totalRows: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
function queryClient(results: FakeQueryResult[]) {
|
||||
const executeQuery = vi.fn(async (_query: string) => {
|
||||
const next = results.shift();
|
||||
if (!next) {
|
||||
throw new Error('unexpected query');
|
||||
}
|
||||
return next;
|
||||
});
|
||||
return { executeQuery };
|
||||
}
|
||||
|
||||
function firstQuery(client: ReturnType<typeof queryClient>): string {
|
||||
const call = client.executeQuery.mock.calls[0];
|
||||
if (!call) {
|
||||
throw new Error('expected query client to be called');
|
||||
}
|
||||
return call[0];
|
||||
}
|
||||
|
||||
describe('BigQueryHistoricSqlQueryHistoryReader', () => {
|
||||
it('probes region-qualified INFORMATION_SCHEMA.JOBS_BY_PROJECT', async () => {
|
||||
const client = queryClient([{ headers: ['1'], rows: [[1]], totalRows: 1 }]);
|
||||
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
|
||||
|
||||
await expect(reader.probe(client)).resolves.toBeUndefined();
|
||||
|
||||
expect(client.executeQuery).toHaveBeenCalledWith(
|
||||
'SELECT 1 FROM `project-1.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT` LIMIT 1',
|
||||
);
|
||||
});
|
||||
|
||||
it('turns probe result errors into HistoricSqlGrantsMissingError', async () => {
|
||||
const client = queryClient([{ headers: [], rows: [], totalRows: 0, error: 'Access Denied: jobs.listAll' }]);
|
||||
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'us-central1' });
|
||||
|
||||
await expect(reader.probe(client)).rejects.toMatchObject({
|
||||
name: 'HistoricSqlGrantsMissingError',
|
||||
dialect: 'bigquery',
|
||||
remediation:
|
||||
'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.',
|
||||
});
|
||||
});
|
||||
|
||||
it('turns thrown probe failures into HistoricSqlGrantsMissingError', async () => {
|
||||
const client = {
|
||||
executeQuery: vi.fn(async () => {
|
||||
throw new Error('permission denied');
|
||||
}),
|
||||
};
|
||||
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
|
||||
|
||||
await expect(reader.probe(client)).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
|
||||
});
|
||||
|
||||
it('fetches BigQuery jobs with cursor and maps them into RawQueryRow shape without rowsProduced', async () => {
|
||||
const client = queryClient([
|
||||
{
|
||||
headers: [
|
||||
'job_id',
|
||||
'query',
|
||||
'user_email',
|
||||
'creation_time',
|
||||
'end_time',
|
||||
'runtime_ms',
|
||||
'total_slot_ms',
|
||||
'total_bytes_processed',
|
||||
'state',
|
||||
'error_reason',
|
||||
'error_message',
|
||||
'statement_type',
|
||||
],
|
||||
rows: [
|
||||
[
|
||||
'bquxjob_1',
|
||||
"SELECT COUNT(*) FROM `project-1.analytics.orders` WHERE status = 'paid'",
|
||||
'analyst-a@example.test',
|
||||
'2026-05-04T10:00:00.000Z',
|
||||
'2026-05-04T10:00:01.250Z',
|
||||
1250,
|
||||
3106,
|
||||
161164718,
|
||||
'DONE',
|
||||
null,
|
||||
null,
|
||||
'SELECT',
|
||||
],
|
||||
[
|
||||
'bquxjob_2',
|
||||
'SELECT * FROM `project-1.analytics.missing_table`',
|
||||
'analyst-b@example.test',
|
||||
new Date('2026-05-04T10:05:00.000Z'),
|
||||
null,
|
||||
null,
|
||||
0,
|
||||
0,
|
||||
'DONE',
|
||||
'notFound',
|
||||
'Not found: Table project-1.analytics.missing_table',
|
||||
'SELECT',
|
||||
],
|
||||
],
|
||||
totalRows: 2,
|
||||
},
|
||||
]);
|
||||
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
|
||||
|
||||
const rows = [];
|
||||
for await (const row of reader.fetch(
|
||||
client,
|
||||
{
|
||||
start: new Date('2026-05-01T00:00:00.000Z'),
|
||||
end: new Date('2026-05-04T12:00:00.000Z'),
|
||||
},
|
||||
'2026-05-03T00:00:00.000Z',
|
||||
)) {
|
||||
rows.push(row);
|
||||
}
|
||||
|
||||
expect(client.executeQuery).toHaveBeenCalledTimes(1);
|
||||
const sql = firstQuery(client);
|
||||
expect(sql).toContain('FROM `project-1.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT`');
|
||||
expect(sql).toContain("creation_time >= TIMESTAMP('2026-05-03T00:00:00.000Z')");
|
||||
expect(sql).toContain("creation_time < TIMESTAMP('2026-05-04T12:00:00.000Z')");
|
||||
expect(sql).toContain("job_type = 'QUERY'");
|
||||
expect(sql).toContain("(statement_type IS NULL OR statement_type != 'SCRIPT')");
|
||||
expect(sql).toContain('ORDER BY creation_time ASC, job_id ASC');
|
||||
expect(sql).toContain('total_slot_ms');
|
||||
expect(sql).toContain('total_bytes_processed');
|
||||
expect(sql).not.toMatch(/total_rows/i);
|
||||
|
||||
expect(rows).toEqual([
|
||||
{
|
||||
id: 'bquxjob_1',
|
||||
sql: "SELECT COUNT(*) FROM `project-1.analytics.orders` WHERE status = 'paid'",
|
||||
user: 'analyst-a@example.test',
|
||||
startedAt: '2026-05-04T10:00:00.000Z',
|
||||
endedAt: '2026-05-04T10:00:01.250Z',
|
||||
runtimeMs: 1250,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: 'bquxjob_2',
|
||||
sql: 'SELECT * FROM `project-1.analytics.missing_table`',
|
||||
user: 'analyst-b@example.test',
|
||||
startedAt: '2026-05-04T10:05:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: null,
|
||||
success: false,
|
||||
errorMessage: 'notFound: Not found: Table project-1.analytics.missing_table',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('uses the window start when no cursor is available', async () => {
|
||||
const client = queryClient([{ headers: ['job_id'], rows: [], totalRows: 0 }]);
|
||||
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'EU' });
|
||||
|
||||
for await (const _row of reader.fetch(client, {
|
||||
start: new Date('2026-02-03T12:00:00.000Z'),
|
||||
end: new Date('2026-05-04T12:00:00.000Z'),
|
||||
})) {
|
||||
throw new Error('empty result should not yield rows');
|
||||
}
|
||||
|
||||
const sql = firstQuery(client);
|
||||
expect(sql).toContain('FROM `project-1.region-eu.INFORMATION_SCHEMA.JOBS_BY_PROJECT`');
|
||||
expect(sql).toContain("creation_time >= TIMESTAMP('2026-02-03T12:00:00.000Z')");
|
||||
});
|
||||
|
||||
it('throws a clear error when the query client cannot execute SQL', async () => {
|
||||
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
|
||||
|
||||
await expect(async () => {
|
||||
for await (const _row of reader.fetch({}, { start: new Date(), end: new Date() })) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
}).rejects.toThrow('Historic SQL BigQuery reader requires a query client with executeQuery(query)');
|
||||
});
|
||||
|
||||
it('rejects unsafe project and region identifiers before building SQL', () => {
|
||||
expect(() => new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project`1', region: 'US' })).toThrow(
|
||||
'Invalid BigQuery project id for historic-SQL ingest: project`1',
|
||||
);
|
||||
expect(() => new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US;DROP' })).toThrow(
|
||||
'Invalid BigQuery region for historic-SQL ingest: US;DROP',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,219 @@
|
|||
import { HistoricSqlGrantsMissingError } from './errors.js';
|
||||
import type { HistoricSqlQueryHistoryReader, HistoricSqlRawQueryRow, HistoricSqlTimeWindow } from './types.js';
|
||||
|
||||
interface QueryResultLike {
|
||||
headers: string[];
|
||||
rows: unknown[][];
|
||||
totalRows: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
interface QueryClientLike {
|
||||
executeQuery(query: string): Promise<QueryResultLike>;
|
||||
}
|
||||
|
||||
export interface BigQueryHistoricSqlQueryHistoryReaderOptions {
|
||||
projectId: string;
|
||||
region: string;
|
||||
}
|
||||
|
||||
const BIGQUERY_GRANTS_REMEDIATION =
|
||||
'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.';
|
||||
|
||||
function queryClient(client: unknown): QueryClientLike {
|
||||
if (
|
||||
client &&
|
||||
typeof client === 'object' &&
|
||||
'executeQuery' in client &&
|
||||
typeof (client as { executeQuery?: unknown }).executeQuery === 'function'
|
||||
) {
|
||||
return client as QueryClientLike;
|
||||
}
|
||||
throw new Error('Historic SQL BigQuery reader requires a query client with executeQuery(query)');
|
||||
}
|
||||
|
||||
function grantsError(cause: unknown): HistoricSqlGrantsMissingError {
|
||||
const message =
|
||||
cause instanceof Error
|
||||
? cause.message
|
||||
: typeof cause === 'string'
|
||||
? cause
|
||||
: 'BigQuery principal cannot query INFORMATION_SCHEMA.JOBS_BY_PROJECT.';
|
||||
return new HistoricSqlGrantsMissingError({
|
||||
dialect: 'bigquery',
|
||||
message: `Missing BigQuery audit grants for historic-SQL ingest: ${message}`,
|
||||
remediation: BIGQUERY_GRANTS_REMEDIATION,
|
||||
cause,
|
||||
});
|
||||
}
|
||||
|
||||
function normalizeProjectId(value: string): string {
|
||||
if (!/^[A-Za-z0-9_-]+$/.test(value)) {
|
||||
throw new Error(`Invalid BigQuery project id for historic-SQL ingest: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function normalizeRegion(value: string): string {
|
||||
const region = value.trim().toLowerCase().replace(/^region-/, '');
|
||||
if (!/^[a-z0-9-]+$/.test(region)) {
|
||||
throw new Error(`Invalid BigQuery region for historic-SQL ingest: ${value}`);
|
||||
}
|
||||
return region;
|
||||
}
|
||||
|
||||
function timestampExpression(value: Date | string): string {
|
||||
const date = value instanceof Date ? value : new Date(value);
|
||||
if (Number.isNaN(date.getTime())) {
|
||||
throw new Error(`Invalid BigQuery query-history timestamp: ${String(value)}`);
|
||||
}
|
||||
return `TIMESTAMP('${date.toISOString().replace(/'/g, "\\'")}')`;
|
||||
}
|
||||
|
||||
function indexByHeader(headers: string[]): Map<string, number> {
|
||||
const out = new Map<string, number>();
|
||||
headers.forEach((header, index) => {
|
||||
out.set(header.toUpperCase(), index);
|
||||
});
|
||||
return out;
|
||||
}
|
||||
|
||||
function value(row: unknown[], indexes: Map<string, number>, name: string): unknown {
|
||||
const index = indexes.get(name.toUpperCase());
|
||||
return index === undefined ? null : row[index];
|
||||
}
|
||||
|
||||
function nullableString(raw: unknown): string | null {
|
||||
if (raw === null || raw === undefined) {
|
||||
return null;
|
||||
}
|
||||
const text = String(raw);
|
||||
return text.length > 0 ? text : null;
|
||||
}
|
||||
|
||||
function requiredString(raw: unknown, field: string): string {
|
||||
const text = nullableString(raw);
|
||||
if (!text) {
|
||||
throw new Error(`BigQuery JOBS_BY_PROJECT row is missing ${field}`);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function nullableNumber(raw: unknown): number | null {
|
||||
if (raw === null || raw === undefined || raw === '') {
|
||||
return null;
|
||||
}
|
||||
const number = typeof raw === 'number' ? raw : Number(raw);
|
||||
if (!Number.isFinite(number)) {
|
||||
return null;
|
||||
}
|
||||
return Math.max(0, number);
|
||||
}
|
||||
|
||||
function isoTimestamp(raw: unknown, field: string): string {
|
||||
if (raw instanceof Date) {
|
||||
return raw.toISOString();
|
||||
}
|
||||
const text = requiredString(raw, field);
|
||||
const date = new Date(text);
|
||||
if (Number.isNaN(date.getTime())) {
|
||||
throw new Error(`BigQuery JOBS_BY_PROJECT row has invalid ${field}: ${text}`);
|
||||
}
|
||||
return date.toISOString();
|
||||
}
|
||||
|
||||
function nullableIsoTimestamp(raw: unknown): string | null {
|
||||
if (raw === null || raw === undefined || raw === '') {
|
||||
return null;
|
||||
}
|
||||
return isoTimestamp(raw, 'end_time');
|
||||
}
|
||||
|
||||
function executionSucceeded(state: string | null, errorReason: string | null, errorMessage: string | null): boolean {
|
||||
if (errorReason || errorMessage) {
|
||||
return false;
|
||||
}
|
||||
return state === null || state.toUpperCase() === 'DONE';
|
||||
}
|
||||
|
||||
function combinedErrorMessage(errorReason: string | null, errorMessage: string | null): string | null {
|
||||
if (errorReason && errorMessage) {
|
||||
return `${errorReason}: ${errorMessage}`;
|
||||
}
|
||||
return errorMessage ?? errorReason;
|
||||
}
|
||||
|
||||
function mapRow(row: unknown[], indexes: Map<string, number>): HistoricSqlRawQueryRow {
|
||||
const errorReason = nullableString(value(row, indexes, 'error_reason'));
|
||||
const errorMessage = nullableString(value(row, indexes, 'error_message'));
|
||||
return {
|
||||
id: requiredString(value(row, indexes, 'job_id'), 'job_id'),
|
||||
sql: requiredString(value(row, indexes, 'query'), 'query'),
|
||||
user: nullableString(value(row, indexes, 'user_email')),
|
||||
startedAt: isoTimestamp(value(row, indexes, 'creation_time'), 'creation_time'),
|
||||
endedAt: nullableIsoTimestamp(value(row, indexes, 'end_time')),
|
||||
runtimeMs: nullableNumber(value(row, indexes, 'runtime_ms')),
|
||||
success: executionSucceeded(nullableString(value(row, indexes, 'state')), errorReason, errorMessage),
|
||||
errorMessage: combinedErrorMessage(errorReason, errorMessage),
|
||||
};
|
||||
}
|
||||
|
||||
export class BigQueryHistoricSqlQueryHistoryReader implements HistoricSqlQueryHistoryReader {
|
||||
private readonly viewPath: string;
|
||||
|
||||
constructor(options: BigQueryHistoricSqlQueryHistoryReaderOptions) {
|
||||
const projectId = normalizeProjectId(options.projectId);
|
||||
const region = normalizeRegion(options.region);
|
||||
this.viewPath = `\`${projectId}.region-${region}.INFORMATION_SCHEMA.JOBS_BY_PROJECT\``;
|
||||
}
|
||||
|
||||
async probe(client: unknown): Promise<void> {
|
||||
let result: QueryResultLike;
|
||||
try {
|
||||
result = await queryClient(client).executeQuery(`SELECT 1 FROM ${this.viewPath} LIMIT 1`);
|
||||
} catch (error) {
|
||||
throw grantsError(error);
|
||||
}
|
||||
if (result.error) {
|
||||
throw grantsError(result.error);
|
||||
}
|
||||
}
|
||||
|
||||
async *fetch(
|
||||
client: unknown,
|
||||
window: HistoricSqlTimeWindow,
|
||||
cursor?: string | null,
|
||||
): AsyncIterable<HistoricSqlRawQueryRow> {
|
||||
const start = timestampExpression(cursor ?? window.start);
|
||||
const end = timestampExpression(window.end);
|
||||
const sql = `
|
||||
SELECT
|
||||
job_id,
|
||||
query,
|
||||
user_email,
|
||||
creation_time,
|
||||
end_time,
|
||||
TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND) AS runtime_ms,
|
||||
total_slot_ms,
|
||||
total_bytes_processed,
|
||||
state,
|
||||
error_result.reason AS error_reason,
|
||||
error_result.message AS error_message,
|
||||
statement_type
|
||||
FROM ${this.viewPath}
|
||||
WHERE creation_time >= ${start}
|
||||
AND creation_time < ${end}
|
||||
AND job_type = 'QUERY'
|
||||
AND query IS NOT NULL
|
||||
AND (statement_type IS NULL OR statement_type != 'SCRIPT')
|
||||
ORDER BY creation_time ASC, job_id ASC`.trim();
|
||||
const result = await queryClient(client).executeQuery(sql);
|
||||
if (result.error) {
|
||||
throw grantsError(result.error);
|
||||
}
|
||||
const indexes = indexByHeader(result.headers);
|
||||
for (const row of result.rows) {
|
||||
yield mapRow(row, indexes);
|
||||
}
|
||||
}
|
||||
}
|
||||
251
packages/context/src/ingest/adapters/historic-sql/chunk.test.ts
Normal file
251
packages/context/src/ingest/adapters/historic-sql/chunk.test.ts
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { chunkHistoricSqlStagedDir, describeHistoricSqlScope } from './chunk.js';
|
||||
|
||||
async function tempDir(): Promise<string> {
|
||||
return mkdtemp(join(tmpdir(), 'historic-sql-chunk-'));
|
||||
}
|
||||
|
||||
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
|
||||
const target = join(root, relPath);
|
||||
await mkdir(join(target, '..'), { recursive: true });
|
||||
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
async function writeTemplate(root: string): Promise<void> {
|
||||
await writeJson(root, 'manifest.json', {
|
||||
source: 'historic-sql',
|
||||
connectionId: 'conn_1',
|
||||
dialect: 'snowflake',
|
||||
fetchedAt: '2026-05-04T12:00:00.000Z',
|
||||
windowStart: '2026-02-03T12:00:00.000Z',
|
||||
windowEnd: '2026-05-04T12:00:00.000Z',
|
||||
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
|
||||
templateCount: 1,
|
||||
capped: false,
|
||||
warnings: ['source warning'],
|
||||
templates: [{ id: 'fp_1', fingerprint: 'fp_1', subClusterId: null, path: 'templates/fp_1/page.md' }],
|
||||
});
|
||||
await writeJson(root, 'templates/fp_1/metadata.json', {
|
||||
id: 'fp_1',
|
||||
title: 'snowflake · analytics.orders [fp_1]',
|
||||
path: 'templates/fp_1/page.md',
|
||||
objectType: 'historic_sql_template',
|
||||
lastEditedAt: null,
|
||||
properties: {
|
||||
fingerprint: 'fp_1',
|
||||
sub_cluster_id: null,
|
||||
dialect: 'snowflake',
|
||||
tables_touched: ['analytics.orders'],
|
||||
literal_slots: [{ position: 1, type: 'string', classification: 'constant' }],
|
||||
triage_signals: {
|
||||
executions_bucket: 'high',
|
||||
distinct_users_bucket: 'team',
|
||||
error_rate_bucket: 'ok',
|
||||
recency_bucket: 'active',
|
||||
service_account_only: 'false',
|
||||
slot_summary: '1 constant, 0 runtime',
|
||||
},
|
||||
},
|
||||
});
|
||||
await writeFile(join(root, 'templates/fp_1/page.md'), '# fp_1\n', 'utf-8');
|
||||
await writeJson(root, 'templates/fp_1/usage.json', {
|
||||
stats: {
|
||||
executions: 20,
|
||||
distinct_users: 3,
|
||||
first_seen: '2026-05-01T00:00:00.000Z',
|
||||
last_seen: '2026-05-04T11:55:00.000Z',
|
||||
p50_runtime_ms: 100,
|
||||
p95_runtime_ms: 200,
|
||||
error_rate: 0,
|
||||
rows_produced: 20,
|
||||
},
|
||||
literal_slots: [{ position: 1, distinct_values: 1, top_values: [['paid', 20]] }],
|
||||
samples: [],
|
||||
});
|
||||
}
|
||||
|
||||
async function writeSubclusterTemplates(root: string): Promise<void> {
|
||||
await writeJson(root, 'manifest.json', {
|
||||
source: 'historic-sql',
|
||||
connectionId: 'conn_1',
|
||||
dialect: 'snowflake',
|
||||
fetchedAt: '2026-05-04T12:00:00.000Z',
|
||||
windowStart: '2026-02-03T12:00:00.000Z',
|
||||
windowEnd: '2026-05-04T12:00:00.000Z',
|
||||
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
|
||||
templateCount: 2,
|
||||
capped: false,
|
||||
warnings: [],
|
||||
templates: [
|
||||
{
|
||||
id: 'fp_order_status__cat_2b2ff2318877',
|
||||
fingerprint: 'fp_order_status',
|
||||
subClusterId: 'cat_2b2ff2318877',
|
||||
path: 'templates/fp_order_status__cat_2b2ff2318877/page.md',
|
||||
},
|
||||
{
|
||||
id: 'fp_order_status__cat_34f037ddcbfa',
|
||||
fingerprint: 'fp_order_status',
|
||||
subClusterId: 'cat_34f037ddcbfa',
|
||||
path: 'templates/fp_order_status__cat_34f037ddcbfa/page.md',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
for (const template of [
|
||||
{ id: 'fp_order_status__cat_2b2ff2318877', subClusterId: 'cat_2b2ff2318877' },
|
||||
{ id: 'fp_order_status__cat_34f037ddcbfa', subClusterId: 'cat_34f037ddcbfa' },
|
||||
]) {
|
||||
await writeJson(root, `templates/${template.id}/metadata.json`, {
|
||||
id: template.id,
|
||||
title: `snowflake · analytics.orders [fp_ord:${template.subClusterId.slice(-6)}]`,
|
||||
path: `templates/${template.id}/page.md`,
|
||||
objectType: 'historic_sql_template',
|
||||
lastEditedAt: null,
|
||||
properties: {
|
||||
fingerprint: 'fp_order_status',
|
||||
sub_cluster_id: template.subClusterId,
|
||||
dialect: 'snowflake',
|
||||
tables_touched: ['analytics.orders'],
|
||||
literal_slots: [{ position: 1, type: 'string', classification: 'categorical' }],
|
||||
triage_signals: {
|
||||
executions_bucket: 'mid',
|
||||
distinct_users_bucket: 'team',
|
||||
error_rate_bucket: 'ok',
|
||||
recency_bucket: 'active',
|
||||
service_account_only: 'false',
|
||||
slot_summary: '0 constant, 0 runtime',
|
||||
},
|
||||
},
|
||||
});
|
||||
await writeFile(join(root, `templates/${template.id}/page.md`), `# ${template.id}\n`, 'utf-8');
|
||||
await writeJson(root, `templates/${template.id}/usage.json`, {
|
||||
stats: {
|
||||
executions: 3,
|
||||
distinct_users: 3,
|
||||
first_seen: '2026-05-04T10:00:00.000Z',
|
||||
last_seen: '2026-05-04T10:05:00.000Z',
|
||||
p50_runtime_ms: 120,
|
||||
p95_runtime_ms: 150,
|
||||
error_rate: 0,
|
||||
rows_produced: 36,
|
||||
},
|
||||
literal_slots: [{ position: 1, distinct_values: 1, top_values: [['paid', 3]] }],
|
||||
samples: [],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
describe('chunkHistoricSqlStagedDir', () => {
|
||||
it('emits one WorkUnit per changed template and keeps usage as dependency', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
await writeTemplate(stagedDir);
|
||||
|
||||
const result = await chunkHistoricSqlStagedDir(stagedDir, {
|
||||
added: ['templates/fp_1/metadata.json'],
|
||||
modified: [],
|
||||
deleted: [],
|
||||
unchanged: ['templates/fp_1/page.md', 'templates/fp_1/usage.json', 'manifest.json'],
|
||||
});
|
||||
|
||||
expect(result.workUnits).toEqual([
|
||||
{
|
||||
unitKey: 'historic-sql-fp-1',
|
||||
displayLabel: 'snowflake · analytics.orders [fp_1]',
|
||||
rawFiles: ['templates/fp_1/metadata.json'],
|
||||
dependencyPaths: ['manifest.json', 'templates/fp_1/usage.json'],
|
||||
peerFileIndex: ['templates/fp_1/page.md'],
|
||||
notes:
|
||||
'Infer canonical query intent for this single historic-SQL template only. Read metadata.json, page.md, and usage.json for this template; do not group sibling templates in this WorkUnit.',
|
||||
},
|
||||
]);
|
||||
expect(result.contextReport).toEqual({ capped: false, warnings: ['source warning'] });
|
||||
});
|
||||
|
||||
it('emits one WorkUnit per changed categorical sub-cluster', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
await writeSubclusterTemplates(stagedDir);
|
||||
|
||||
const result = await chunkHistoricSqlStagedDir(stagedDir, {
|
||||
added: [
|
||||
'templates/fp_order_status__cat_2b2ff2318877/metadata.json',
|
||||
'templates/fp_order_status__cat_34f037ddcbfa/metadata.json',
|
||||
],
|
||||
modified: [],
|
||||
deleted: [],
|
||||
unchanged: [
|
||||
'manifest.json',
|
||||
'templates/fp_order_status__cat_2b2ff2318877/page.md',
|
||||
'templates/fp_order_status__cat_2b2ff2318877/usage.json',
|
||||
'templates/fp_order_status__cat_34f037ddcbfa/page.md',
|
||||
'templates/fp_order_status__cat_34f037ddcbfa/usage.json',
|
||||
],
|
||||
});
|
||||
|
||||
expect(
|
||||
result.workUnits.map((unit) => ({
|
||||
unitKey: unit.unitKey,
|
||||
displayLabel: unit.displayLabel,
|
||||
rawFiles: unit.rawFiles,
|
||||
dependencyPaths: unit.dependencyPaths,
|
||||
})),
|
||||
).toEqual([
|
||||
{
|
||||
unitKey: 'historic-sql-fp-order-status-cat-2b2ff2318877',
|
||||
displayLabel: 'snowflake · analytics.orders [fp_ord:318877]',
|
||||
rawFiles: ['templates/fp_order_status__cat_2b2ff2318877/metadata.json'],
|
||||
dependencyPaths: ['manifest.json', 'templates/fp_order_status__cat_2b2ff2318877/usage.json'],
|
||||
},
|
||||
{
|
||||
unitKey: 'historic-sql-fp-order-status-cat-34f037ddcbfa',
|
||||
displayLabel: 'snowflake · analytics.orders [fp_ord:ddcbfa]',
|
||||
rawFiles: ['templates/fp_order_status__cat_34f037ddcbfa/metadata.json'],
|
||||
dependencyPaths: ['manifest.json', 'templates/fp_order_status__cat_34f037ddcbfa/usage.json'],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('emits zero WorkUnits for usage-only diffs', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
await writeTemplate(stagedDir);
|
||||
|
||||
const result = await chunkHistoricSqlStagedDir(stagedDir, {
|
||||
added: [],
|
||||
modified: ['templates/fp_1/usage.json'],
|
||||
deleted: [],
|
||||
unchanged: ['templates/fp_1/metadata.json', 'templates/fp_1/page.md', 'manifest.json'],
|
||||
});
|
||||
|
||||
expect(result.workUnits).toEqual([]);
|
||||
expect(result.eviction).toBeUndefined();
|
||||
});
|
||||
|
||||
it('emits eviction only for deleted metadata or page files', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
await writeTemplate(stagedDir);
|
||||
|
||||
const result = await chunkHistoricSqlStagedDir(stagedDir, {
|
||||
added: [],
|
||||
modified: [],
|
||||
deleted: ['templates/fp_1/usage.json', 'templates/fp_2/page.md'],
|
||||
unchanged: [],
|
||||
});
|
||||
|
||||
expect(result.eviction).toEqual({ deletedRawPaths: ['templates/fp_2/page.md'] });
|
||||
});
|
||||
|
||||
it('describes historic-sql scope without including unrelated paths', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
await writeTemplate(stagedDir);
|
||||
|
||||
const scope = await describeHistoricSqlScope(stagedDir);
|
||||
|
||||
expect(scope.fingerprint).toHaveLength(64);
|
||||
expect(scope.isPathInScope('manifest.json')).toBe(true);
|
||||
expect(scope.isPathInScope('templates/fp_1/usage.json')).toBe(true);
|
||||
expect(scope.isPathInScope('pages/notion/page.md')).toBe(false);
|
||||
});
|
||||
});
|
||||
86
packages/context/src/ingest/adapters/historic-sql/chunk.ts
Normal file
86
packages/context/src/ingest/adapters/historic-sql/chunk.ts
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { readFile, readdir } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import type { ChunkResult, DiffSet, ScopeDescriptor, WorkUnit } from '../../types.js';
|
||||
import { historicSqlManifestSchema, historicSqlMetadataSchema } from './types.js';
|
||||
|
||||
async function walk(root: string): Promise<string[]> {
|
||||
const entries = await readdir(root, { withFileTypes: true, recursive: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isFile())
|
||||
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
|
||||
.sort();
|
||||
}
|
||||
|
||||
function safeUnitKey(id: string): string {
|
||||
return `historic-sql-${id.replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`;
|
||||
}
|
||||
|
||||
async function readManifest(stagedDir: string) {
|
||||
try {
|
||||
return historicSqlManifestSchema.parse(JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')));
|
||||
} catch (error) {
|
||||
throw new Error(`Invalid historic-SQL manifest: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function chunkHistoricSqlStagedDir(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
const files = await walk(stagedDir);
|
||||
const manifest = await readManifest(stagedDir);
|
||||
const touched = diffSet ? new Set([...diffSet.added, ...diffSet.modified]) : null;
|
||||
const workUnits: WorkUnit[] = [];
|
||||
|
||||
for (const pagePath of files.filter((path) => /^templates\/[^/]+\/page\.md$/.test(path))) {
|
||||
const metadataPath = pagePath.replace(/\/page\.md$/, '/metadata.json');
|
||||
const usagePath = pagePath.replace(/\/page\.md$/, '/usage.json');
|
||||
const primary = [metadataPath, pagePath].filter((path) => files.includes(path));
|
||||
if (touched && !primary.some((path) => touched.has(path))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const metadata = historicSqlMetadataSchema.parse(JSON.parse(await readFile(join(stagedDir, metadataPath), 'utf-8')));
|
||||
const rawFiles = touched ? primary.filter((path) => touched.has(path)).sort() : primary.sort();
|
||||
const dependencyPaths = ['manifest.json', files.includes(usagePath) ? usagePath : null]
|
||||
.filter((path): path is string => typeof path === 'string' && !rawFiles.includes(path))
|
||||
.sort();
|
||||
const excluded = new Set([...rawFiles, ...dependencyPaths]);
|
||||
const peerFileIndex = files.filter((path) => !excluded.has(path)).sort();
|
||||
|
||||
workUnits.push({
|
||||
unitKey: safeUnitKey(metadata.id),
|
||||
displayLabel: metadata.title,
|
||||
rawFiles,
|
||||
dependencyPaths,
|
||||
peerFileIndex,
|
||||
notes:
|
||||
'Infer canonical query intent for this single historic-SQL template only. Read metadata.json, page.md, and usage.json for this template; do not group sibling templates in this WorkUnit.',
|
||||
});
|
||||
}
|
||||
|
||||
const deletedPrimary = diffSet?.deleted.filter((path) => /^templates\/[^/]+\/(metadata\.json|page\.md)$/.test(path));
|
||||
|
||||
return {
|
||||
workUnits,
|
||||
eviction: deletedPrimary && deletedPrimary.length > 0 ? { deletedRawPaths: deletedPrimary.sort() } : undefined,
|
||||
reconcileNotes: [`Historic-SQL staged templates=${manifest.templateCount}`],
|
||||
contextReport: {
|
||||
capped: manifest.capped,
|
||||
warnings: manifest.warnings,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function describeHistoricSqlScope(stagedDir: string): Promise<ScopeDescriptor> {
|
||||
const manifest = await readManifest(stagedDir);
|
||||
const scopeKey = JSON.stringify({
|
||||
connectionId: manifest.connectionId,
|
||||
dialect: manifest.dialect,
|
||||
windowStart: manifest.windowStart,
|
||||
windowEnd: manifest.windowEnd,
|
||||
});
|
||||
const fingerprint = createHash('sha256').update(scopeKey).digest('hex');
|
||||
return {
|
||||
fingerprint,
|
||||
isPathInScope: (rawPath) => rawPath === 'manifest.json' || rawPath.startsWith('templates/'),
|
||||
};
|
||||
}
|
||||
197
packages/context/src/ingest/adapters/historic-sql/detect.test.ts
Normal file
197
packages/context/src/ingest/adapters/historic-sql/detect.test.ts
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { detectHistoricSqlStagedDir } from './detect.js';
|
||||
import {
|
||||
HISTORIC_SQL_SOURCE_KEY,
|
||||
historicSqlManifestSchema,
|
||||
historicSqlMetadataSchema,
|
||||
historicSqlPullConfigSchema,
|
||||
historicSqlUsageSchema,
|
||||
} from './types.js';
|
||||
|
||||
async function tempDir(): Promise<string> {
|
||||
return mkdtemp(join(tmpdir(), 'historic-sql-detect-'));
|
||||
}
|
||||
|
||||
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
|
||||
const target = join(root, relPath);
|
||||
await mkdir(join(target, '..'), { recursive: true });
|
||||
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
describe('historic-sql staged dir detection', () => {
|
||||
it('detects manifest source', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
await writeJson(stagedDir, 'manifest.json', {
|
||||
source: HISTORIC_SQL_SOURCE_KEY,
|
||||
connectionId: 'conn_1',
|
||||
dialect: 'snowflake',
|
||||
fetchedAt: '2026-05-04T12:00:00.000Z',
|
||||
windowStart: '2026-02-03T12:00:00.000Z',
|
||||
windowEnd: '2026-05-04T12:00:00.000Z',
|
||||
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
|
||||
templateCount: 0,
|
||||
capped: false,
|
||||
warnings: [],
|
||||
templates: [],
|
||||
});
|
||||
|
||||
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(true);
|
||||
});
|
||||
|
||||
it('detects document-shaped template structure without manifest', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
await writeFile(join(stagedDir, 'not-a-match.txt'), 'x', 'utf-8');
|
||||
await mkdir(join(stagedDir, 'templates', 'fp_1'), { recursive: true });
|
||||
await writeFile(join(stagedDir, 'templates', 'fp_1', 'metadata.json'), '{}', 'utf-8');
|
||||
await writeFile(join(stagedDir, 'templates', 'fp_1', 'page.md'), '# fp_1\n', 'utf-8');
|
||||
|
||||
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(true);
|
||||
});
|
||||
|
||||
it('does not detect unrelated directories', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
await writeJson(stagedDir, 'manifest.json', { source: 'notion' });
|
||||
|
||||
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('historic-sql schemas', () => {
|
||||
it('defaults disabled optional pull-config fields through the parser', () => {
|
||||
expect(
|
||||
historicSqlPullConfigSchema.parse({
|
||||
dialect: 'bigquery',
|
||||
}),
|
||||
).toEqual({
|
||||
dialect: 'bigquery',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 5000,
|
||||
minCalls: 5,
|
||||
});
|
||||
});
|
||||
|
||||
it('accepts postgres pull config with a minCalls floor', () => {
|
||||
expect(
|
||||
historicSqlPullConfigSchema.parse({
|
||||
dialect: 'postgres',
|
||||
minCalls: 12,
|
||||
}),
|
||||
).toEqual({
|
||||
dialect: 'postgres',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 5000,
|
||||
minCalls: 12,
|
||||
});
|
||||
});
|
||||
|
||||
it('accepts postgres manifest fields with defaults for older dialects', () => {
|
||||
expect(
|
||||
historicSqlManifestSchema.parse({
|
||||
source: HISTORIC_SQL_SOURCE_KEY,
|
||||
connectionId: 'conn_pg',
|
||||
dialect: 'postgres',
|
||||
fetchedAt: '2026-05-08T12:00:00.000Z',
|
||||
windowStart: '2026-05-08T11:00:00.000Z',
|
||||
windowEnd: '2026-05-08T12:00:00.000Z',
|
||||
nextSuccessfulCursor: '2026-05-08T12:00:00.000Z',
|
||||
templateCount: 0,
|
||||
capped: false,
|
||||
warnings: [],
|
||||
templates: [],
|
||||
degraded: true,
|
||||
statsResetAt: '2026-05-01T00:00:00.000Z',
|
||||
baselineFirstRun: true,
|
||||
pgServerVersion: 'PostgreSQL 16.4',
|
||||
deallocCount: 3,
|
||||
}),
|
||||
).toMatchObject({
|
||||
dialect: 'postgres',
|
||||
degraded: true,
|
||||
statsResetAt: '2026-05-01T00:00:00.000Z',
|
||||
baselineFirstRun: true,
|
||||
pgServerVersion: 'PostgreSQL 16.4',
|
||||
deallocCount: 3,
|
||||
});
|
||||
|
||||
expect(
|
||||
historicSqlManifestSchema.parse({
|
||||
source: HISTORIC_SQL_SOURCE_KEY,
|
||||
connectionId: 'conn_sf',
|
||||
dialect: 'snowflake',
|
||||
fetchedAt: '2026-05-08T12:00:00.000Z',
|
||||
windowStart: '2026-05-01T12:00:00.000Z',
|
||||
windowEnd: '2026-05-08T12:00:00.000Z',
|
||||
nextSuccessfulCursor: null,
|
||||
templateCount: 0,
|
||||
capped: false,
|
||||
warnings: [],
|
||||
templates: [],
|
||||
}),
|
||||
).toMatchObject({
|
||||
degraded: false,
|
||||
statsResetAt: null,
|
||||
baselineFirstRun: false,
|
||||
pgServerVersion: null,
|
||||
deallocCount: null,
|
||||
});
|
||||
});
|
||||
|
||||
it('accepts postgres usage stats with mean_runtime_ms and empty samples', () => {
|
||||
const parsed = historicSqlUsageSchema.parse({
|
||||
stats: {
|
||||
executions: 25,
|
||||
distinct_users: 2,
|
||||
first_seen: '2026-05-08T10:00:00.000Z',
|
||||
last_seen: '2026-05-08T12:00:00.000Z',
|
||||
p50_runtime_ms: null,
|
||||
p95_runtime_ms: null,
|
||||
mean_runtime_ms: 32.5,
|
||||
error_rate: 0,
|
||||
rows_produced: 1042,
|
||||
},
|
||||
literal_slots: [],
|
||||
samples: [],
|
||||
});
|
||||
|
||||
expect(parsed.stats.mean_runtime_ms).toBe(32.5);
|
||||
expect(parsed.samples).toEqual([]);
|
||||
});
|
||||
|
||||
it('pins the Notion-compatible metadata envelope', () => {
|
||||
const parsed = historicSqlMetadataSchema.parse({
|
||||
id: 'fp_1',
|
||||
title: 'snowflake · analytics.orders [fp_1]',
|
||||
path: 'templates/fp_1/page.md',
|
||||
objectType: 'historic_sql_template',
|
||||
lastEditedAt: null,
|
||||
properties: {
|
||||
fingerprint: 'fp_1',
|
||||
sub_cluster_id: null,
|
||||
dialect: 'snowflake',
|
||||
tables_touched: ['analytics.orders'],
|
||||
literal_slots: [{ position: 1, type: 'string', classification: 'constant' }],
|
||||
triage_signals: {
|
||||
executions_bucket: 'high',
|
||||
distinct_users_bucket: 'team',
|
||||
error_rate_bucket: 'ok',
|
||||
recency_bucket: 'active',
|
||||
service_account_only: 'false',
|
||||
slot_summary: '1 constant, 0 runtime',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(parsed.objectType).toBe('historic_sql_template');
|
||||
expect(parsed.lastEditedAt).toBeNull();
|
||||
expect(parsed.properties.triage_signals.service_account_only).toBe('false');
|
||||
});
|
||||
});
|
||||
37
packages/context/src/ingest/adapters/historic-sql/detect.ts
Normal file
37
packages/context/src/ingest/adapters/historic-sql/detect.ts
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import { readFile, readdir } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { HISTORIC_SQL_SOURCE_KEY } from './types.js';
|
||||
|
||||
export async function detectHistoricSqlStagedDir(stagedDir: string): Promise<boolean> {
|
||||
try {
|
||||
const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')) as { source?: unknown };
|
||||
if (manifest.source === HISTORIC_SQL_SOURCE_KEY) {
|
||||
return true;
|
||||
}
|
||||
if (manifest.source !== undefined) {
|
||||
return false;
|
||||
}
|
||||
} catch {
|
||||
// Fall through to structural detection for stage-only fixtures.
|
||||
}
|
||||
|
||||
try {
|
||||
const entries = await readdir(join(stagedDir, 'templates'), { withFileTypes: true, recursive: true });
|
||||
const metadataDirs = new Set<string>();
|
||||
const pageDirs = new Set<string>();
|
||||
for (const entry of entries) {
|
||||
if (!entry.isFile()) {
|
||||
continue;
|
||||
}
|
||||
if (entry.name === 'metadata.json') {
|
||||
metadataDirs.add(entry.parentPath);
|
||||
}
|
||||
if (entry.name === 'page.md') {
|
||||
pageDirs.add(entry.parentPath);
|
||||
}
|
||||
}
|
||||
return [...metadataDirs].some((dir) => pageDirs.has(dir));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
61
packages/context/src/ingest/adapters/historic-sql/errors.ts
Normal file
61
packages/context/src/ingest/adapters/historic-sql/errors.ts
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
import type { HistoricSqlDialect } from './types.js';
|
||||
|
||||
interface HistoricSqlGrantsMissingErrorOptions {
|
||||
dialect: HistoricSqlDialect;
|
||||
message: string;
|
||||
remediation: string;
|
||||
cause?: unknown;
|
||||
}
|
||||
|
||||
export class HistoricSqlGrantsMissingError extends Error {
|
||||
readonly dialect: HistoricSqlDialect;
|
||||
readonly remediation: string;
|
||||
|
||||
constructor(options: HistoricSqlGrantsMissingErrorOptions) {
|
||||
super(options.message, options.cause === undefined ? undefined : { cause: options.cause });
|
||||
this.name = 'HistoricSqlGrantsMissingError';
|
||||
this.dialect = options.dialect;
|
||||
this.remediation = options.remediation;
|
||||
}
|
||||
}
|
||||
|
||||
interface HistoricSqlExtensionMissingErrorOptions {
|
||||
dialect: HistoricSqlDialect;
|
||||
message: string;
|
||||
remediation: string;
|
||||
cause?: unknown;
|
||||
}
|
||||
|
||||
export class HistoricSqlExtensionMissingError extends Error {
|
||||
readonly dialect: HistoricSqlDialect;
|
||||
readonly remediation: string;
|
||||
|
||||
constructor(options: HistoricSqlExtensionMissingErrorOptions) {
|
||||
super(options.message, options.cause === undefined ? undefined : { cause: options.cause });
|
||||
this.name = 'HistoricSqlExtensionMissingError';
|
||||
this.dialect = options.dialect;
|
||||
this.remediation = options.remediation;
|
||||
}
|
||||
}
|
||||
|
||||
interface HistoricSqlVersionUnsupportedErrorOptions {
|
||||
dialect: HistoricSqlDialect;
|
||||
detectedVersion: string;
|
||||
minimumVersion: string;
|
||||
}
|
||||
|
||||
export class HistoricSqlVersionUnsupportedError extends Error {
|
||||
readonly dialect: HistoricSqlDialect;
|
||||
readonly detectedVersion: string;
|
||||
readonly minimumVersion: string;
|
||||
|
||||
constructor(options: HistoricSqlVersionUnsupportedErrorOptions) {
|
||||
super(
|
||||
`Unsupported ${options.dialect} version for historic-SQL ingest: detected ${options.detectedVersion}; requires ${options.minimumVersion} or newer.`,
|
||||
);
|
||||
this.name = 'HistoricSqlVersionUnsupportedError';
|
||||
this.dialect = options.dialect;
|
||||
this.detectedVersion = options.detectedVersion;
|
||||
this.minimumVersion = options.minimumVersion;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,304 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { SqlAnalysisPort } from '../../../sql-analysis/index.js';
|
||||
import { HistoricSqlSourceAdapter } from './historic-sql.adapter.js';
|
||||
import { pgssBaselinePath } from './stage-pgss.js';
|
||||
import type { HistoricSqlQueryHistoryReader, PostgresPgssReader } from './types.js';
|
||||
|
||||
async function tempDir(): Promise<string> {
|
||||
return mkdtemp(join(tmpdir(), 'historic-sql-adapter-'));
|
||||
}
|
||||
|
||||
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
|
||||
const target = join(root, relPath);
|
||||
await mkdir(join(target, '..'), { recursive: true });
|
||||
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
async analyzeForFingerprint() {
|
||||
return {
|
||||
fingerprint: 'fp_1',
|
||||
normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ?',
|
||||
tablesTouched: ['analytics.orders'],
|
||||
literalSlots: [{ position: 1, type: 'string', exampleValue: 'paid' }],
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const reader: HistoricSqlQueryHistoryReader = {
|
||||
async probe() {},
|
||||
async *fetch() {
|
||||
yield {
|
||||
id: 'q1',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'",
|
||||
user: 'analyst',
|
||||
startedAt: '2026-05-04T11:00:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 10,
|
||||
rowsProduced: 1,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
describe('HistoricSqlSourceAdapter', () => {
|
||||
it('declares canonical adapter metadata', () => {
|
||||
const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} });
|
||||
|
||||
expect(adapter.source).toBe('historic-sql');
|
||||
expect(adapter.skillNames).toEqual(['historic_sql_ingest']);
|
||||
expect(adapter.reconcileSkillNames).toEqual(['historic_sql_curator']);
|
||||
expect(adapter.evidenceIndexing).toBe('documents');
|
||||
expect(adapter.triageSupported).toBe(true);
|
||||
});
|
||||
|
||||
it('fetches staged templates through injected reader and SqlAnalysisPort', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const adapter = new HistoricSqlSourceAdapter({
|
||||
sqlAnalysis,
|
||||
reader,
|
||||
queryClient: {},
|
||||
now: () => new Date('2026-05-04T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
await adapter.fetch(
|
||||
{
|
||||
dialect: 'snowflake',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 5000,
|
||||
},
|
||||
stagedDir,
|
||||
{ connectionId: 'conn_1', sourceKey: 'historic-sql' },
|
||||
);
|
||||
|
||||
await expect(adapter.detect(stagedDir)).resolves.toBe(true);
|
||||
});
|
||||
|
||||
it('reads triage signals from usage.json and metadata properties', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
await writeJson(stagedDir, 'manifest.json', {
|
||||
source: 'historic-sql',
|
||||
connectionId: 'conn_1',
|
||||
dialect: 'snowflake',
|
||||
fetchedAt: '2026-05-04T12:00:00.000Z',
|
||||
windowStart: '2026-02-03T12:00:00.000Z',
|
||||
windowEnd: '2026-05-04T12:00:00.000Z',
|
||||
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
|
||||
templateCount: 1,
|
||||
capped: false,
|
||||
warnings: [],
|
||||
templates: [{ id: 'fp_1', fingerprint: 'fp_1', subClusterId: null, path: 'templates/fp_1/page.md' }],
|
||||
});
|
||||
await writeJson(stagedDir, 'templates/fp_1/metadata.json', {
|
||||
id: 'fp_1',
|
||||
title: 'snowflake · analytics.orders [fp_1]',
|
||||
path: 'templates/fp_1/page.md',
|
||||
objectType: 'historic_sql_template',
|
||||
lastEditedAt: null,
|
||||
properties: {
|
||||
fingerprint: 'fp_1',
|
||||
sub_cluster_id: null,
|
||||
dialect: 'snowflake',
|
||||
tables_touched: ['analytics.orders'],
|
||||
literal_slots: [{ position: 1, type: 'string', classification: 'constant' }],
|
||||
triage_signals: {
|
||||
executions_bucket: 'high',
|
||||
distinct_users_bucket: 'team',
|
||||
error_rate_bucket: 'ok',
|
||||
recency_bucket: 'active',
|
||||
service_account_only: 'false',
|
||||
slot_summary: '1 constant, 0 runtime',
|
||||
},
|
||||
},
|
||||
});
|
||||
await writeFile(join(stagedDir, 'templates/fp_1/page.md'), '# fp_1\n', 'utf-8');
|
||||
await writeJson(stagedDir, 'templates/fp_1/usage.json', {
|
||||
stats: {
|
||||
executions: 20,
|
||||
distinct_users: 3,
|
||||
first_seen: '2026-05-01T00:00:00.000Z',
|
||||
last_seen: '2026-05-04T11:55:00.000Z',
|
||||
p50_runtime_ms: 100,
|
||||
p95_runtime_ms: 200,
|
||||
error_rate: 0,
|
||||
},
|
||||
literal_slots: [{ position: 1, distinct_values: 1, top_values: [['paid', 20]] }],
|
||||
samples: [],
|
||||
});
|
||||
|
||||
const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} });
|
||||
|
||||
await expect(adapter.getTriageSignals(stagedDir, 'fp_1')).resolves.toEqual({
|
||||
objectType: 'historic_sql_template',
|
||||
lastEditedAt: '2026-05-04T11:55:00.000Z',
|
||||
propertyHints: {
|
||||
executions_bucket: 'high',
|
||||
distinct_users_bucket: 'team',
|
||||
error_rate_bucket: 'ok',
|
||||
recency_bucket: 'active',
|
||||
service_account_only: 'false',
|
||||
slot_summary: '1 constant, 0 runtime',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('dispatches postgres fetches through PGSS staging and writes the baseline only after pull success', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const baselineRootDir = await tempDir();
|
||||
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
|
||||
const unusedPerExecutionReader: HistoricSqlQueryHistoryReader = {
|
||||
async probe() {
|
||||
throw new Error('per-execution reader must not be used for postgres');
|
||||
},
|
||||
async *fetch() {
|
||||
throw new Error('per-execution reader must not be used for postgres');
|
||||
},
|
||||
};
|
||||
const postgresReader: PostgresPgssReader = {
|
||||
async probe() {
|
||||
return { pgServerVersion: 'PostgreSQL 16.4', warnings: [] };
|
||||
},
|
||||
async readSnapshot() {
|
||||
return {
|
||||
statsResetAt: '2026-05-08T08:00:00.000Z',
|
||||
deallocCount: 0,
|
||||
rows: [
|
||||
{
|
||||
queryid: '901',
|
||||
userid: '11',
|
||||
username: 'analyst',
|
||||
dbid: '5',
|
||||
database: 'warehouse',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 9,
|
||||
totalExecTime: 90,
|
||||
meanExecTime: 10,
|
||||
totalRows: 18,
|
||||
},
|
||||
],
|
||||
};
|
||||
},
|
||||
};
|
||||
const adapter = new HistoricSqlSourceAdapter({
|
||||
sqlAnalysis,
|
||||
reader: unusedPerExecutionReader,
|
||||
queryClient: {},
|
||||
postgresReader,
|
||||
postgresQueryClient: {
|
||||
async executeQuery() {
|
||||
return { headers: [], rows: [] };
|
||||
},
|
||||
},
|
||||
postgresBaselineRootDir: baselineRootDir,
|
||||
now: () => new Date('2026-05-08T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
await adapter.fetch(
|
||||
{
|
||||
dialect: 'postgres',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 5000,
|
||||
minCalls: 5,
|
||||
},
|
||||
stagedDir,
|
||||
{ connectionId: 'conn_pg', sourceKey: 'historic-sql' },
|
||||
);
|
||||
|
||||
const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')) as {
|
||||
dialect: string;
|
||||
baselineFirstRun: boolean;
|
||||
templates: Array<{ id: string }>;
|
||||
};
|
||||
expect(manifest.dialect).toBe('postgres');
|
||||
expect(manifest.baselineFirstRun).toBe(true);
|
||||
expect(manifest.templates).toEqual([
|
||||
{ id: 'db5_q901', fingerprint: 'fp_1', subClusterId: null, path: 'templates/db5_q901/page.md' },
|
||||
]);
|
||||
await expect(readFile(baselinePath, 'utf-8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
|
||||
await adapter.onPullSucceeded({
|
||||
connectionId: 'conn_pg',
|
||||
sourceKey: 'historic-sql',
|
||||
syncId: 'sync_pg',
|
||||
trigger: 'scheduled_pull',
|
||||
completedAt: new Date('2026-05-08T12:01:00.000Z'),
|
||||
stagedDir,
|
||||
});
|
||||
|
||||
const baseline = JSON.parse(await readFile(baselinePath, 'utf-8')) as {
|
||||
fetchedAt: string;
|
||||
templates: Record<string, { perUser: Record<string, { calls: number }> }>;
|
||||
};
|
||||
expect(baseline.fetchedAt).toBe('2026-05-08T12:00:00.000Z');
|
||||
expect(baseline.templates.db5_q901.perUser['11'].calls).toBe(9);
|
||||
});
|
||||
|
||||
it('fails postgres fetches clearly when no PGSS reader is configured', async () => {
|
||||
const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} });
|
||||
|
||||
await expect(
|
||||
adapter.fetch(
|
||||
{
|
||||
dialect: 'postgres',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 5000,
|
||||
minCalls: 5,
|
||||
},
|
||||
await tempDir(),
|
||||
{ connectionId: 'conn_pg', sourceKey: 'historic-sql' },
|
||||
),
|
||||
).rejects.toThrow('Historic SQL Postgres fetch requires deps.postgresReader');
|
||||
});
|
||||
|
||||
it('forwards manifest cursor through onPullSucceeded without changing the SourceAdapter signature', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
await writeJson(stagedDir, 'manifest.json', {
|
||||
source: 'historic-sql',
|
||||
connectionId: 'conn_1',
|
||||
dialect: 'snowflake',
|
||||
fetchedAt: '2026-05-04T12:00:00.000Z',
|
||||
windowStart: '2026-02-03T12:00:00.000Z',
|
||||
windowEnd: '2026-05-04T12:00:00.000Z',
|
||||
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
|
||||
templateCount: 0,
|
||||
capped: false,
|
||||
warnings: [],
|
||||
templates: [],
|
||||
});
|
||||
const onPullSucceeded = vi.fn(async () => {});
|
||||
const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {}, onPullSucceeded });
|
||||
const completedAt = new Date('2026-05-04T12:01:00.000Z');
|
||||
|
||||
await adapter.onPullSucceeded({
|
||||
connectionId: 'conn_1',
|
||||
sourceKey: 'historic-sql',
|
||||
syncId: 'sync_1',
|
||||
trigger: 'scheduled_pull',
|
||||
completedAt,
|
||||
stagedDir,
|
||||
});
|
||||
|
||||
expect(onPullSucceeded).toHaveBeenCalledWith({
|
||||
connectionId: 'conn_1',
|
||||
sourceKey: 'historic-sql',
|
||||
syncId: 'sync_1',
|
||||
trigger: 'scheduled_pull',
|
||||
completedAt,
|
||||
stagedDir,
|
||||
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import type {
|
||||
ChunkResult,
|
||||
DiffSet,
|
||||
FetchContext,
|
||||
IngestTrigger,
|
||||
ScopeDescriptor,
|
||||
SourceAdapter,
|
||||
TriageSignals,
|
||||
} from '../../types.js';
|
||||
import { chunkHistoricSqlStagedDir, describeHistoricSqlScope } from './chunk.js';
|
||||
import { detectHistoricSqlStagedDir } from './detect.js';
|
||||
import { stageHistoricSqlTemplates } from './stage.js';
|
||||
import {
|
||||
pgssBaselinePath,
|
||||
stagePgStatStatementsTemplates,
|
||||
writePgssBaselineAtomic,
|
||||
type StagePgStatStatementsTemplatesResult,
|
||||
} from './stage-pgss.js';
|
||||
import {
|
||||
historicSqlManifestSchema,
|
||||
historicSqlMetadataSchema,
|
||||
historicSqlPullConfigSchema,
|
||||
historicSqlUsageSchema,
|
||||
type HistoricSqlSourceAdapterDeps,
|
||||
} from './types.js';
|
||||
|
||||
export class HistoricSqlSourceAdapter implements SourceAdapter {
|
||||
readonly source = 'historic-sql';
|
||||
readonly skillNames = ['historic_sql_ingest'];
|
||||
readonly reconcileSkillNames = ['historic_sql_curator'];
|
||||
readonly evidenceIndexing = 'documents' as const;
|
||||
readonly triageSupported = true;
|
||||
|
||||
private readonly pendingPgssBaselines = new Map<string, StagePgStatStatementsTemplatesResult>();
|
||||
|
||||
constructor(private readonly deps: HistoricSqlSourceAdapterDeps) {}
|
||||
|
||||
detect(stagedDir: string): Promise<boolean> {
|
||||
return detectHistoricSqlStagedDir(stagedDir);
|
||||
}
|
||||
|
||||
async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
|
||||
const config = historicSqlPullConfigSchema.parse(pullConfig);
|
||||
if (config.dialect === 'postgres') {
|
||||
if (!this.deps.postgresReader) {
|
||||
throw new Error('Historic SQL Postgres fetch requires deps.postgresReader');
|
||||
}
|
||||
const postgresQueryClient = this.deps.postgresQueryClient ?? this.deps.queryClient;
|
||||
if (
|
||||
!postgresQueryClient ||
|
||||
typeof postgresQueryClient !== 'object' ||
|
||||
!('executeQuery' in postgresQueryClient) ||
|
||||
typeof (postgresQueryClient as { executeQuery?: unknown }).executeQuery !== 'function'
|
||||
) {
|
||||
throw new Error('Historic SQL Postgres fetch requires deps.postgresQueryClient with executeQuery(sql, params?)');
|
||||
}
|
||||
const result = await stagePgStatStatementsTemplates({
|
||||
stagedDir,
|
||||
connectionId: ctx.connectionId,
|
||||
queryClient: postgresQueryClient as NonNullable<HistoricSqlSourceAdapterDeps['postgresQueryClient']>,
|
||||
reader: this.deps.postgresReader,
|
||||
sqlAnalysis: this.deps.sqlAnalysis,
|
||||
pullConfig: config,
|
||||
baselinePath: pgssBaselinePath(this.deps.postgresBaselineRootDir, ctx.connectionId),
|
||||
now: this.deps.now?.(),
|
||||
});
|
||||
this.pendingPgssBaselines.set(stagedDir, result);
|
||||
return;
|
||||
}
|
||||
|
||||
await stageHistoricSqlTemplates({
|
||||
stagedDir,
|
||||
connectionId: ctx.connectionId,
|
||||
queryClient: this.deps.queryClient,
|
||||
reader: this.deps.reader,
|
||||
sqlAnalysis: this.deps.sqlAnalysis,
|
||||
pullConfig: config,
|
||||
now: this.deps.now?.(),
|
||||
});
|
||||
}
|
||||
|
||||
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
return chunkHistoricSqlStagedDir(stagedDir, diffSet);
|
||||
}
|
||||
|
||||
describeScope(stagedDir: string): Promise<ScopeDescriptor> {
|
||||
return describeHistoricSqlScope(stagedDir);
|
||||
}
|
||||
|
||||
async getTriageSignals(stagedDir: string, externalId: string): Promise<TriageSignals> {
|
||||
const manifest = historicSqlManifestSchema.parse(
|
||||
JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')),
|
||||
);
|
||||
const template = manifest.templates.find((entry) => entry.id === externalId);
|
||||
if (!template) {
|
||||
return {};
|
||||
}
|
||||
const templateDir = template.path.replace(/\/page\.md$/, '');
|
||||
const metadata = historicSqlMetadataSchema.parse(
|
||||
JSON.parse(await readFile(join(stagedDir, templateDir, 'metadata.json'), 'utf-8')),
|
||||
);
|
||||
const usage = historicSqlUsageSchema.parse(
|
||||
JSON.parse(await readFile(join(stagedDir, templateDir, 'usage.json'), 'utf-8')),
|
||||
);
|
||||
|
||||
return {
|
||||
objectType: metadata.objectType,
|
||||
lastEditedAt: usage.stats.last_seen,
|
||||
propertyHints: metadata.properties.triage_signals,
|
||||
};
|
||||
}
|
||||
|
||||
async onPullSucceeded(ctx: {
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
syncId: string;
|
||||
trigger: IngestTrigger;
|
||||
completedAt: Date;
|
||||
stagedDir: string;
|
||||
}): Promise<void> {
|
||||
const manifest = historicSqlManifestSchema.parse(
|
||||
JSON.parse(await readFile(join(ctx.stagedDir, 'manifest.json'), 'utf-8')),
|
||||
);
|
||||
if (manifest.dialect === 'postgres') {
|
||||
const pending = this.pendingPgssBaselines.get(ctx.stagedDir);
|
||||
if (pending) {
|
||||
await writePgssBaselineAtomic(pending.baselinePath, pending.baseline);
|
||||
this.pendingPgssBaselines.delete(ctx.stagedDir);
|
||||
}
|
||||
}
|
||||
await this.deps.onPullSucceeded?.({ ...ctx, nextSuccessfulCursor: manifest.nextSuccessfulCursor });
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,281 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
HistoricSqlExtensionMissingError,
|
||||
HistoricSqlGrantsMissingError,
|
||||
HistoricSqlVersionUnsupportedError,
|
||||
} from './errors.js';
|
||||
import { PostgresPgssQueryHistoryReader } from './postgres-pgss-query-history-reader.js';
|
||||
|
||||
interface FakeQueryResult {
|
||||
headers: string[];
|
||||
rows: unknown[][];
|
||||
totalRows?: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
function queryClient(results: Array<FakeQueryResult | Error>) {
|
||||
const executeQuery = vi.fn(async (_query: string, _params?: unknown[]) => {
|
||||
const next = results.shift();
|
||||
if (!next) {
|
||||
throw new Error('unexpected query');
|
||||
}
|
||||
if (next instanceof Error) {
|
||||
throw next;
|
||||
}
|
||||
return next;
|
||||
});
|
||||
return { executeQuery };
|
||||
}
|
||||
|
||||
function executedSql(client: ReturnType<typeof queryClient>, index: number): string {
|
||||
const call = client.executeQuery.mock.calls[index];
|
||||
if (!call) {
|
||||
throw new Error(`expected query client call ${index}`);
|
||||
}
|
||||
return call[0];
|
||||
}
|
||||
|
||||
describe('PostgresPgssQueryHistoryReader', () => {
|
||||
it('probes version, extension presence, grants, and tracking state', async () => {
|
||||
const client = queryClient([
|
||||
{
|
||||
headers: ['server_version_num', 'server_version'],
|
||||
rows: [[160004, 'PostgreSQL 16.4 on x86_64-apple-darwin']],
|
||||
},
|
||||
{ headers: ['?column?'], rows: [[1]] },
|
||||
{ headers: ['has_role'], rows: [[true]] },
|
||||
{ headers: ['track'], rows: [['top']] },
|
||||
{ headers: ['max'], rows: [['5000']] },
|
||||
]);
|
||||
const reader = new PostgresPgssQueryHistoryReader();
|
||||
|
||||
await expect(reader.probe(client)).resolves.toEqual({
|
||||
pgServerVersion: 'PostgreSQL 16.4 on x86_64-apple-darwin',
|
||||
warnings: [],
|
||||
});
|
||||
|
||||
expect(executedSql(client, 0)).toContain("current_setting('server_version_num')::int");
|
||||
expect(executedSql(client, 1)).toBe('SELECT 1 FROM pg_stat_statements LIMIT 1');
|
||||
expect(executedSql(client, 2)).toBe(
|
||||
"SELECT pg_has_role(current_user, 'pg_read_all_stats', 'USAGE') AS has_role",
|
||||
);
|
||||
expect(executedSql(client, 3)).toBe("SELECT current_setting('pg_stat_statements.track') AS track");
|
||||
expect(executedSql(client, 4)).toBe("SELECT current_setting('pg_stat_statements.max') AS max");
|
||||
});
|
||||
|
||||
it('rejects PostgreSQL versions older than 14 without probing the extension', async () => {
|
||||
const client = queryClient([
|
||||
{
|
||||
headers: ['server_version_num', 'server_version'],
|
||||
rows: [[130012, 'PostgreSQL 13.12']],
|
||||
},
|
||||
{
|
||||
headers: ['stats_reset', 'dealloc'],
|
||||
rows: [[new Date('2026-05-01T00:00:00.000Z'), 7]],
|
||||
},
|
||||
]);
|
||||
const reader = new PostgresPgssQueryHistoryReader();
|
||||
|
||||
const promise = reader.probe(client);
|
||||
await expect(promise).rejects.toMatchObject({
|
||||
name: 'HistoricSqlVersionUnsupportedError',
|
||||
dialect: 'postgres',
|
||||
detectedVersion: 'PostgreSQL 13.12',
|
||||
minimumVersion: 'PostgreSQL 14',
|
||||
});
|
||||
await expect(promise).rejects.toBeInstanceOf(HistoricSqlVersionUnsupportedError);
|
||||
expect(client.executeQuery).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('maps a missing pg_stat_statements relation to HistoricSqlExtensionMissingError', async () => {
|
||||
const client = queryClient([
|
||||
{
|
||||
headers: ['server_version_num', 'server_version'],
|
||||
rows: [[160004, 'PostgreSQL 16.4']],
|
||||
},
|
||||
new Error('relation "pg_stat_statements" does not exist'),
|
||||
]);
|
||||
const reader = new PostgresPgssQueryHistoryReader();
|
||||
|
||||
const promise = reader.probe(client);
|
||||
await expect(promise).rejects.toMatchObject({
|
||||
name: 'HistoricSqlExtensionMissingError',
|
||||
dialect: 'postgres',
|
||||
});
|
||||
await expect(promise).rejects.toBeInstanceOf(HistoricSqlExtensionMissingError);
|
||||
});
|
||||
|
||||
it('maps pg_stat_statements preload failures to HistoricSqlExtensionMissingError with preload remediation', async () => {
|
||||
const client = queryClient([
|
||||
{
|
||||
headers: ['server_version_num', 'server_version'],
|
||||
rows: [[160004, 'PostgreSQL 16.4']],
|
||||
},
|
||||
new Error('pg_stat_statements must be loaded via shared_preload_libraries'),
|
||||
]);
|
||||
const reader = new PostgresPgssQueryHistoryReader();
|
||||
|
||||
const promise = reader.probe(client);
|
||||
await expect(promise).rejects.toMatchObject({
|
||||
name: 'HistoricSqlExtensionMissingError',
|
||||
dialect: 'postgres',
|
||||
message: 'pg_stat_statements is installed but not loaded via shared_preload_libraries.',
|
||||
remediation: expect.stringContaining("shared_preload_libraries includes 'pg_stat_statements'"),
|
||||
});
|
||||
await expect(promise).rejects.toBeInstanceOf(HistoricSqlExtensionMissingError);
|
||||
});
|
||||
|
||||
it('maps missing pg_read_all_stats membership to HistoricSqlGrantsMissingError', async () => {
|
||||
const client = queryClient([
|
||||
{
|
||||
headers: ['server_version_num', 'server_version'],
|
||||
rows: [[160004, 'PostgreSQL 16.4']],
|
||||
},
|
||||
{ headers: ['?column?'], rows: [[1]] },
|
||||
{ headers: ['has_role'], rows: [[false]] },
|
||||
]);
|
||||
const reader = new PostgresPgssQueryHistoryReader();
|
||||
|
||||
const promise = reader.probe(client);
|
||||
await expect(promise).rejects.toMatchObject({
|
||||
name: 'HistoricSqlGrantsMissingError',
|
||||
dialect: 'postgres',
|
||||
remediation: 'GRANT pg_read_all_stats TO <connection role>;',
|
||||
});
|
||||
await expect(promise).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
|
||||
});
|
||||
|
||||
it('returns a warning instead of failing when pg_stat_statements.track is none', async () => {
|
||||
const client = queryClient([
|
||||
{
|
||||
headers: ['server_version_num', 'server_version'],
|
||||
rows: [[160004, 'PostgreSQL 16.4']],
|
||||
},
|
||||
{ headers: ['?column?'], rows: [[1]] },
|
||||
{ headers: ['has_role'], rows: [[true]] },
|
||||
{ headers: ['track'], rows: [['none']] },
|
||||
{ headers: ['max'], rows: [['5000']] },
|
||||
]);
|
||||
const reader = new PostgresPgssQueryHistoryReader();
|
||||
|
||||
await expect(reader.probe(client)).resolves.toEqual({
|
||||
pgServerVersion: 'PostgreSQL 16.4',
|
||||
warnings: [
|
||||
"pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config",
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('warns when pg_stat_statements.max is below the recommended floor', async () => {
|
||||
const client = queryClient([
|
||||
{
|
||||
headers: ['server_version_num', 'server_version'],
|
||||
rows: [[160004, 'PostgreSQL 16.4']],
|
||||
},
|
||||
{ headers: ['?column?'], rows: [[1]] },
|
||||
{ headers: ['has_role'], rows: [[true]] },
|
||||
{ headers: ['track'], rows: [['top']] },
|
||||
{ headers: ['max'], rows: [['1000']] },
|
||||
]);
|
||||
const reader = new PostgresPgssQueryHistoryReader();
|
||||
|
||||
await expect(reader.probe(client)).resolves.toEqual({
|
||||
pgServerVersion: 'PostgreSQL 16.4',
|
||||
warnings: [
|
||||
'pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn',
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('reads a parameterized pg_stat_statements snapshot and stats info', async () => {
|
||||
const client = queryClient([
|
||||
{
|
||||
headers: [
|
||||
'queryid',
|
||||
'userid',
|
||||
'username',
|
||||
'dbid',
|
||||
'database',
|
||||
'query',
|
||||
'calls',
|
||||
'total_exec_time',
|
||||
'mean_exec_time',
|
||||
'total_rows',
|
||||
],
|
||||
rows: [
|
||||
[
|
||||
'922337203685477580',
|
||||
'16384',
|
||||
'analyst',
|
||||
'16385',
|
||||
'warehouse',
|
||||
'SELECT count(*) FROM public.orders WHERE status = $1',
|
||||
'42',
|
||||
'2100.5',
|
||||
'50.0119',
|
||||
'9001',
|
||||
],
|
||||
[
|
||||
'922337203685477581',
|
||||
'16386',
|
||||
'unknown',
|
||||
'16385',
|
||||
'warehouse',
|
||||
'SELECT * FROM public.customers WHERE id = $1',
|
||||
5,
|
||||
30,
|
||||
6,
|
||||
5,
|
||||
],
|
||||
],
|
||||
},
|
||||
{
|
||||
headers: ['stats_reset', 'dealloc'],
|
||||
rows: [[new Date('2026-05-01T00:00:00.000Z'), 7]],
|
||||
},
|
||||
]);
|
||||
const reader = new PostgresPgssQueryHistoryReader();
|
||||
|
||||
await expect(reader.readSnapshot(client, { minCalls: 5, maxTemplates: 500 })).resolves.toEqual({
|
||||
statsResetAt: '2026-05-01T00:00:00.000Z',
|
||||
deallocCount: 7,
|
||||
rows: [
|
||||
{
|
||||
queryid: '922337203685477580',
|
||||
userid: '16384',
|
||||
username: 'analyst',
|
||||
dbid: '16385',
|
||||
database: 'warehouse',
|
||||
query: 'SELECT count(*) FROM public.orders WHERE status = $1',
|
||||
calls: 42,
|
||||
totalExecTime: 2100.5,
|
||||
meanExecTime: 50.0119,
|
||||
totalRows: 9001,
|
||||
},
|
||||
{
|
||||
queryid: '922337203685477581',
|
||||
userid: '16386',
|
||||
username: 'unknown',
|
||||
dbid: '16385',
|
||||
database: 'warehouse',
|
||||
query: 'SELECT * FROM public.customers WHERE id = $1',
|
||||
calls: 5,
|
||||
totalExecTime: 30,
|
||||
meanExecTime: 6,
|
||||
totalRows: 5,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const snapshotSql = executedSql(client, 0);
|
||||
expect(snapshotSql).toContain('FROM pg_stat_statements s');
|
||||
expect(snapshotSql).toContain('LEFT JOIN pg_roles');
|
||||
expect(snapshotSql).toContain('LEFT JOIN pg_database');
|
||||
expect(snapshotSql).toContain('WHERE s.toplevel = true');
|
||||
expect(snapshotSql).toContain('AND s.calls >= $1');
|
||||
expect(snapshotSql).toContain('ORDER BY s.total_exec_time DESC');
|
||||
expect(snapshotSql).toContain('LIMIT $2');
|
||||
expect(client.executeQuery.mock.calls[0]?.[1]).toEqual([5, 500]);
|
||||
expect(executedSql(client, 1)).toBe('SELECT stats_reset, dealloc FROM pg_stat_statements_info');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,262 @@
|
|||
import {
|
||||
HistoricSqlExtensionMissingError,
|
||||
HistoricSqlGrantsMissingError,
|
||||
HistoricSqlVersionUnsupportedError,
|
||||
} from './errors.js';
|
||||
import type {
|
||||
KloPostgresQueryClient,
|
||||
PostgresPgssProbeResult,
|
||||
PostgresPgssReader,
|
||||
PostgresPgssRow,
|
||||
PostgresPgssSnapshot,
|
||||
} from './types.js';
|
||||
|
||||
interface QueryResultLike {
|
||||
headers: string[];
|
||||
rows: unknown[][];
|
||||
totalRows?: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
const VERSION_SQL = `
|
||||
SELECT current_setting('server_version_num')::int AS server_version_num,
|
||||
version() AS server_version
|
||||
`.trim();
|
||||
|
||||
const EXTENSION_PROBE_SQL = 'SELECT 1 FROM pg_stat_statements LIMIT 1';
|
||||
const GRANTS_PROBE_SQL = "SELECT pg_has_role(current_user, 'pg_read_all_stats', 'USAGE') AS has_role";
|
||||
const TRACKING_PROBE_SQL = "SELECT current_setting('pg_stat_statements.track') AS track";
|
||||
const MAX_SETTING_PROBE_SQL = "SELECT current_setting('pg_stat_statements.max') AS max";
|
||||
const RECOMMENDED_PGSS_MAX = 5000;
|
||||
const STATS_INFO_SQL = 'SELECT stats_reset, dealloc FROM pg_stat_statements_info';
|
||||
|
||||
const SNAPSHOT_SQL = `
|
||||
SELECT
|
||||
s.queryid::text AS queryid,
|
||||
s.userid::text AS userid,
|
||||
COALESCE(r.rolname, 'unknown') AS username,
|
||||
s.dbid::text AS dbid,
|
||||
d.datname AS database,
|
||||
s.query,
|
||||
s.calls,
|
||||
s.total_exec_time,
|
||||
s.mean_exec_time,
|
||||
s.rows AS total_rows
|
||||
FROM pg_stat_statements s
|
||||
LEFT JOIN pg_roles r ON s.userid = r.oid
|
||||
LEFT JOIN pg_database d ON s.dbid = d.oid
|
||||
WHERE s.toplevel = true
|
||||
AND s.calls >= $1
|
||||
ORDER BY s.total_exec_time DESC
|
||||
LIMIT $2
|
||||
`.trim();
|
||||
|
||||
const POSTGRES_EXTENSION_REMEDIATION = [
|
||||
'Run CREATE EXTENSION pg_stat_statements; against the connection database.',
|
||||
"Ensure shared_preload_libraries includes 'pg_stat_statements' in the Postgres parameter group or config.",
|
||||
].join(' ');
|
||||
|
||||
const POSTGRES_GRANTS_REMEDIATION = 'GRANT pg_read_all_stats TO <connection role>;';
|
||||
|
||||
function queryClient(client: unknown): KloPostgresQueryClient {
|
||||
if (
|
||||
client &&
|
||||
typeof client === 'object' &&
|
||||
'executeQuery' in client &&
|
||||
typeof (client as { executeQuery?: unknown }).executeQuery === 'function'
|
||||
) {
|
||||
return client as KloPostgresQueryClient;
|
||||
}
|
||||
throw new Error('Historic SQL Postgres PGSS reader requires a query client with executeQuery(sql, params?)');
|
||||
}
|
||||
|
||||
async function execute(client: KloPostgresQueryClient, sql: string, params?: unknown[]): Promise<QueryResultLike> {
|
||||
const result = await client.executeQuery(sql, params);
|
||||
if ('error' in result && typeof result.error === 'string' && result.error.length > 0) {
|
||||
throw new Error(result.error);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function indexes(headers: string[]): Map<string, number> {
|
||||
const out = new Map<string, number>();
|
||||
headers.forEach((header, index) => out.set(header.toLowerCase(), index));
|
||||
return out;
|
||||
}
|
||||
|
||||
function value(row: unknown[], headerIndexes: Map<string, number>, header: string): unknown {
|
||||
const index = headerIndexes.get(header.toLowerCase());
|
||||
return index === undefined ? null : row[index];
|
||||
}
|
||||
|
||||
function nullableString(raw: unknown): string | null {
|
||||
if (raw === null || raw === undefined) {
|
||||
return null;
|
||||
}
|
||||
const text = String(raw);
|
||||
return text.length > 0 ? text : null;
|
||||
}
|
||||
|
||||
function requiredString(raw: unknown, field: string): string {
|
||||
const text = nullableString(raw);
|
||||
if (!text) {
|
||||
throw new Error(`Postgres pg_stat_statements row is missing ${field}`);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function requiredFiniteNumber(raw: unknown, field: string): number {
|
||||
const number = typeof raw === 'number' ? raw : Number(raw);
|
||||
if (!Number.isFinite(number)) {
|
||||
throw new Error(`Postgres pg_stat_statements row has invalid ${field}: ${String(raw)}`);
|
||||
}
|
||||
return number;
|
||||
}
|
||||
|
||||
function nullableInteger(raw: unknown): number | null {
|
||||
if (raw === null || raw === undefined || raw === '') {
|
||||
return null;
|
||||
}
|
||||
const number = typeof raw === 'number' ? raw : Number(raw);
|
||||
return Number.isFinite(number) ? Math.trunc(number) : null;
|
||||
}
|
||||
|
||||
function nullableIsoTimestamp(raw: unknown): string | null {
|
||||
if (raw === null || raw === undefined || raw === '') {
|
||||
return null;
|
||||
}
|
||||
if (raw instanceof Date) {
|
||||
return raw.toISOString();
|
||||
}
|
||||
const date = new Date(String(raw));
|
||||
return Number.isNaN(date.getTime()) ? null : date.toISOString();
|
||||
}
|
||||
|
||||
function firstRow(result: QueryResultLike, context: string): { row: unknown[]; headers: Map<string, number> } {
|
||||
const row = result.rows[0];
|
||||
if (!row) {
|
||||
throw new Error(`Postgres historic-SQL ${context} query returned no rows`);
|
||||
}
|
||||
return { row, headers: indexes(result.headers) };
|
||||
}
|
||||
|
||||
function isMissingPgssRelation(error: unknown): boolean {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return /relation ["']?pg_stat_statements["']? does not exist/i.test(message);
|
||||
}
|
||||
|
||||
function isPgssPreloadRequired(error: unknown): boolean {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return /pg_stat_statements.*shared_preload_libraries/i.test(message);
|
||||
}
|
||||
|
||||
function extensionMissingError(cause: unknown, message?: string): HistoricSqlExtensionMissingError {
|
||||
return new HistoricSqlExtensionMissingError({
|
||||
dialect: 'postgres',
|
||||
message: message ?? 'pg_stat_statements extension is not installed in the connection database.',
|
||||
remediation: POSTGRES_EXTENSION_REMEDIATION,
|
||||
cause,
|
||||
});
|
||||
}
|
||||
|
||||
function grantsMissingError(): HistoricSqlGrantsMissingError {
|
||||
return new HistoricSqlGrantsMissingError({
|
||||
dialect: 'postgres',
|
||||
message: 'Postgres connection role lacks pg_read_all_stats for historic-SQL ingest.',
|
||||
remediation: POSTGRES_GRANTS_REMEDIATION,
|
||||
});
|
||||
}
|
||||
|
||||
function mapSnapshotRow(row: unknown[], headerIndexes: Map<string, number>): PostgresPgssRow {
|
||||
return {
|
||||
queryid: requiredString(value(row, headerIndexes, 'queryid'), 'queryid'),
|
||||
userid: requiredString(value(row, headerIndexes, 'userid'), 'userid'),
|
||||
username: nullableString(value(row, headerIndexes, 'username')),
|
||||
dbid: requiredString(value(row, headerIndexes, 'dbid'), 'dbid'),
|
||||
database: nullableString(value(row, headerIndexes, 'database')),
|
||||
query: requiredString(value(row, headerIndexes, 'query'), 'query'),
|
||||
calls: Math.trunc(requiredFiniteNumber(value(row, headerIndexes, 'calls'), 'calls')),
|
||||
totalExecTime: requiredFiniteNumber(value(row, headerIndexes, 'total_exec_time'), 'total_exec_time'),
|
||||
meanExecTime: requiredFiniteNumber(value(row, headerIndexes, 'mean_exec_time'), 'mean_exec_time'),
|
||||
totalRows: Math.trunc(requiredFiniteNumber(value(row, headerIndexes, 'total_rows'), 'total_rows')),
|
||||
};
|
||||
}
|
||||
|
||||
export class PostgresPgssQueryHistoryReader implements PostgresPgssReader {
|
||||
async probe(client: unknown): Promise<PostgresPgssProbeResult> {
|
||||
const pgClient = queryClient(client);
|
||||
const versionResult = await execute(pgClient, VERSION_SQL);
|
||||
const { row: versionRow, headers: versionHeaders } = firstRow(versionResult, 'version probe');
|
||||
const serverVersionNum = requiredFiniteNumber(
|
||||
value(versionRow, versionHeaders, 'server_version_num'),
|
||||
'server_version_num',
|
||||
);
|
||||
const pgServerVersion = requiredString(value(versionRow, versionHeaders, 'server_version'), 'server_version');
|
||||
|
||||
if (serverVersionNum < 140000) {
|
||||
throw new HistoricSqlVersionUnsupportedError({
|
||||
dialect: 'postgres',
|
||||
detectedVersion: pgServerVersion,
|
||||
minimumVersion: 'PostgreSQL 14',
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
await execute(pgClient, EXTENSION_PROBE_SQL);
|
||||
} catch (error) {
|
||||
if (isMissingPgssRelation(error)) {
|
||||
throw extensionMissingError(error);
|
||||
}
|
||||
if (isPgssPreloadRequired(error)) {
|
||||
throw extensionMissingError(
|
||||
error,
|
||||
'pg_stat_statements is installed but not loaded via shared_preload_libraries.',
|
||||
);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
const grantsResult = await execute(pgClient, GRANTS_PROBE_SQL);
|
||||
const { row: grantsRow, headers: grantsHeaders } = firstRow(grantsResult, 'grant probe');
|
||||
if (value(grantsRow, grantsHeaders, 'has_role') !== true) {
|
||||
throw grantsMissingError();
|
||||
}
|
||||
|
||||
const trackingResult = await execute(pgClient, TRACKING_PROBE_SQL);
|
||||
const { row: trackingRow, headers: trackingHeaders } = firstRow(trackingResult, 'tracking probe');
|
||||
const track = nullableString(value(trackingRow, trackingHeaders, 'track'));
|
||||
|
||||
const maxResult = await execute(pgClient, MAX_SETTING_PROBE_SQL);
|
||||
const { row: maxRow, headers: maxHeaders } = firstRow(maxResult, 'max-setting probe');
|
||||
const pgssMax = nullableInteger(value(maxRow, maxHeaders, 'max'));
|
||||
|
||||
const warnings: string[] = [];
|
||||
if (track === 'none') {
|
||||
warnings.push('pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config');
|
||||
}
|
||||
if (pgssMax !== null && pgssMax < RECOMMENDED_PGSS_MAX) {
|
||||
warnings.push(
|
||||
`pg_stat_statements.max is ${pgssMax}; set it to at least ${RECOMMENDED_PGSS_MAX} to reduce query-template eviction churn`,
|
||||
);
|
||||
}
|
||||
|
||||
return { pgServerVersion, warnings };
|
||||
}
|
||||
|
||||
async readSnapshot(
|
||||
client: unknown,
|
||||
options: { minCalls: number; maxTemplates: number },
|
||||
): Promise<PostgresPgssSnapshot> {
|
||||
const pgClient = queryClient(client);
|
||||
const snapshotResult = await execute(pgClient, SNAPSHOT_SQL, [options.minCalls, options.maxTemplates]);
|
||||
const snapshotHeaders = indexes(snapshotResult.headers);
|
||||
const statsResult = await execute(pgClient, STATS_INFO_SQL);
|
||||
const { row: statsRow, headers: statsHeaders } = firstRow(statsResult, 'stats-info');
|
||||
|
||||
return {
|
||||
statsResetAt: nullableIsoTimestamp(value(statsRow, statsHeaders, 'stats_reset')),
|
||||
deallocCount: nullableInteger(value(statsRow, statsHeaders, 'dealloc')),
|
||||
rows: snapshotResult.rows.map((row) => mapSnapshotRow(row, snapshotHeaders)),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,193 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { HistoricSqlGrantsMissingError } from './errors.js';
|
||||
import { SnowflakeHistoricSqlQueryHistoryReader } from './snowflake-query-history-reader.js';
|
||||
|
||||
interface FakeQueryResult {
|
||||
headers: string[];
|
||||
rows: unknown[][];
|
||||
totalRows: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
function queryClient(results: FakeQueryResult[]) {
|
||||
const executeQuery = vi.fn(async (_query: string) => {
|
||||
const next = results.shift();
|
||||
if (!next) {
|
||||
throw new Error('unexpected query');
|
||||
}
|
||||
return next;
|
||||
});
|
||||
return { executeQuery };
|
||||
}
|
||||
|
||||
function firstQuery(client: ReturnType<typeof queryClient>): string {
|
||||
const call = client.executeQuery.mock.calls[0];
|
||||
if (!call) {
|
||||
throw new Error('expected query client to be called');
|
||||
}
|
||||
return call[0];
|
||||
}
|
||||
|
||||
describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
|
||||
it('probes SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', async () => {
|
||||
const client = queryClient([{ headers: ['1'], rows: [[1]], totalRows: 1 }]);
|
||||
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
|
||||
|
||||
await expect(reader.probe(client)).resolves.toBeUndefined();
|
||||
|
||||
expect(client.executeQuery).toHaveBeenCalledWith(
|
||||
'SELECT 1 FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY LIMIT 1',
|
||||
);
|
||||
});
|
||||
|
||||
it('turns probe result errors into HistoricSqlGrantsMissingError', async () => {
|
||||
const client = queryClient([{ headers: [], rows: [], totalRows: 0, error: 'Object does not exist or not authorized' }]);
|
||||
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
|
||||
|
||||
await expect(reader.probe(client)).rejects.toMatchObject({
|
||||
name: 'HistoricSqlGrantsMissingError',
|
||||
dialect: 'snowflake',
|
||||
remediation: 'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE <connection role>;',
|
||||
});
|
||||
});
|
||||
|
||||
it('turns thrown probe failures into HistoricSqlGrantsMissingError', async () => {
|
||||
const client = {
|
||||
executeQuery: vi.fn(async () => {
|
||||
throw new Error('permission denied');
|
||||
}),
|
||||
};
|
||||
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
|
||||
|
||||
await expect(reader.probe(client)).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
|
||||
});
|
||||
|
||||
it('fetches query-history rows with cursor and maps them into RawQueryRow shape', async () => {
|
||||
const client = queryClient([
|
||||
{
|
||||
headers: [
|
||||
'QUERY_ID',
|
||||
'QUERY_TEXT',
|
||||
'USER_NAME',
|
||||
'ROLE_NAME',
|
||||
'WAREHOUSE_NAME',
|
||||
'DATABASE_NAME',
|
||||
'SCHEMA_NAME',
|
||||
'START_TIME',
|
||||
'END_TIME',
|
||||
'TOTAL_ELAPSED_TIME',
|
||||
'ROWS_PRODUCED',
|
||||
'EXECUTION_STATUS',
|
||||
'ERROR_CODE',
|
||||
'ERROR_MESSAGE',
|
||||
],
|
||||
rows: [
|
||||
[
|
||||
'01a',
|
||||
"SELECT count(*) FROM ANALYTICS.ORDERS WHERE STATUS = 'paid'",
|
||||
'ANALYST_A',
|
||||
'ANALYST_ROLE',
|
||||
'WH_XS',
|
||||
'ANALYTICS',
|
||||
'PUBLIC',
|
||||
'2026-05-04T10:00:00.000Z',
|
||||
'2026-05-04T10:00:01.250Z',
|
||||
1250,
|
||||
12,
|
||||
'SUCCESS',
|
||||
null,
|
||||
null,
|
||||
],
|
||||
[
|
||||
'01b',
|
||||
'SELECT * FROM MISSING_TABLE',
|
||||
'ANALYST_B',
|
||||
'ANALYST_ROLE',
|
||||
'WH_XS',
|
||||
'ANALYTICS',
|
||||
'PUBLIC',
|
||||
new Date('2026-05-04T10:05:00.000Z'),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
'FAILED_WITH_ERROR',
|
||||
'002003',
|
||||
'SQL compilation error',
|
||||
],
|
||||
],
|
||||
totalRows: 2,
|
||||
},
|
||||
]);
|
||||
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
|
||||
|
||||
const rows = [];
|
||||
for await (const row of reader.fetch(
|
||||
client,
|
||||
{
|
||||
start: new Date('2026-05-01T00:00:00.000Z'),
|
||||
end: new Date('2026-05-04T12:00:00.000Z'),
|
||||
},
|
||||
'2026-05-03T00:00:00.000Z',
|
||||
)) {
|
||||
rows.push(row);
|
||||
}
|
||||
|
||||
expect(client.executeQuery).toHaveBeenCalledTimes(1);
|
||||
const sql = firstQuery(client);
|
||||
expect(sql).toContain('FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY');
|
||||
expect(sql).toContain("START_TIME >= '2026-05-03T00:00:00.000Z'::TIMESTAMP_TZ");
|
||||
expect(sql).toContain("START_TIME < '2026-05-04T12:00:00.000Z'::TIMESTAMP_TZ");
|
||||
expect(sql).toContain('ORDER BY START_TIME ASC, QUERY_ID ASC');
|
||||
expect(sql).toContain('ROWS_PRODUCED');
|
||||
|
||||
expect(rows).toEqual([
|
||||
{
|
||||
id: '01a',
|
||||
sql: "SELECT count(*) FROM ANALYTICS.ORDERS WHERE STATUS = 'paid'",
|
||||
user: 'ANALYST_A',
|
||||
startedAt: '2026-05-04T10:00:00.000Z',
|
||||
endedAt: '2026-05-04T10:00:01.250Z',
|
||||
runtimeMs: 1250,
|
||||
rowsProduced: 12,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: '01b',
|
||||
sql: 'SELECT * FROM MISSING_TABLE',
|
||||
user: 'ANALYST_B',
|
||||
startedAt: '2026-05-04T10:05:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: null,
|
||||
rowsProduced: null,
|
||||
success: false,
|
||||
errorMessage: '002003: SQL compilation error',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('uses the window start when no cursor is available', async () => {
|
||||
const client = queryClient([{ headers: ['QUERY_ID'], rows: [], totalRows: 0 }]);
|
||||
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
|
||||
|
||||
for await (const _row of reader.fetch(client, {
|
||||
start: new Date('2026-02-03T12:00:00.000Z'),
|
||||
end: new Date('2026-05-04T12:00:00.000Z'),
|
||||
})) {
|
||||
throw new Error('empty result should not yield rows');
|
||||
}
|
||||
|
||||
const sql = firstQuery(client);
|
||||
expect(sql).toContain("START_TIME >= '2026-02-03T12:00:00.000Z'::TIMESTAMP_TZ");
|
||||
});
|
||||
|
||||
it('throws a clear error when the query client cannot execute SQL', async () => {
|
||||
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
|
||||
|
||||
await expect(async () => {
|
||||
for await (const _row of reader.fetch({}, { start: new Date(), end: new Date() })) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
}).rejects.toThrow('Historic SQL Snowflake reader requires a query client with executeQuery(query)');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,203 @@
|
|||
import { HistoricSqlGrantsMissingError } from './errors.js';
|
||||
import type { HistoricSqlQueryHistoryReader, HistoricSqlRawQueryRow, HistoricSqlTimeWindow } from './types.js';
|
||||
|
||||
interface QueryResultLike {
|
||||
headers: string[];
|
||||
rows: unknown[][];
|
||||
totalRows: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
interface QueryClientLike {
|
||||
executeQuery(query: string): Promise<QueryResultLike>;
|
||||
}
|
||||
|
||||
const PROBE_SQL = 'SELECT 1 FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY LIMIT 1';
|
||||
|
||||
const SNOWFLAKE_GRANTS_REMEDIATION =
|
||||
'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE <connection role>;';
|
||||
|
||||
function queryClient(client: unknown): QueryClientLike {
|
||||
if (
|
||||
client &&
|
||||
typeof client === 'object' &&
|
||||
'executeQuery' in client &&
|
||||
typeof (client as { executeQuery?: unknown }).executeQuery === 'function'
|
||||
) {
|
||||
return client as QueryClientLike;
|
||||
}
|
||||
throw new Error('Historic SQL Snowflake reader requires a query client with executeQuery(query)');
|
||||
}
|
||||
|
||||
function grantsError(cause: unknown): HistoricSqlGrantsMissingError {
|
||||
const message =
|
||||
cause instanceof Error
|
||||
? cause.message
|
||||
: typeof cause === 'string'
|
||||
? cause
|
||||
: 'Snowflake role cannot query SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY.';
|
||||
return new HistoricSqlGrantsMissingError({
|
||||
dialect: 'snowflake',
|
||||
message: `Missing Snowflake audit grants for historic-SQL ingest: ${message}`,
|
||||
remediation: SNOWFLAKE_GRANTS_REMEDIATION,
|
||||
cause,
|
||||
});
|
||||
}
|
||||
|
||||
function timestampLiteral(value: Date | string): string {
|
||||
const date = value instanceof Date ? value : new Date(value);
|
||||
if (Number.isNaN(date.getTime())) {
|
||||
throw new Error(`Invalid Snowflake query-history timestamp: ${String(value)}`);
|
||||
}
|
||||
return `'${date.toISOString().replace(/'/g, "''")}'::TIMESTAMP_TZ`;
|
||||
}
|
||||
|
||||
function queryHistorySql(window: HistoricSqlTimeWindow, cursor?: string | null): string {
|
||||
const start = timestampLiteral(cursor ?? window.start);
|
||||
const end = timestampLiteral(window.end);
|
||||
return `
|
||||
SELECT
|
||||
QUERY_ID,
|
||||
QUERY_TEXT,
|
||||
USER_NAME,
|
||||
ROLE_NAME,
|
||||
WAREHOUSE_NAME,
|
||||
DATABASE_NAME,
|
||||
SCHEMA_NAME,
|
||||
START_TIME,
|
||||
END_TIME,
|
||||
TOTAL_ELAPSED_TIME,
|
||||
ROWS_PRODUCED,
|
||||
EXECUTION_STATUS,
|
||||
ERROR_CODE,
|
||||
ERROR_MESSAGE
|
||||
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
|
||||
WHERE START_TIME >= ${start}
|
||||
AND START_TIME < ${end}
|
||||
AND QUERY_TEXT IS NOT NULL
|
||||
ORDER BY START_TIME ASC, QUERY_ID ASC`.trim();
|
||||
}
|
||||
|
||||
function indexByHeader(headers: string[]): Map<string, number> {
|
||||
const out = new Map<string, number>();
|
||||
headers.forEach((header, index) => {
|
||||
out.set(header.toUpperCase(), index);
|
||||
});
|
||||
return out;
|
||||
}
|
||||
|
||||
function value(row: unknown[], indexes: Map<string, number>, name: string): unknown {
|
||||
const index = indexes.get(name);
|
||||
return index === undefined ? null : row[index];
|
||||
}
|
||||
|
||||
function nullableString(raw: unknown): string | null {
|
||||
if (raw === null || raw === undefined) {
|
||||
return null;
|
||||
}
|
||||
const text = String(raw);
|
||||
return text.length > 0 ? text : null;
|
||||
}
|
||||
|
||||
function requiredString(raw: unknown, field: string): string {
|
||||
const text = nullableString(raw);
|
||||
if (!text) {
|
||||
throw new Error(`Snowflake QUERY_HISTORY row is missing ${field}`);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function nullableNumber(raw: unknown): number | null {
|
||||
if (raw === null || raw === undefined || raw === '') {
|
||||
return null;
|
||||
}
|
||||
const number = typeof raw === 'number' ? raw : Number(raw);
|
||||
if (!Number.isFinite(number)) {
|
||||
return null;
|
||||
}
|
||||
return number;
|
||||
}
|
||||
|
||||
function nullableInteger(raw: unknown): number | null {
|
||||
const number = nullableNumber(raw);
|
||||
return number === null ? null : Math.trunc(number);
|
||||
}
|
||||
|
||||
function isoTimestamp(raw: unknown, field: string): string {
|
||||
if (raw instanceof Date) {
|
||||
return raw.toISOString();
|
||||
}
|
||||
const text = requiredString(raw, field);
|
||||
const date = new Date(text);
|
||||
if (Number.isNaN(date.getTime())) {
|
||||
throw new Error(`Snowflake QUERY_HISTORY row has invalid ${field}: ${text}`);
|
||||
}
|
||||
return date.toISOString();
|
||||
}
|
||||
|
||||
function nullableIsoTimestamp(raw: unknown): string | null {
|
||||
if (raw === null || raw === undefined || raw === '') {
|
||||
return null;
|
||||
}
|
||||
return isoTimestamp(raw, 'END_TIME');
|
||||
}
|
||||
|
||||
function executionSucceeded(status: string | null, errorCode: string | null, errorMessage: string | null): boolean {
|
||||
if (errorCode || errorMessage) {
|
||||
return false;
|
||||
}
|
||||
return status === null || status.toUpperCase().startsWith('SUCCESS');
|
||||
}
|
||||
|
||||
function combinedErrorMessage(errorCode: string | null, errorMessage: string | null): string | null {
|
||||
if (errorCode && errorMessage) {
|
||||
return `${errorCode}: ${errorMessage}`;
|
||||
}
|
||||
return errorMessage ?? errorCode;
|
||||
}
|
||||
|
||||
function mapRow(row: unknown[], indexes: Map<string, number>): HistoricSqlRawQueryRow {
|
||||
const errorCode = nullableString(value(row, indexes, 'ERROR_CODE'));
|
||||
const errorMessage = nullableString(value(row, indexes, 'ERROR_MESSAGE'));
|
||||
const rowsProduced = nullableInteger(value(row, indexes, 'ROWS_PRODUCED'));
|
||||
return {
|
||||
id: requiredString(value(row, indexes, 'QUERY_ID'), 'QUERY_ID'),
|
||||
sql: requiredString(value(row, indexes, 'QUERY_TEXT'), 'QUERY_TEXT'),
|
||||
user: nullableString(value(row, indexes, 'USER_NAME')),
|
||||
startedAt: isoTimestamp(value(row, indexes, 'START_TIME'), 'START_TIME'),
|
||||
endedAt: nullableIsoTimestamp(value(row, indexes, 'END_TIME')),
|
||||
runtimeMs: nullableNumber(value(row, indexes, 'TOTAL_ELAPSED_TIME')),
|
||||
rowsProduced,
|
||||
success: executionSucceeded(nullableString(value(row, indexes, 'EXECUTION_STATUS')), errorCode, errorMessage),
|
||||
errorMessage: combinedErrorMessage(errorCode, errorMessage),
|
||||
};
|
||||
}
|
||||
|
||||
export class SnowflakeHistoricSqlQueryHistoryReader implements HistoricSqlQueryHistoryReader {
|
||||
async probe(client: unknown): Promise<void> {
|
||||
let result: QueryResultLike;
|
||||
try {
|
||||
result = await queryClient(client).executeQuery(PROBE_SQL);
|
||||
} catch (error) {
|
||||
throw grantsError(error);
|
||||
}
|
||||
if (result.error) {
|
||||
throw grantsError(result.error);
|
||||
}
|
||||
}
|
||||
|
||||
async *fetch(
|
||||
client: unknown,
|
||||
window: HistoricSqlTimeWindow,
|
||||
cursor?: string | null,
|
||||
): AsyncIterable<HistoricSqlRawQueryRow> {
|
||||
const result = await queryClient(client).executeQuery(queryHistorySql(window, cursor));
|
||||
if (result.error) {
|
||||
throw grantsError(result.error);
|
||||
}
|
||||
const indexes = indexByHeader(result.headers);
|
||||
for (const row of result.rows) {
|
||||
yield mapRow(row, indexes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
import { mkdir, mkdtemp, readdir, readFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { dirname, join, relative } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import type { SqlAnalysisPort } from '../../../sql-analysis/index.js';
|
||||
import { stagePgStatStatementsTemplates, writePgssBaselineAtomic, type PgssBaseline } from './stage-pgss.js';
|
||||
import type { HistoricSqlPullConfig, KloPostgresQueryClient, PostgresPgssReader, PostgresPgssRow } from './types.js';
|
||||
|
||||
const FIXTURE_ROOT = join(__dirname, '__fixtures__/postgres');
|
||||
|
||||
interface GoldenFixture {
|
||||
name: string;
|
||||
now: string;
|
||||
connectionId: string;
|
||||
probe: {
|
||||
pgServerVersion: string;
|
||||
warnings: string[];
|
||||
};
|
||||
snapshot: {
|
||||
statsResetAt: string | null;
|
||||
deallocCount: number | null;
|
||||
rows: PostgresPgssRow[];
|
||||
};
|
||||
pullConfig: HistoricSqlPullConfig & { dialect: 'postgres' };
|
||||
analysisBySql: Record<
|
||||
string,
|
||||
{
|
||||
fingerprint: string;
|
||||
normalizedSql: string;
|
||||
tablesTouched: string[];
|
||||
literalSlots: [];
|
||||
error?: string;
|
||||
}
|
||||
>;
|
||||
baseline: PgssBaseline | null;
|
||||
expectedBaseline: PgssBaseline;
|
||||
expectedFiles: Record<string, { json?: unknown; text?: string }>;
|
||||
}
|
||||
|
||||
async function readFixture(name: string): Promise<GoldenFixture> {
|
||||
return JSON.parse(await readFile(join(FIXTURE_ROOT, name, 'input.json'), 'utf-8')) as GoldenFixture;
|
||||
}
|
||||
|
||||
async function tempDir(prefix: string): Promise<string> {
|
||||
return mkdtemp(join(tmpdir(), prefix));
|
||||
}
|
||||
|
||||
function fakePgClient(): KloPostgresQueryClient {
|
||||
return {
|
||||
async executeQuery() {
|
||||
return { headers: [], rows: [] };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function fixtureReader(fixture: GoldenFixture): PostgresPgssReader {
|
||||
return {
|
||||
async probe() {
|
||||
return fixture.probe;
|
||||
},
|
||||
async readSnapshot(_client, options) {
|
||||
return {
|
||||
statsResetAt: fixture.snapshot.statsResetAt,
|
||||
deallocCount: fixture.snapshot.deallocCount,
|
||||
rows: fixture.snapshot.rows.slice(0, options.maxTemplates),
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function fixtureSqlAnalysis(fixture: GoldenFixture): SqlAnalysisPort {
|
||||
return {
|
||||
async analyzeForFingerprint(sql) {
|
||||
const result = fixture.analysisBySql[sql];
|
||||
if (!result) {
|
||||
return {
|
||||
fingerprint: '',
|
||||
normalizedSql: '',
|
||||
tablesTouched: [],
|
||||
literalSlots: [],
|
||||
error: `missing fixture analysis for ${sql}`,
|
||||
};
|
||||
}
|
||||
return result;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function writeFixtureBaseline(path: string, baseline: PgssBaseline | null): Promise<void> {
|
||||
if (!baseline) {
|
||||
return;
|
||||
}
|
||||
await writePgssBaselineAtomic(path, baseline);
|
||||
}
|
||||
|
||||
async function listFiles(root: string, current = root): Promise<string[]> {
|
||||
const entries = await readdir(current, { withFileTypes: true });
|
||||
const files: string[] = [];
|
||||
for (const entry of entries) {
|
||||
const fullPath = join(current, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
files.push(...(await listFiles(root, fullPath)));
|
||||
} else {
|
||||
files.push(relative(root, fullPath));
|
||||
}
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
async function expectGoldenFiles(stagedDir: string, expectedFiles: GoldenFixture['expectedFiles']): Promise<void> {
|
||||
const actualFiles = await listFiles(stagedDir);
|
||||
const expectedPaths = Object.keys(expectedFiles).sort();
|
||||
expect(actualFiles.sort()).toEqual(expectedPaths);
|
||||
|
||||
for (const path of expectedPaths) {
|
||||
const expected = expectedFiles[path];
|
||||
const actual = await readFile(join(stagedDir, path), 'utf-8');
|
||||
if ('json' in expected) {
|
||||
expect(JSON.parse(actual)).toEqual(expected.json);
|
||||
} else {
|
||||
expect(actual).toBe(expected.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
describe('stagePgStatStatementsTemplates golden fixtures', () => {
|
||||
it.each(['first-run', 'normal-delta', 'reset-detected', 'version-change', 'eviction-churn'] as const)(
|
||||
'matches the committed %s golden output',
|
||||
async (fixtureName) => {
|
||||
const fixture = await readFixture(fixtureName);
|
||||
const root = await tempDir(`pgss-golden-${fixtureName}-`);
|
||||
const stagedDir = join(root, 'staged');
|
||||
const baselinePath = join(root, 'cache', fixture.connectionId, 'pgss-baseline.json');
|
||||
await mkdir(dirname(baselinePath), { recursive: true });
|
||||
await writeFixtureBaseline(baselinePath, fixture.baseline);
|
||||
|
||||
const result = await stagePgStatStatementsTemplates({
|
||||
stagedDir,
|
||||
connectionId: fixture.connectionId,
|
||||
queryClient: fakePgClient(),
|
||||
reader: fixtureReader(fixture),
|
||||
sqlAnalysis: fixtureSqlAnalysis(fixture),
|
||||
pullConfig: fixture.pullConfig,
|
||||
baselinePath,
|
||||
now: new Date(fixture.now),
|
||||
});
|
||||
|
||||
await expectGoldenFiles(stagedDir, fixture.expectedFiles);
|
||||
expect(result.baseline).toEqual(fixture.expectedBaseline);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
|
@ -0,0 +1,652 @@
|
|||
import { mkdtemp, readFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { SqlAnalysisPort } from '../../../sql-analysis/index.js';
|
||||
import {
|
||||
pgssBaselinePath,
|
||||
readPgssBaseline,
|
||||
stagePgStatStatementsTemplates,
|
||||
writePgssBaselineAtomic,
|
||||
type PgssBaseline,
|
||||
} from './stage-pgss.js';
|
||||
import { historicSqlManifestSchema, historicSqlMetadataSchema, historicSqlUsageSchema } from './types.js';
|
||||
import type { KloPostgresQueryClient, PostgresPgssReader, PostgresPgssRow } from './types.js';
|
||||
|
||||
async function tempDir(prefix: string): Promise<string> {
|
||||
return mkdtemp(join(tmpdir(), prefix));
|
||||
}
|
||||
|
||||
async function readJson<T>(root: string, relPath: string): Promise<T> {
|
||||
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
|
||||
}
|
||||
|
||||
function fakePgClient(): KloPostgresQueryClient {
|
||||
return {
|
||||
async executeQuery() {
|
||||
return { headers: [], rows: [] };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function row(overrides: Partial<PostgresPgssRow> & Pick<PostgresPgssRow, 'queryid' | 'query'>): PostgresPgssRow {
|
||||
return {
|
||||
userid: '11',
|
||||
username: 'analyst',
|
||||
dbid: '5',
|
||||
database: 'warehouse',
|
||||
calls: 10,
|
||||
totalExecTime: 250,
|
||||
meanExecTime: 25,
|
||||
totalRows: 20,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function fakeReader(input: {
|
||||
pgServerVersion?: string;
|
||||
warnings?: string[];
|
||||
statsResetAt?: string | null;
|
||||
deallocCount?: number | null;
|
||||
rows: PostgresPgssRow[];
|
||||
}): PostgresPgssReader {
|
||||
return {
|
||||
probe: vi.fn(async () => ({
|
||||
pgServerVersion: input.pgServerVersion ?? 'PostgreSQL 16.4',
|
||||
warnings: input.warnings ?? [],
|
||||
})),
|
||||
readSnapshot: vi.fn(async (_client, options) => ({
|
||||
statsResetAt: input.statsResetAt ?? '2026-05-08T08:00:00.000Z',
|
||||
deallocCount: input.deallocCount ?? 0,
|
||||
rows: input.rows.slice(0, options.maxTemplates),
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
async analyzeForFingerprint(sql) {
|
||||
if (sql.includes('broken')) {
|
||||
return {
|
||||
fingerprint: '',
|
||||
normalizedSql: '',
|
||||
tablesTouched: [],
|
||||
literalSlots: [],
|
||||
error: 'parse failed',
|
||||
};
|
||||
}
|
||||
if (sql.includes('customers')) {
|
||||
return {
|
||||
fingerprint: 'fp_customers',
|
||||
normalizedSql: 'SELECT count(*) FROM analytics.customers',
|
||||
tablesTouched: ['analytics.customers'],
|
||||
literalSlots: [],
|
||||
};
|
||||
}
|
||||
return {
|
||||
fingerprint: 'fp_orders',
|
||||
normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
tablesTouched: ['analytics.orders'],
|
||||
literalSlots: [],
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
function postgresPullConfig(maxTemplatesPerRun = 5000) {
|
||||
return {
|
||||
dialect: 'postgres' as const,
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: ['^svc_'],
|
||||
redactionPatterns: ['secret'],
|
||||
maxTemplatesPerRun,
|
||||
minCalls: 5,
|
||||
};
|
||||
}
|
||||
|
||||
describe('stagePgStatStatementsTemplates', () => {
|
||||
it('stages first-run PGSS templates as degraded aggregate templates and builds a next baseline', async () => {
|
||||
const stagedDir = await tempDir('pgss-stage-first-');
|
||||
const baselineRootDir = await tempDir('pgss-baseline-first-');
|
||||
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
|
||||
|
||||
const result = await stagePgStatStatementsTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_pg',
|
||||
queryClient: fakePgClient(),
|
||||
reader: fakeReader({
|
||||
warnings: ['pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config'],
|
||||
deallocCount: 2,
|
||||
rows: [
|
||||
row({
|
||||
queryid: '101',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 10,
|
||||
totalExecTime: 250,
|
||||
totalRows: 20,
|
||||
}),
|
||||
row({
|
||||
queryid: '102',
|
||||
query: 'SELECT * FROM pg_catalog.pg_class',
|
||||
calls: 50,
|
||||
totalExecTime: 500,
|
||||
}),
|
||||
row({
|
||||
queryid: '103',
|
||||
query: 'BEGIN',
|
||||
calls: 75,
|
||||
totalExecTime: 75,
|
||||
}),
|
||||
row({
|
||||
queryid: '104',
|
||||
query: 'SELECT broken FROM analytics.orders',
|
||||
calls: 8,
|
||||
totalExecTime: 80,
|
||||
}),
|
||||
],
|
||||
}),
|
||||
sqlAnalysis,
|
||||
pullConfig: postgresPullConfig(),
|
||||
baselinePath,
|
||||
now: new Date('2026-05-08T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
expect(manifest).toMatchObject({
|
||||
source: 'historic-sql',
|
||||
connectionId: 'conn_pg',
|
||||
dialect: 'postgres',
|
||||
fetchedAt: '2026-05-08T12:00:00.000Z',
|
||||
windowEnd: '2026-05-08T12:00:00.000Z',
|
||||
nextSuccessfulCursor: '2026-05-08T12:00:00.000Z',
|
||||
templateCount: 1,
|
||||
capped: false,
|
||||
degraded: true,
|
||||
statsResetAt: '2026-05-08T08:00:00.000Z',
|
||||
baselineFirstRun: true,
|
||||
pgServerVersion: 'PostgreSQL 16.4',
|
||||
deallocCount: 2,
|
||||
});
|
||||
expect(manifest.warnings).toEqual([
|
||||
'pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config',
|
||||
'pgss_dealloc_count:2; pg_stat_statements.max may be too low, causing template eviction churn',
|
||||
'baseline_first_run:no_previous_pgss_baseline',
|
||||
'analysis_failed:db5_q104',
|
||||
]);
|
||||
expect(manifest.templates).toEqual([
|
||||
{
|
||||
id: 'db5_q101',
|
||||
fingerprint: 'fp_orders',
|
||||
subClusterId: null,
|
||||
path: 'templates/db5_q101/page.md',
|
||||
},
|
||||
]);
|
||||
|
||||
const metadata = historicSqlMetadataSchema.parse(await readJson(stagedDir, 'templates/db5_q101/metadata.json'));
|
||||
expect(metadata).toMatchObject({
|
||||
id: 'db5_q101',
|
||||
title: 'postgres · analytics.orders [db5_q101]',
|
||||
path: 'templates/db5_q101/page.md',
|
||||
objectType: 'historic_sql_template',
|
||||
lastEditedAt: null,
|
||||
properties: {
|
||||
fingerprint: 'fp_orders',
|
||||
sub_cluster_id: null,
|
||||
dialect: 'postgres',
|
||||
tables_touched: ['analytics.orders'],
|
||||
literal_slots: [],
|
||||
},
|
||||
});
|
||||
expect(metadata.properties.triage_signals).toEqual({
|
||||
executions_bucket: 'mid',
|
||||
distinct_users_bucket: 'solo',
|
||||
error_rate_bucket: 'ok',
|
||||
recency_bucket: 'active',
|
||||
service_account_only: 'false',
|
||||
runtime_bucket: 'fast',
|
||||
});
|
||||
|
||||
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q101/usage.json'));
|
||||
expect(usage).toEqual({
|
||||
stats: {
|
||||
executions: 10,
|
||||
distinct_users: 1,
|
||||
first_seen: '2026-05-08T12:00:00.000Z',
|
||||
last_seen: '2026-05-08T12:00:00.000Z',
|
||||
p50_runtime_ms: null,
|
||||
p95_runtime_ms: null,
|
||||
mean_runtime_ms: 25,
|
||||
error_rate: 0,
|
||||
rows_produced: 20,
|
||||
},
|
||||
literal_slots: [],
|
||||
samples: [],
|
||||
});
|
||||
|
||||
expect(await readFile(join(stagedDir, 'templates/db5_q101/page.md'), 'utf-8')).toContain(
|
||||
'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
);
|
||||
expect(result.baselinePath).toBe(baselinePath);
|
||||
expect(result.baseline.templates.db5_q101.perUser['11']).toEqual({
|
||||
calls: 10,
|
||||
totalExecTime: 250,
|
||||
totalRows: 20,
|
||||
});
|
||||
await expect(readPgssBaseline(baselinePath)).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('warns when pg_stat_statements reports dealloc churn', async () => {
|
||||
const root = await tempDir('pgss-churn-');
|
||||
const stagedDir = join(root, 'staged');
|
||||
const baselinePath = join(root, 'cache', 'warehouse', 'pgss-baseline.json');
|
||||
|
||||
await stagePgStatStatementsTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'warehouse',
|
||||
queryClient: fakePgClient(),
|
||||
reader: fakeReader({
|
||||
rows: [
|
||||
row({
|
||||
queryid: '901',
|
||||
query: 'SELECT COUNT(*) FROM public.orders WHERE status = $1',
|
||||
calls: 20,
|
||||
totalExecTime: 500,
|
||||
meanExecTime: 25,
|
||||
}),
|
||||
],
|
||||
deallocCount: 3,
|
||||
}),
|
||||
sqlAnalysis,
|
||||
pullConfig: postgresPullConfig(50),
|
||||
baselinePath,
|
||||
now: new Date('2026-05-08T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = await readJson<{ warnings: string[]; deallocCount: number }>(stagedDir, 'manifest.json');
|
||||
expect(manifest.deallocCount).toBe(3);
|
||||
expect(manifest.warnings).toContain(
|
||||
'pgss_dealloc_count:3; pg_stat_statements.max may be too low, causing template eviction churn',
|
||||
);
|
||||
});
|
||||
|
||||
it('uses the saved cumulative baseline to stage only positive deltas on later runs', async () => {
|
||||
const stagedDir = await tempDir('pgss-stage-delta-');
|
||||
const baselineRootDir = await tempDir('pgss-baseline-delta-');
|
||||
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
|
||||
const baseline: PgssBaseline = {
|
||||
version: 1,
|
||||
fetchedAt: '2026-05-08T10:00:00.000Z',
|
||||
statsResetAt: '2026-05-08T08:00:00.000Z',
|
||||
pgServerVersion: 'PostgreSQL 16.4',
|
||||
templates: {
|
||||
db5_q201: {
|
||||
firstObservedAt: '2026-05-08T09:00:00.000Z',
|
||||
perUser: {
|
||||
'11': { calls: 10, totalExecTime: 100, totalRows: 50 },
|
||||
'12': { calls: 5, totalExecTime: 50, totalRows: 25 },
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
await writePgssBaselineAtomic(baselinePath, baseline);
|
||||
|
||||
await stagePgStatStatementsTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_pg',
|
||||
queryClient: fakePgClient(),
|
||||
reader: fakeReader({
|
||||
rows: [
|
||||
row({
|
||||
queryid: '201',
|
||||
userid: '11',
|
||||
username: 'analyst',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 12,
|
||||
totalExecTime: 160,
|
||||
totalRows: 58,
|
||||
}),
|
||||
row({
|
||||
queryid: '201',
|
||||
userid: '12',
|
||||
username: 'svc_loader',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 5,
|
||||
totalExecTime: 50,
|
||||
totalRows: 25,
|
||||
}),
|
||||
row({
|
||||
queryid: '202',
|
||||
userid: '13',
|
||||
username: 'analyst_2',
|
||||
query: 'SELECT count(*) FROM analytics.customers',
|
||||
calls: 7,
|
||||
totalExecTime: 210,
|
||||
totalRows: 7,
|
||||
}),
|
||||
],
|
||||
}),
|
||||
sqlAnalysis,
|
||||
pullConfig: postgresPullConfig(),
|
||||
baselinePath,
|
||||
now: new Date('2026-05-08T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
expect(manifest.baselineFirstRun).toBe(false);
|
||||
expect(manifest.windowStart).toBe('2026-05-08T10:00:00.000Z');
|
||||
expect(manifest.templateCount).toBe(2);
|
||||
expect(manifest.templates.map((template) => template.id)).toEqual(['db5_q202', 'db5_q201']);
|
||||
|
||||
const usage201 = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q201/usage.json'));
|
||||
expect(usage201.stats).toMatchObject({
|
||||
executions: 2,
|
||||
distinct_users: 1,
|
||||
first_seen: '2026-05-08T09:00:00.000Z',
|
||||
last_seen: '2026-05-08T12:00:00.000Z',
|
||||
mean_runtime_ms: 30,
|
||||
rows_produced: 8,
|
||||
});
|
||||
const metadata201 = historicSqlMetadataSchema.parse(await readJson(stagedDir, 'templates/db5_q201/metadata.json'));
|
||||
expect(metadata201.properties.triage_signals.service_account_only).toBe('false');
|
||||
|
||||
const usage202 = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q202/usage.json'));
|
||||
expect(usage202.stats).toMatchObject({
|
||||
executions: 7,
|
||||
distinct_users: 1,
|
||||
first_seen: '2026-05-08T12:00:00.000Z',
|
||||
mean_runtime_ms: 30,
|
||||
rows_produced: 7,
|
||||
});
|
||||
});
|
||||
|
||||
it('keeps matching queryid values from different databases as distinct templates and baseline entries', async () => {
|
||||
const stagedDir = await tempDir('pgss-stage-db-key-');
|
||||
const baselineRootDir = await tempDir('pgss-baseline-db-key-');
|
||||
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
|
||||
await writePgssBaselineAtomic(baselinePath, {
|
||||
version: 1,
|
||||
fetchedAt: '2026-05-08T10:00:00.000Z',
|
||||
statsResetAt: '2026-05-08T08:00:00.000Z',
|
||||
pgServerVersion: 'PostgreSQL 16.4',
|
||||
templates: {
|
||||
db5_q701: {
|
||||
firstObservedAt: '2026-05-08T09:00:00.000Z',
|
||||
perUser: {
|
||||
'11': { calls: 10, totalExecTime: 100, totalRows: 50 },
|
||||
},
|
||||
},
|
||||
db6_q701: {
|
||||
firstObservedAt: '2026-05-08T09:30:00.000Z',
|
||||
perUser: {
|
||||
'11': { calls: 4, totalExecTime: 40, totalRows: 20 },
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const result = await stagePgStatStatementsTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_pg',
|
||||
queryClient: fakePgClient(),
|
||||
reader: fakeReader({
|
||||
rows: [
|
||||
row({
|
||||
queryid: '701',
|
||||
dbid: '5',
|
||||
database: 'warehouse',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 12,
|
||||
totalExecTime: 160,
|
||||
totalRows: 58,
|
||||
}),
|
||||
row({
|
||||
queryid: '701',
|
||||
dbid: '6',
|
||||
database: 'app',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 9,
|
||||
totalExecTime: 130,
|
||||
totalRows: 35,
|
||||
}),
|
||||
],
|
||||
}),
|
||||
sqlAnalysis,
|
||||
pullConfig: postgresPullConfig(),
|
||||
baselinePath,
|
||||
now: new Date('2026-05-08T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
expect(manifest.templates.map((template) => template.id).sort()).toEqual(['db5_q701', 'db6_q701']);
|
||||
|
||||
const warehouseUsage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q701/usage.json'));
|
||||
expect(warehouseUsage.stats).toMatchObject({
|
||||
executions: 2,
|
||||
rows_produced: 8,
|
||||
first_seen: '2026-05-08T09:00:00.000Z',
|
||||
});
|
||||
|
||||
const appUsage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db6_q701/usage.json'));
|
||||
expect(appUsage.stats).toMatchObject({
|
||||
executions: 5,
|
||||
rows_produced: 15,
|
||||
first_seen: '2026-05-08T09:30:00.000Z',
|
||||
});
|
||||
|
||||
expect(result.baseline.templates.db5_q701.perUser['11']).toEqual({
|
||||
calls: 12,
|
||||
totalExecTime: 160,
|
||||
totalRows: 58,
|
||||
});
|
||||
expect(result.baseline.templates.db6_q701.perUser['11']).toEqual({
|
||||
calls: 9,
|
||||
totalExecTime: 130,
|
||||
totalRows: 35,
|
||||
});
|
||||
});
|
||||
|
||||
it('treats stats_reset advancement and major-version changes as fresh baselines', async () => {
|
||||
const resetStagedDir = await tempDir('pgss-stage-reset-');
|
||||
const resetBaselineRootDir = await tempDir('pgss-baseline-reset-');
|
||||
const resetBaselinePath = pgssBaselinePath(resetBaselineRootDir, 'conn_pg');
|
||||
await writePgssBaselineAtomic(resetBaselinePath, {
|
||||
version: 1,
|
||||
fetchedAt: '2026-05-08T10:00:00.000Z',
|
||||
statsResetAt: '2026-05-08T08:00:00.000Z',
|
||||
pgServerVersion: 'PostgreSQL 16.4',
|
||||
templates: {
|
||||
db5_q301: {
|
||||
firstObservedAt: '2026-05-08T09:00:00.000Z',
|
||||
perUser: {
|
||||
'11': { calls: 100, totalExecTime: 1000, totalRows: 500 },
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await stagePgStatStatementsTemplates({
|
||||
stagedDir: resetStagedDir,
|
||||
connectionId: 'conn_pg',
|
||||
queryClient: fakePgClient(),
|
||||
reader: fakeReader({
|
||||
statsResetAt: '2026-05-08T11:00:00.000Z',
|
||||
rows: [
|
||||
row({
|
||||
queryid: '301',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 3,
|
||||
totalExecTime: 90,
|
||||
totalRows: 9,
|
||||
}),
|
||||
],
|
||||
}),
|
||||
sqlAnalysis,
|
||||
pullConfig: postgresPullConfig(),
|
||||
baselinePath: resetBaselinePath,
|
||||
now: new Date('2026-05-08T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const resetManifest = historicSqlManifestSchema.parse(await readJson(resetStagedDir, 'manifest.json'));
|
||||
expect(resetManifest.baselineFirstRun).toBe(true);
|
||||
expect(resetManifest.warnings).toContain(
|
||||
'baseline_reset:stats_reset advanced from 2026-05-08T08:00:00.000Z to 2026-05-08T11:00:00.000Z',
|
||||
);
|
||||
const resetUsage = historicSqlUsageSchema.parse(await readJson(resetStagedDir, 'templates/db5_q301/usage.json'));
|
||||
expect(resetUsage.stats.executions).toBe(3);
|
||||
|
||||
const versionStagedDir = await tempDir('pgss-stage-version-');
|
||||
const versionBaselineRootDir = await tempDir('pgss-baseline-version-');
|
||||
const versionBaselinePath = pgssBaselinePath(versionBaselineRootDir, 'conn_pg');
|
||||
await writePgssBaselineAtomic(versionBaselinePath, {
|
||||
version: 1,
|
||||
fetchedAt: '2026-05-08T10:00:00.000Z',
|
||||
statsResetAt: '2026-05-08T08:00:00.000Z',
|
||||
pgServerVersion: 'PostgreSQL 15.7',
|
||||
templates: {
|
||||
db5_q302: {
|
||||
firstObservedAt: '2026-05-08T09:00:00.000Z',
|
||||
perUser: {
|
||||
'11': { calls: 100, totalExecTime: 1000, totalRows: 500 },
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await stagePgStatStatementsTemplates({
|
||||
stagedDir: versionStagedDir,
|
||||
connectionId: 'conn_pg',
|
||||
queryClient: fakePgClient(),
|
||||
reader: fakeReader({
|
||||
pgServerVersion: 'PostgreSQL 16.4',
|
||||
rows: [
|
||||
row({
|
||||
queryid: '302',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 4,
|
||||
totalExecTime: 80,
|
||||
totalRows: 8,
|
||||
}),
|
||||
],
|
||||
}),
|
||||
sqlAnalysis,
|
||||
pullConfig: postgresPullConfig(),
|
||||
baselinePath: versionBaselinePath,
|
||||
now: new Date('2026-05-08T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const versionManifest = historicSqlManifestSchema.parse(await readJson(versionStagedDir, 'manifest.json'));
|
||||
expect(versionManifest.baselineFirstRun).toBe(true);
|
||||
expect(versionManifest.warnings).toContain('baseline_reset:pg_server_major changed from 15 to 16');
|
||||
});
|
||||
|
||||
it('handles scoped counter regressions without forcing a global first-run baseline', async () => {
|
||||
const stagedDir = await tempDir('pgss-stage-scoped-');
|
||||
const baselineRootDir = await tempDir('pgss-baseline-scoped-');
|
||||
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
|
||||
await writePgssBaselineAtomic(baselinePath, {
|
||||
version: 1,
|
||||
fetchedAt: '2026-05-08T10:00:00.000Z',
|
||||
statsResetAt: '2026-05-08T08:00:00.000Z',
|
||||
pgServerVersion: 'PostgreSQL 16.4',
|
||||
templates: {
|
||||
db5_q401: {
|
||||
firstObservedAt: '2026-05-08T09:00:00.000Z',
|
||||
perUser: {
|
||||
'11': { calls: 100, totalExecTime: 1000, totalRows: 500 },
|
||||
'12': { calls: 50, totalExecTime: 500, totalRows: 250 },
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await stagePgStatStatementsTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_pg',
|
||||
queryClient: fakePgClient(),
|
||||
reader: fakeReader({
|
||||
statsResetAt: '2026-05-08T08:00:00.000Z',
|
||||
rows: [
|
||||
row({
|
||||
queryid: '401',
|
||||
userid: '11',
|
||||
username: 'analyst',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 2,
|
||||
totalExecTime: 30,
|
||||
totalRows: 6,
|
||||
}),
|
||||
row({
|
||||
queryid: '401',
|
||||
userid: '12',
|
||||
username: 'svc_loader',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 55,
|
||||
totalExecTime: 650,
|
||||
totalRows: 275,
|
||||
}),
|
||||
],
|
||||
}),
|
||||
sqlAnalysis,
|
||||
pullConfig: postgresPullConfig(),
|
||||
baselinePath,
|
||||
now: new Date('2026-05-08T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
expect(manifest.baselineFirstRun).toBe(false);
|
||||
expect(manifest.warnings).toContain('scoped_reset:dbid=5 queryid=401 userid=11');
|
||||
|
||||
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q401/usage.json'));
|
||||
expect(usage.stats).toMatchObject({
|
||||
executions: 7,
|
||||
distinct_users: 2,
|
||||
mean_runtime_ms: 25.714285714285715,
|
||||
rows_produced: 31,
|
||||
});
|
||||
});
|
||||
|
||||
it('ranks and caps selected PGSS templates after skip and analysis filtering', async () => {
|
||||
const stagedDir = await tempDir('pgss-stage-cap-');
|
||||
const baselineRootDir = await tempDir('pgss-baseline-cap-');
|
||||
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
|
||||
|
||||
await stagePgStatStatementsTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_pg',
|
||||
queryClient: fakePgClient(),
|
||||
reader: fakeReader({
|
||||
rows: [
|
||||
row({
|
||||
queryid: '501',
|
||||
username: 'analyst_a',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 2,
|
||||
totalExecTime: 20,
|
||||
}),
|
||||
row({
|
||||
queryid: '502',
|
||||
username: 'analyst_b',
|
||||
query: 'SELECT count(*) FROM analytics.customers',
|
||||
calls: 20,
|
||||
totalExecTime: 200,
|
||||
}),
|
||||
row({
|
||||
queryid: '503',
|
||||
username: 'analyst_c',
|
||||
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
|
||||
calls: 10,
|
||||
totalExecTime: 100,
|
||||
}),
|
||||
],
|
||||
}),
|
||||
sqlAnalysis,
|
||||
pullConfig: postgresPullConfig(2),
|
||||
baselinePath,
|
||||
now: new Date('2026-05-08T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
expect(manifest.capped).toBe(true);
|
||||
expect(manifest.warnings).toContain('templates_truncated: kept 2 of 3 templates');
|
||||
expect(manifest.templates.map((template) => template.id)).toEqual(['db5_q502', 'db5_q503']);
|
||||
});
|
||||
});
|
||||
508
packages/context/src/ingest/adapters/historic-sql/stage-pgss.ts
Normal file
508
packages/context/src/ingest/adapters/historic-sql/stage-pgss.ts
Normal file
|
|
@ -0,0 +1,508 @@
|
|||
import { mkdir, readFile, rename, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import type { SqlAnalysisFingerprintResult, SqlAnalysisPort } from '../../../sql-analysis/index.js';
|
||||
import {
|
||||
HISTORIC_SQL_OBJECT_TYPE,
|
||||
HISTORIC_SQL_SOURCE_KEY,
|
||||
historicSqlPullConfigSchema,
|
||||
type HistoricSqlManifest,
|
||||
type HistoricSqlMetadata,
|
||||
type HistoricSqlPullConfig,
|
||||
type HistoricSqlUsage,
|
||||
type KloPostgresQueryClient,
|
||||
type PostgresPgssAggregateRow,
|
||||
type PostgresPgssReader,
|
||||
type PostgresPgssRow,
|
||||
} from './types.js';
|
||||
|
||||
const PGSS_BASELINE_VERSION = 1 as const;
|
||||
|
||||
const pgssCounterSchema = z.object({
|
||||
calls: z.number().int().nonnegative(),
|
||||
totalExecTime: z.number().nonnegative(),
|
||||
totalRows: z.number().int().nonnegative(),
|
||||
});
|
||||
|
||||
const pgssBaselineSchema = z.object({
|
||||
version: z.literal(PGSS_BASELINE_VERSION),
|
||||
fetchedAt: z.string().datetime(),
|
||||
statsResetAt: z.string().datetime().nullable(),
|
||||
pgServerVersion: z.string(),
|
||||
templates: z.record(
|
||||
z.string(),
|
||||
z.object({
|
||||
firstObservedAt: z.string().datetime(),
|
||||
perUser: z.record(z.string(), pgssCounterSchema),
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
||||
export type PgssBaseline = z.infer<typeof pgssBaselineSchema>;
|
||||
|
||||
export interface StagePgStatStatementsTemplatesInput {
|
||||
stagedDir: string;
|
||||
connectionId: string;
|
||||
queryClient: KloPostgresQueryClient;
|
||||
reader: PostgresPgssReader;
|
||||
sqlAnalysis: SqlAnalysisPort;
|
||||
pullConfig: HistoricSqlPullConfig;
|
||||
baselinePath: string;
|
||||
now?: Date;
|
||||
}
|
||||
|
||||
export interface StagePgStatStatementsTemplatesResult {
|
||||
baselinePath: string;
|
||||
baseline: PgssBaseline;
|
||||
}
|
||||
|
||||
interface PgssBaselineCounter {
|
||||
calls: number;
|
||||
totalExecTime: number;
|
||||
totalRows: number;
|
||||
}
|
||||
|
||||
interface PgssAggregateMutable {
|
||||
id: string;
|
||||
queryid: string;
|
||||
dbid: string;
|
||||
database: string | null;
|
||||
query: string;
|
||||
deltaCalls: number;
|
||||
deltaExecTime: number;
|
||||
deltaRows: number;
|
||||
users: Set<string>;
|
||||
firstObservedAt: string;
|
||||
}
|
||||
|
||||
interface AnalyzedPgssTemplate {
|
||||
aggregate: PostgresPgssAggregateRow;
|
||||
analysis: SqlAnalysisFingerprintResult;
|
||||
}
|
||||
|
||||
const ZERO_COUNTER: PgssBaselineCounter = {
|
||||
calls: 0,
|
||||
totalExecTime: 0,
|
||||
totalRows: 0,
|
||||
};
|
||||
|
||||
const PGSS_SNAPSHOT_READ_LIMIT = 5000;
|
||||
const PGSS_HARD_SKIP_PREFIX_RE = /^\s*(SHOW|DESCRIBE|DESC|EXPLAIN|USE|SET|BEGIN|COMMIT|ROLLBACK|VACUUM|ANALYZE)\b/i;
|
||||
const PGSS_HARD_SKIP_TABLE_RE = /\b(INFORMATION_SCHEMA|pg_catalog\.|pg_toast\.|pg_stat_)/i;
|
||||
|
||||
function pgssTemplateId(row: Pick<PostgresPgssRow, 'dbid' | 'queryid'>): string {
|
||||
return `db${row.dbid}_q${row.queryid}`;
|
||||
}
|
||||
|
||||
export function pgssBaselinePath(rootDir: string | undefined, connectionId: string): string {
|
||||
return join(rootDir ?? join(process.cwd(), '.klo/cache/historic-sql'), connectionId, 'pgss-baseline.json');
|
||||
}
|
||||
|
||||
export async function readPgssBaseline(path: string): Promise<PgssBaseline | null> {
|
||||
try {
|
||||
return pgssBaselineSchema.parse(JSON.parse(await readFile(path, 'utf-8')));
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return null;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function writePgssBaselineAtomic(path: string, baseline: PgssBaseline): Promise<void> {
|
||||
const parsed = pgssBaselineSchema.parse(baseline);
|
||||
await mkdir(dirname(path), { recursive: true });
|
||||
const tempPath = `${path}.tmp`;
|
||||
await writeFile(tempPath, `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8');
|
||||
await rename(tempPath, path);
|
||||
}
|
||||
|
||||
export async function stagePgStatStatementsTemplates(
|
||||
input: StagePgStatStatementsTemplatesInput,
|
||||
): Promise<StagePgStatStatementsTemplatesResult> {
|
||||
const config = historicSqlPullConfigSchema.parse(input.pullConfig);
|
||||
if (config.dialect !== 'postgres') {
|
||||
throw new Error(`stagePgStatStatementsTemplates requires dialect postgres, got ${config.dialect}`);
|
||||
}
|
||||
|
||||
const now = input.now ?? new Date();
|
||||
const fetchedAt = now.toISOString();
|
||||
const probe = await input.reader.probe(input.queryClient);
|
||||
const warnings = [...probe.warnings];
|
||||
const baseline = await readPgssBaseline(input.baselinePath);
|
||||
const snapshot = await input.reader.readSnapshot(input.queryClient, {
|
||||
minCalls: config.minCalls,
|
||||
maxTemplates: PGSS_SNAPSHOT_READ_LIMIT,
|
||||
});
|
||||
if (snapshot.deallocCount !== null && snapshot.deallocCount > 0) {
|
||||
warnings.push(
|
||||
`pgss_dealloc_count:${snapshot.deallocCount}; pg_stat_statements.max may be too low, causing template eviction churn`,
|
||||
);
|
||||
}
|
||||
const reset = detectBaselineReset({
|
||||
baseline,
|
||||
snapshotStatsResetAt: snapshot.statsResetAt,
|
||||
currentPgServerVersion: probe.pgServerVersion,
|
||||
});
|
||||
warnings.push(...reset.warnings);
|
||||
|
||||
const aggregates = aggregatePgssRows({
|
||||
rows: snapshot.rows,
|
||||
baseline,
|
||||
baselineFirstRun: reset.baselineFirstRun,
|
||||
fetchedAt,
|
||||
warnings,
|
||||
}).filter((aggregate) => !shouldSkipPgssSql(aggregate.query));
|
||||
|
||||
const analyzed: AnalyzedPgssTemplate[] = [];
|
||||
for (const aggregate of aggregates) {
|
||||
const analysis = await input.sqlAnalysis.analyzeForFingerprint(aggregate.query, 'postgres');
|
||||
if (analysis.error || !analysis.fingerprint || !analysis.normalizedSql) {
|
||||
warnings.push(`analysis_failed:${aggregate.id}`);
|
||||
continue;
|
||||
}
|
||||
analyzed.push({ aggregate, analysis });
|
||||
}
|
||||
|
||||
const selected = selectPgssTemplates(analyzed, config.maxTemplatesPerRun);
|
||||
if (selected.length < analyzed.length) {
|
||||
warnings.push(`templates_truncated: kept ${selected.length} of ${analyzed.length} templates`);
|
||||
}
|
||||
|
||||
await mkdir(input.stagedDir, { recursive: true });
|
||||
const templates: HistoricSqlManifest['templates'] = [];
|
||||
for (const template of selected) {
|
||||
const staged = buildPgssStagedTemplate(template, config, now);
|
||||
const basePath = `templates/${staged.metadata.id}`;
|
||||
await writeJson(input.stagedDir, `${basePath}/metadata.json`, staged.metadata);
|
||||
await writeText(input.stagedDir, `${basePath}/page.md`, staged.pageMarkdown);
|
||||
await writeJson(input.stagedDir, `${basePath}/usage.json`, staged.usage);
|
||||
templates.push({
|
||||
id: staged.metadata.id,
|
||||
fingerprint: staged.metadata.properties.fingerprint,
|
||||
subClusterId: staged.metadata.properties.sub_cluster_id,
|
||||
path: staged.metadata.path,
|
||||
});
|
||||
}
|
||||
|
||||
await writeJson(input.stagedDir, 'manifest.json', {
|
||||
source: HISTORIC_SQL_SOURCE_KEY,
|
||||
connectionId: input.connectionId,
|
||||
dialect: 'postgres',
|
||||
fetchedAt,
|
||||
windowStart: baseline?.fetchedAt ?? snapshot.statsResetAt ?? fetchedAt,
|
||||
windowEnd: fetchedAt,
|
||||
nextSuccessfulCursor: fetchedAt,
|
||||
templateCount: selected.length,
|
||||
capped: selected.length < analyzed.length,
|
||||
warnings,
|
||||
degraded: true,
|
||||
statsResetAt: snapshot.statsResetAt,
|
||||
baselineFirstRun: reset.baselineFirstRun,
|
||||
pgServerVersion: probe.pgServerVersion,
|
||||
deallocCount: snapshot.deallocCount,
|
||||
templates,
|
||||
} satisfies HistoricSqlManifest);
|
||||
|
||||
return {
|
||||
baselinePath: input.baselinePath,
|
||||
baseline: buildNextBaseline({
|
||||
rows: snapshot.rows,
|
||||
fetchedAt,
|
||||
statsResetAt: snapshot.statsResetAt,
|
||||
pgServerVersion: probe.pgServerVersion,
|
||||
previousBaseline: reset.baselineFirstRun ? null : baseline,
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
function detectBaselineReset(input: {
|
||||
baseline: PgssBaseline | null;
|
||||
snapshotStatsResetAt: string | null;
|
||||
currentPgServerVersion: string;
|
||||
}): { baselineFirstRun: boolean; warnings: string[] } {
|
||||
if (!input.baseline) {
|
||||
return { baselineFirstRun: true, warnings: ['baseline_first_run:no_previous_pgss_baseline'] };
|
||||
}
|
||||
|
||||
const warnings: string[] = [];
|
||||
if (
|
||||
input.baseline.statsResetAt &&
|
||||
input.snapshotStatsResetAt &&
|
||||
input.baseline.statsResetAt < input.snapshotStatsResetAt
|
||||
) {
|
||||
warnings.push(
|
||||
`baseline_reset:stats_reset advanced from ${input.baseline.statsResetAt} to ${input.snapshotStatsResetAt}`,
|
||||
);
|
||||
}
|
||||
|
||||
const previousMajor = postgresMajor(input.baseline.pgServerVersion);
|
||||
const currentMajor = postgresMajor(input.currentPgServerVersion);
|
||||
if (previousMajor && currentMajor && previousMajor !== currentMajor) {
|
||||
warnings.push(`baseline_reset:pg_server_major changed from ${previousMajor} to ${currentMajor}`);
|
||||
}
|
||||
|
||||
return { baselineFirstRun: warnings.length > 0, warnings };
|
||||
}
|
||||
|
||||
function postgresMajor(version: string): string | null {
|
||||
return version.match(/PostgreSQL\s+(\d+)/i)?.[1] ?? version.match(/^(\d+)(?:\.|$)/)?.[1] ?? null;
|
||||
}
|
||||
|
||||
function aggregatePgssRows(input: {
|
||||
rows: PostgresPgssRow[];
|
||||
baseline: PgssBaseline | null;
|
||||
baselineFirstRun: boolean;
|
||||
fetchedAt: string;
|
||||
warnings: string[];
|
||||
}): PostgresPgssAggregateRow[] {
|
||||
const aggregates = new Map<string, PgssAggregateMutable>();
|
||||
|
||||
for (const row of input.rows) {
|
||||
const templateId = pgssTemplateId(row);
|
||||
const baselineTemplate = input.baselineFirstRun ? undefined : input.baseline?.templates[templateId];
|
||||
const baselineCounter = baselineTemplate?.perUser[row.userid];
|
||||
const previous = scopedCounterBaseline(row, baselineCounter, input.baselineFirstRun, input.warnings);
|
||||
const deltaCalls = row.calls - previous.calls;
|
||||
const deltaExecTime = row.totalExecTime - previous.totalExecTime;
|
||||
const deltaRows = row.totalRows - previous.totalRows;
|
||||
if (deltaCalls === 0 && !input.baselineFirstRun) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existing =
|
||||
aggregates.get(templateId) ??
|
||||
({
|
||||
id: templateId,
|
||||
queryid: row.queryid,
|
||||
dbid: row.dbid,
|
||||
database: row.database,
|
||||
query: row.query,
|
||||
deltaCalls: 0,
|
||||
deltaExecTime: 0,
|
||||
deltaRows: 0,
|
||||
users: new Set<string>(),
|
||||
firstObservedAt: baselineTemplate?.firstObservedAt ?? input.fetchedAt,
|
||||
} satisfies PgssAggregateMutable);
|
||||
|
||||
existing.deltaCalls += Math.max(0, deltaCalls);
|
||||
existing.deltaExecTime += Math.max(0, deltaExecTime);
|
||||
existing.deltaRows += Math.max(0, deltaRows);
|
||||
if (deltaCalls > 0) {
|
||||
existing.users.add(row.username ?? 'unknown');
|
||||
}
|
||||
aggregates.set(templateId, existing);
|
||||
}
|
||||
|
||||
return [...aggregates.values()]
|
||||
.filter((aggregate) => aggregate.deltaCalls > 0)
|
||||
.map((aggregate) => ({
|
||||
id: aggregate.id,
|
||||
queryid: aggregate.queryid,
|
||||
dbid: aggregate.dbid,
|
||||
database: aggregate.database,
|
||||
query: aggregate.query,
|
||||
deltaCalls: aggregate.deltaCalls,
|
||||
deltaExecTime: aggregate.deltaExecTime,
|
||||
deltaRows: aggregate.deltaRows,
|
||||
meanExecTime: aggregate.deltaExecTime / Math.max(aggregate.deltaCalls, 1),
|
||||
distinctUsersDelta: aggregate.users.size,
|
||||
users: [...aggregate.users].sort(),
|
||||
firstObservedAt: aggregate.firstObservedAt,
|
||||
}));
|
||||
}
|
||||
|
||||
function scopedCounterBaseline(
|
||||
row: PostgresPgssRow,
|
||||
baselineCounter: PgssBaselineCounter | undefined,
|
||||
baselineFirstRun: boolean,
|
||||
warnings: string[],
|
||||
): PgssBaselineCounter {
|
||||
if (!baselineCounter || baselineFirstRun) {
|
||||
return ZERO_COUNTER;
|
||||
}
|
||||
if (
|
||||
baselineCounter.calls > row.calls ||
|
||||
baselineCounter.totalExecTime > row.totalExecTime ||
|
||||
baselineCounter.totalRows > row.totalRows
|
||||
) {
|
||||
warnings.push(`scoped_reset:dbid=${row.dbid} queryid=${row.queryid} userid=${row.userid}`);
|
||||
return ZERO_COUNTER;
|
||||
}
|
||||
return baselineCounter;
|
||||
}
|
||||
|
||||
function shouldSkipPgssSql(sql: string): boolean {
|
||||
return PGSS_HARD_SKIP_PREFIX_RE.test(sql) || PGSS_HARD_SKIP_TABLE_RE.test(sql);
|
||||
}
|
||||
|
||||
function selectPgssTemplates(templates: AnalyzedPgssTemplate[], maxTemplatesPerRun: number): AnalyzedPgssTemplate[] {
|
||||
return templates
|
||||
.map((template) => ({
|
||||
template,
|
||||
score: template.aggregate.users.length * Math.log1p(template.aggregate.deltaCalls),
|
||||
}))
|
||||
.sort(
|
||||
(left, right) => right.score - left.score || left.template.aggregate.id.localeCompare(right.template.aggregate.id),
|
||||
)
|
||||
.slice(0, maxTemplatesPerRun)
|
||||
.map((entry) => entry.template);
|
||||
}
|
||||
|
||||
function buildPgssStagedTemplate(
|
||||
template: AnalyzedPgssTemplate,
|
||||
config: HistoricSqlPullConfig,
|
||||
now: Date,
|
||||
): { metadata: HistoricSqlMetadata; pageMarkdown: string; usage: HistoricSqlUsage } {
|
||||
const tablesTouched = [...template.analysis.tablesTouched].sort();
|
||||
const firstTable = tablesTouched[0] ?? 'query';
|
||||
const id = template.aggregate.id;
|
||||
|
||||
const metadata: HistoricSqlMetadata = {
|
||||
id,
|
||||
title: `postgres · ${firstTable} [${id.slice(0, 12)}]`,
|
||||
path: `templates/${id}/page.md`,
|
||||
objectType: HISTORIC_SQL_OBJECT_TYPE,
|
||||
lastEditedAt: null,
|
||||
properties: {
|
||||
fingerprint: template.analysis.fingerprint,
|
||||
sub_cluster_id: null,
|
||||
dialect: 'postgres',
|
||||
tables_touched: tablesTouched,
|
||||
literal_slots: [],
|
||||
triage_signals: buildPgssTriageSignals({
|
||||
executions: template.aggregate.deltaCalls,
|
||||
distinctUsers: template.aggregate.distinctUsersDelta,
|
||||
firstSeen: template.aggregate.firstObservedAt,
|
||||
lastSeen: now.toISOString(),
|
||||
meanRuntimeMs: template.aggregate.meanExecTime,
|
||||
serviceAccountOnly: isServiceAccountOnly(template.aggregate.users, config.serviceAccountUserPatterns),
|
||||
now,
|
||||
}),
|
||||
},
|
||||
};
|
||||
|
||||
return {
|
||||
metadata,
|
||||
pageMarkdown: renderTemplatePage(id, template.analysis.normalizedSql, tablesTouched),
|
||||
usage: {
|
||||
stats: {
|
||||
executions: template.aggregate.deltaCalls,
|
||||
distinct_users: template.aggregate.distinctUsersDelta,
|
||||
first_seen: template.aggregate.firstObservedAt,
|
||||
last_seen: now.toISOString(),
|
||||
p50_runtime_ms: null,
|
||||
p95_runtime_ms: null,
|
||||
mean_runtime_ms: template.aggregate.meanExecTime,
|
||||
error_rate: 0,
|
||||
rows_produced: template.aggregate.deltaRows,
|
||||
},
|
||||
literal_slots: [],
|
||||
samples: [],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildPgssTriageSignals(input: {
|
||||
executions: number;
|
||||
distinctUsers: number;
|
||||
firstSeen: string;
|
||||
lastSeen: string;
|
||||
meanRuntimeMs: number;
|
||||
serviceAccountOnly: boolean;
|
||||
now: Date;
|
||||
}): Record<string, string> {
|
||||
return {
|
||||
executions_bucket: input.executions < 3 ? 'low' : input.executions < 50 ? 'mid' : 'high',
|
||||
distinct_users_bucket: input.distinctUsers <= 1 ? 'solo' : input.distinctUsers <= 5 ? 'team' : 'broad',
|
||||
error_rate_bucket: 'ok',
|
||||
recency_bucket: recencyBucket(input.lastSeen, input.now),
|
||||
service_account_only: String(input.serviceAccountOnly),
|
||||
runtime_bucket: runtimeBucket(input.meanRuntimeMs),
|
||||
};
|
||||
}
|
||||
|
||||
function runtimeBucket(meanRuntimeMs: number): string {
|
||||
if (meanRuntimeMs < 100) {
|
||||
return 'fast';
|
||||
}
|
||||
if (meanRuntimeMs < 1000) {
|
||||
return 'moderate';
|
||||
}
|
||||
return 'slow';
|
||||
}
|
||||
|
||||
function recencyBucket(lastSeen: string, now: Date): string {
|
||||
const ageDays = Math.max(0, (now.getTime() - new Date(lastSeen).getTime()) / 86400000);
|
||||
if (ageDays <= 14) {
|
||||
return 'active';
|
||||
}
|
||||
if (ageDays <= 60) {
|
||||
return 'warm';
|
||||
}
|
||||
return 'cold';
|
||||
}
|
||||
|
||||
function isServiceAccountOnly(users: string[], patterns: string[]): boolean {
|
||||
if (users.length === 0 || patterns.length === 0) {
|
||||
return false;
|
||||
}
|
||||
const regexes = patterns.map((pattern) => new RegExp(pattern));
|
||||
return users.every((user) => regexes.some((regex) => regex.test(user)));
|
||||
}
|
||||
|
||||
function renderTemplatePage(id: string, normalizedSql: string, tablesTouched: string[]): string {
|
||||
return [
|
||||
`# ${id}`,
|
||||
'',
|
||||
'## Normalized SQL',
|
||||
'```sql',
|
||||
normalizedSql,
|
||||
'```',
|
||||
'',
|
||||
'## Tables touched',
|
||||
...tablesTouched.map((table) => `- ${table}`),
|
||||
'',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function buildNextBaseline(input: {
|
||||
rows: PostgresPgssRow[];
|
||||
fetchedAt: string;
|
||||
statsResetAt: string | null;
|
||||
pgServerVersion: string;
|
||||
previousBaseline: PgssBaseline | null;
|
||||
}): PgssBaseline {
|
||||
const templates: PgssBaseline['templates'] = {};
|
||||
for (const row of input.rows) {
|
||||
const templateId = pgssTemplateId(row);
|
||||
const previous = input.previousBaseline?.templates[templateId];
|
||||
const template = templates[templateId] ?? {
|
||||
firstObservedAt: previous?.firstObservedAt ?? input.fetchedAt,
|
||||
perUser: {},
|
||||
};
|
||||
template.perUser[row.userid] = {
|
||||
calls: row.calls,
|
||||
totalExecTime: row.totalExecTime,
|
||||
totalRows: row.totalRows,
|
||||
};
|
||||
templates[templateId] = template;
|
||||
}
|
||||
return {
|
||||
version: PGSS_BASELINE_VERSION,
|
||||
fetchedAt: input.fetchedAt,
|
||||
statsResetAt: input.statsResetAt,
|
||||
pgServerVersion: input.pgServerVersion,
|
||||
templates,
|
||||
};
|
||||
}
|
||||
|
||||
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
|
||||
await writeText(root, relPath, `${JSON.stringify(value, null, 2)}\n`);
|
||||
}
|
||||
|
||||
async function writeText(root: string, relPath: string, value: string): Promise<void> {
|
||||
const target = join(root, relPath);
|
||||
await mkdir(dirname(target), { recursive: true });
|
||||
await writeFile(target, value, 'utf-8');
|
||||
}
|
||||
798
packages/context/src/ingest/adapters/historic-sql/stage.test.ts
Normal file
798
packages/context/src/ingest/adapters/historic-sql/stage.test.ts
Normal file
|
|
@ -0,0 +1,798 @@
|
|||
import { mkdtemp, readFile, readdir } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import type { SqlAnalysisPort } from '../../../sql-analysis/index.js';
|
||||
import { stageHistoricSqlTemplates } from './stage.js';
|
||||
import {
|
||||
historicSqlManifestSchema,
|
||||
historicSqlMetadataSchema,
|
||||
historicSqlUsageSchema,
|
||||
type HistoricSqlQueryHistoryReader,
|
||||
type HistoricSqlRawQueryRow,
|
||||
} from './types.js';
|
||||
|
||||
async function tempDir(): Promise<string> {
|
||||
return mkdtemp(join(tmpdir(), 'historic-sql-stage-'));
|
||||
}
|
||||
|
||||
async function readJson<T>(root: string, relPath: string): Promise<T> {
|
||||
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
|
||||
}
|
||||
|
||||
function fakeReader(rows: HistoricSqlRawQueryRow[]): HistoricSqlQueryHistoryReader {
|
||||
return {
|
||||
async probe() {},
|
||||
async *fetch() {
|
||||
for (const row of rows) {
|
||||
yield row;
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const fakeSqlAnalysis: SqlAnalysisPort = {
|
||||
async analyzeForFingerprint(sql) {
|
||||
if (sql.includes('paid')) {
|
||||
return {
|
||||
fingerprint: 'fp_paid_orders',
|
||||
normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ? AND created_at >= ?',
|
||||
tablesTouched: ['analytics.orders'],
|
||||
literalSlots: [
|
||||
{ position: 1, type: 'string', exampleValue: 'paid' },
|
||||
{ position: 2, type: 'date', exampleValue: '2026-04-01' },
|
||||
],
|
||||
};
|
||||
}
|
||||
return {
|
||||
fingerprint: 'fp_refunds',
|
||||
normalizedSql: 'SELECT count(*) FROM analytics.refunds WHERE state = ?',
|
||||
tablesTouched: ['analytics.refunds'],
|
||||
literalSlots: [{ position: 1, type: 'string', exampleValue: 'complete' }],
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const categoricalSqlAnalysis: SqlAnalysisPort = {
|
||||
async analyzeForFingerprint(sql) {
|
||||
const status = sql.includes("'refunded'") ? 'refunded' : 'paid';
|
||||
return {
|
||||
fingerprint: 'fp_order_status',
|
||||
normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ?',
|
||||
tablesTouched: ['analytics.orders'],
|
||||
literalSlots: [{ position: 1, type: 'string', exampleValue: status }],
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
function categoricalRows(): HistoricSqlRawQueryRow[] {
|
||||
return [
|
||||
{
|
||||
id: 'paid-1',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'",
|
||||
user: 'analyst-a',
|
||||
startedAt: '2026-05-04T10:00:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 100,
|
||||
rowsProduced: 11,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: 'paid-2',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'",
|
||||
user: 'analyst-b',
|
||||
startedAt: '2026-05-04T10:01:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 110,
|
||||
rowsProduced: 12,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: 'paid-3',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'",
|
||||
user: 'analyst-c',
|
||||
startedAt: '2026-05-04T10:02:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 120,
|
||||
rowsProduced: 13,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: 'refunded-1',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'refunded'",
|
||||
user: 'analyst-a',
|
||||
startedAt: '2026-05-04T10:03:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 130,
|
||||
rowsProduced: 21,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: 'refunded-2',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'refunded'",
|
||||
user: 'analyst-b',
|
||||
startedAt: '2026-05-04T10:04:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 140,
|
||||
rowsProduced: 22,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: 'refunded-3',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'refunded'",
|
||||
user: 'analyst-c',
|
||||
startedAt: '2026-05-04T10:05:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 150,
|
||||
rowsProduced: 23,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const diverseSqlAnalysis: SqlAnalysisPort = {
|
||||
async analyzeForFingerprint(sql) {
|
||||
const value = sql.match(/status = '([^']+)'/)?.[1] ?? 'unknown';
|
||||
return {
|
||||
fingerprint: 'fp_diverse_samples',
|
||||
normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ?',
|
||||
tablesTouched: ['analytics.orders'],
|
||||
literalSlots: [{ position: 1, type: 'string', exampleValue: value }],
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const classificationMatrixSqlAnalysis: SqlAnalysisPort = {
|
||||
async analyzeForFingerprint(sql) {
|
||||
if (sql.includes('stale_orders')) {
|
||||
return {
|
||||
fingerprint: 'fp_stale_date',
|
||||
normalizedSql: 'SELECT count(*) FROM analytics.stale_orders WHERE created_at >= ?',
|
||||
tablesTouched: ['analytics.stale_orders'],
|
||||
literalSlots: [{ position: 1, type: 'date', exampleValue: '2026-04-01' }],
|
||||
};
|
||||
}
|
||||
|
||||
const stringValue = (field: string): string => sql.match(new RegExp(`${field} = '([^']+)'`))?.[1] ?? 'unknown';
|
||||
const amount = sql.match(/amount >= (\d+)/)?.[1] ?? '0';
|
||||
const asOf = sql.match(/created_at >= '([^']+)'/)?.[1] ?? '2026-05-01';
|
||||
|
||||
return {
|
||||
fingerprint: 'fp_classification_matrix',
|
||||
normalizedSql:
|
||||
'SELECT count(*) FROM analytics.orders WHERE region = ? AND plan = ? AND status = ? AND amount >= ? AND created_at >= ?',
|
||||
tablesTouched: ['analytics.orders'],
|
||||
literalSlots: [
|
||||
{ position: 1, type: 'string', exampleValue: stringValue('region') },
|
||||
{ position: 2, type: 'string', exampleValue: stringValue('plan') },
|
||||
{ position: 3, type: 'string', exampleValue: stringValue('status') },
|
||||
{ position: 4, type: 'number', exampleValue: amount },
|
||||
{ position: 5, type: 'date', exampleValue: asOf },
|
||||
],
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
function classificationMatrixRows(): HistoricSqlRawQueryRow[] {
|
||||
const rows: HistoricSqlRawQueryRow[] = Array.from({ length: 20 }, (_, index) => {
|
||||
const status = index < 10 ? 'paid' : 'refunded';
|
||||
const plan = index === 19 ? 'self_serve' : 'enterprise';
|
||||
const amount = 100 + index;
|
||||
const asOf = `2026-05-${String(1 + Math.floor(index / 5)).padStart(2, '0')}`;
|
||||
return {
|
||||
id: `matrix-${index + 1}`,
|
||||
sql: `SELECT count(*) FROM analytics.orders WHERE region = 'us' AND plan = '${plan}' AND status = '${status}' AND amount >= ${amount} AND created_at >= '${asOf}'`,
|
||||
user: `analyst-${(index % 4) + 1}`,
|
||||
startedAt: `2026-05-04T10:${String(index).padStart(2, '0')}:00.000Z`,
|
||||
endedAt: null,
|
||||
runtimeMs: 100 + index,
|
||||
rowsProduced: 1,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
};
|
||||
});
|
||||
|
||||
return [
|
||||
...rows,
|
||||
{
|
||||
id: 'stale-date-1',
|
||||
sql: "SELECT count(*) FROM analytics.stale_orders WHERE created_at >= '2026-04-01'",
|
||||
user: 'analyst-1',
|
||||
startedAt: '2026-05-04T11:00:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 75,
|
||||
rowsProduced: 1,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
describe('stageHistoricSqlTemplates', () => {
|
||||
it('compresses rows by fingerprint into document-shaped staged templates', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
|
||||
await stageHistoricSqlTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_1',
|
||||
queryClient: {},
|
||||
reader: fakeReader([
|
||||
{
|
||||
id: 'q1',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid' AND created_at >= '2026-04-01' AND email = 'analyst@example.com'",
|
||||
user: 'analyst@example.com',
|
||||
startedAt: '2026-05-04T10:00:00.000Z',
|
||||
endedAt: '2026-05-04T10:00:01.000Z',
|
||||
runtimeMs: 100,
|
||||
rowsProduced: 1,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: 'q2',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid' AND created_at >= '2026-05-01' AND email = 'analyst-2@example.com'",
|
||||
user: 'analyst-2@example.com',
|
||||
startedAt: '2026-05-04T11:00:00.000Z',
|
||||
endedAt: '2026-05-04T11:00:01.000Z',
|
||||
runtimeMs: 300,
|
||||
rowsProduced: 1,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
]),
|
||||
sqlAnalysis: fakeSqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'snowflake',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: ['^svc_'],
|
||||
redactionPatterns: ['[\\w.+-]+@[\\w-]+\\.[\\w.-]+'],
|
||||
maxTemplatesPerRun: 5000,
|
||||
minCalls: 5,
|
||||
},
|
||||
now: new Date('2026-05-04T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
expect(manifest).toMatchObject({
|
||||
source: 'historic-sql',
|
||||
connectionId: 'conn_1',
|
||||
dialect: 'snowflake',
|
||||
nextSuccessfulCursor: '2026-05-04T11:00:00.000Z',
|
||||
templateCount: 1,
|
||||
capped: false,
|
||||
});
|
||||
|
||||
const files = (await readdir(join(stagedDir, 'templates', 'fp_paid_orders'))).sort();
|
||||
expect(files).toEqual(['metadata.json', 'page.md', 'usage.json']);
|
||||
|
||||
const metadata = historicSqlMetadataSchema.parse(
|
||||
await readJson(stagedDir, 'templates/fp_paid_orders/metadata.json'),
|
||||
);
|
||||
expect(metadata).toEqual({
|
||||
id: 'fp_paid_orders',
|
||||
title: 'snowflake · analytics.orders [fp_pai]',
|
||||
path: 'templates/fp_paid_orders/page.md',
|
||||
objectType: 'historic_sql_template',
|
||||
lastEditedAt: null,
|
||||
properties: {
|
||||
fingerprint: 'fp_paid_orders',
|
||||
sub_cluster_id: null,
|
||||
dialect: 'snowflake',
|
||||
tables_touched: ['analytics.orders'],
|
||||
literal_slots: [
|
||||
{ position: 1, type: 'string', classification: 'constant' },
|
||||
{ position: 2, type: 'date', classification: 'runtime' },
|
||||
],
|
||||
triage_signals: {
|
||||
executions_bucket: 'low',
|
||||
distinct_users_bucket: 'team',
|
||||
error_rate_bucket: 'ok',
|
||||
recency_bucket: 'active',
|
||||
service_account_only: 'false',
|
||||
slot_summary: '1 constant, 1 runtime',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const page = await readFile(join(stagedDir, 'templates/fp_paid_orders/page.md'), 'utf-8');
|
||||
expect(page).toContain('## Normalized SQL');
|
||||
expect(page).toContain('SELECT count(*) FROM analytics.orders WHERE status = ? AND created_at >= ?');
|
||||
expect(page).toContain('- analytics.orders');
|
||||
|
||||
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_paid_orders/usage.json'));
|
||||
expect(usage.stats).toMatchObject({
|
||||
executions: 2,
|
||||
distinct_users: 2,
|
||||
first_seen: '2026-05-04T10:00:00.000Z',
|
||||
last_seen: '2026-05-04T11:00:00.000Z',
|
||||
p50_runtime_ms: 100,
|
||||
p95_runtime_ms: 300,
|
||||
error_rate: 0,
|
||||
});
|
||||
expect(usage.samples).toHaveLength(1);
|
||||
expect(usage.samples[0].bound_sql).toContain('<redacted>');
|
||||
expect(usage.samples[0].bound_sql).not.toContain('analyst@example.com');
|
||||
expect(usage.samples[0].bound_sql).not.toContain('analyst-2@example.com');
|
||||
});
|
||||
|
||||
it('skips hard-noise SQL and caps templates deterministically', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
|
||||
await stageHistoricSqlTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_1',
|
||||
queryClient: {},
|
||||
reader: fakeReader([
|
||||
{
|
||||
id: 'show-1',
|
||||
sql: 'SHOW TABLES',
|
||||
user: 'analyst',
|
||||
startedAt: '2026-05-04T10:00:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: null,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: 'q3',
|
||||
sql: "SELECT count(*) FROM analytics.refunds WHERE state = 'complete'",
|
||||
user: 'analyst',
|
||||
startedAt: '2026-05-04T11:00:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 50,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: 'q4',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid' AND created_at >= '2026-04-01'",
|
||||
user: 'analyst',
|
||||
startedAt: '2026-05-04T11:30:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 40,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
]),
|
||||
sqlAnalysis: fakeSqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'bigquery',
|
||||
windowDays: 7,
|
||||
lastSuccessfulCursor: '2026-05-01T00:00:00.000Z',
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 1,
|
||||
minCalls: 5,
|
||||
},
|
||||
now: new Date('2026-05-04T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
expect(manifest.templateCount).toBe(1);
|
||||
expect(manifest.capped).toBe(true);
|
||||
expect(manifest.warnings).toEqual(['templates_truncated: kept 1 of 2 templates']);
|
||||
expect(manifest.templates.map((template) => template.id)).toEqual(['fp_paid_orders']);
|
||||
});
|
||||
|
||||
it('splits categorical fingerprints into one document directory per dominant value', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
|
||||
await stageHistoricSqlTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_1',
|
||||
queryClient: {},
|
||||
reader: fakeReader(categoricalRows()),
|
||||
sqlAnalysis: categoricalSqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'snowflake',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 5000,
|
||||
minCalls: 5,
|
||||
},
|
||||
now: new Date('2026-05-04T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
const templates = manifest.templates
|
||||
.map((template) => ({
|
||||
id: template.id,
|
||||
fingerprint: template.fingerprint,
|
||||
subClusterId: template.subClusterId,
|
||||
path: template.path,
|
||||
}))
|
||||
.sort((left, right) => left.id.localeCompare(right.id));
|
||||
|
||||
expect(manifest.templateCount).toBe(2);
|
||||
expect(templates).toEqual([
|
||||
{
|
||||
id: 'fp_order_status__cat_2b2ff2318877',
|
||||
fingerprint: 'fp_order_status',
|
||||
subClusterId: 'cat_2b2ff2318877',
|
||||
path: 'templates/fp_order_status__cat_2b2ff2318877/page.md',
|
||||
},
|
||||
{
|
||||
id: 'fp_order_status__cat_34f037ddcbfa',
|
||||
fingerprint: 'fp_order_status',
|
||||
subClusterId: 'cat_34f037ddcbfa',
|
||||
path: 'templates/fp_order_status__cat_34f037ddcbfa/page.md',
|
||||
},
|
||||
]);
|
||||
|
||||
const paidMetadata = historicSqlMetadataSchema.parse(
|
||||
await readJson(stagedDir, 'templates/fp_order_status__cat_34f037ddcbfa/metadata.json'),
|
||||
);
|
||||
expect(paidMetadata).toMatchObject({
|
||||
id: 'fp_order_status__cat_34f037ddcbfa',
|
||||
title: 'snowflake · analytics.orders [fp_ord:ddcbfa]',
|
||||
path: 'templates/fp_order_status__cat_34f037ddcbfa/page.md',
|
||||
properties: {
|
||||
fingerprint: 'fp_order_status',
|
||||
sub_cluster_id: 'cat_34f037ddcbfa',
|
||||
dialect: 'snowflake',
|
||||
tables_touched: ['analytics.orders'],
|
||||
literal_slots: [{ position: 1, type: 'string', classification: 'categorical' }],
|
||||
},
|
||||
});
|
||||
|
||||
const paidUsage = historicSqlUsageSchema.parse(
|
||||
await readJson(stagedDir, 'templates/fp_order_status__cat_34f037ddcbfa/usage.json'),
|
||||
);
|
||||
expect(paidUsage.stats).toMatchObject({
|
||||
executions: 3,
|
||||
distinct_users: 3,
|
||||
first_seen: '2026-05-04T10:00:00.000Z',
|
||||
last_seen: '2026-05-04T10:02:00.000Z',
|
||||
rows_produced: 36,
|
||||
});
|
||||
expect(paidUsage.literal_slots).toEqual([{ position: 1, distinct_values: 1, top_values: [['paid', 3]] }]);
|
||||
|
||||
const refundedUsage = historicSqlUsageSchema.parse(
|
||||
await readJson(stagedDir, 'templates/fp_order_status__cat_2b2ff2318877/usage.json'),
|
||||
);
|
||||
expect(refundedUsage.stats).toMatchObject({
|
||||
executions: 3,
|
||||
distinct_users: 3,
|
||||
first_seen: '2026-05-04T10:03:00.000Z',
|
||||
last_seen: '2026-05-04T10:05:00.000Z',
|
||||
rows_produced: 66,
|
||||
});
|
||||
expect(refundedUsage.literal_slots).toEqual([
|
||||
{ position: 1, distinct_values: 1, top_values: [['refunded', 3]] },
|
||||
]);
|
||||
});
|
||||
|
||||
it('classifies literal slots across the spec matrix and stale-date demotion', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
|
||||
await stageHistoricSqlTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_1',
|
||||
queryClient: {},
|
||||
reader: fakeReader(classificationMatrixRows()),
|
||||
sqlAnalysis: classificationMatrixSqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'snowflake',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 5000,
|
||||
minCalls: 5,
|
||||
},
|
||||
now: new Date('2026-05-04T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
const matrixTemplates = manifest.templates.filter((template) => template.fingerprint === 'fp_classification_matrix');
|
||||
expect(matrixTemplates).toHaveLength(2);
|
||||
expect(matrixTemplates.every((template) => template.subClusterId?.startsWith('cat_'))).toBe(true);
|
||||
|
||||
const matrixTemplate = matrixTemplates[0];
|
||||
if (!matrixTemplate) {
|
||||
throw new Error('expected classification matrix template');
|
||||
}
|
||||
const matrixMetadata = historicSqlMetadataSchema.parse(
|
||||
await readJson(stagedDir, matrixTemplate.path.replace('/page.md', '/metadata.json')),
|
||||
);
|
||||
expect(matrixMetadata.properties.literal_slots).toMatchInlineSnapshot(`
|
||||
[
|
||||
{
|
||||
"classification": "constant",
|
||||
"position": 1,
|
||||
"type": "string",
|
||||
},
|
||||
{
|
||||
"classification": "constant",
|
||||
"position": 2,
|
||||
"type": "string",
|
||||
},
|
||||
{
|
||||
"classification": "categorical",
|
||||
"position": 3,
|
||||
"type": "string",
|
||||
},
|
||||
{
|
||||
"classification": "runtime",
|
||||
"position": 4,
|
||||
"type": "number",
|
||||
},
|
||||
{
|
||||
"classification": "runtime",
|
||||
"position": 5,
|
||||
"type": "date",
|
||||
},
|
||||
]
|
||||
`);
|
||||
expect(matrixMetadata.properties.triage_signals.slot_summary).toBe('2 constant, 2 runtime');
|
||||
|
||||
const staleMetadata = historicSqlMetadataSchema.parse(
|
||||
await readJson(stagedDir, 'templates/fp_stale_date/metadata.json'),
|
||||
);
|
||||
expect(staleMetadata.properties.literal_slots).toMatchInlineSnapshot(`
|
||||
[
|
||||
{
|
||||
"classification": "runtime",
|
||||
"position": 1,
|
||||
"type": "date",
|
||||
},
|
||||
]
|
||||
`);
|
||||
expect(staleMetadata.properties.triage_signals.slot_summary).toBe('0 constant, 1 runtime');
|
||||
});
|
||||
|
||||
it('applies the templates-per-run cap after categorical expansion', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
|
||||
await stageHistoricSqlTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_1',
|
||||
queryClient: {},
|
||||
reader: fakeReader(categoricalRows()),
|
||||
sqlAnalysis: categoricalSqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'snowflake',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 1,
|
||||
minCalls: 5,
|
||||
},
|
||||
now: new Date('2026-05-04T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
expect(manifest.templateCount).toBe(1);
|
||||
expect(manifest.capped).toBe(true);
|
||||
expect(manifest.warnings).toEqual(['templates_truncated: kept 1 of 2 templates']);
|
||||
expect(manifest.templates).toHaveLength(1);
|
||||
expect(manifest.templates[0].id).toMatch(/^fp_order_status__cat_/);
|
||||
});
|
||||
|
||||
it('omits rows_produced for BigQuery templates when reader rows have no row counts', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
|
||||
await stageHistoricSqlTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_bq',
|
||||
queryClient: {},
|
||||
reader: fakeReader([
|
||||
{
|
||||
id: 'bq-1',
|
||||
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'",
|
||||
user: 'analyst-a@example.com',
|
||||
startedAt: '2026-05-04T10:00:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 100,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
]),
|
||||
sqlAnalysis: fakeSqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'bigquery',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 5000,
|
||||
minCalls: 5,
|
||||
},
|
||||
now: new Date('2026-05-04T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_paid_orders/usage.json'));
|
||||
expect(usage.stats).not.toHaveProperty('rows_produced');
|
||||
expect(usage.samples[0]).not.toHaveProperty('rows_produced');
|
||||
});
|
||||
|
||||
it('keeps at most five diverse samples, preferring recent successful representatives per literal tuple', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const statuses = [
|
||||
'paid',
|
||||
'refunded',
|
||||
'pending',
|
||||
'failed',
|
||||
'trial',
|
||||
'cancelled',
|
||||
'draft',
|
||||
'returned',
|
||||
'review',
|
||||
'held',
|
||||
'archived',
|
||||
];
|
||||
const rows: HistoricSqlRawQueryRow[] = statuses.flatMap((status, index) => [
|
||||
{
|
||||
id: `${status}-old`,
|
||||
sql: `SELECT count(*) FROM analytics.orders WHERE status = '${status}'`,
|
||||
user: 'analyst-a',
|
||||
startedAt: `2026-05-04T10:${String(index).padStart(2, '0')}:00.000Z`,
|
||||
endedAt: null,
|
||||
runtimeMs: 100,
|
||||
rowsProduced: 1,
|
||||
success: false,
|
||||
errorMessage: 'old failed sample',
|
||||
},
|
||||
{
|
||||
id: `${status}-new`,
|
||||
sql: `SELECT count(*) FROM analytics.orders WHERE status = '${status}'`,
|
||||
user: 'analyst-a',
|
||||
startedAt: `2026-05-04T11:${String(index).padStart(2, '0')}:00.000Z`,
|
||||
endedAt: null,
|
||||
runtimeMs: 90,
|
||||
rowsProduced: 2,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
]);
|
||||
|
||||
await stageHistoricSqlTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_1',
|
||||
queryClient: {},
|
||||
reader: fakeReader(rows),
|
||||
sqlAnalysis: diverseSqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'snowflake',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 5000,
|
||||
minCalls: 5,
|
||||
},
|
||||
now: new Date('2026-05-04T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_diverse_samples/usage.json'));
|
||||
expect(usage.samples).toHaveLength(5);
|
||||
expect(usage.samples.every((sample) => sample.success)).toBe(true);
|
||||
expect(new Set(usage.samples.map((sample) => sample.bound_sql.match(/status = '([^']+)'/)?.[1])).size).toBe(5);
|
||||
expect(usage.samples.map((sample) => sample.started_at)).toEqual([
|
||||
'2026-05-04T11:10:00.000Z',
|
||||
'2026-05-04T11:09:00.000Z',
|
||||
'2026-05-04T11:08:00.000Z',
|
||||
'2026-05-04T11:07:00.000Z',
|
||||
'2026-05-04T11:06:00.000Z',
|
||||
]);
|
||||
});
|
||||
|
||||
it('uses recency as a tie-breaker when the templates-per-run cap overflows', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
async analyzeForFingerprint(sql) {
|
||||
const table = sql.includes('fresh_orders') ? 'fresh_orders' : 'stale_orders';
|
||||
return {
|
||||
fingerprint: `fp_${table}`,
|
||||
normalizedSql: `SELECT count(*) FROM analytics.${table}`,
|
||||
tablesTouched: [`analytics.${table}`],
|
||||
literalSlots: [],
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
await stageHistoricSqlTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_1',
|
||||
queryClient: {},
|
||||
reader: fakeReader([
|
||||
{
|
||||
id: 'stale-1',
|
||||
sql: 'SELECT count(*) FROM analytics.stale_orders',
|
||||
user: 'analyst-a',
|
||||
startedAt: '2026-02-04T10:00:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 100,
|
||||
rowsProduced: 1,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
{
|
||||
id: 'fresh-1',
|
||||
sql: 'SELECT count(*) FROM analytics.fresh_orders',
|
||||
user: 'analyst-a',
|
||||
startedAt: '2026-05-04T10:00:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 100,
|
||||
rowsProduced: 1,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
]),
|
||||
sqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'snowflake',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: [],
|
||||
maxTemplatesPerRun: 1,
|
||||
minCalls: 5,
|
||||
},
|
||||
now: new Date('2026-05-04T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
expect(manifest.templates.map((template) => template.id)).toEqual(['fp_fresh_orders']);
|
||||
});
|
||||
|
||||
it('does not persist bound SQL samples when redaction patterns are invalid', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
|
||||
await stageHistoricSqlTemplates({
|
||||
stagedDir,
|
||||
connectionId: 'conn_1',
|
||||
queryClient: {},
|
||||
reader: fakeReader([
|
||||
{
|
||||
id: 'q1',
|
||||
sql: "SELECT * FROM analytics.orders WHERE email = 'analyst@example.com'",
|
||||
user: 'analyst@example.com',
|
||||
startedAt: '2026-05-04T10:00:00.000Z',
|
||||
endedAt: null,
|
||||
runtimeMs: 100,
|
||||
rowsProduced: 1,
|
||||
success: true,
|
||||
errorMessage: null,
|
||||
},
|
||||
]),
|
||||
sqlAnalysis: {
|
||||
async analyzeForFingerprint() {
|
||||
return {
|
||||
fingerprint: 'fp_redaction',
|
||||
normalizedSql: 'SELECT * FROM analytics.orders WHERE email = ?',
|
||||
tablesTouched: ['analytics.orders'],
|
||||
literalSlots: [{ position: 1, type: 'string', exampleValue: 'analyst@example.com' }],
|
||||
};
|
||||
},
|
||||
},
|
||||
pullConfig: {
|
||||
dialect: 'snowflake',
|
||||
windowDays: 90,
|
||||
lastSuccessfulCursor: null,
|
||||
serviceAccountUserPatterns: [],
|
||||
redactionPatterns: ['['],
|
||||
maxTemplatesPerRun: 5000,
|
||||
minCalls: 5,
|
||||
},
|
||||
now: new Date('2026-05-04T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
|
||||
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_redaction/usage.json'));
|
||||
expect(manifest.warnings.some((warning) => warning.startsWith('redaction_skipped:invalid_redaction_pattern'))).toBe(
|
||||
true,
|
||||
);
|
||||
expect(usage.samples).toEqual([]);
|
||||
});
|
||||
});
|
||||
630
packages/context/src/ingest/adapters/historic-sql/stage.ts
Normal file
630
packages/context/src/ingest/adapters/historic-sql/stage.ts
Normal file
|
|
@ -0,0 +1,630 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import type {
|
||||
SqlAnalysisFingerprintResult,
|
||||
SqlAnalysisLiteralSlot,
|
||||
SqlAnalysisLiteralSlotType,
|
||||
SqlAnalysisPort,
|
||||
} from '../../../sql-analysis/index.js';
|
||||
import {
|
||||
HISTORIC_SQL_OBJECT_TYPE,
|
||||
HISTORIC_SQL_SOURCE_KEY,
|
||||
historicSqlPullConfigSchema,
|
||||
historicSqlRawQueryRowSchema,
|
||||
type HistoricSqlLiteralSlotClassification,
|
||||
type HistoricSqlManifest,
|
||||
type HistoricSqlMetadata,
|
||||
type HistoricSqlPullConfig,
|
||||
type HistoricSqlQueryHistoryReader,
|
||||
type HistoricSqlRawQueryRow,
|
||||
type HistoricSqlUsage,
|
||||
} from './types.js';
|
||||
|
||||
interface StageHistoricSqlTemplatesInput {
|
||||
stagedDir: string;
|
||||
connectionId: string;
|
||||
queryClient: unknown;
|
||||
reader: HistoricSqlQueryHistoryReader;
|
||||
sqlAnalysis: SqlAnalysisPort;
|
||||
pullConfig: HistoricSqlPullConfig;
|
||||
now?: Date;
|
||||
}
|
||||
|
||||
interface SlotObservation {
|
||||
value: string;
|
||||
rowStartedAt: string;
|
||||
}
|
||||
|
||||
interface SlotStats {
|
||||
position: number;
|
||||
type: SqlAnalysisLiteralSlotType;
|
||||
values: Map<string, number>;
|
||||
observations: SlotObservation[];
|
||||
}
|
||||
|
||||
interface TemplateAccumulator {
|
||||
fingerprint: string;
|
||||
normalizedSql: string;
|
||||
tablesTouched: Set<string>;
|
||||
rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>;
|
||||
slotStats: Map<number, SlotStats>;
|
||||
}
|
||||
|
||||
interface ClassifiedLiteralSlot {
|
||||
position: number;
|
||||
type: SqlAnalysisLiteralSlotType;
|
||||
classification: HistoricSqlLiteralSlotClassification;
|
||||
}
|
||||
|
||||
interface TemplateVariant {
|
||||
id: string;
|
||||
fingerprint: string;
|
||||
subClusterId: string | null;
|
||||
normalizedSql: string;
|
||||
tablesTouched: Set<string>;
|
||||
rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>;
|
||||
slotStats: Map<number, SlotStats>;
|
||||
slotClassifications: ClassifiedLiteralSlot[];
|
||||
}
|
||||
|
||||
interface CategoricalTupleEntry {
|
||||
position: number;
|
||||
value: string;
|
||||
}
|
||||
|
||||
interface RedactionPolicy {
|
||||
redactors: RegExp[];
|
||||
samplesAllowed: boolean;
|
||||
}
|
||||
|
||||
const HARD_SKIP_PREFIX_RE = /^\s*(SHOW|DESCRIBE|DESC|EXPLAIN|USE|SET)\b/i;
|
||||
const HARD_SKIP_TABLE_RE = /\b(INFORMATION_SCHEMA|SNOWFLAKE\.ACCOUNT_USAGE|pg_|system\.)/i;
|
||||
|
||||
export async function stageHistoricSqlTemplates(input: StageHistoricSqlTemplatesInput): Promise<void> {
|
||||
const config = historicSqlPullConfigSchema.parse(input.pullConfig);
|
||||
const now = input.now ?? new Date();
|
||||
const windowStart = config.lastSuccessfulCursor
|
||||
? new Date(config.lastSuccessfulCursor)
|
||||
: new Date(now.getTime() - config.windowDays * 24 * 60 * 60 * 1000);
|
||||
const warnings: string[] = [];
|
||||
const redaction = compileRedactors(config.redactionPatterns, warnings);
|
||||
const groups = new Map<string, TemplateAccumulator>();
|
||||
let nextSuccessfulCursor: string | null = null;
|
||||
|
||||
await input.reader.probe(input.queryClient);
|
||||
|
||||
for await (const rawRow of input.reader.fetch(
|
||||
input.queryClient,
|
||||
{ start: windowStart, end: now },
|
||||
config.lastSuccessfulCursor,
|
||||
)) {
|
||||
const row = historicSqlRawQueryRowSchema.parse(rawRow);
|
||||
if (!nextSuccessfulCursor || row.startedAt > nextSuccessfulCursor) {
|
||||
nextSuccessfulCursor = row.startedAt;
|
||||
}
|
||||
if (shouldSkipSql(row.sql)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const analysis = await input.sqlAnalysis.analyzeForFingerprint(row.sql, config.dialect);
|
||||
if (analysis.error || !analysis.fingerprint || !analysis.normalizedSql) {
|
||||
warnings.push(`analysis_failed:${row.id}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const group =
|
||||
groups.get(analysis.fingerprint) ??
|
||||
{
|
||||
fingerprint: analysis.fingerprint,
|
||||
normalizedSql: analysis.normalizedSql,
|
||||
tablesTouched: new Set<string>(),
|
||||
rows: [],
|
||||
slotStats: new Map<number, SlotStats>(),
|
||||
};
|
||||
|
||||
for (const table of analysis.tablesTouched) {
|
||||
group.tablesTouched.add(table);
|
||||
}
|
||||
for (const slot of analysis.literalSlots) {
|
||||
recordSlot(group.slotStats, slot, redaction.redactors, row.startedAt);
|
||||
}
|
||||
group.rows.push({ row, analysis });
|
||||
groups.set(analysis.fingerprint, group);
|
||||
}
|
||||
|
||||
const expandedTemplates = expandCategoricalTemplates([...groups.values()], redaction.redactors);
|
||||
const selected = selectTemplates(expandedTemplates, config.maxTemplatesPerRun, now);
|
||||
if (selected.length < expandedTemplates.length) {
|
||||
warnings.push(`templates_truncated: kept ${selected.length} of ${expandedTemplates.length} templates`);
|
||||
}
|
||||
|
||||
await mkdir(input.stagedDir, { recursive: true });
|
||||
const templates: HistoricSqlManifest['templates'] = [];
|
||||
for (const template of selected) {
|
||||
const staged = buildStagedTemplate(template, config, redaction, now);
|
||||
const basePath = `templates/${staged.metadata.id}`;
|
||||
await writeJson(input.stagedDir, `${basePath}/metadata.json`, staged.metadata);
|
||||
await writeText(input.stagedDir, `${basePath}/page.md`, staged.pageMarkdown);
|
||||
await writeJson(input.stagedDir, `${basePath}/usage.json`, staged.usage);
|
||||
templates.push({
|
||||
id: staged.metadata.id,
|
||||
fingerprint: staged.metadata.properties.fingerprint,
|
||||
subClusterId: staged.metadata.properties.sub_cluster_id,
|
||||
path: staged.metadata.path,
|
||||
});
|
||||
}
|
||||
|
||||
await writeJson(input.stagedDir, 'manifest.json', {
|
||||
source: HISTORIC_SQL_SOURCE_KEY,
|
||||
connectionId: input.connectionId,
|
||||
dialect: config.dialect,
|
||||
fetchedAt: now.toISOString(),
|
||||
windowStart: windowStart.toISOString(),
|
||||
windowEnd: now.toISOString(),
|
||||
nextSuccessfulCursor,
|
||||
templateCount: selected.length,
|
||||
capped: selected.length < expandedTemplates.length,
|
||||
warnings,
|
||||
degraded: false,
|
||||
statsResetAt: null,
|
||||
baselineFirstRun: false,
|
||||
pgServerVersion: null,
|
||||
deallocCount: null,
|
||||
templates,
|
||||
} satisfies HistoricSqlManifest);
|
||||
}
|
||||
|
||||
function shouldSkipSql(sql: string): boolean {
|
||||
return HARD_SKIP_PREFIX_RE.test(sql) || HARD_SKIP_TABLE_RE.test(sql);
|
||||
}
|
||||
|
||||
function recordSlot(
|
||||
slotStats: Map<number, SlotStats>,
|
||||
slot: SqlAnalysisLiteralSlot,
|
||||
redactors: RegExp[],
|
||||
rowStartedAt: string,
|
||||
): void {
|
||||
const existing = slotStats.get(slot.position) ?? {
|
||||
position: slot.position,
|
||||
type: slot.type,
|
||||
values: new Map<string, number>(),
|
||||
observations: [],
|
||||
};
|
||||
const persistedValue = redactText(slot.exampleValue, redactors);
|
||||
existing.values.set(persistedValue, (existing.values.get(persistedValue) ?? 0) + 1);
|
||||
existing.observations.push({ value: persistedValue, rowStartedAt });
|
||||
slotStats.set(slot.position, existing);
|
||||
}
|
||||
|
||||
function expandCategoricalTemplates(groups: TemplateAccumulator[], redactors: RegExp[]): TemplateVariant[] {
|
||||
return groups.flatMap((group) => expandTemplateGroup(group, redactors));
|
||||
}
|
||||
|
||||
function expandTemplateGroup(group: TemplateAccumulator, redactors: RegExp[]): TemplateVariant[] {
|
||||
const rows = [...group.rows].sort((left, right) => left.row.startedAt.localeCompare(right.row.startedAt));
|
||||
const firstSeen = rows[0]?.row.startedAt;
|
||||
if (!firstSeen) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const slotClassifications = classifySlots(group.slotStats, rows.length, firstSeen);
|
||||
const categoricalPositions = slotClassifications
|
||||
.filter((slot) => slot.classification === 'categorical')
|
||||
.map((slot) => slot.position)
|
||||
.sort((left, right) => left - right);
|
||||
|
||||
if (categoricalPositions.length === 0) {
|
||||
return [
|
||||
{
|
||||
id: group.fingerprint,
|
||||
fingerprint: group.fingerprint,
|
||||
subClusterId: null,
|
||||
normalizedSql: group.normalizedSql,
|
||||
tablesTouched: group.tablesTouched,
|
||||
rows,
|
||||
slotStats: group.slotStats,
|
||||
slotClassifications,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const byTuple = new Map<
|
||||
string,
|
||||
{
|
||||
tuple: CategoricalTupleEntry[];
|
||||
rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>;
|
||||
}
|
||||
>();
|
||||
|
||||
for (const entry of rows) {
|
||||
const tuple = categoricalTuple(entry.analysis.literalSlots, categoricalPositions, redactors);
|
||||
const key = JSON.stringify(tuple);
|
||||
const existing = byTuple.get(key) ?? { tuple, rows: [] };
|
||||
existing.rows.push(entry);
|
||||
byTuple.set(key, existing);
|
||||
}
|
||||
|
||||
return [...byTuple.values()]
|
||||
.map(({ tuple, rows: tupleRows }) => {
|
||||
const subClusterId = subClusterIdForTuple(tuple);
|
||||
return {
|
||||
id: `${group.fingerprint}__${subClusterId}`,
|
||||
fingerprint: group.fingerprint,
|
||||
subClusterId,
|
||||
normalizedSql: group.normalizedSql,
|
||||
tablesTouched: group.tablesTouched,
|
||||
rows: tupleRows,
|
||||
slotStats: collectSlotStats(tupleRows, redactors),
|
||||
slotClassifications,
|
||||
};
|
||||
})
|
||||
.sort((left, right) => left.id.localeCompare(right.id));
|
||||
}
|
||||
|
||||
function classifySlots(
|
||||
slotStats: Map<number, SlotStats>,
|
||||
executions: number,
|
||||
firstSeen: string,
|
||||
): ClassifiedLiteralSlot[] {
|
||||
return [...slotStats.values()]
|
||||
.sort((left, right) => left.position - right.position)
|
||||
.map((slot) => ({
|
||||
position: slot.position,
|
||||
type: slot.type,
|
||||
classification: classifySlot(slot, executions, firstSeen),
|
||||
}));
|
||||
}
|
||||
|
||||
function collectSlotStats(
|
||||
rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>,
|
||||
redactors: RegExp[],
|
||||
): Map<number, SlotStats> {
|
||||
const slotStats = new Map<number, SlotStats>();
|
||||
for (const entry of rows) {
|
||||
for (const slot of entry.analysis.literalSlots) {
|
||||
recordSlot(slotStats, slot, redactors, entry.row.startedAt);
|
||||
}
|
||||
}
|
||||
return slotStats;
|
||||
}
|
||||
|
||||
function categoricalTuple(
|
||||
literalSlots: SqlAnalysisLiteralSlot[],
|
||||
categoricalPositions: number[],
|
||||
redactors: RegExp[],
|
||||
): CategoricalTupleEntry[] {
|
||||
const valuesByPosition = new Map(
|
||||
literalSlots.map((slot) => [slot.position, redactText(slot.exampleValue, redactors)] as const),
|
||||
);
|
||||
return categoricalPositions.map((position) => ({
|
||||
position,
|
||||
value: valuesByPosition.get(position) ?? '<missing>',
|
||||
}));
|
||||
}
|
||||
|
||||
function subClusterIdForTuple(tuple: CategoricalTupleEntry[]): string {
|
||||
return `cat_${createHash('sha256').update(JSON.stringify(tuple)).digest('hex').slice(0, 12)}`;
|
||||
}
|
||||
|
||||
function buildStagedTemplate(
|
||||
template: TemplateVariant,
|
||||
config: HistoricSqlPullConfig,
|
||||
redaction: RedactionPolicy,
|
||||
now: Date,
|
||||
): { metadata: HistoricSqlMetadata; pageMarkdown: string; usage: HistoricSqlUsage } {
|
||||
const rows = template.rows
|
||||
.map((entry) => entry.row)
|
||||
.sort((left, right) => left.startedAt.localeCompare(right.startedAt));
|
||||
const firstSeen = rows[0].startedAt;
|
||||
const lastSeen = rows[rows.length - 1].startedAt;
|
||||
const distinctUsers = new Set(rows.map((row) => row.user).filter((user): user is string => !!user)).size;
|
||||
const errorCount = rows.filter((row) => !row.success).length;
|
||||
const runtimes = rows
|
||||
.map((row) => row.runtimeMs)
|
||||
.filter((runtime): runtime is number => typeof runtime === 'number')
|
||||
.sort((left, right) => left - right);
|
||||
const triageSignals = buildTriageSignals({
|
||||
executions: rows.length,
|
||||
distinctUsers,
|
||||
errorRate: rows.length === 0 ? 0 : errorCount / rows.length,
|
||||
lastSeen,
|
||||
now,
|
||||
serviceAccountOnly: isServiceAccountOnly(rows, config.serviceAccountUserPatterns),
|
||||
slotClassifications: template.slotClassifications.map((slot) => slot.classification),
|
||||
});
|
||||
const tablesTouched = [...template.tablesTouched].sort();
|
||||
const firstTable = tablesTouched[0] ?? 'query';
|
||||
const id = template.id;
|
||||
const rowsProduced = sumRowsProduced(rows);
|
||||
const metadata: HistoricSqlMetadata = {
|
||||
id,
|
||||
title: buildTemplateTitle(config.dialect, firstTable, template.fingerprint, template.subClusterId),
|
||||
path: `templates/${id}/page.md`,
|
||||
objectType: HISTORIC_SQL_OBJECT_TYPE,
|
||||
lastEditedAt: null,
|
||||
properties: {
|
||||
fingerprint: template.fingerprint,
|
||||
sub_cluster_id: template.subClusterId,
|
||||
dialect: config.dialect,
|
||||
tables_touched: tablesTouched,
|
||||
literal_slots: template.slotClassifications,
|
||||
triage_signals: triageSignals,
|
||||
},
|
||||
};
|
||||
|
||||
return {
|
||||
metadata,
|
||||
pageMarkdown: renderTemplatePage(id, template.normalizedSql, tablesTouched),
|
||||
usage: {
|
||||
stats: {
|
||||
executions: rows.length,
|
||||
distinct_users: distinctUsers,
|
||||
first_seen: firstSeen,
|
||||
last_seen: lastSeen,
|
||||
p50_runtime_ms: percentile(runtimes, 0.5),
|
||||
p95_runtime_ms: percentile(runtimes, 0.95),
|
||||
error_rate: rows.length === 0 ? 0 : errorCount / rows.length,
|
||||
...(rowsProduced === null ? {} : { rows_produced: rowsProduced }),
|
||||
},
|
||||
literal_slots: [...template.slotStats.values()]
|
||||
.sort((left, right) => left.position - right.position)
|
||||
.map((slot) => ({
|
||||
position: slot.position,
|
||||
distinct_values: slot.values.size,
|
||||
top_values: [...slot.values.entries()]
|
||||
.sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
|
||||
.slice(0, 10),
|
||||
})),
|
||||
samples: selectSamples(template.rows, redaction),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const TEMPORAL_SLOT_TYPES = new Set<SqlAnalysisLiteralSlotType>(['date', 'timestamp']);
|
||||
|
||||
function isStaleDateConstant(slot: SlotStats, value: string, firstSeen: string): boolean {
|
||||
return slot.type === 'date' && parseTemporalSlotValue(value) !== null && value < firstSeen.slice(0, 10);
|
||||
}
|
||||
|
||||
function isMovingTemporalSlot(slot: SlotStats): boolean {
|
||||
if (!TEMPORAL_SLOT_TYPES.has(slot.type) || slot.values.size < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const observations: Array<{ rowStartedAt: number; literalTime: number }> = [];
|
||||
for (const observation of slot.observations) {
|
||||
const rowStartedAt = Date.parse(observation.rowStartedAt);
|
||||
const literalTime = parseTemporalSlotValue(observation.value);
|
||||
if (Number.isNaN(rowStartedAt) || literalTime === null) {
|
||||
return false;
|
||||
}
|
||||
observations.push({ rowStartedAt, literalTime });
|
||||
}
|
||||
|
||||
const literalTimes = observations
|
||||
.sort((left, right) => left.rowStartedAt - right.rowStartedAt)
|
||||
.map((observation) => observation.literalTime);
|
||||
|
||||
return isMonotonic(literalTimes);
|
||||
}
|
||||
|
||||
function parseTemporalSlotValue(value: string): number | null {
|
||||
const parsed = Date.parse(value);
|
||||
return Number.isNaN(parsed) ? null : parsed;
|
||||
}
|
||||
|
||||
function isMonotonic(values: number[]): boolean {
|
||||
if (values.length < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let nonDecreasing = true;
|
||||
let nonIncreasing = true;
|
||||
for (let index = 1; index < values.length; index += 1) {
|
||||
if (values[index] < values[index - 1]) {
|
||||
nonDecreasing = false;
|
||||
}
|
||||
if (values[index] > values[index - 1]) {
|
||||
nonIncreasing = false;
|
||||
}
|
||||
}
|
||||
|
||||
return nonDecreasing || nonIncreasing;
|
||||
}
|
||||
|
||||
function classifySlot(
|
||||
slot: SlotStats,
|
||||
executions: number,
|
||||
firstSeen: string,
|
||||
): HistoricSqlLiteralSlotClassification {
|
||||
const ordered = [...slot.values.entries()].sort((left, right) => right[1] - left[1]);
|
||||
const distinct = ordered.length;
|
||||
const topCount = ordered[0]?.[1] ?? 0;
|
||||
const topValue = ordered[0]?.[0] ?? '';
|
||||
const staleDateConstant = isStaleDateConstant(slot, topValue, firstSeen);
|
||||
|
||||
if (distinct === 1 && !staleDateConstant) {
|
||||
return 'constant';
|
||||
}
|
||||
if (executions > 0 && topCount / executions >= 0.95 && !staleDateConstant) {
|
||||
return 'constant';
|
||||
}
|
||||
if (isMovingTemporalSlot(slot)) {
|
||||
return 'runtime';
|
||||
}
|
||||
if (executions > 0 && distinct >= 2 && distinct <= 10 && ordered.every(([, count]) => count / executions >= 0.05)) {
|
||||
return 'categorical';
|
||||
}
|
||||
return 'runtime';
|
||||
}
|
||||
|
||||
function buildTriageSignals(input: {
|
||||
executions: number;
|
||||
distinctUsers: number;
|
||||
errorRate: number;
|
||||
lastSeen: string;
|
||||
now: Date;
|
||||
serviceAccountOnly: boolean;
|
||||
slotClassifications: HistoricSqlLiteralSlotClassification[];
|
||||
}): Record<string, string> {
|
||||
const runtimeCount = input.slotClassifications.filter((classification) => classification === 'runtime').length;
|
||||
const constantCount = input.slotClassifications.filter((classification) => classification === 'constant').length;
|
||||
return {
|
||||
executions_bucket: input.executions < 3 ? 'low' : input.executions < 50 ? 'mid' : 'high',
|
||||
distinct_users_bucket: input.distinctUsers <= 1 ? 'solo' : input.distinctUsers <= 5 ? 'team' : 'broad',
|
||||
error_rate_bucket: input.errorRate <= 0.01 ? 'ok' : input.errorRate <= 0.1 ? 'noisy' : 'broken',
|
||||
recency_bucket: recencyBucket(input.lastSeen, input.now),
|
||||
service_account_only: String(input.serviceAccountOnly),
|
||||
slot_summary: `${constantCount} constant, ${runtimeCount} runtime`,
|
||||
};
|
||||
}
|
||||
|
||||
function recencyBucket(lastSeen: string, now: Date): string {
|
||||
const ageDays = Math.max(0, (now.getTime() - new Date(lastSeen).getTime()) / (24 * 60 * 60 * 1000));
|
||||
if (ageDays <= 14) {
|
||||
return 'active';
|
||||
}
|
||||
if (ageDays <= 60) {
|
||||
return 'warm';
|
||||
}
|
||||
return 'cold';
|
||||
}
|
||||
|
||||
function isServiceAccountOnly(rows: HistoricSqlRawQueryRow[], patterns: string[]): boolean {
|
||||
const users = rows.map((row) => row.user).filter((user): user is string => !!user);
|
||||
if (users.length === 0 || patterns.length === 0) {
|
||||
return false;
|
||||
}
|
||||
const regexes = patterns.map((pattern) => new RegExp(pattern));
|
||||
return users.every((user) => regexes.some((regex) => regex.test(user)));
|
||||
}
|
||||
|
||||
function buildTemplateTitle(
|
||||
dialect: HistoricSqlPullConfig['dialect'],
|
||||
firstTable: string,
|
||||
fingerprint: string,
|
||||
subClusterId: string | null,
|
||||
): string {
|
||||
if (!subClusterId) {
|
||||
return `${dialect} · ${firstTable} [${fingerprint.slice(0, 6)}]`;
|
||||
}
|
||||
return `${dialect} · ${firstTable} [${fingerprint.slice(0, 6)}:${subClusterId.slice(-6)}]`;
|
||||
}
|
||||
|
||||
function renderTemplatePage(fingerprint: string, normalizedSql: string, tablesTouched: string[]): string {
|
||||
return [
|
||||
`# ${fingerprint}`,
|
||||
'',
|
||||
'## Normalized SQL',
|
||||
'```sql',
|
||||
normalizedSql,
|
||||
'```',
|
||||
'',
|
||||
'## Tables touched',
|
||||
...tablesTouched.map((table) => `- ${table}`),
|
||||
'',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function selectSamples(
|
||||
rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>,
|
||||
redaction: RedactionPolicy,
|
||||
): HistoricSqlUsage['samples'] {
|
||||
if (!redaction.samplesAllowed) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const byLiteralTuple = new Map<string, { row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>();
|
||||
const preferred = [...rows].sort((left, right) => {
|
||||
if (left.row.success !== right.row.success) {
|
||||
return left.row.success ? -1 : 1;
|
||||
}
|
||||
return right.row.startedAt.localeCompare(left.row.startedAt);
|
||||
});
|
||||
|
||||
for (const entry of preferred) {
|
||||
const key = [...entry.analysis.literalSlots]
|
||||
.sort((left, right) => left.position - right.position)
|
||||
.map((slot) => slot.exampleValue)
|
||||
.join('\u001f');
|
||||
if (!byLiteralTuple.has(key)) {
|
||||
byLiteralTuple.set(key, entry);
|
||||
}
|
||||
}
|
||||
|
||||
return [...byLiteralTuple.values()]
|
||||
.sort((left, right) => right.row.startedAt.localeCompare(left.row.startedAt))
|
||||
.slice(0, 5)
|
||||
.map(({ row }) => ({
|
||||
started_at: row.startedAt,
|
||||
user: row.user,
|
||||
bound_sql: redactText(row.sql, redaction.redactors),
|
||||
...(row.rowsProduced === undefined ? {} : { rows_produced: row.rowsProduced ?? null }),
|
||||
runtime_ms: row.runtimeMs,
|
||||
success: row.success,
|
||||
}));
|
||||
}
|
||||
|
||||
function selectTemplates(templates: TemplateVariant[], maxTemplatesPerRun: number, now: Date): TemplateVariant[] {
|
||||
return templates
|
||||
.map((template) => ({ template, score: rankTemplate(template, now) }))
|
||||
.sort((left, right) => right.score - left.score || left.template.id.localeCompare(right.template.id))
|
||||
.slice(0, maxTemplatesPerRun)
|
||||
.map((entry) => entry.template);
|
||||
}
|
||||
|
||||
function rankTemplate(template: TemplateVariant, now: Date): number {
|
||||
const users = new Set(template.rows.map(({ row }) => row.user).filter((user): user is string => !!user)).size;
|
||||
const latestStartedAt = template.rows.reduce<string | null>(
|
||||
(latest, { row }) => (latest === null || row.startedAt > latest ? row.startedAt : latest),
|
||||
null,
|
||||
);
|
||||
const ageDays =
|
||||
latestStartedAt === null ? 365 : Math.max(0, (now.getTime() - new Date(latestStartedAt).getTime()) / 86400000);
|
||||
const recencyWeight = 1 / (1 + ageDays / 30);
|
||||
return users * Math.log1p(template.rows.length) * recencyWeight;
|
||||
}
|
||||
|
||||
function percentile(values: number[], percentileValue: number): number | null {
|
||||
if (values.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const index = Math.min(values.length - 1, Math.max(0, Math.ceil(values.length * percentileValue) - 1));
|
||||
return values[index];
|
||||
}
|
||||
|
||||
function sumRowsProduced(rows: HistoricSqlRawQueryRow[]): number | null {
|
||||
const values = rows.map((row) => row.rowsProduced).filter((value): value is number => typeof value === 'number');
|
||||
return values.length > 0 ? values.reduce((sum, value) => sum + value, 0) : null;
|
||||
}
|
||||
|
||||
function compileRedactors(patterns: string[], warnings: string[]): RedactionPolicy {
|
||||
let samplesAllowed = true;
|
||||
const redactors = patterns.flatMap((pattern) => {
|
||||
try {
|
||||
return [new RegExp(pattern, 'g')];
|
||||
} catch (error) {
|
||||
samplesAllowed = false;
|
||||
warnings.push(
|
||||
`redaction_skipped:invalid_redaction_pattern:${pattern}:${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
});
|
||||
return { redactors, samplesAllowed };
|
||||
}
|
||||
|
||||
function redactText(value: string, redactors: RegExp[]): string {
|
||||
return redactors.reduce((current, regex) => current.replace(regex, '<redacted>'), value);
|
||||
}
|
||||
|
||||
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
|
||||
await writeText(stagedDir, relPath, `${JSON.stringify(value, null, 2)}\n`);
|
||||
}
|
||||
|
||||
async function writeText(stagedDir: string, relPath: string, value: string): Promise<void> {
|
||||
const target = join(stagedDir, relPath);
|
||||
await mkdir(dirname(target), { recursive: true });
|
||||
await writeFile(target, value, 'utf-8');
|
||||
}
|
||||
201
packages/context/src/ingest/adapters/historic-sql/types.ts
Normal file
201
packages/context/src/ingest/adapters/historic-sql/types.ts
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
import { z } from 'zod';
|
||||
import type { SqlAnalysisPort } from '../../../sql-analysis/index.js';
|
||||
|
||||
export const HISTORIC_SQL_SOURCE_KEY = 'historic-sql' as const;
|
||||
export const HISTORIC_SQL_OBJECT_TYPE = 'historic_sql_template' as const;
|
||||
|
||||
const historicSqlDialectSchema = z.enum(['snowflake', 'bigquery', 'postgres']);
|
||||
export type HistoricSqlDialect = z.infer<typeof historicSqlDialectSchema>;
|
||||
|
||||
export const historicSqlPullConfigSchema = z.object({
|
||||
dialect: historicSqlDialectSchema,
|
||||
windowDays: z.number().int().min(1).max(365).default(90),
|
||||
lastSuccessfulCursor: z.string().datetime().nullable().default(null),
|
||||
serviceAccountUserPatterns: z.array(z.string()).default([]),
|
||||
redactionPatterns: z.array(z.string()).default([]),
|
||||
maxTemplatesPerRun: z.number().int().min(1).max(5000).default(5000),
|
||||
minCalls: z.number().int().min(1).default(5),
|
||||
});
|
||||
export type HistoricSqlPullConfig = z.infer<typeof historicSqlPullConfigSchema>;
|
||||
|
||||
export interface HistoricSqlTimeWindow {
|
||||
start: Date;
|
||||
end: Date;
|
||||
}
|
||||
|
||||
export const historicSqlRawQueryRowSchema = z.object({
|
||||
id: z.string().min(1),
|
||||
sql: z.string().min(1),
|
||||
user: z.string().nullable().default(null),
|
||||
startedAt: z.string().datetime(),
|
||||
endedAt: z.string().datetime().nullable().default(null),
|
||||
runtimeMs: z.number().nonnegative().nullable().default(null),
|
||||
rowsProduced: z.number().int().nonnegative().nullable().optional(),
|
||||
success: z.boolean().default(true),
|
||||
errorMessage: z.string().nullable().default(null),
|
||||
});
|
||||
export type HistoricSqlRawQueryRow = z.infer<typeof historicSqlRawQueryRowSchema>;
|
||||
|
||||
export interface HistoricSqlQueryHistoryReader {
|
||||
probe(client: unknown): Promise<void>;
|
||||
fetch(
|
||||
client: unknown,
|
||||
window: HistoricSqlTimeWindow,
|
||||
cursor?: string | null,
|
||||
): AsyncIterable<HistoricSqlRawQueryRow>;
|
||||
}
|
||||
|
||||
export interface KloPostgresQueryClient {
|
||||
executeQuery(sql: string, params?: unknown[]): Promise<{ headers: string[]; rows: unknown[][]; totalRows?: number }>;
|
||||
}
|
||||
|
||||
export interface PostgresPgssProbeResult {
|
||||
pgServerVersion: string;
|
||||
warnings: string[];
|
||||
}
|
||||
|
||||
export interface PostgresPgssSnapshot {
|
||||
statsResetAt: string | null;
|
||||
deallocCount: number | null;
|
||||
rows: PostgresPgssRow[];
|
||||
}
|
||||
|
||||
export interface PostgresPgssReader {
|
||||
probe(client: KloPostgresQueryClient): Promise<PostgresPgssProbeResult>;
|
||||
readSnapshot(
|
||||
client: KloPostgresQueryClient,
|
||||
options: { minCalls: number; maxTemplates: number },
|
||||
): Promise<PostgresPgssSnapshot>;
|
||||
}
|
||||
|
||||
export interface PostgresPgssRow {
|
||||
queryid: string;
|
||||
userid: string;
|
||||
username: string | null;
|
||||
dbid: string;
|
||||
database: string | null;
|
||||
query: string;
|
||||
calls: number;
|
||||
totalExecTime: number;
|
||||
meanExecTime: number;
|
||||
totalRows: number;
|
||||
}
|
||||
|
||||
export interface PostgresPgssAggregateRow {
|
||||
id: string;
|
||||
queryid: string;
|
||||
dbid: string;
|
||||
database: string | null;
|
||||
query: string;
|
||||
deltaCalls: number;
|
||||
deltaExecTime: number;
|
||||
deltaRows: number;
|
||||
meanExecTime: number;
|
||||
distinctUsersDelta: number;
|
||||
users: string[];
|
||||
firstObservedAt: string;
|
||||
}
|
||||
|
||||
export interface HistoricSqlSourceAdapterDeps {
|
||||
sqlAnalysis: SqlAnalysisPort;
|
||||
reader: HistoricSqlQueryHistoryReader;
|
||||
queryClient: unknown;
|
||||
postgresReader?: PostgresPgssReader;
|
||||
postgresQueryClient?: KloPostgresQueryClient;
|
||||
postgresBaselineRootDir?: string;
|
||||
now?: () => Date;
|
||||
onPullSucceeded?: (ctx: {
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
syncId: string;
|
||||
trigger: import('../../types.js').IngestTrigger;
|
||||
completedAt: Date;
|
||||
stagedDir: string;
|
||||
nextSuccessfulCursor: string | null;
|
||||
}) => Promise<void>;
|
||||
}
|
||||
|
||||
const historicSqlLiteralSlotClassificationSchema = z.enum(['constant', 'runtime', 'categorical']);
|
||||
export type HistoricSqlLiteralSlotClassification = z.infer<typeof historicSqlLiteralSlotClassificationSchema>;
|
||||
|
||||
export const historicSqlMetadataSchema = z.object({
|
||||
id: z.string().min(1),
|
||||
title: z.string().min(1),
|
||||
path: z.string().min(1),
|
||||
objectType: z.literal(HISTORIC_SQL_OBJECT_TYPE),
|
||||
lastEditedAt: z.null(),
|
||||
properties: z.object({
|
||||
fingerprint: z.string().min(1),
|
||||
sub_cluster_id: z.string().nullable(),
|
||||
dialect: historicSqlDialectSchema,
|
||||
tables_touched: z.array(z.string()),
|
||||
literal_slots: z.array(
|
||||
z.object({
|
||||
position: z.number().int().min(1),
|
||||
type: z.enum(['string', 'number', 'timestamp', 'date', 'boolean', 'null', 'unknown']),
|
||||
classification: historicSqlLiteralSlotClassificationSchema,
|
||||
}),
|
||||
),
|
||||
triage_signals: z.record(z.string(), z.string()),
|
||||
}),
|
||||
});
|
||||
export type HistoricSqlMetadata = z.infer<typeof historicSqlMetadataSchema>;
|
||||
|
||||
export const historicSqlUsageSchema = z.object({
|
||||
stats: z.object({
|
||||
executions: z.number().int().nonnegative(),
|
||||
distinct_users: z.number().int().nonnegative(),
|
||||
first_seen: z.string().datetime(),
|
||||
last_seen: z.string().datetime(),
|
||||
p50_runtime_ms: z.number().nonnegative().nullable(),
|
||||
p95_runtime_ms: z.number().nonnegative().nullable(),
|
||||
mean_runtime_ms: z.number().nonnegative().nullable().optional(),
|
||||
error_rate: z.number().min(0).max(1),
|
||||
rows_produced: z.number().int().nonnegative().nullable().optional(),
|
||||
}),
|
||||
literal_slots: z.array(
|
||||
z.object({
|
||||
position: z.number().int().min(1),
|
||||
distinct_values: z.number().int().nonnegative(),
|
||||
top_values: z.array(z.tuple([z.string(), z.number().int().nonnegative()])),
|
||||
}),
|
||||
),
|
||||
samples: z.array(
|
||||
z.object({
|
||||
started_at: z.string().datetime(),
|
||||
user: z.string().nullable(),
|
||||
bound_sql: z.string(),
|
||||
rows_produced: z.number().int().nonnegative().nullable().optional(),
|
||||
runtime_ms: z.number().nonnegative().nullable(),
|
||||
success: z.boolean(),
|
||||
}),
|
||||
),
|
||||
});
|
||||
export type HistoricSqlUsage = z.infer<typeof historicSqlUsageSchema>;
|
||||
|
||||
export const historicSqlManifestSchema = z.object({
|
||||
source: z.literal(HISTORIC_SQL_SOURCE_KEY),
|
||||
connectionId: z.string().min(1),
|
||||
dialect: historicSqlDialectSchema,
|
||||
fetchedAt: z.string().datetime(),
|
||||
windowStart: z.string().datetime(),
|
||||
windowEnd: z.string().datetime(),
|
||||
nextSuccessfulCursor: z.string().datetime().nullable(),
|
||||
templateCount: z.number().int().nonnegative(),
|
||||
capped: z.boolean(),
|
||||
warnings: z.array(z.string()),
|
||||
degraded: z.boolean().default(false),
|
||||
statsResetAt: z.string().datetime().nullable().default(null),
|
||||
baselineFirstRun: z.boolean().default(false),
|
||||
pgServerVersion: z.string().nullable().default(null),
|
||||
deallocCount: z.number().int().nonnegative().nullable().default(null),
|
||||
templates: z.array(
|
||||
z.object({
|
||||
id: z.string().min(1),
|
||||
fingerprint: z.string().min(1),
|
||||
subClusterId: z.string().nullable(),
|
||||
path: z.string().min(1),
|
||||
}),
|
||||
),
|
||||
});
|
||||
export type HistoricSqlManifest = z.infer<typeof historicSqlManifestSchema>;
|
||||
107
packages/context/src/ingest/adapters/live-database/chunk.test.ts
Normal file
107
packages/context/src/ingest/adapters/live-database/chunk.test.ts
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
import { mkdtemp } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import type { KloSchemaSnapshot } from '../../../scan/types.js';
|
||||
import { chunkLiveDatabaseStagedDir } from './chunk.js';
|
||||
import { liveDatabaseTablePath, writeLiveDatabaseSnapshot } from './stage.js';
|
||||
|
||||
function snapshot(): KloSchemaSnapshot {
|
||||
return {
|
||||
connectionId: 'conn-1',
|
||||
driver: 'postgres',
|
||||
extractedAt: '2026-04-27T00:00:00.000Z',
|
||||
scope: { schemas: ['public'] },
|
||||
metadata: {},
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
kind: 'table',
|
||||
comment: null,
|
||||
estimatedRows: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
kind: 'table',
|
||||
comment: null,
|
||||
estimatedRows: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
describe('chunkLiveDatabaseStagedDir', () => {
|
||||
it('emits one work unit per table on the first run', async () => {
|
||||
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-chunk-'));
|
||||
await writeLiveDatabaseSnapshot(dir, snapshot());
|
||||
|
||||
const result = await chunkLiveDatabaseStagedDir(dir);
|
||||
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual([
|
||||
'live-database-public-customers',
|
||||
'live-database-public-orders',
|
||||
]);
|
||||
expect(result.workUnits[0]?.dependencyPaths).toEqual(['connection.json', 'foreign-keys.json']);
|
||||
expect(result.workUnits[0]?.peerFileIndex).toContain(
|
||||
liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('keeps only changed tables during incremental syncs and records table evictions', async () => {
|
||||
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-diff-'));
|
||||
await writeLiveDatabaseSnapshot(dir, snapshot());
|
||||
const ordersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' });
|
||||
const customersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'customers' });
|
||||
|
||||
const result = await chunkLiveDatabaseStagedDir(dir, {
|
||||
added: [],
|
||||
modified: [ordersPath],
|
||||
deleted: [customersPath],
|
||||
unchanged: ['connection.json', 'foreign-keys.json'],
|
||||
});
|
||||
|
||||
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']);
|
||||
expect(result.eviction?.deletedRawPaths).toEqual([customersPath]);
|
||||
});
|
||||
|
||||
it('fans out all table work units when the foreign-key index changes', async () => {
|
||||
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-fk-'));
|
||||
await writeLiveDatabaseSnapshot(dir, snapshot());
|
||||
|
||||
const result = await chunkLiveDatabaseStagedDir(dir, {
|
||||
added: [],
|
||||
modified: ['foreign-keys.json'],
|
||||
deleted: [],
|
||||
unchanged: [],
|
||||
});
|
||||
|
||||
expect(result.workUnits).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
58
packages/context/src/ingest/adapters/live-database/chunk.ts
Normal file
58
packages/context/src/ingest/adapters/live-database/chunk.ts
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js';
|
||||
import type { KloSchemaTable } from '../../../scan/types.js';
|
||||
import { LIVE_DATABASE_FOREIGN_KEYS_FILE, LIVE_DATABASE_META_FILE, readLiveDatabaseTableFiles } from './stage.js';
|
||||
|
||||
function unitKey(table: KloSchemaTable): string {
|
||||
const parts = [table.catalog, table.db, table.name]
|
||||
.filter((part): part is string => typeof part === 'string' && part.length > 0)
|
||||
.map((part) =>
|
||||
part
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, ''),
|
||||
)
|
||||
.filter(Boolean);
|
||||
return `live-database-${parts.join('-') || 'table'}`;
|
||||
}
|
||||
|
||||
function displayName(table: KloSchemaTable): string {
|
||||
return [table.catalog, table.db, table.name].filter(Boolean).join('.');
|
||||
}
|
||||
|
||||
function isTablePath(path: string): boolean {
|
||||
return path.startsWith('tables/') && path.endsWith('.json');
|
||||
}
|
||||
|
||||
export async function chunkLiveDatabaseStagedDir(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
const tableFiles = await readLiveDatabaseTableFiles(stagedDir);
|
||||
const allTablePaths = tableFiles.map((file) => file.path);
|
||||
const globalDeps = [LIVE_DATABASE_META_FILE, LIVE_DATABASE_FOREIGN_KEYS_FILE];
|
||||
const touched = diffSet ? new Set([...diffSet.added, ...diffSet.modified]) : null;
|
||||
const globalTouched = Boolean(
|
||||
touched && (touched.has(LIVE_DATABASE_META_FILE) || touched.has(LIVE_DATABASE_FOREIGN_KEYS_FILE)),
|
||||
);
|
||||
|
||||
const workUnits: WorkUnit[] = [];
|
||||
for (const file of tableFiles) {
|
||||
if (touched && !globalTouched && !touched.has(file.path)) {
|
||||
continue;
|
||||
}
|
||||
const peers = allTablePaths.filter((path) => path !== file.path).sort();
|
||||
workUnits.push({
|
||||
unitKey: unitKey(file.table),
|
||||
displayLabel: `Live database table ${displayName(file.table)}`,
|
||||
rawFiles: [file.path],
|
||||
peerFileIndex: peers,
|
||||
dependencyPaths: globalDeps,
|
||||
notes: `Database catalog snapshot for ${displayName(file.table)} with ${file.table.columns.length} column${
|
||||
file.table.columns.length === 1 ? '' : 's'
|
||||
}.`,
|
||||
});
|
||||
}
|
||||
|
||||
const deletedRawPaths = diffSet ? diffSet.deleted.filter(isTablePath).sort() : [];
|
||||
return {
|
||||
workUnits,
|
||||
...(deletedRawPaths.length > 0 ? { eviction: { deletedRawPaths } } : {}),
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,224 @@
|
|||
import { once } from 'node:events';
|
||||
import { createServer } from 'node:http';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createDaemonLiveDatabaseIntrospection } from './daemon-introspection.js';
|
||||
|
||||
const daemonResponse = {
|
||||
connection_id: 'warehouse',
|
||||
extracted_at: '2026-04-28T10:00:00+00:00',
|
||||
metadata: { driver: 'postgres', schemas: ['public'] },
|
||||
tables: [
|
||||
{
|
||||
catalog: 'warehouse',
|
||||
db: 'public',
|
||||
name: 'customers',
|
||||
comment: null,
|
||||
columns: [{ name: 'id', type: 'integer', nullable: false, primary_key: true, comment: null }],
|
||||
foreign_keys: [],
|
||||
},
|
||||
{
|
||||
catalog: 'warehouse',
|
||||
db: 'public',
|
||||
name: 'orders',
|
||||
comment: 'Order facts',
|
||||
columns: [
|
||||
{ name: 'id', type: 'integer', nullable: false, primary_key: true, comment: 'Order id' },
|
||||
{ name: 'customer_id', type: 'integer', nullable: false, primary_key: false, comment: null },
|
||||
],
|
||||
foreign_keys: [
|
||||
{
|
||||
from_column: 'customer_id',
|
||||
to_table: 'customers',
|
||||
to_column: 'id',
|
||||
constraint_name: 'orders_customer_id_fkey',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
describe('createDaemonLiveDatabaseIntrospection', () => {
|
||||
it('calls the database-introspect daemon command and maps the snapshot response', async () => {
|
||||
const runJson = vi.fn(async () => daemonResponse);
|
||||
const introspection = createDaemonLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
url: 'postgres://localhost:5432/warehouse',
|
||||
readonly: true,
|
||||
},
|
||||
},
|
||||
schemas: ['public'],
|
||||
runJson,
|
||||
});
|
||||
|
||||
await expect(introspection.extractSchema('warehouse')).resolves.toEqual({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
extractedAt: '2026-04-28T10:00:00+00:00',
|
||||
scope: { schemas: ['public'] },
|
||||
metadata: { driver: 'postgres', schemas: ['public'] },
|
||||
tables: [
|
||||
{
|
||||
catalog: 'warehouse',
|
||||
db: 'public',
|
||||
name: 'customers',
|
||||
kind: 'table',
|
||||
comment: null,
|
||||
estimatedRows: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
{
|
||||
catalog: 'warehouse',
|
||||
db: 'public',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Order facts',
|
||||
estimatedRows: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
{
|
||||
name: 'customer_id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: null,
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fkey',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(runJson).toHaveBeenCalledWith('database-introspect', {
|
||||
connection_id: 'warehouse',
|
||||
driver: 'postgres',
|
||||
url: 'postgres://localhost:5432/warehouse',
|
||||
schemas: ['public'],
|
||||
statement_timeout_ms: 30_000,
|
||||
connection_timeout_seconds: 5,
|
||||
});
|
||||
});
|
||||
|
||||
it('calls a running daemon HTTP endpoint when baseUrl is configured', async () => {
|
||||
const requests: Array<{ url: string | undefined; body: unknown }> = [];
|
||||
const server = createServer((request, response) => {
|
||||
const chunks: Buffer[] = [];
|
||||
request.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
request.on('end', () => {
|
||||
requests.push({
|
||||
url: request.url,
|
||||
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
|
||||
});
|
||||
response.writeHead(200, { 'content-type': 'application/json' });
|
||||
response.end(JSON.stringify(daemonResponse));
|
||||
});
|
||||
});
|
||||
|
||||
server.listen(0, '127.0.0.1');
|
||||
await once(server, 'listening');
|
||||
try {
|
||||
const address = server.address();
|
||||
if (!address || typeof address === 'string') {
|
||||
throw new Error('expected TCP server address');
|
||||
}
|
||||
const introspection = createDaemonLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgresql',
|
||||
url: 'postgres://localhost:5432/warehouse',
|
||||
readonly: true,
|
||||
},
|
||||
},
|
||||
baseUrl: `http://127.0.0.1:${address.port}`,
|
||||
});
|
||||
|
||||
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
tables: [{ name: 'customers' }, { name: 'orders' }],
|
||||
});
|
||||
|
||||
expect(requests).toEqual([
|
||||
{
|
||||
url: '/database/introspect',
|
||||
body: {
|
||||
connection_id: 'warehouse',
|
||||
driver: 'postgres',
|
||||
url: 'postgres://localhost:5432/warehouse',
|
||||
schemas: ['public'],
|
||||
statement_timeout_ms: 30_000,
|
||||
connection_timeout_seconds: 5,
|
||||
},
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
server.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('requires a configured read-only postgres connection with a url', async () => {
|
||||
const introspection = createDaemonLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
url: 'postgres://localhost:5432/warehouse',
|
||||
readonly: false,
|
||||
},
|
||||
},
|
||||
runJson: vi.fn(async () => daemonResponse),
|
||||
});
|
||||
|
||||
await expect(introspection.extractSchema('warehouse')).rejects.toThrow(
|
||||
'Local live-database ingest requires connections.warehouse.readonly: true.',
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects unsupported local connection drivers before calling the daemon', async () => {
|
||||
const runJson = vi.fn(async () => daemonResponse);
|
||||
const introspection = createDaemonLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'snowflake',
|
||||
url: 'snowflake://example',
|
||||
readonly: true,
|
||||
},
|
||||
},
|
||||
runJson,
|
||||
});
|
||||
|
||||
await expect(introspection.extractSchema('warehouse')).rejects.toThrow(
|
||||
'Local live-database ingest cannot run driver "snowflake".',
|
||||
);
|
||||
expect(runJson).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,256 @@
|
|||
import { spawn } from 'node:child_process';
|
||||
import { request as httpRequest } from 'node:http';
|
||||
import { request as httpsRequest } from 'node:https';
|
||||
import { URL } from 'node:url';
|
||||
import type { KloProjectConnectionConfig } from '../../../project/config.js';
|
||||
import type { KloSchemaColumn, KloSchemaForeignKey, KloSchemaSnapshot, KloSchemaTable } from '../../../scan/types.js';
|
||||
import { inferKloDimensionType, normalizeKloNativeType } from '../../../scan/type-normalization.js';
|
||||
import type { LiveDatabaseIntrospectionPort } from './types.js';
|
||||
|
||||
export type KloDaemonDatabaseIntrospectionCommand = 'database-introspect';
|
||||
|
||||
export type KloDaemonDatabaseJsonRunner = (
|
||||
subcommand: KloDaemonDatabaseIntrospectionCommand,
|
||||
payload: Record<string, unknown>,
|
||||
) => Promise<Record<string, unknown>>;
|
||||
|
||||
export type KloDaemonDatabaseHttpJsonRunner = (
|
||||
path: string,
|
||||
payload: Record<string, unknown>,
|
||||
) => Promise<Record<string, unknown>>;
|
||||
|
||||
export interface DaemonLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KloProjectConnectionConfig>;
|
||||
schemas?: string[];
|
||||
statementTimeoutMs?: number;
|
||||
connectionTimeoutSeconds?: number;
|
||||
command?: string;
|
||||
args?: string[];
|
||||
cwd?: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
baseUrl?: string;
|
||||
runJson?: KloDaemonDatabaseJsonRunner;
|
||||
requestJson?: KloDaemonDatabaseHttpJsonRunner;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
const DEFAULT_SCHEMAS = ['public'];
|
||||
|
||||
function parseJsonObject(raw: string, subcommand: string): Record<string, unknown> {
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
||||
throw new Error(`klo-daemon ${subcommand} returned non-object JSON`);
|
||||
}
|
||||
return parsed as Record<string, unknown>;
|
||||
}
|
||||
|
||||
function runProcessJson(
|
||||
options: Required<Pick<DaemonLiveDatabaseIntrospectionOptions, 'command' | 'args'>> &
|
||||
Pick<DaemonLiveDatabaseIntrospectionOptions, 'cwd' | 'env'>,
|
||||
): KloDaemonDatabaseJsonRunner {
|
||||
return async (subcommand, payload) =>
|
||||
new Promise((resolve, reject) => {
|
||||
const child = spawn(options.command, [...options.args, subcommand], {
|
||||
cwd: options.cwd,
|
||||
env: { ...process.env, ...options.env },
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
});
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
|
||||
child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk));
|
||||
child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk));
|
||||
child.on('error', reject);
|
||||
child.on('close', (code) => {
|
||||
const stdoutText = Buffer.concat(stdout).toString('utf8').trim();
|
||||
const stderrText = Buffer.concat(stderr).toString('utf8').trim();
|
||||
if (code !== 0) {
|
||||
reject(new Error(`klo-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(parseJsonObject(stdoutText, subcommand));
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
child.stdin.end(`${JSON.stringify(payload)}\n`);
|
||||
});
|
||||
}
|
||||
|
||||
function normalizedBaseUrl(baseUrl: string): string {
|
||||
return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`;
|
||||
}
|
||||
|
||||
function postJson(baseUrl: string): KloDaemonDatabaseHttpJsonRunner {
|
||||
return async (path, payload) =>
|
||||
new Promise((resolve, reject) => {
|
||||
const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl));
|
||||
const body = JSON.stringify(payload);
|
||||
const client = target.protocol === 'https:' ? httpsRequest : httpRequest;
|
||||
const request = client(
|
||||
target,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
accept: 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'content-length': Buffer.byteLength(body),
|
||||
},
|
||||
},
|
||||
(response) => {
|
||||
const chunks: Buffer[] = [];
|
||||
response.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
response.on('end', () => {
|
||||
const text = Buffer.concat(chunks).toString('utf8');
|
||||
const statusCode = response.statusCode ?? 0;
|
||||
if (statusCode < 200 || statusCode >= 300) {
|
||||
reject(new Error(`klo-daemon HTTP ${path} failed with ${statusCode}: ${text}`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(parseJsonObject(text, path));
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
request.on('error', reject);
|
||||
request.end(body);
|
||||
});
|
||||
}
|
||||
|
||||
function recordValue(value: unknown): Record<string, unknown> {
|
||||
return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record<string, unknown>) : {};
|
||||
}
|
||||
|
||||
function recordArray(value: unknown): Array<Record<string, unknown>> {
|
||||
return Array.isArray(value)
|
||||
? value.filter(
|
||||
(item): item is Record<string, unknown> => item !== null && typeof item === 'object' && !Array.isArray(item),
|
||||
)
|
||||
: [];
|
||||
}
|
||||
|
||||
function requiredString(value: unknown, field: string): string {
|
||||
if (typeof value !== 'string' || value.length === 0) {
|
||||
throw new Error(`klo-daemon database introspection response is missing string field ${field}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function nullableString(value: unknown): string | null {
|
||||
return typeof value === 'string' ? value : null;
|
||||
}
|
||||
|
||||
function optionalString(value: unknown): string | undefined {
|
||||
return typeof value === 'string' ? value : undefined;
|
||||
}
|
||||
|
||||
function normalizeDriver(driver: unknown): string {
|
||||
const normalized = String(driver ?? '').trim().toLowerCase();
|
||||
return normalized === 'postgresql' ? 'postgres' : normalized;
|
||||
}
|
||||
|
||||
function requirePostgresConnection(
|
||||
connections: Record<string, KloProjectConnectionConfig>,
|
||||
connectionId: string,
|
||||
): KloProjectConnectionConfig & { url: string } {
|
||||
const connection = connections[connectionId];
|
||||
const driver = normalizeDriver(connection?.driver);
|
||||
if (driver !== 'postgres') {
|
||||
throw new Error(`Local live-database ingest cannot run driver "${connection?.driver ?? 'unknown'}".`);
|
||||
}
|
||||
if (connection?.readonly !== true) {
|
||||
throw new Error(`Local live-database ingest requires connections.${connectionId}.readonly: true.`);
|
||||
}
|
||||
if (typeof connection.url !== 'string' || connection.url.trim().length === 0) {
|
||||
throw new Error(`Local live-database ingest requires connections.${connectionId}.url.`);
|
||||
}
|
||||
return connection as KloProjectConnectionConfig & { url: string };
|
||||
}
|
||||
|
||||
function mapColumn(raw: Record<string, unknown>): KloSchemaColumn {
|
||||
const nativeType = requiredString(raw.type, 'tables[].columns[].type');
|
||||
return {
|
||||
name: requiredString(raw.name, 'tables[].columns[].name'),
|
||||
nativeType,
|
||||
normalizedType: normalizeKloNativeType(nativeType),
|
||||
dimensionType: inferKloDimensionType(nativeType),
|
||||
nullable: raw.nullable !== false ? true : false,
|
||||
primaryKey: raw.primary_key === true,
|
||||
comment: nullableString(raw.comment),
|
||||
};
|
||||
}
|
||||
|
||||
function mapForeignKey(raw: Record<string, unknown>): KloSchemaForeignKey {
|
||||
return {
|
||||
fromColumn: requiredString(raw.from_column, 'tables[].foreign_keys[].from_column'),
|
||||
toCatalog: null,
|
||||
toDb: null,
|
||||
toTable: requiredString(raw.to_table, 'tables[].foreign_keys[].to_table'),
|
||||
toColumn: requiredString(raw.to_column, 'tables[].foreign_keys[].to_column'),
|
||||
constraintName: nullableString(raw.constraint_name),
|
||||
};
|
||||
}
|
||||
|
||||
function mapTable(raw: Record<string, unknown>): KloSchemaTable {
|
||||
return {
|
||||
catalog: nullableString(raw.catalog),
|
||||
db: nullableString(raw.db),
|
||||
name: requiredString(raw.name, 'tables[].name'),
|
||||
kind: 'table',
|
||||
comment: nullableString(raw.comment),
|
||||
estimatedRows: null,
|
||||
columns: recordArray(raw.columns).map(mapColumn),
|
||||
foreignKeys: recordArray(raw.foreign_keys).map(mapForeignKey),
|
||||
};
|
||||
}
|
||||
|
||||
function mapDaemonSnapshot(
|
||||
raw: Record<string, unknown>,
|
||||
input: { connectionId: string; extractedAt: string; schemas: string[] },
|
||||
): KloSchemaSnapshot {
|
||||
return {
|
||||
connectionId: requiredString(raw.connection_id, 'connection_id') || input.connectionId,
|
||||
driver: 'postgres',
|
||||
extractedAt: optionalString(raw.extracted_at) ?? input.extractedAt,
|
||||
scope: { schemas: input.schemas },
|
||||
metadata: recordValue(raw.metadata),
|
||||
tables: recordArray(raw.tables).map(mapTable),
|
||||
};
|
||||
}
|
||||
|
||||
export function createDaemonLiveDatabaseIntrospection(
|
||||
options: DaemonLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
const schemas = options.schemas ?? DEFAULT_SCHEMAS;
|
||||
const command = options.command ?? 'python';
|
||||
const args = options.args ?? ['-m', 'klo_daemon'];
|
||||
const runJson = options.runJson ?? runProcessJson({ command, args, cwd: options.cwd, env: options.env });
|
||||
const requestJson = options.requestJson ?? (options.baseUrl ? postJson(options.baseUrl) : undefined);
|
||||
const now = options.now ?? (() => new Date());
|
||||
|
||||
return {
|
||||
async extractSchema(connectionId: string): Promise<KloSchemaSnapshot> {
|
||||
const connection = requirePostgresConnection(options.connections, connectionId);
|
||||
const payload = {
|
||||
connection_id: connectionId,
|
||||
driver: normalizeDriver(connection.driver),
|
||||
url: connection.url,
|
||||
schemas,
|
||||
statement_timeout_ms: options.statementTimeoutMs ?? 30_000,
|
||||
connection_timeout_seconds: options.connectionTimeoutSeconds ?? 5,
|
||||
};
|
||||
const raw = requestJson
|
||||
? await requestJson('/database/introspect', payload)
|
||||
: await runJson('database-introspect', payload);
|
||||
return mapDaemonSnapshot(raw, {
|
||||
connectionId,
|
||||
extractedAt: now().toISOString(),
|
||||
schemas,
|
||||
});
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import type { KloSchemaSnapshot } from '../../../scan/types.js';
|
||||
import { buildLiveDatabaseTableNaturalKey, kloSchemaSnapshotToExtractedSchema } from './extracted-schema.js';
|
||||
|
||||
function snapshot(): KloSchemaSnapshot {
|
||||
return {
|
||||
connectionId: 'conn-1',
|
||||
driver: 'postgres',
|
||||
extractedAt: '2026-04-27T00:00:00.000Z',
|
||||
scope: { schemas: ['public'] },
|
||||
metadata: { driver: 'postgres' },
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
kind: 'table',
|
||||
comment: 'Orders placed by customers',
|
||||
estimatedRows: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Primary key',
|
||||
},
|
||||
{
|
||||
name: 'customer_id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: 'public',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fkey',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
kind: 'table',
|
||||
comment: null,
|
||||
estimatedRows: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
describe('kloSchemaSnapshotToExtractedSchema', () => {
|
||||
it('preserves structural table, column, comment, and key metadata', () => {
|
||||
const extracted = kloSchemaSnapshotToExtractedSchema(snapshot());
|
||||
|
||||
expect(extracted.tables).toEqual([
|
||||
{
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
dbComment: 'Orders placed by customers',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
type: 'integer',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
dbComment: 'Primary key',
|
||||
},
|
||||
{
|
||||
name: 'customer_id',
|
||||
type: 'integer',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
dbComment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fkey',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
dbComment: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
type: 'integer',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
dbComment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('builds the same natural key shape used by schema sync', () => {
|
||||
expect(buildLiveDatabaseTableNaturalKey({ catalog: null, db: 'public', name: 'orders' })).toBe('|public|orders');
|
||||
expect(buildLiveDatabaseTableNaturalKey({ catalog: 'warehouse', db: 'analytics', name: 'events' })).toBe(
|
||||
'warehouse|analytics|events',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
import type { KloSchemaSnapshot, KloSchemaTable } from '../../../scan/types.js';
|
||||
|
||||
export interface LiveDatabaseExtractedForeignKey {
|
||||
fromTable: string;
|
||||
fromColumn: string;
|
||||
toTable: string;
|
||||
toColumn: string;
|
||||
constraintName?: string;
|
||||
}
|
||||
|
||||
export interface LiveDatabaseExtractedColumn {
|
||||
name: string;
|
||||
type: string;
|
||||
nullable: boolean;
|
||||
primaryKey: boolean;
|
||||
dbComment: string | null;
|
||||
}
|
||||
|
||||
export interface LiveDatabaseExtractedTable {
|
||||
name: string;
|
||||
catalog: string | null;
|
||||
db: string | null;
|
||||
dbComment: string | null;
|
||||
columns: LiveDatabaseExtractedColumn[];
|
||||
foreignKeys: LiveDatabaseExtractedForeignKey[];
|
||||
}
|
||||
|
||||
export interface LiveDatabaseExtractedSchema {
|
||||
connectionId?: string;
|
||||
tables: LiveDatabaseExtractedTable[];
|
||||
}
|
||||
|
||||
export function buildLiveDatabaseTableNaturalKey(table: Pick<KloSchemaTable, 'catalog' | 'db' | 'name'>): string {
|
||||
return `${table.catalog ?? ''}|${table.db ?? ''}|${table.name}`;
|
||||
}
|
||||
|
||||
export function kloSchemaSnapshotToExtractedSchema(snapshot: KloSchemaSnapshot): LiveDatabaseExtractedSchema {
|
||||
return {
|
||||
connectionId: snapshot.connectionId,
|
||||
tables: snapshot.tables.map((table) => ({
|
||||
name: table.name,
|
||||
catalog: table.catalog ?? null,
|
||||
db: table.db ?? null,
|
||||
dbComment: table.comment ?? null,
|
||||
columns: table.columns.map((column) => ({
|
||||
name: column.name,
|
||||
type: column.nativeType,
|
||||
nullable: column.nullable,
|
||||
primaryKey: column.primaryKey,
|
||||
dbComment: column.comment ?? null,
|
||||
})),
|
||||
foreignKeys: table.foreignKeys.map((foreignKey) => ({
|
||||
fromTable: table.name,
|
||||
fromColumn: foreignKey.fromColumn,
|
||||
toTable: foreignKey.toTable,
|
||||
toColumn: foreignKey.toColumn,
|
||||
...(foreignKey.constraintName ? { constraintName: foreignKey.constraintName } : {}),
|
||||
})),
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
import { mkdtemp } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { LiveDatabaseSourceAdapter } from './live-database.adapter.js';
|
||||
|
||||
describe('LiveDatabaseSourceAdapter', () => {
|
||||
it('fetches a schema snapshot through the introspection port', async () => {
|
||||
const extractSchema = vi.fn().mockResolvedValue({
|
||||
connectionId: 'conn-1',
|
||||
driver: 'postgres',
|
||||
extractedAt: '2026-04-27T00:00:00.000Z',
|
||||
scope: { schemas: ['public'] },
|
||||
metadata: {},
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
kind: 'table',
|
||||
comment: null,
|
||||
estimatedRows: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
],
|
||||
});
|
||||
const adapter = new LiveDatabaseSourceAdapter({
|
||||
introspection: { extractSchema },
|
||||
now: () => new Date('2026-04-27T00:00:00.000Z'),
|
||||
});
|
||||
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-adapter-'));
|
||||
|
||||
await adapter.fetch(undefined, dir, { connectionId: 'conn-1', sourceKey: 'live-database' });
|
||||
|
||||
expect(extractSchema).toHaveBeenCalledWith('conn-1');
|
||||
await expect(adapter.detect(dir)).resolves.toBe(true);
|
||||
const chunked = await adapter.chunk(dir);
|
||||
expect(chunked.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']);
|
||||
});
|
||||
|
||||
it('declares the live database source and skill', () => {
|
||||
const adapter = new LiveDatabaseSourceAdapter({
|
||||
introspection: { extractSchema: vi.fn() },
|
||||
});
|
||||
expect(adapter.source).toBe('live-database');
|
||||
expect(adapter.skillNames).toEqual(['live_database_ingest']);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js';
|
||||
import { chunkLiveDatabaseStagedDir } from './chunk.js';
|
||||
import { detectLiveDatabaseStagedDir, writeLiveDatabaseSnapshot } from './stage.js';
|
||||
import type { LiveDatabaseSourceAdapterDeps } from './types.js';
|
||||
|
||||
export class LiveDatabaseSourceAdapter implements SourceAdapter {
|
||||
readonly source = 'live-database';
|
||||
readonly skillNames = ['live_database_ingest'];
|
||||
|
||||
constructor(private readonly deps: LiveDatabaseSourceAdapterDeps) {}
|
||||
|
||||
detect(stagedDir: string): Promise<boolean> {
|
||||
return detectLiveDatabaseStagedDir(stagedDir);
|
||||
}
|
||||
|
||||
async fetch(_pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
|
||||
const snapshot = await this.deps.introspection.extractSchema(ctx.connectionId);
|
||||
await writeLiveDatabaseSnapshot(stagedDir, {
|
||||
...snapshot,
|
||||
connectionId: ctx.connectionId,
|
||||
extractedAt: snapshot.extractedAt ?? (this.deps.now ?? (() => new Date()))().toISOString(),
|
||||
});
|
||||
}
|
||||
|
||||
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
return chunkLiveDatabaseStagedDir(stagedDir, diffSet);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,252 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
buildLiveDatabaseManifestShards,
|
||||
type LiveDatabaseManifestExistingDescriptions,
|
||||
type LiveDatabaseManifestJoinEntry,
|
||||
type LiveDatabaseManifestShard,
|
||||
} from './manifest.js';
|
||||
|
||||
function shardObject(shards: Map<string, LiveDatabaseManifestShard>): Record<string, LiveDatabaseManifestShard> {
|
||||
return Object.fromEntries([...shards.entries()].sort(([a], [b]) => a.localeCompare(b)));
|
||||
}
|
||||
|
||||
describe('buildLiveDatabaseManifestShards', () => {
|
||||
it('builds shard objects with generated joins and preserved external descriptions', () => {
|
||||
const existingDescriptions = new Map<string, LiveDatabaseManifestExistingDescriptions>([
|
||||
[
|
||||
'orders',
|
||||
{
|
||||
table: { user: 'Pinned analyst description', db: 'Old db description' },
|
||||
columns: new Map([['id', { user: 'Pinned id description', db: 'Old id description' }]]),
|
||||
},
|
||||
],
|
||||
]);
|
||||
|
||||
const preservedJoins = new Map<string, LiveDatabaseManifestJoinEntry[]>([
|
||||
[
|
||||
'orders',
|
||||
[
|
||||
{
|
||||
to: 'customers',
|
||||
on: 'orders.account_id = customers.id',
|
||||
relationship: 'many_to_one',
|
||||
source: 'manual',
|
||||
},
|
||||
{
|
||||
to: 'missing_accounts',
|
||||
on: 'orders.account_id = missing_accounts.id',
|
||||
relationship: 'many_to_one',
|
||||
source: 'manual',
|
||||
},
|
||||
],
|
||||
],
|
||||
]);
|
||||
|
||||
const result = buildLiveDatabaseManifestShards({
|
||||
connectionType: 'POSTGRESQL',
|
||||
mapColumnType: (nativeType) => nativeType.toLowerCase(),
|
||||
existingDescriptions,
|
||||
existingPreservedJoins: preservedJoins,
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
descriptions: { db: 'Fresh db description', ai: 'Generated AI description' },
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
type: 'INTEGER',
|
||||
pk: true,
|
||||
nullable: false,
|
||||
descriptions: { db: 'Fresh id description' },
|
||||
},
|
||||
{
|
||||
name: 'customer_id',
|
||||
type: 'INTEGER',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
type: 'INTEGER',
|
||||
pk: true,
|
||||
nullable: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
joins: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumns: ['customer_id'],
|
||||
toTable: 'customers',
|
||||
toColumns: ['id'],
|
||||
relationship: 'MANY_TO_ONE',
|
||||
source: 'formal',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(result.tablesProcessed).toBe(2);
|
||||
expect(shardObject(result.shards)).toEqual({
|
||||
public: {
|
||||
tables: {
|
||||
orders: {
|
||||
table: 'public.orders',
|
||||
descriptions: {
|
||||
user: 'Pinned analyst description',
|
||||
db: 'Fresh db description',
|
||||
ai: 'Generated AI description',
|
||||
},
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
type: 'integer',
|
||||
pk: true,
|
||||
nullable: false,
|
||||
descriptions: {
|
||||
user: 'Pinned id description',
|
||||
db: 'Fresh id description',
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'customer_id',
|
||||
type: 'integer',
|
||||
},
|
||||
],
|
||||
joins: [
|
||||
{
|
||||
to: 'customers',
|
||||
on: 'orders.customer_id = customers.id',
|
||||
relationship: 'many_to_one',
|
||||
source: 'formal',
|
||||
},
|
||||
{
|
||||
to: 'customers',
|
||||
on: 'orders.account_id = customers.id',
|
||||
relationship: 'many_to_one',
|
||||
source: 'manual',
|
||||
},
|
||||
],
|
||||
},
|
||||
customers: {
|
||||
table: 'public.customers',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
type: 'integer',
|
||||
pk: true,
|
||||
nullable: false,
|
||||
},
|
||||
],
|
||||
joins: [
|
||||
{
|
||||
to: 'orders',
|
||||
on: 'customers.id = orders.customer_id',
|
||||
relationship: 'one_to_many',
|
||||
source: 'formal',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('uses warehouse and schema shard keys for snowflake-style connections', () => {
|
||||
const result = buildLiveDatabaseManifestShards({
|
||||
connectionType: 'SNOWFLAKE',
|
||||
mapColumnType: (nativeType) => nativeType.toLowerCase(),
|
||||
tables: [
|
||||
{
|
||||
name: 'accounts',
|
||||
catalog: 'ANALYTICS',
|
||||
db: 'CORE',
|
||||
columns: [{ name: 'id', type: 'NUMBER' }],
|
||||
},
|
||||
],
|
||||
joins: [],
|
||||
});
|
||||
|
||||
expect(shardObject(result.shards)).toEqual({
|
||||
'ANALYTICS.CORE': {
|
||||
tables: {
|
||||
accounts: {
|
||||
table: 'ANALYTICS.CORE.accounts',
|
||||
columns: [{ name: 'id', type: 'number' }],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('renders ordered multi-column joins in both directions', () => {
|
||||
const result = buildLiveDatabaseManifestShards({
|
||||
connectionType: 'POSTGRESQL',
|
||||
mapColumnType: (nativeType) => nativeType,
|
||||
tables: [
|
||||
{
|
||||
name: 'order_lines',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
columns: [
|
||||
{ name: 'order_id', type: 'integer' },
|
||||
{ name: 'line_number', type: 'integer' },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'order_line_allocations',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
columns: [
|
||||
{ name: 'order_id', type: 'integer' },
|
||||
{ name: 'line_number', type: 'integer' },
|
||||
],
|
||||
},
|
||||
],
|
||||
joins: [
|
||||
{
|
||||
fromTable: 'order_line_allocations',
|
||||
fromColumns: ['order_id', 'line_number'],
|
||||
toTable: 'order_lines',
|
||||
toColumns: ['order_id', 'line_number'],
|
||||
relationship: 'many_to_one',
|
||||
source: 'inferred',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(shardObject(result.shards)).toMatchObject({
|
||||
public: {
|
||||
tables: {
|
||||
order_line_allocations: {
|
||||
joins: [
|
||||
{
|
||||
to: 'order_lines',
|
||||
on: 'order_line_allocations.order_id = order_lines.order_id AND order_line_allocations.line_number = order_lines.line_number',
|
||||
relationship: 'many_to_one',
|
||||
source: 'inferred',
|
||||
},
|
||||
],
|
||||
},
|
||||
order_lines: {
|
||||
joins: [
|
||||
{
|
||||
to: 'order_line_allocations',
|
||||
on: 'order_lines.order_id = order_line_allocations.order_id AND order_lines.line_number = order_line_allocations.line_number',
|
||||
relationship: 'one_to_many',
|
||||
source: 'inferred',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
270
packages/context/src/ingest/adapters/live-database/manifest.ts
Normal file
270
packages/context/src/ingest/adapters/live-database/manifest.ts
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
const RELATIONSHIP_MAP: Record<string, string> = {
|
||||
MANY_TO_ONE: 'many_to_one',
|
||||
ONE_TO_MANY: 'one_to_many',
|
||||
ONE_TO_ONE: 'one_to_one',
|
||||
};
|
||||
|
||||
const RELATIONSHIP_INVERSE: Record<string, string> = {
|
||||
many_to_one: 'one_to_many',
|
||||
one_to_many: 'many_to_one',
|
||||
one_to_one: 'one_to_one',
|
||||
};
|
||||
|
||||
const SCAN_MANAGED_DESCRIPTION_KEYS = new Set(['db', 'ai']);
|
||||
|
||||
export interface LiveDatabaseManifestColumn {
|
||||
name: string;
|
||||
type: string;
|
||||
pk?: boolean;
|
||||
nullable?: boolean;
|
||||
descriptions?: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface LiveDatabaseManifestJoinEntry {
|
||||
to: string;
|
||||
on: string;
|
||||
relationship: string;
|
||||
source: string;
|
||||
}
|
||||
|
||||
export interface LiveDatabaseManifestTableEntry {
|
||||
table: string;
|
||||
descriptions?: Record<string, string>;
|
||||
columns: LiveDatabaseManifestColumn[];
|
||||
joins?: LiveDatabaseManifestJoinEntry[];
|
||||
}
|
||||
|
||||
export interface LiveDatabaseManifestShard {
|
||||
tables: Record<string, LiveDatabaseManifestTableEntry>;
|
||||
}
|
||||
|
||||
export interface LiveDatabaseManifestTableData {
|
||||
name: string;
|
||||
catalog: string | null;
|
||||
db: string | null;
|
||||
descriptions?: Record<string, string>;
|
||||
columns: Array<{
|
||||
name: string;
|
||||
type: string;
|
||||
pk?: boolean;
|
||||
nullable?: boolean;
|
||||
descriptions?: Record<string, string>;
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface LiveDatabaseManifestJoinData {
|
||||
fromTable: string;
|
||||
fromColumns: string[];
|
||||
toTable: string;
|
||||
toColumns: string[];
|
||||
relationship: string;
|
||||
source: 'formal' | 'inferred' | 'manual';
|
||||
}
|
||||
|
||||
export interface LiveDatabaseManifestExistingDescriptions {
|
||||
table?: Record<string, string>;
|
||||
columns: Map<string, Record<string, string>>;
|
||||
}
|
||||
|
||||
export interface BuildLiveDatabaseManifestShardsInput {
|
||||
connectionType: string;
|
||||
tables: LiveDatabaseManifestTableData[];
|
||||
joins: LiveDatabaseManifestJoinData[];
|
||||
mapColumnType: (nativeType: string) => string;
|
||||
existingPreservedJoins?: Map<string, LiveDatabaseManifestJoinEntry[]>;
|
||||
existingDescriptions?: Map<string, LiveDatabaseManifestExistingDescriptions>;
|
||||
}
|
||||
|
||||
export interface BuildLiveDatabaseManifestShardsResult {
|
||||
shards: Map<string, LiveDatabaseManifestShard>;
|
||||
tablesProcessed: number;
|
||||
}
|
||||
|
||||
function mergeDescriptionsPreservingExternal(
|
||||
existing: Record<string, string> | undefined,
|
||||
incoming: Record<string, string> | undefined,
|
||||
): Record<string, string> | undefined {
|
||||
if (!existing && !incoming) {
|
||||
return undefined;
|
||||
}
|
||||
const result: Record<string, string> = {};
|
||||
if (existing) {
|
||||
for (const [key, value] of Object.entries(existing)) {
|
||||
if (!SCAN_MANAGED_DESCRIPTION_KEYS.has(key)) {
|
||||
result[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (incoming) {
|
||||
Object.assign(result, incoming);
|
||||
}
|
||||
return Object.keys(result).length > 0 ? result : undefined;
|
||||
}
|
||||
|
||||
function getShardKey(connectionType: string, catalog: string | null, db: string | null): string {
|
||||
const normalized = connectionType.toUpperCase();
|
||||
|
||||
switch (normalized) {
|
||||
case 'SNOWFLAKE':
|
||||
case 'DATABRICKS': {
|
||||
const catalogPart = catalog ?? 'default';
|
||||
const schemaPart = db ?? 'public';
|
||||
return `${catalogPart}.${schemaPart}`;
|
||||
}
|
||||
case 'BIGQUERY': {
|
||||
return db ?? catalog ?? 'default';
|
||||
}
|
||||
case 'MYSQL':
|
||||
case 'CLICKHOUSE': {
|
||||
return db ?? catalog ?? 'default';
|
||||
}
|
||||
default: {
|
||||
return db ?? 'public';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function buildTableRef(name: string, catalog: string | null, db: string | null): string {
|
||||
const parts: string[] = [];
|
||||
if (catalog) {
|
||||
parts.push(catalog);
|
||||
}
|
||||
if (db) {
|
||||
parts.push(db);
|
||||
}
|
||||
parts.push(name);
|
||||
return parts.join('.');
|
||||
}
|
||||
|
||||
function addJoinOnce(
|
||||
joinsByTable: Map<string, LiveDatabaseManifestJoinEntry[]>,
|
||||
tableName: string,
|
||||
join: LiveDatabaseManifestJoinEntry,
|
||||
): void {
|
||||
const joins = joinsByTable.get(tableName) ?? [];
|
||||
const exists = joins.some((candidate) => candidate.to === join.to && candidate.on === join.on);
|
||||
if (!exists) {
|
||||
joins.push(join);
|
||||
}
|
||||
joinsByTable.set(tableName, joins);
|
||||
}
|
||||
|
||||
function joinCondition(
|
||||
leftTable: string,
|
||||
leftColumns: readonly string[],
|
||||
rightTable: string,
|
||||
rightColumns: readonly string[],
|
||||
): string {
|
||||
if (leftColumns.length === 0 || leftColumns.length !== rightColumns.length) {
|
||||
throw new Error(`Invalid relationship join from ${leftTable} to ${rightTable}: column tuple widths differ`);
|
||||
}
|
||||
return leftColumns
|
||||
.map((leftColumn, index) => {
|
||||
const rightColumn = rightColumns[index];
|
||||
if (!rightColumn) {
|
||||
throw new Error(`Invalid relationship join from ${leftTable} to ${rightTable}: missing target column`);
|
||||
}
|
||||
return `${leftTable}.${leftColumn} = ${rightTable}.${rightColumn}`;
|
||||
})
|
||||
.join(' AND ');
|
||||
}
|
||||
|
||||
function buildJoinsByTable(
|
||||
tableNames: Set<string>,
|
||||
joins: LiveDatabaseManifestJoinData[],
|
||||
preservedJoins: Map<string, LiveDatabaseManifestJoinEntry[]>,
|
||||
): Map<string, LiveDatabaseManifestJoinEntry[]> {
|
||||
const joinsByTable = new Map<string, LiveDatabaseManifestJoinEntry[]>();
|
||||
|
||||
for (const join of joins) {
|
||||
if (!tableNames.has(join.fromTable) || !tableNames.has(join.toTable)) {
|
||||
continue;
|
||||
}
|
||||
const relationship = RELATIONSHIP_MAP[join.relationship] ?? join.relationship;
|
||||
addJoinOnce(joinsByTable, join.fromTable, {
|
||||
to: join.toTable,
|
||||
on: joinCondition(join.fromTable, join.fromColumns, join.toTable, join.toColumns),
|
||||
relationship,
|
||||
source: join.source,
|
||||
});
|
||||
|
||||
const reverseRelationship = RELATIONSHIP_INVERSE[relationship] ?? 'one_to_many';
|
||||
addJoinOnce(joinsByTable, join.toTable, {
|
||||
to: join.fromTable,
|
||||
on: joinCondition(join.toTable, join.toColumns, join.fromTable, join.fromColumns),
|
||||
relationship: reverseRelationship,
|
||||
source: join.source,
|
||||
});
|
||||
}
|
||||
|
||||
for (const [tableName, tableJoins] of preservedJoins) {
|
||||
if (!tableNames.has(tableName)) {
|
||||
continue;
|
||||
}
|
||||
for (const join of tableJoins) {
|
||||
if (tableNames.has(join.to)) {
|
||||
addJoinOnce(joinsByTable, tableName, join);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return joinsByTable;
|
||||
}
|
||||
|
||||
export function buildLiveDatabaseManifestShards(
|
||||
input: BuildLiveDatabaseManifestShardsInput,
|
||||
): BuildLiveDatabaseManifestShardsResult {
|
||||
const tableNames = new Set(input.tables.map((table) => table.name));
|
||||
const joinsByTable = buildJoinsByTable(tableNames, input.joins, input.existingPreservedJoins ?? new Map());
|
||||
const shards = new Map<string, LiveDatabaseManifestShard>();
|
||||
|
||||
for (const table of input.tables) {
|
||||
const shardKey = getShardKey(input.connectionType, table.catalog, table.db);
|
||||
const shard = shards.get(shardKey) ?? { tables: {} };
|
||||
const existingDescriptions = input.existingDescriptions?.get(table.name);
|
||||
|
||||
const columns: LiveDatabaseManifestColumn[] = table.columns.map((column) => {
|
||||
const manifestColumn: LiveDatabaseManifestColumn = {
|
||||
name: column.name,
|
||||
type: input.mapColumnType(column.type),
|
||||
};
|
||||
if (column.pk) {
|
||||
manifestColumn.pk = true;
|
||||
}
|
||||
if (column.nullable === false) {
|
||||
manifestColumn.nullable = false;
|
||||
}
|
||||
const descriptions = mergeDescriptionsPreservingExternal(
|
||||
existingDescriptions?.columns.get(column.name),
|
||||
column.descriptions,
|
||||
);
|
||||
if (descriptions) {
|
||||
manifestColumn.descriptions = descriptions;
|
||||
}
|
||||
return manifestColumn;
|
||||
});
|
||||
|
||||
const entry: LiveDatabaseManifestTableEntry = {
|
||||
table: buildTableRef(table.name, table.catalog, table.db),
|
||||
columns,
|
||||
};
|
||||
|
||||
const tableDescriptions = mergeDescriptionsPreservingExternal(existingDescriptions?.table, table.descriptions);
|
||||
if (tableDescriptions) {
|
||||
entry.descriptions = tableDescriptions;
|
||||
}
|
||||
|
||||
const tableJoins = joinsByTable.get(table.name);
|
||||
if (tableJoins && tableJoins.length > 0) {
|
||||
entry.joins = tableJoins;
|
||||
}
|
||||
|
||||
shard.tables[table.name] = entry;
|
||||
shards.set(shardKey, shard);
|
||||
}
|
||||
|
||||
return {
|
||||
shards,
|
||||
tablesProcessed: input.tables.length,
|
||||
};
|
||||
}
|
||||
152
packages/context/src/ingest/adapters/live-database/stage.test.ts
Normal file
152
packages/context/src/ingest/adapters/live-database/stage.test.ts
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
import { mkdtemp, readFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
detectLiveDatabaseStagedDir,
|
||||
LIVE_DATABASE_FOREIGN_KEYS_FILE,
|
||||
LIVE_DATABASE_META_FILE,
|
||||
liveDatabaseTablePath,
|
||||
readLiveDatabaseTableFiles,
|
||||
writeLiveDatabaseSnapshot,
|
||||
} from './stage.js';
|
||||
import type { KloSchemaSnapshot } from '../../../scan/types.js';
|
||||
|
||||
function snapshot(): KloSchemaSnapshot {
|
||||
return {
|
||||
connectionId: 'conn-1',
|
||||
driver: 'postgres',
|
||||
extractedAt: '2026-04-27T00:00:00.000Z',
|
||||
scope: { schemas: ['public'] },
|
||||
metadata: { dialect: 'postgres' },
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
kind: 'table',
|
||||
comment: 'Orders placed by customers',
|
||||
estimatedRows: 200,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'customer_id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'total',
|
||||
nativeType: 'numeric',
|
||||
normalizedType: 'numeric',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: 'public',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
kind: 'table',
|
||||
comment: null,
|
||||
estimatedRows: 50,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
describe('live-database staged snapshot files', () => {
|
||||
it('writes deterministic metadata, table, and foreign-key files', async () => {
|
||||
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-stage-'));
|
||||
await writeLiveDatabaseSnapshot(dir, snapshot());
|
||||
|
||||
await expect(readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8')).resolves.toContain('"connectionId": "conn-1"');
|
||||
await expect(readFile(join(dir, LIVE_DATABASE_FOREIGN_KEYS_FILE), 'utf8')).resolves.toContain(
|
||||
'"fromTable": "orders"',
|
||||
);
|
||||
const connectionJson = await readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8');
|
||||
expect(connectionJson).toContain('"driver": "postgres"');
|
||||
expect(connectionJson).toContain('"schemas"');
|
||||
|
||||
const ordersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' });
|
||||
const customersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'customers' });
|
||||
expect(ordersPath).toMatch(/^tables\/[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.json$/);
|
||||
await expect(readFile(join(dir, ordersPath), 'utf8')).resolves.toContain('"name": "orders"');
|
||||
await expect(readFile(join(dir, customersPath), 'utf8')).resolves.toContain('"name": "customers"');
|
||||
const ordersJson = await readFile(join(dir, ordersPath), 'utf8');
|
||||
expect(ordersJson).toContain('"kind": "table"');
|
||||
expect(ordersJson).toContain('"estimatedRows": 200');
|
||||
expect(ordersJson).toContain('"nativeType": "integer"');
|
||||
expect(ordersJson).toContain('"normalizedType": "integer"');
|
||||
expect(ordersJson).not.toContain('"type": "integer"');
|
||||
|
||||
const tableFiles = await readLiveDatabaseTableFiles(dir);
|
||||
expect(tableFiles.map((file) => file.table.name)).toEqual(['customers', 'orders']);
|
||||
expect(await detectLiveDatabaseStagedDir(dir)).toBe(true);
|
||||
});
|
||||
|
||||
it('redacts sensitive snapshot metadata before writing connection metadata', async () => {
|
||||
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-redacted-stage-'));
|
||||
await writeLiveDatabaseSnapshot(dir, {
|
||||
...snapshot(),
|
||||
metadata: {
|
||||
dialect: 'postgres',
|
||||
url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret
|
||||
serviceAccountJson: {
|
||||
client_email: 'reader@example.test',
|
||||
private_key: 'pem-value', // pragma: allowlist secret
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const connectionJson = await readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8');
|
||||
|
||||
expect(connectionJson).toContain('"dialect": "postgres"');
|
||||
expect(connectionJson).toContain('"client_email": "reader@example.test"');
|
||||
expect(connectionJson).toContain('"url": "<redacted>"');
|
||||
expect(connectionJson).toContain('"private_key": "<redacted>"');
|
||||
expect(connectionJson).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret
|
||||
expect(connectionJson).not.toContain('pem-value');
|
||||
});
|
||||
|
||||
it('returns false for a directory that is missing live database metadata', async () => {
|
||||
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-empty-'));
|
||||
expect(await detectLiveDatabaseStagedDir(dir)).toBe(false);
|
||||
});
|
||||
});
|
||||
138
packages/context/src/ingest/adapters/live-database/stage.ts
Normal file
138
packages/context/src/ingest/adapters/live-database/stage.ts
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
import { Buffer } from 'node:buffer';
|
||||
import type { Dirent } from 'node:fs';
|
||||
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import { redactKloSensitiveMetadata } from '../../../core/redaction.js';
|
||||
import type { KloSchemaSnapshot, KloSchemaTable, KloTableRef } from '../../../scan/types.js';
|
||||
|
||||
export const LIVE_DATABASE_META_FILE = 'connection.json';
|
||||
export const LIVE_DATABASE_FOREIGN_KEYS_FILE = 'foreign-keys.json';
|
||||
const LIVE_DATABASE_TABLES_DIR = 'tables';
|
||||
|
||||
interface LiveDatabaseTableFile {
|
||||
path: string;
|
||||
table: KloSchemaTable;
|
||||
}
|
||||
|
||||
interface ForeignKeyIndexEntry {
|
||||
fromTable: string;
|
||||
fromTablePath: string;
|
||||
fromColumn: string;
|
||||
toCatalog: string | null;
|
||||
toDb: string | null;
|
||||
toTable: string;
|
||||
toColumn: string;
|
||||
constraintName: string | null;
|
||||
}
|
||||
|
||||
function encodePathPart(value: string | null | undefined): string {
|
||||
return Buffer.from(value ?? '_', 'utf8').toString('base64url');
|
||||
}
|
||||
|
||||
function tableSortKey(table: KloTableRef): string {
|
||||
return `${table.catalog ?? ''}\u0000${table.db ?? ''}\u0000${table.name}`;
|
||||
}
|
||||
|
||||
export function liveDatabaseTablePath(table: KloTableRef): string {
|
||||
return `${LIVE_DATABASE_TABLES_DIR}/${encodePathPart(table.catalog)}.${encodePathPart(table.db)}.${encodePathPart(
|
||||
table.name,
|
||||
)}.json`;
|
||||
}
|
||||
|
||||
async function walkFiles(root: string, dir = root): Promise<string[]> {
|
||||
let entries: Dirent[];
|
||||
try {
|
||||
entries = await readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
const files: string[] = [];
|
||||
for (const entry of entries) {
|
||||
const absolute = join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
files.push(...(await walkFiles(root, absolute)));
|
||||
} else if (entry.isFile()) {
|
||||
files.push(relative(root, absolute).replace(/\\/g, '/'));
|
||||
}
|
||||
}
|
||||
return files.sort();
|
||||
}
|
||||
|
||||
function stableJson(value: unknown): string {
|
||||
return `${JSON.stringify(value, null, 2)}\n`;
|
||||
}
|
||||
|
||||
function foreignKeyIndex(snapshot: KloSchemaSnapshot): ForeignKeyIndexEntry[] {
|
||||
const entries: ForeignKeyIndexEntry[] = [];
|
||||
for (const table of snapshot.tables) {
|
||||
for (const fk of table.foreignKeys) {
|
||||
entries.push({
|
||||
fromTable: table.name,
|
||||
fromTablePath: liveDatabaseTablePath(table),
|
||||
fromColumn: fk.fromColumn,
|
||||
toCatalog: fk.toCatalog,
|
||||
toDb: fk.toDb,
|
||||
toTable: fk.toTable,
|
||||
toColumn: fk.toColumn,
|
||||
constraintName: fk.constraintName,
|
||||
});
|
||||
}
|
||||
}
|
||||
entries.sort(
|
||||
(a, b) =>
|
||||
a.fromTable.localeCompare(b.fromTable) ||
|
||||
a.fromColumn.localeCompare(b.fromColumn) ||
|
||||
a.toTable.localeCompare(b.toTable) ||
|
||||
a.toColumn.localeCompare(b.toColumn),
|
||||
);
|
||||
return entries;
|
||||
}
|
||||
|
||||
export async function writeLiveDatabaseSnapshot(stagedDir: string, snapshot: KloSchemaSnapshot): Promise<void> {
|
||||
await mkdir(join(stagedDir, LIVE_DATABASE_TABLES_DIR), { recursive: true });
|
||||
const sortedTables = [...snapshot.tables].sort((a, b) => tableSortKey(a).localeCompare(tableSortKey(b)));
|
||||
const metadata = {
|
||||
connectionId: snapshot.connectionId,
|
||||
driver: snapshot.driver,
|
||||
extractedAt: snapshot.extractedAt,
|
||||
scope: snapshot.scope,
|
||||
metadata: redactKloSensitiveMetadata(snapshot.metadata),
|
||||
tableCount: sortedTables.length,
|
||||
};
|
||||
await writeFile(join(stagedDir, LIVE_DATABASE_META_FILE), stableJson(metadata));
|
||||
await writeFile(
|
||||
join(stagedDir, LIVE_DATABASE_FOREIGN_KEYS_FILE),
|
||||
stableJson({ foreignKeys: foreignKeyIndex(snapshot) }),
|
||||
);
|
||||
for (const table of sortedTables) {
|
||||
await writeFile(join(stagedDir, liveDatabaseTablePath(table)), stableJson(table));
|
||||
}
|
||||
}
|
||||
|
||||
export async function readLiveDatabaseTableFiles(stagedDir: string): Promise<LiveDatabaseTableFile[]> {
|
||||
const files = await walkFiles(join(stagedDir, LIVE_DATABASE_TABLES_DIR));
|
||||
const out: LiveDatabaseTableFile[] = [];
|
||||
for (const file of files.filter((path) => path.endsWith('.json'))) {
|
||||
const path = `${LIVE_DATABASE_TABLES_DIR}/${file}`;
|
||||
const raw = await readFile(join(stagedDir, path), 'utf8');
|
||||
const parsed = JSON.parse(raw) as KloSchemaTable;
|
||||
if (parsed && typeof parsed.name === 'string' && Array.isArray(parsed.columns)) {
|
||||
out.push({ path, table: parsed });
|
||||
}
|
||||
}
|
||||
out.sort((a, b) => tableSortKey(a.table).localeCompare(tableSortKey(b.table)));
|
||||
return out;
|
||||
}
|
||||
|
||||
export async function detectLiveDatabaseStagedDir(stagedDir: string): Promise<boolean> {
|
||||
try {
|
||||
const meta = JSON.parse(await readFile(join(stagedDir, LIVE_DATABASE_META_FILE), 'utf8')) as unknown;
|
||||
if (!meta || typeof meta !== 'object' || Array.isArray(meta)) {
|
||||
return false;
|
||||
}
|
||||
const files = await readLiveDatabaseTableFiles(stagedDir);
|
||||
return files.length > 0;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,428 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { type LiveDatabaseSyncedSchema, planLiveDatabaseStructuralSync } from './structural-sync.js';
|
||||
|
||||
function idFactory(): () => string {
|
||||
let next = 1;
|
||||
return () => `id-${next++}`;
|
||||
}
|
||||
|
||||
describe('planLiveDatabaseStructuralSync', () => {
|
||||
it('plans table and column creates, updates, deletes, and metadata invalidation', () => {
|
||||
const current: LiveDatabaseSyncedSchema = {
|
||||
connectionId: 'conn-1',
|
||||
tables: [
|
||||
{
|
||||
id: 'tbl-orders',
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
enabled: true,
|
||||
descriptions: { ai: 'Old AI order text', db: 'Old DB order text' },
|
||||
columns: [
|
||||
{
|
||||
id: 'col-order-id',
|
||||
name: 'id',
|
||||
type: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
parentColumnId: null,
|
||||
descriptions: { db: 'Order id' },
|
||||
embedding: [1, 2, 3],
|
||||
sampleValues: null,
|
||||
cardinality: null,
|
||||
},
|
||||
{
|
||||
id: 'col-order-total',
|
||||
name: 'total',
|
||||
type: 'number',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
parentColumnId: null,
|
||||
descriptions: { ai: 'Old AI total text', db: 'Old total text' },
|
||||
embedding: [4, 5, 6],
|
||||
sampleValues: ['10'],
|
||||
cardinality: 12,
|
||||
},
|
||||
{
|
||||
id: 'col-order-removed',
|
||||
name: 'removed',
|
||||
type: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
parentColumnId: null,
|
||||
descriptions: {},
|
||||
embedding: null,
|
||||
sampleValues: null,
|
||||
cardinality: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'tbl-removed',
|
||||
name: 'removed_table',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
enabled: true,
|
||||
descriptions: {},
|
||||
columns: [
|
||||
{
|
||||
id: 'col-removed-id',
|
||||
name: 'id',
|
||||
type: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
parentColumnId: null,
|
||||
descriptions: {},
|
||||
embedding: null,
|
||||
sampleValues: null,
|
||||
cardinality: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
links: [
|
||||
{
|
||||
id: 'inferred-total-link',
|
||||
fromTableId: 'tbl-orders',
|
||||
fromColumnId: 'col-order-total',
|
||||
toTableId: 'tbl-orders',
|
||||
toColumnId: 'col-order-id',
|
||||
source: 'inferred',
|
||||
confidence: 0.7,
|
||||
relationshipType: 'MANY_TO_ONE',
|
||||
isPrimaryKeyReference: true,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const plan = planLiveDatabaseStructuralSync({
|
||||
connectionId: 'conn-1',
|
||||
current,
|
||||
extracted: {
|
||||
connectionId: 'conn-1',
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
dbComment: 'Fresh DB order text',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
type: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
dbComment: 'Order id',
|
||||
},
|
||||
{
|
||||
name: 'total',
|
||||
type: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
dbComment: 'Fresh total text',
|
||||
},
|
||||
{
|
||||
name: 'created_at',
|
||||
type: 'time',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
dbComment: 'Creation timestamp',
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
dbComment: 'Customer table',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
type: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
dbComment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
idFactory: idFactory(),
|
||||
});
|
||||
|
||||
expect(plan.stats).toEqual({
|
||||
tablesCreated: 1,
|
||||
tablesDeleted: 1,
|
||||
columnsCreated: 2,
|
||||
columnsDeleted: 2,
|
||||
columnsModified: 1,
|
||||
formalLinksCreated: 0,
|
||||
formalLinksDeleted: 0,
|
||||
});
|
||||
expect(plan.operations.deleteTableIds).toEqual(['tbl-removed']);
|
||||
expect(plan.operations.deleteColumnIds).toEqual(['col-order-removed']);
|
||||
expect(plan.operations.insertTables).toEqual([
|
||||
{
|
||||
id: 'id-2',
|
||||
connectionId: 'conn-1',
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
enabled: true,
|
||||
},
|
||||
]);
|
||||
expect(plan.operations.insertColumns).toEqual([
|
||||
{
|
||||
id: 'id-1',
|
||||
tableId: 'tbl-orders',
|
||||
name: 'created_at',
|
||||
parentColumnId: null,
|
||||
},
|
||||
{
|
||||
id: 'id-3',
|
||||
tableId: 'id-2',
|
||||
name: 'id',
|
||||
parentColumnId: null,
|
||||
},
|
||||
]);
|
||||
expect(plan.operations.touchColumnIds).toEqual(['col-order-total']);
|
||||
expect(plan.operations.invalidateColumnEmbeddingIds).toEqual(['col-order-total']);
|
||||
expect(plan.inferredLinksToValidate).toEqual(['inferred-total-link']);
|
||||
expect(plan.changes).toEqual({
|
||||
newTableIds: ['id-2'],
|
||||
newColumnIds: ['id-1', 'id-3'],
|
||||
tablesWithStructuralChanges: ['tbl-orders', 'id-2'],
|
||||
columnsWithTypeChange: ['col-order-total'],
|
||||
columnsWithDescriptionChange: ['col-order-total'],
|
||||
tablesWithDescriptionChange: ['tbl-orders'],
|
||||
});
|
||||
|
||||
const orders = plan.schema.tables.find((table) => table.name === 'orders');
|
||||
expect(orders?.descriptions).toEqual({ db: 'Fresh DB order text' });
|
||||
expect(orders?.columns.map((column) => column.name)).toEqual(['id', 'total', 'created_at']);
|
||||
expect(orders?.columns.find((column) => column.name === 'total')).toMatchObject({
|
||||
id: 'col-order-total',
|
||||
type: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
descriptions: { db: 'Fresh total text' },
|
||||
embedding: null,
|
||||
sampleValues: ['10'],
|
||||
cardinality: 12,
|
||||
});
|
||||
});
|
||||
|
||||
it('builds formal links from extracted foreign keys and preserves valid inferred links', () => {
|
||||
const current: LiveDatabaseSyncedSchema = {
|
||||
connectionId: 'conn-1',
|
||||
tables: [
|
||||
{
|
||||
id: 'tbl-orders',
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
enabled: true,
|
||||
descriptions: {},
|
||||
columns: [
|
||||
{
|
||||
id: 'col-orders-id',
|
||||
name: 'id',
|
||||
type: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
parentColumnId: null,
|
||||
descriptions: {},
|
||||
embedding: null,
|
||||
sampleValues: null,
|
||||
cardinality: null,
|
||||
},
|
||||
{
|
||||
id: 'col-orders-customer',
|
||||
name: 'customer_id',
|
||||
type: 'number',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
parentColumnId: null,
|
||||
descriptions: {},
|
||||
embedding: null,
|
||||
sampleValues: null,
|
||||
cardinality: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'tbl-customers',
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
enabled: true,
|
||||
descriptions: {},
|
||||
columns: [
|
||||
{
|
||||
id: 'col-customers-id',
|
||||
name: 'id',
|
||||
type: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
parentColumnId: null,
|
||||
descriptions: {},
|
||||
embedding: null,
|
||||
sampleValues: null,
|
||||
cardinality: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
links: [
|
||||
{
|
||||
id: 'formal-existing',
|
||||
fromTableId: 'tbl-orders',
|
||||
fromColumnId: 'col-orders-customer',
|
||||
toTableId: 'tbl-customers',
|
||||
toColumnId: 'col-customers-id',
|
||||
source: 'formal',
|
||||
confidence: 1,
|
||||
relationshipType: 'MANY_TO_ONE',
|
||||
isPrimaryKeyReference: true,
|
||||
},
|
||||
{
|
||||
id: 'inferred-existing',
|
||||
fromTableId: 'tbl-orders',
|
||||
fromColumnId: 'col-orders-id',
|
||||
toTableId: 'tbl-customers',
|
||||
toColumnId: 'col-customers-id',
|
||||
source: 'inferred',
|
||||
confidence: 0.6,
|
||||
relationshipType: 'MANY_TO_ONE',
|
||||
isPrimaryKeyReference: true,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const plan = planLiveDatabaseStructuralSync({
|
||||
connectionId: 'conn-1',
|
||||
current,
|
||||
extracted: {
|
||||
connectionId: 'conn-1',
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
dbComment: null,
|
||||
columns: [
|
||||
{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null },
|
||||
{ name: 'customer_id', type: 'number', nullable: false, primaryKey: false, dbComment: null },
|
||||
],
|
||||
foreignKeys: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
dbComment: null,
|
||||
columns: [{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }],
|
||||
foreignKeys: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
idFactory: idFactory(),
|
||||
});
|
||||
|
||||
expect(plan.stats.formalLinksCreated).toBe(0);
|
||||
expect(plan.stats.formalLinksDeleted).toBe(0);
|
||||
expect(plan.schema.links.map((link) => link.id)).toEqual(['formal-existing', 'inferred-existing']);
|
||||
|
||||
const planAfterForeignKeyRemoval = planLiveDatabaseStructuralSync({
|
||||
connectionId: 'conn-1',
|
||||
current,
|
||||
extracted: {
|
||||
connectionId: 'conn-1',
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
dbComment: null,
|
||||
columns: [
|
||||
{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null },
|
||||
{ name: 'customer_id', type: 'number', nullable: false, primaryKey: false, dbComment: null },
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
dbComment: null,
|
||||
columns: [{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }],
|
||||
foreignKeys: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
idFactory: idFactory(),
|
||||
});
|
||||
|
||||
expect(planAfterForeignKeyRemoval.stats.formalLinksDeleted).toBe(1);
|
||||
expect(planAfterForeignKeyRemoval.schema.links.map((link) => link.id)).toEqual(['inferred-existing']);
|
||||
|
||||
const planAfterForeignKeyCreation = planLiveDatabaseStructuralSync({
|
||||
connectionId: 'conn-1',
|
||||
current: { ...current, links: [current.links[1]] },
|
||||
extracted: {
|
||||
connectionId: 'conn-1',
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
dbComment: null,
|
||||
columns: [
|
||||
{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null },
|
||||
{ name: 'customer_id', type: 'number', nullable: false, primaryKey: false, dbComment: null },
|
||||
],
|
||||
foreignKeys: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
dbComment: null,
|
||||
columns: [{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }],
|
||||
foreignKeys: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
idFactory: idFactory(),
|
||||
});
|
||||
|
||||
expect(planAfterForeignKeyCreation.stats.formalLinksCreated).toBe(1);
|
||||
expect(planAfterForeignKeyCreation.schema.links[0]).toMatchObject({
|
||||
id: 'id-1',
|
||||
fromTableId: 'tbl-orders',
|
||||
fromColumnId: 'col-orders-customer',
|
||||
toTableId: 'tbl-customers',
|
||||
toColumnId: 'col-customers-id',
|
||||
source: 'formal',
|
||||
confidence: 1,
|
||||
relationshipType: 'MANY_TO_ONE',
|
||||
isPrimaryKeyReference: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,525 @@
|
|||
import type { LiveDatabaseExtractedSchema, LiveDatabaseExtractedTable } from './extracted-schema.js';
|
||||
import { buildLiveDatabaseTableNaturalKey } from './extracted-schema.js';
|
||||
|
||||
export interface LiveDatabaseSyncedColumn {
|
||||
id: string;
|
||||
name: string;
|
||||
type: string;
|
||||
nullable: boolean;
|
||||
primaryKey: boolean;
|
||||
parentColumnId: string | null;
|
||||
descriptions: Record<string, string>;
|
||||
embedding: number[] | null;
|
||||
sampleValues: string[] | null;
|
||||
cardinality: number | null;
|
||||
}
|
||||
|
||||
export interface LiveDatabaseSyncedTable {
|
||||
id: string;
|
||||
name: string;
|
||||
catalog: string | null;
|
||||
db: string | null;
|
||||
enabled: boolean;
|
||||
descriptions: Record<string, string>;
|
||||
columns: LiveDatabaseSyncedColumn[];
|
||||
}
|
||||
|
||||
export interface LiveDatabaseSyncedLink {
|
||||
id: string;
|
||||
fromTableId: string;
|
||||
fromColumnId: string;
|
||||
toTableId: string;
|
||||
toColumnId: string;
|
||||
source: 'formal' | 'inferred' | 'manual';
|
||||
confidence: number;
|
||||
relationshipType: string;
|
||||
isPrimaryKeyReference: boolean;
|
||||
}
|
||||
|
||||
export interface LiveDatabaseSyncedSchema {
|
||||
connectionId: string;
|
||||
tables: LiveDatabaseSyncedTable[];
|
||||
links: LiveDatabaseSyncedLink[];
|
||||
}
|
||||
|
||||
export interface LiveDatabaseStructuralChanges {
|
||||
newTableIds: string[];
|
||||
newColumnIds: string[];
|
||||
tablesWithStructuralChanges: string[];
|
||||
columnsWithTypeChange: string[];
|
||||
columnsWithDescriptionChange: string[];
|
||||
tablesWithDescriptionChange: string[];
|
||||
}
|
||||
|
||||
export interface LiveDatabaseStructuralSyncStats {
|
||||
tablesCreated: number;
|
||||
tablesDeleted: number;
|
||||
columnsCreated: number;
|
||||
columnsDeleted: number;
|
||||
columnsModified: number;
|
||||
formalLinksCreated: number;
|
||||
formalLinksDeleted: number;
|
||||
}
|
||||
|
||||
export interface LiveDatabaseStructuralSyncOperations {
|
||||
deleteTableIds: string[];
|
||||
deleteColumnIds: string[];
|
||||
insertTables: Array<{
|
||||
id: string;
|
||||
connectionId: string;
|
||||
name: string;
|
||||
catalog: string | null;
|
||||
db: string | null;
|
||||
enabled: boolean;
|
||||
}>;
|
||||
insertColumns: Array<{
|
||||
id: string;
|
||||
tableId: string;
|
||||
name: string;
|
||||
parentColumnId: string | null;
|
||||
}>;
|
||||
touchColumnIds: string[];
|
||||
invalidateColumnEmbeddingIds: string[];
|
||||
}
|
||||
|
||||
export interface LiveDatabaseStructuralSyncPlan {
|
||||
schema: LiveDatabaseSyncedSchema;
|
||||
inferredLinksToValidate: string[];
|
||||
stats: LiveDatabaseStructuralSyncStats;
|
||||
changes: LiveDatabaseStructuralChanges;
|
||||
operations: LiveDatabaseStructuralSyncOperations;
|
||||
}
|
||||
|
||||
export interface PlanLiveDatabaseStructuralSyncInput {
|
||||
connectionId: string;
|
||||
current: LiveDatabaseSyncedSchema | null;
|
||||
extracted: LiveDatabaseExtractedSchema;
|
||||
idFactory: () => string;
|
||||
}
|
||||
|
||||
interface UpdatedTableResult {
|
||||
table: LiveDatabaseSyncedTable;
|
||||
columnsCreated: number;
|
||||
columnsDeleted: number;
|
||||
columnsModified: number;
|
||||
newColumnIds: string[];
|
||||
columnsWithTypeChange: string[];
|
||||
columnsWithDescriptionChange: string[];
|
||||
tableDescriptionChanged: boolean;
|
||||
}
|
||||
|
||||
function updateDescription(
|
||||
descriptions: Record<string, string>,
|
||||
dbComment: string | null | undefined,
|
||||
changed: boolean,
|
||||
): Record<string, string> {
|
||||
const updated = { ...descriptions };
|
||||
if (dbComment) {
|
||||
updated.db = dbComment;
|
||||
} else {
|
||||
delete updated.db;
|
||||
}
|
||||
if (changed) {
|
||||
delete updated.ai;
|
||||
}
|
||||
return updated;
|
||||
}
|
||||
|
||||
function descriptionFromDbComment(dbComment: string | null | undefined): Record<string, string> {
|
||||
return dbComment ? { db: dbComment } : {};
|
||||
}
|
||||
|
||||
function planUpdatedTable(args: {
|
||||
currentTable: LiveDatabaseSyncedTable;
|
||||
extractedTable: LiveDatabaseExtractedTable;
|
||||
currentLinks: LiveDatabaseSyncedLink[];
|
||||
inferredLinksToValidate: string[];
|
||||
operations: LiveDatabaseStructuralSyncOperations;
|
||||
idFactory: () => string;
|
||||
}): UpdatedTableResult {
|
||||
const { currentTable, extractedTable, currentLinks, inferredLinksToValidate, operations, idFactory } = args;
|
||||
|
||||
let columnsCreated = 0;
|
||||
let columnsDeleted = 0;
|
||||
let columnsModified = 0;
|
||||
const newColumnIds: string[] = [];
|
||||
const columnsWithTypeChange: string[] = [];
|
||||
const columnsWithDescriptionChange: string[] = [];
|
||||
const updatedColumns: LiveDatabaseSyncedColumn[] = [];
|
||||
|
||||
const tableDescriptionChanged = (currentTable.descriptions.db ?? null) !== (extractedTable.dbComment ?? null);
|
||||
const currentColumnsByName = new Map(currentTable.columns.map((column) => [column.name, column]));
|
||||
const extractedColumnsByName = new Map(extractedTable.columns.map((column) => [column.name, column]));
|
||||
|
||||
for (const [name, currentColumn] of currentColumnsByName) {
|
||||
if (!extractedColumnsByName.has(name)) {
|
||||
operations.deleteColumnIds.push(currentColumn.id);
|
||||
columnsDeleted++;
|
||||
}
|
||||
}
|
||||
|
||||
for (const [name, extractedColumn] of extractedColumnsByName) {
|
||||
const currentColumn = currentColumnsByName.get(name);
|
||||
if (!currentColumn) {
|
||||
const columnId = idFactory();
|
||||
operations.insertColumns.push({
|
||||
id: columnId,
|
||||
tableId: currentTable.id,
|
||||
name: extractedColumn.name,
|
||||
parentColumnId: null,
|
||||
});
|
||||
columnsCreated++;
|
||||
newColumnIds.push(columnId);
|
||||
updatedColumns.push({
|
||||
id: columnId,
|
||||
name: extractedColumn.name,
|
||||
type: extractedColumn.type,
|
||||
nullable: extractedColumn.nullable,
|
||||
primaryKey: extractedColumn.primaryKey,
|
||||
descriptions: descriptionFromDbComment(extractedColumn.dbComment),
|
||||
parentColumnId: null,
|
||||
embedding: null,
|
||||
sampleValues: null,
|
||||
cardinality: null,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const typeChanged = currentColumn.type !== extractedColumn.type;
|
||||
const nullableChanged = currentColumn.nullable !== extractedColumn.nullable;
|
||||
const primaryKeyChanged = currentColumn.primaryKey !== extractedColumn.primaryKey;
|
||||
const dbDescriptionChanged = (currentColumn.descriptions.db ?? null) !== (extractedColumn.dbComment ?? null);
|
||||
|
||||
if (typeChanged || nullableChanged || primaryKeyChanged || dbDescriptionChanged) {
|
||||
operations.touchColumnIds.push(currentColumn.id);
|
||||
columnsModified++;
|
||||
|
||||
if (typeChanged || dbDescriptionChanged) {
|
||||
operations.invalidateColumnEmbeddingIds.push(currentColumn.id);
|
||||
}
|
||||
|
||||
if (typeChanged) {
|
||||
columnsWithTypeChange.push(currentColumn.id);
|
||||
const affectedLinks = currentLinks.filter(
|
||||
(link) =>
|
||||
link.source === 'inferred' &&
|
||||
(link.fromColumnId === currentColumn.id || link.toColumnId === currentColumn.id),
|
||||
);
|
||||
for (const link of affectedLinks) {
|
||||
if (!inferredLinksToValidate.includes(link.id)) {
|
||||
inferredLinksToValidate.push(link.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dbDescriptionChanged) {
|
||||
columnsWithDescriptionChange.push(currentColumn.id);
|
||||
}
|
||||
}
|
||||
|
||||
updatedColumns.push({
|
||||
...currentColumn,
|
||||
type: extractedColumn.type,
|
||||
nullable: extractedColumn.nullable,
|
||||
primaryKey: extractedColumn.primaryKey,
|
||||
descriptions: updateDescription(currentColumn.descriptions, extractedColumn.dbComment, dbDescriptionChanged),
|
||||
embedding: typeChanged ? null : currentColumn.embedding,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
table: {
|
||||
...currentTable,
|
||||
descriptions: updateDescription(currentTable.descriptions, extractedTable.dbComment, tableDescriptionChanged),
|
||||
columns: updatedColumns,
|
||||
},
|
||||
columnsCreated,
|
||||
columnsDeleted,
|
||||
columnsModified,
|
||||
newColumnIds,
|
||||
columnsWithTypeChange,
|
||||
columnsWithDescriptionChange,
|
||||
tableDescriptionChanged,
|
||||
};
|
||||
}
|
||||
|
||||
function planCreatedTable(args: {
|
||||
connectionId: string;
|
||||
extractedTable: LiveDatabaseExtractedTable;
|
||||
operations: LiveDatabaseStructuralSyncOperations;
|
||||
idFactory: () => string;
|
||||
}): LiveDatabaseSyncedTable {
|
||||
const { connectionId, extractedTable, operations, idFactory } = args;
|
||||
const tableId = idFactory();
|
||||
operations.insertTables.push({
|
||||
id: tableId,
|
||||
connectionId,
|
||||
name: extractedTable.name,
|
||||
catalog: extractedTable.catalog,
|
||||
db: extractedTable.db,
|
||||
enabled: true,
|
||||
});
|
||||
|
||||
const columns: LiveDatabaseSyncedColumn[] = extractedTable.columns.map((extractedColumn) => {
|
||||
const columnId = idFactory();
|
||||
operations.insertColumns.push({
|
||||
id: columnId,
|
||||
tableId,
|
||||
name: extractedColumn.name,
|
||||
parentColumnId: null,
|
||||
});
|
||||
return {
|
||||
id: columnId,
|
||||
name: extractedColumn.name,
|
||||
type: extractedColumn.type,
|
||||
nullable: extractedColumn.nullable,
|
||||
primaryKey: extractedColumn.primaryKey,
|
||||
descriptions: descriptionFromDbComment(extractedColumn.dbComment),
|
||||
parentColumnId: null,
|
||||
embedding: null,
|
||||
sampleValues: null,
|
||||
cardinality: null,
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
id: tableId,
|
||||
name: extractedTable.name,
|
||||
catalog: extractedTable.catalog,
|
||||
db: extractedTable.db,
|
||||
enabled: true,
|
||||
descriptions: descriptionFromDbComment(extractedTable.dbComment),
|
||||
columns,
|
||||
};
|
||||
}
|
||||
|
||||
function syncFormalLinks(args: {
|
||||
extracted: LiveDatabaseExtractedSchema;
|
||||
tables: LiveDatabaseSyncedTable[];
|
||||
tableNaturalKeyToId: Map<string, string>;
|
||||
currentLinks: LiveDatabaseSyncedLink[];
|
||||
idFactory: () => string;
|
||||
}): { links: LiveDatabaseSyncedLink[]; created: number; deleted: number } {
|
||||
const { extracted, tables, tableNaturalKeyToId, currentLinks, idFactory } = args;
|
||||
const columnKeyToId = new Map<string, string>();
|
||||
|
||||
for (const table of tables) {
|
||||
const tableKey = buildLiveDatabaseTableNaturalKey(table);
|
||||
for (const column of table.columns) {
|
||||
columnKeyToId.set(`${tableKey}.${column.name}`, column.id);
|
||||
}
|
||||
}
|
||||
|
||||
const extractedFormalLinks: Array<{
|
||||
fromTableId: string;
|
||||
fromColumnId: string;
|
||||
toTableId: string;
|
||||
toColumnId: string;
|
||||
}> = [];
|
||||
|
||||
for (const table of extracted.tables) {
|
||||
const fromTableKey = buildLiveDatabaseTableNaturalKey(table);
|
||||
const fromTableId = tableNaturalKeyToId.get(fromTableKey);
|
||||
if (!fromTableId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const foreignKey of table.foreignKeys) {
|
||||
const toTableKey = buildLiveDatabaseTableNaturalKey({
|
||||
catalog: table.catalog,
|
||||
db: table.db,
|
||||
name: foreignKey.toTable,
|
||||
});
|
||||
const toTableId = tableNaturalKeyToId.get(toTableKey);
|
||||
if (!toTableId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const fromColumnId = columnKeyToId.get(`${fromTableKey}.${foreignKey.fromColumn}`);
|
||||
const toColumnId = columnKeyToId.get(`${toTableKey}.${foreignKey.toColumn}`);
|
||||
if (!fromColumnId || !toColumnId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
extractedFormalLinks.push({ fromTableId, fromColumnId, toTableId, toColumnId });
|
||||
}
|
||||
}
|
||||
|
||||
const currentFormalLinks = currentLinks.filter((link) => link.source === 'formal');
|
||||
const extractedLinkKeys = new Set(extractedFormalLinks.map((link) => `${link.fromColumnId}->${link.toColumnId}`));
|
||||
const linksToDelete = currentFormalLinks.filter(
|
||||
(link) => !extractedLinkKeys.has(`${link.fromColumnId}->${link.toColumnId}`),
|
||||
);
|
||||
|
||||
const currentLinkKeys = new Set(currentFormalLinks.map((link) => `${link.fromColumnId}->${link.toColumnId}`));
|
||||
const linksToCreate = extractedFormalLinks.filter(
|
||||
(link) => !currentLinkKeys.has(`${link.fromColumnId}->${link.toColumnId}`),
|
||||
);
|
||||
|
||||
const newLinks = linksToCreate.map((linkData) => ({
|
||||
id: idFactory(),
|
||||
fromTableId: linkData.fromTableId,
|
||||
fromColumnId: linkData.fromColumnId,
|
||||
toTableId: linkData.toTableId,
|
||||
toColumnId: linkData.toColumnId,
|
||||
source: 'formal' as const,
|
||||
confidence: 1,
|
||||
relationshipType: 'MANY_TO_ONE',
|
||||
isPrimaryKeyReference: true,
|
||||
}));
|
||||
|
||||
const deletedLinkIds = new Set(linksToDelete.map((link) => link.id));
|
||||
const preservedFormalLinks = currentFormalLinks.filter((link) => !deletedLinkIds.has(link.id));
|
||||
|
||||
return {
|
||||
links: [...preservedFormalLinks, ...newLinks],
|
||||
created: linksToCreate.length,
|
||||
deleted: linksToDelete.length,
|
||||
};
|
||||
}
|
||||
|
||||
export function planLiveDatabaseStructuralSync(
|
||||
input: PlanLiveDatabaseStructuralSyncInput,
|
||||
): LiveDatabaseStructuralSyncPlan {
|
||||
const operations: LiveDatabaseStructuralSyncOperations = {
|
||||
deleteTableIds: [],
|
||||
deleteColumnIds: [],
|
||||
insertTables: [],
|
||||
insertColumns: [],
|
||||
touchColumnIds: [],
|
||||
invalidateColumnEmbeddingIds: [],
|
||||
};
|
||||
const stats: LiveDatabaseStructuralSyncStats = {
|
||||
tablesCreated: 0,
|
||||
tablesDeleted: 0,
|
||||
columnsCreated: 0,
|
||||
columnsDeleted: 0,
|
||||
columnsModified: 0,
|
||||
formalLinksCreated: 0,
|
||||
formalLinksDeleted: 0,
|
||||
};
|
||||
const changes: LiveDatabaseStructuralChanges = {
|
||||
newTableIds: [],
|
||||
newColumnIds: [],
|
||||
tablesWithStructuralChanges: [],
|
||||
columnsWithTypeChange: [],
|
||||
columnsWithDescriptionChange: [],
|
||||
tablesWithDescriptionChange: [],
|
||||
};
|
||||
const inferredLinksToValidate: string[] = [];
|
||||
|
||||
const currentTablesByKey = new Map<string, LiveDatabaseSyncedTable>();
|
||||
const extractedTablesByKey = new Map<string, LiveDatabaseExtractedTable>();
|
||||
|
||||
if (input.current) {
|
||||
for (const table of input.current.tables) {
|
||||
currentTablesByKey.set(buildLiveDatabaseTableNaturalKey(table), table);
|
||||
}
|
||||
}
|
||||
for (const table of input.extracted.tables) {
|
||||
extractedTablesByKey.set(buildLiveDatabaseTableNaturalKey(table), table);
|
||||
}
|
||||
|
||||
const tablesToDelete: LiveDatabaseSyncedTable[] = [];
|
||||
const tablesToUpdate: Array<{
|
||||
current: LiveDatabaseSyncedTable;
|
||||
extracted: LiveDatabaseExtractedTable;
|
||||
}> = [];
|
||||
const tablesToCreate: LiveDatabaseExtractedTable[] = [];
|
||||
|
||||
for (const [key, table] of currentTablesByKey) {
|
||||
const extractedTable = extractedTablesByKey.get(key);
|
||||
if (!extractedTable) {
|
||||
tablesToDelete.push(table);
|
||||
} else {
|
||||
tablesToUpdate.push({ current: table, extracted: extractedTable });
|
||||
}
|
||||
}
|
||||
|
||||
for (const [key, table] of extractedTablesByKey) {
|
||||
if (!currentTablesByKey.has(key)) {
|
||||
tablesToCreate.push(table);
|
||||
}
|
||||
}
|
||||
|
||||
for (const table of tablesToDelete) {
|
||||
operations.deleteTableIds.push(table.id);
|
||||
stats.tablesDeleted++;
|
||||
stats.columnsDeleted += table.columns.length;
|
||||
}
|
||||
|
||||
const updatedTables: LiveDatabaseSyncedTable[] = [];
|
||||
for (const { current, extracted } of tablesToUpdate) {
|
||||
const result = planUpdatedTable({
|
||||
currentTable: current,
|
||||
extractedTable: extracted,
|
||||
currentLinks: input.current?.links ?? [],
|
||||
inferredLinksToValidate,
|
||||
operations,
|
||||
idFactory: input.idFactory,
|
||||
});
|
||||
updatedTables.push(result.table);
|
||||
stats.columnsCreated += result.columnsCreated;
|
||||
stats.columnsDeleted += result.columnsDeleted;
|
||||
stats.columnsModified += result.columnsModified;
|
||||
changes.newColumnIds.push(...result.newColumnIds);
|
||||
changes.columnsWithTypeChange.push(...result.columnsWithTypeChange);
|
||||
changes.columnsWithDescriptionChange.push(...result.columnsWithDescriptionChange);
|
||||
if (result.tableDescriptionChanged) {
|
||||
changes.tablesWithDescriptionChange.push(current.id);
|
||||
}
|
||||
if (result.columnsCreated > 0 || result.columnsDeleted > 0 || result.columnsWithTypeChange.length > 0) {
|
||||
changes.tablesWithStructuralChanges.push(current.id);
|
||||
}
|
||||
}
|
||||
|
||||
const createdTables: LiveDatabaseSyncedTable[] = [];
|
||||
for (const extractedTable of tablesToCreate) {
|
||||
const table = planCreatedTable({
|
||||
connectionId: input.connectionId,
|
||||
extractedTable,
|
||||
operations,
|
||||
idFactory: input.idFactory,
|
||||
});
|
||||
createdTables.push(table);
|
||||
stats.tablesCreated++;
|
||||
stats.columnsCreated += table.columns.length;
|
||||
changes.newTableIds.push(table.id);
|
||||
changes.newColumnIds.push(...table.columns.map((column) => column.id));
|
||||
changes.tablesWithStructuralChanges.push(table.id);
|
||||
}
|
||||
|
||||
const allTables = [...updatedTables, ...createdTables];
|
||||
const tableNaturalKeyToId = new Map<string, string>();
|
||||
for (const table of allTables) {
|
||||
tableNaturalKeyToId.set(buildLiveDatabaseTableNaturalKey(table), table.id);
|
||||
}
|
||||
|
||||
const formalLinkResult = syncFormalLinks({
|
||||
extracted: input.extracted,
|
||||
tables: allTables,
|
||||
tableNaturalKeyToId,
|
||||
currentLinks: input.current?.links ?? [],
|
||||
idFactory: input.idFactory,
|
||||
});
|
||||
stats.formalLinksCreated = formalLinkResult.created;
|
||||
stats.formalLinksDeleted = formalLinkResult.deleted;
|
||||
|
||||
const deletedTableIds = new Set(tablesToDelete.map((table) => table.id));
|
||||
const preservedInferredLinks = (input.current?.links ?? []).filter(
|
||||
(link) =>
|
||||
link.source === 'inferred' && !deletedTableIds.has(link.fromTableId) && !deletedTableIds.has(link.toTableId),
|
||||
);
|
||||
|
||||
return {
|
||||
schema: {
|
||||
connectionId: input.connectionId,
|
||||
tables: allTables,
|
||||
links: [...formalLinkResult.links, ...preservedInferredLinks],
|
||||
},
|
||||
inferredLinksToValidate,
|
||||
stats,
|
||||
changes,
|
||||
operations,
|
||||
};
|
||||
}
|
||||
10
packages/context/src/ingest/adapters/live-database/types.ts
Normal file
10
packages/context/src/ingest/adapters/live-database/types.ts
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
import type { KloSchemaSnapshot } from '../../../scan/types.js';
|
||||
|
||||
export interface LiveDatabaseIntrospectionPort {
|
||||
extractSchema(connectionId: string): Promise<KloSchemaSnapshot>;
|
||||
}
|
||||
|
||||
export interface LiveDatabaseSourceAdapterDeps {
|
||||
introspection: LiveDatabaseIntrospectionPort;
|
||||
now?: () => Date;
|
||||
}
|
||||
154
packages/context/src/ingest/adapters/looker/chunk.test.ts
Normal file
154
packages/context/src/ingest/adapters/looker/chunk.test.ts
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { chunkLookerStagedDir } from './chunk.js';
|
||||
import { writeLookerEvidenceDocuments } from './evidence-documents.js';
|
||||
|
||||
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
|
||||
const abs = join(stagedDir, relPath);
|
||||
await mkdir(join(abs, '..'), { recursive: true });
|
||||
await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
async function writeSmallFixture(stagedDir: string): Promise<void> {
|
||||
await writeJson(stagedDir, 'sync-config.json', {
|
||||
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
|
||||
fetchedAt: '2026-04-30T12:30:00.000Z',
|
||||
});
|
||||
await writeJson(stagedDir, 'lookml_models.json', {
|
||||
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
|
||||
});
|
||||
await writeJson(stagedDir, 'explores/b2b/sales_pipeline.json', {
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: null,
|
||||
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
|
||||
joins: [],
|
||||
});
|
||||
await writeJson(stagedDir, 'dashboards/10.json', {
|
||||
lookerId: '10',
|
||||
title: 'Sales Pipeline',
|
||||
description: null,
|
||||
folderId: '7',
|
||||
ownerId: '3',
|
||||
updatedAt: '2026-04-30T12:00:00.000Z',
|
||||
tiles: [{ id: '100', title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
|
||||
});
|
||||
await writeJson(stagedDir, 'looks/20.json', {
|
||||
lookerId: '20',
|
||||
title: 'Open Pipeline',
|
||||
description: null,
|
||||
folderId: '7',
|
||||
ownerId: '3',
|
||||
updatedAt: '2026-04-30T12:00:00.000Z',
|
||||
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
|
||||
});
|
||||
await writeJson(stagedDir, 'folders/tree.json', {
|
||||
folders: [{ id: '7', name: 'Sandbox', parentId: null, path: ['Sandbox'] }],
|
||||
});
|
||||
await writeJson(stagedDir, 'users/3.json', { id: '3', displayName: 'Ada Lovelace', email: null });
|
||||
await writeJson(stagedDir, 'signals/dashboard_usage.json', [
|
||||
{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8 },
|
||||
]);
|
||||
await writeJson(stagedDir, 'signals/look_usage.json', [{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5 }]);
|
||||
await writeJson(stagedDir, 'signals/scheduled_plans.json', [
|
||||
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
|
||||
]);
|
||||
await writeJson(stagedDir, 'signals/favorites.json', [
|
||||
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
|
||||
]);
|
||||
await writeLookerEvidenceDocuments(stagedDir);
|
||||
}
|
||||
|
||||
describe('chunkLookerStagedDir', () => {
|
||||
let stagedDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'looker-chunk-'));
|
||||
await writeSmallFixture(stagedDir);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(stagedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('emits one WU per explore, dashboard, and Look with readable dependencies', async () => {
|
||||
const result = await chunkLookerStagedDir(stagedDir);
|
||||
expect(result.reconcileNotes).toEqual([
|
||||
expect.stringContaining('emit_artifact_resolution with actionType="subsumed"'),
|
||||
]);
|
||||
expect(result.workUnits.map((wu) => wu.unitKey).sort()).toEqual([
|
||||
'looker-dashboard-10',
|
||||
'looker-explore-b2b-sales_pipeline',
|
||||
'looker-look-20',
|
||||
]);
|
||||
|
||||
const dashboard = result.workUnits.find((wu) => wu.unitKey === 'looker-dashboard-10');
|
||||
expect(dashboard?.rawFiles).toEqual([
|
||||
'dashboards/10.json',
|
||||
'evidence/dashboards/10/metadata.json',
|
||||
'evidence/dashboards/10/page.md',
|
||||
]);
|
||||
expect(dashboard?.notes).toContain('context_candidate_write');
|
||||
expect(dashboard?.notes).not.toContain('wiki_write');
|
||||
expect(dashboard?.dependencyPaths.sort()).toEqual([
|
||||
'explores/b2b/sales_pipeline.json',
|
||||
'folders/tree.json',
|
||||
'signals/dashboard_usage.json',
|
||||
'signals/favorites.json',
|
||||
'signals/scheduled_plans.json',
|
||||
'users/3.json',
|
||||
]);
|
||||
|
||||
const explore = result.workUnits.find((wu) => wu.unitKey === 'looker-explore-b2b-sales_pipeline');
|
||||
expect(explore?.rawFiles).toEqual([
|
||||
'explores/b2b/sales_pipeline.json',
|
||||
'evidence/explores/b2b/sales_pipeline/metadata.json',
|
||||
'evidence/explores/b2b/sales_pipeline/page.md',
|
||||
]);
|
||||
expect(explore?.dependencyPaths).toEqual(['lookml_models.json']);
|
||||
});
|
||||
|
||||
it('keeps downstream dashboard and Look WUs when an explore dependency changes', async () => {
|
||||
const result = await chunkLookerStagedDir(stagedDir, {
|
||||
added: [],
|
||||
modified: ['explores/b2b/sales_pipeline.json'],
|
||||
deleted: [],
|
||||
unchanged: [
|
||||
'dashboards/10.json',
|
||||
'looks/20.json',
|
||||
'lookml_models.json',
|
||||
'folders/tree.json',
|
||||
'users/3.json',
|
||||
'signals/dashboard_usage.json',
|
||||
'signals/look_usage.json',
|
||||
'signals/scheduled_plans.json',
|
||||
'signals/favorites.json',
|
||||
],
|
||||
});
|
||||
|
||||
expect(result.workUnits.map((wu) => wu.unitKey).sort()).toEqual([
|
||||
'looker-dashboard-10',
|
||||
'looker-explore-b2b-sales_pipeline',
|
||||
'looker-look-20',
|
||||
]);
|
||||
expect(result.workUnits.find((wu) => wu.unitKey === 'looker-dashboard-10')?.rawFiles).toEqual([
|
||||
'dashboards/10.json',
|
||||
'evidence/dashboards/10/metadata.json',
|
||||
'evidence/dashboards/10/page.md',
|
||||
]);
|
||||
});
|
||||
|
||||
it('returns an EvictionUnit for deleted runtime entity raw paths', async () => {
|
||||
const result = await chunkLookerStagedDir(stagedDir, {
|
||||
added: [],
|
||||
modified: [],
|
||||
deleted: ['looks/20.json'],
|
||||
unchanged: ['dashboards/10.json', 'explores/b2b/sales_pipeline.json'],
|
||||
});
|
||||
|
||||
expect(result.eviction).toEqual({ deletedRawPaths: ['looks/20.json'] });
|
||||
});
|
||||
});
|
||||
198
packages/context/src/ingest/adapters/looker/chunk.ts
Normal file
198
packages/context/src/ingest/adapters/looker/chunk.ts
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
import { readdir, readFile } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js';
|
||||
import { buildLookerReconcileNotes } from './reconcile.js';
|
||||
import {
|
||||
STAGED_FILES,
|
||||
type StagedDashboardFile,
|
||||
type StagedLookerQuery,
|
||||
type StagedLookFile,
|
||||
stagedDashboardFileSchema,
|
||||
stagedExploreFileSchema,
|
||||
stagedLookFileSchema,
|
||||
} from './types.js';
|
||||
|
||||
interface LoadedLookerProject {
|
||||
allPaths: string[];
|
||||
dashboardsByPath: Map<string, StagedDashboardFile>;
|
||||
looksByPath: Map<string, StagedLookFile>;
|
||||
explorePaths: string[];
|
||||
}
|
||||
|
||||
async function walk(root: string): Promise<string[]> {
|
||||
const entries = await readdir(root, { withFileTypes: true, recursive: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isFile())
|
||||
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
|
||||
.sort();
|
||||
}
|
||||
|
||||
async function loadProject(stagedDir: string): Promise<LoadedLookerProject> {
|
||||
const allPaths = await walk(stagedDir);
|
||||
const dashboardsByPath = new Map<string, StagedDashboardFile>();
|
||||
const looksByPath = new Map<string, StagedLookFile>();
|
||||
const explorePaths: string[] = [];
|
||||
|
||||
for (const path of allPaths) {
|
||||
if (/^dashboards\/[^/]+\.json$/.test(path)) {
|
||||
dashboardsByPath.set(
|
||||
path,
|
||||
stagedDashboardFileSchema.parse(JSON.parse(await readFile(join(stagedDir, path), 'utf-8'))),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if (/^looks\/[^/]+\.json$/.test(path)) {
|
||||
looksByPath.set(path, stagedLookFileSchema.parse(JSON.parse(await readFile(join(stagedDir, path), 'utf-8'))));
|
||||
continue;
|
||||
}
|
||||
if (/^explores\/[^/]+\/[^/]+\.json$/.test(path)) {
|
||||
const explore = stagedExploreFileSchema.parse(JSON.parse(await readFile(join(stagedDir, path), 'utf-8')));
|
||||
explorePaths.push(explorePath(explore.modelName, explore.exploreName));
|
||||
}
|
||||
}
|
||||
|
||||
return { allPaths, dashboardsByPath, looksByPath, explorePaths: [...new Set(explorePaths)].sort() };
|
||||
}
|
||||
|
||||
export async function chunkLookerStagedDir(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
const project = await loadProject(stagedDir);
|
||||
const firstRunUnits = emitFirstRunWorkUnits(project);
|
||||
const result = diffSet ? applyDiffSet(firstRunUnits, diffSet) : { workUnits: firstRunUnits };
|
||||
const eviction =
|
||||
diffSet && diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : result.eviction;
|
||||
return {
|
||||
...result,
|
||||
eviction,
|
||||
reconcileNotes: result.workUnits.length > 0 || eviction ? buildLookerReconcileNotes() : [],
|
||||
};
|
||||
}
|
||||
|
||||
function emitFirstRunWorkUnits(project: LoadedLookerProject): WorkUnit[] {
|
||||
const units: WorkUnit[] = [];
|
||||
|
||||
for (const path of project.explorePaths) {
|
||||
const parts = /^explores\/([^/]+)\/([^/]+)\.json$/.exec(path);
|
||||
if (!parts) {
|
||||
continue;
|
||||
}
|
||||
const deps = project.allPaths.includes(STAGED_FILES.lookmlModels) ? [STAGED_FILES.lookmlModels] : [];
|
||||
units.push(
|
||||
buildUnit(project, {
|
||||
unitKey: `looker-explore-${parts[1]}-${parts[2]}`,
|
||||
displayLabel: `Looker explore ${parts[1]}.${parts[2]}`,
|
||||
rawFiles: [path, ...evidencePathsForExplore(project, parts[1], parts[2])],
|
||||
dependencyPaths: deps,
|
||||
notes: `Write API-derived SL source looker__${parts[1]}__${parts[2]} and durable domain knowledge for this Looker explore.`,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
for (const [path, dashboard] of [...project.dashboardsByPath.entries()].sort(([a], [b]) => a.localeCompare(b))) {
|
||||
const deps = new Set<string>();
|
||||
addIfPresent(project, deps, STAGED_FILES.foldersTree);
|
||||
addIfPresent(project, deps, STAGED_FILES.signals.dashboardUsage);
|
||||
addIfPresent(project, deps, STAGED_FILES.signals.scheduledPlans);
|
||||
addIfPresent(project, deps, STAGED_FILES.signals.favorites);
|
||||
if (dashboard.ownerId) {
|
||||
addIfPresent(project, deps, `users/${dashboard.ownerId}.json`);
|
||||
}
|
||||
for (const tile of dashboard.tiles) {
|
||||
addExploreDependency(project, deps, tile.query);
|
||||
}
|
||||
|
||||
units.push(
|
||||
buildUnit(project, {
|
||||
unitKey: `looker-dashboard-${dashboard.lookerId}`,
|
||||
displayLabel: `Looker dashboard "${dashboard.title}"`,
|
||||
rawFiles: [path, ...evidencePathsForDashboard(project, dashboard.lookerId)],
|
||||
dependencyPaths: [...deps].sort(),
|
||||
notes:
|
||||
'Extract generalizable metric, segment, and domain knowledge from this dashboard. Treat usage, owner, and folder data as prioritization/provenance context only. Use context_evidence_search/context_evidence_read and context_candidate_write for wiki-bound knowledge; do not write wiki pages directly from this WorkUnit.',
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
for (const [path, look] of [...project.looksByPath.entries()].sort(([a], [b]) => a.localeCompare(b))) {
|
||||
const deps = new Set<string>();
|
||||
addIfPresent(project, deps, STAGED_FILES.foldersTree);
|
||||
addIfPresent(project, deps, STAGED_FILES.signals.lookUsage);
|
||||
addIfPresent(project, deps, STAGED_FILES.signals.scheduledPlans);
|
||||
addIfPresent(project, deps, STAGED_FILES.signals.favorites);
|
||||
if (look.ownerId) {
|
||||
addIfPresent(project, deps, `users/${look.ownerId}.json`);
|
||||
}
|
||||
addExploreDependency(project, deps, look.query);
|
||||
|
||||
units.push(
|
||||
buildUnit(project, {
|
||||
unitKey: `looker-look-${look.lookerId}`,
|
||||
displayLabel: `Looker Look "${look.title}"`,
|
||||
rawFiles: [path, ...evidencePathsForLook(project, look.lookerId)],
|
||||
dependencyPaths: [...deps].sort(),
|
||||
notes:
|
||||
'Extract generalizable metric, segment, and domain knowledge from this Look. Treat usage, owner, and folder data as prioritization/provenance context only. Use context_evidence_search/context_evidence_read and context_candidate_write for wiki-bound knowledge; do not write wiki pages directly from this WorkUnit.',
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
return units.sort((a, b) => a.unitKey.localeCompare(b.unitKey));
|
||||
}
|
||||
|
||||
function buildUnit(
|
||||
project: LoadedLookerProject,
|
||||
input: Pick<WorkUnit, 'unitKey' | 'displayLabel' | 'rawFiles' | 'dependencyPaths' | 'notes'>,
|
||||
): WorkUnit {
|
||||
const excluded = new Set([...input.rawFiles, ...input.dependencyPaths]);
|
||||
return {
|
||||
...input,
|
||||
peerFileIndex: project.allPaths.filter((path) => !excluded.has(path)).sort(),
|
||||
};
|
||||
}
|
||||
|
||||
function applyDiffSet(firstRunUnits: WorkUnit[], diffSet: DiffSet): ChunkResult {
|
||||
const touched = new Set([...diffSet.added, ...diffSet.modified]);
|
||||
const workUnits = firstRunUnits.filter((wu) => {
|
||||
const readablePaths = [...wu.rawFiles, ...wu.dependencyPaths];
|
||||
return readablePaths.some((path) => touched.has(path));
|
||||
});
|
||||
return { workUnits };
|
||||
}
|
||||
|
||||
function addIfPresent(project: LoadedLookerProject, deps: Set<string>, path: string): void {
|
||||
if (project.allPaths.includes(path)) {
|
||||
deps.add(path);
|
||||
}
|
||||
}
|
||||
|
||||
function addExploreDependency(project: LoadedLookerProject, deps: Set<string>, query: StagedLookerQuery | null): void {
|
||||
if (!query) {
|
||||
return;
|
||||
}
|
||||
addIfPresent(project, deps, explorePath(query.model, query.view));
|
||||
}
|
||||
|
||||
function evidencePathsForExplore(project: LoadedLookerProject, modelName: string, exploreName: string): string[] {
|
||||
return existingPaths(project, [
|
||||
`evidence/explores/${modelName}/${exploreName}/metadata.json`,
|
||||
`evidence/explores/${modelName}/${exploreName}/page.md`,
|
||||
]);
|
||||
}
|
||||
|
||||
function evidencePathsForDashboard(project: LoadedLookerProject, dashboardId: string): string[] {
|
||||
return existingPaths(project, [
|
||||
`evidence/dashboards/${dashboardId}/metadata.json`,
|
||||
`evidence/dashboards/${dashboardId}/page.md`,
|
||||
]);
|
||||
}
|
||||
|
||||
function evidencePathsForLook(project: LoadedLookerProject, lookId: string): string[] {
|
||||
return existingPaths(project, [`evidence/looks/${lookId}/metadata.json`, `evidence/looks/${lookId}/page.md`]);
|
||||
}
|
||||
|
||||
function existingPaths(project: LoadedLookerProject, paths: string[]): string[] {
|
||||
return paths.filter((path) => project.allPaths.includes(path));
|
||||
}
|
||||
|
||||
function explorePath(modelName: string, exploreName: string): string {
|
||||
return `explores/${modelName}/${exploreName}.json`;
|
||||
}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
describe('LookerClient boundary', () => {
|
||||
it('does not import server or NestJS modules', async () => {
|
||||
const source = await readFile(new URL('./client.ts', import.meta.url), 'utf-8');
|
||||
|
||||
expect(source).not.toMatch(/@nestjs\/common/);
|
||||
expect(source).not.toMatch(/DataSourceClient/);
|
||||
expect(source).not.toMatch(/\.\.\/interfaces/);
|
||||
expect(source).not.toMatch(/\.\.\/types/);
|
||||
expect(source).not.toMatch(/server\/src/);
|
||||
});
|
||||
});
|
||||
455
packages/context/src/ingest/adapters/looker/client.test.ts
Normal file
455
packages/context/src/ingest/adapters/looker/client.test.ts
Normal file
|
|
@ -0,0 +1,455 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { LookerClient, type LookerSdkPort } from './client.js';
|
||||
|
||||
const clientSecretParam = 'client_secret'; // pragma: allowlist secret
|
||||
|
||||
function params(): Record<string, unknown> {
|
||||
return {
|
||||
base_url: 'https://example.looker.com',
|
||||
client_id: 'id',
|
||||
[clientSecretParam]: 'credential', // pragma: allowlist secret
|
||||
};
|
||||
}
|
||||
|
||||
function sdk(overrides: Partial<LookerSdkPort> = {}): LookerSdkPort {
|
||||
const port: LookerSdkPort = {
|
||||
me: vi.fn().mockResolvedValue({ id: '1', display_name: 'API User', email: 'api@example.com' }),
|
||||
search_dashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
|
||||
dashboard: vi.fn().mockResolvedValue({
|
||||
id: '10',
|
||||
title: 'Revenue Dashboard',
|
||||
description: 'Revenue concepts',
|
||||
folder_id: '20',
|
||||
user_id: '1',
|
||||
updated_at: '2026-04-30T00:00:00.000Z',
|
||||
dashboard_elements: [
|
||||
{
|
||||
id: '99',
|
||||
title: 'ARR',
|
||||
look_id: null,
|
||||
query: {
|
||||
id: 'q1',
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
fields: ['opportunities.arr', 'opportunities.stage'],
|
||||
filters: { 'opportunities.stage': 'open' },
|
||||
sorts: ['opportunities.arr desc'],
|
||||
limit: '500',
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
search_looks: vi.fn().mockResolvedValue([{ id: '30' }]),
|
||||
search_scheduled_plans: vi.fn().mockResolvedValue([]),
|
||||
look: vi.fn().mockResolvedValue({
|
||||
id: '30',
|
||||
title: 'Open Pipeline ARR',
|
||||
description: 'ARR for open opportunities',
|
||||
folder_id: '20',
|
||||
user_id: '1',
|
||||
updated_at: '2026-04-30T00:00:00.000Z',
|
||||
query: {
|
||||
id: 'q2',
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
fields: ['opportunities.arr'],
|
||||
filters: { 'opportunities.stage': 'open' },
|
||||
},
|
||||
}),
|
||||
all_folders: vi.fn().mockResolvedValue([{ id: '20', name: 'Executive', parent_id: null }]),
|
||||
all_users: vi.fn().mockResolvedValue([{ id: '1', display_name: 'API User', email: 'api@example.com' }]),
|
||||
all_groups: vi.fn().mockResolvedValue([{ id: '2', name: 'Finance' }]),
|
||||
all_connections: vi.fn().mockResolvedValue([
|
||||
{
|
||||
name: 'b2b_sandbox_bq',
|
||||
host: 'warehouse.example.com',
|
||||
database: 'analytics',
|
||||
schema: 'public',
|
||||
dialect_name: 'bigquery_standard_sql',
|
||||
},
|
||||
]),
|
||||
all_lookml_models: vi
|
||||
.fn()
|
||||
.mockResolvedValue([
|
||||
{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] },
|
||||
]),
|
||||
lookml_model_explore: vi.fn().mockResolvedValue({
|
||||
name: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: 'Opportunity pipeline',
|
||||
sql_table_name: 'proj.dataset.opportunities AS opportunities',
|
||||
connection_name: 'b2b_sandbox_bq',
|
||||
view_name: 'opportunities',
|
||||
fields: {
|
||||
dimensions: [{ name: 'opportunities.stage', label: 'Stage', type: 'string', sql: '$' + '{TABLE}.stage' }],
|
||||
measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '$' + '{TABLE}.arr' }],
|
||||
},
|
||||
joins: [
|
||||
{
|
||||
name: 'accounts',
|
||||
type: 'left_outer',
|
||||
relationship: 'many_to_one',
|
||||
sql_table_name: 'proj.dataset.accounts',
|
||||
sql_on: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
|
||||
from: null,
|
||||
},
|
||||
],
|
||||
}),
|
||||
run_inline_query: vi.fn().mockResolvedValue('[]'),
|
||||
logout: vi.fn().mockResolvedValue(undefined),
|
||||
...overrides,
|
||||
};
|
||||
return port;
|
||||
}
|
||||
|
||||
describe('LookerClient', () => {
|
||||
it('validates credentials with me()', async () => {
|
||||
const client = new LookerClient(params(), { sdkFactory: () => sdk() });
|
||||
|
||||
await expect(client.testConnection()).resolves.toEqual({
|
||||
success: true,
|
||||
metadata: { userId: '1', displayName: 'API User', email: 'api@example.com' },
|
||||
});
|
||||
});
|
||||
|
||||
it('maps dashboards, looks, folders, models, explores, users, and groups to staged DTOs', async () => {
|
||||
const fakeSdk = sdk();
|
||||
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
|
||||
|
||||
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
|
||||
await expect(client.getDashboard('10')).resolves.toMatchObject({
|
||||
lookerId: '10',
|
||||
title: 'Revenue Dashboard',
|
||||
tiles: [{ id: '99', query: { model: 'b2b', view: 'sales_pipeline' } }],
|
||||
});
|
||||
await expect(client.listLooks()).resolves.toEqual([{ id: '30', updatedAt: null }]);
|
||||
await expect(client.getLook('30')).resolves.toMatchObject({
|
||||
lookerId: '30',
|
||||
title: 'Open Pipeline ARR',
|
||||
query: { model: 'b2b', view: 'sales_pipeline' },
|
||||
});
|
||||
await expect(client.listFolders()).resolves.toEqual({
|
||||
folders: [{ id: '20', name: 'Executive', parentId: null, path: ['Executive'] }],
|
||||
});
|
||||
await expect(client.listLookmlModels()).resolves.toEqual({
|
||||
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
|
||||
});
|
||||
await expect(client.listLookerConnections()).resolves.toEqual([
|
||||
{
|
||||
name: 'b2b_sandbox_bq',
|
||||
host: 'warehouse.example.com',
|
||||
database: 'analytics',
|
||||
schema: 'public',
|
||||
dialect: 'bigquery_standard_sql',
|
||||
},
|
||||
]);
|
||||
await expect(client.getExplore('b2b', 'sales_pipeline')).resolves.toMatchObject({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
|
||||
connectionName: 'b2b_sandbox_bq',
|
||||
viewName: 'opportunities',
|
||||
fields: { dimensions: [{ name: 'opportunities.stage' }], measures: [{ name: 'opportunities.arr' }] },
|
||||
joins: [
|
||||
{
|
||||
name: 'accounts',
|
||||
rawSqlTableName: 'proj.dataset.accounts',
|
||||
sqlOn: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
|
||||
from: null,
|
||||
targetTable: null,
|
||||
},
|
||||
],
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: null,
|
||||
});
|
||||
expect(fakeSdk.dashboard).toHaveBeenCalledWith(
|
||||
'10',
|
||||
'id,title,description,folder_id,user_id,updated_at,dashboard_elements(id,title,look_id,query(id,model,view,fields,filters,sorts,limit,dynamic_fields))',
|
||||
);
|
||||
expect(fakeSdk.look).toHaveBeenCalledWith(
|
||||
'30',
|
||||
'id,title,description,folder_id,user_id,updated_at,query(id,model,view,fields,filters,sorts,limit,dynamic_fields)',
|
||||
);
|
||||
expect(fakeSdk.lookml_model_explore).toHaveBeenCalledWith(
|
||||
'b2b',
|
||||
'sales_pipeline',
|
||||
'name,label,description,sql_table_name,connection_name,view_name,fields,joins(name,type,relationship,sql_table_name,sql_on,from)',
|
||||
);
|
||||
expect(fakeSdk.all_connections).toHaveBeenCalledWith('name,host,database,schema,dialect_name');
|
||||
});
|
||||
|
||||
it('returns empty usage signals when system activity access fails', async () => {
|
||||
const client = new LookerClient(params(), {
|
||||
sdkFactory: () =>
|
||||
sdk({
|
||||
run_inline_query: vi.fn().mockRejectedValue(new Error('access denied')),
|
||||
search_dashboards: vi.fn().mockResolvedValue([{ id: '10', favorite_count: 4 }]),
|
||||
search_looks: vi.fn().mockResolvedValue([{ id: '30', favorite_count: 2 }]),
|
||||
search_scheduled_plans: vi.fn().mockResolvedValue([]),
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(client.getSignals()).resolves.toEqual({
|
||||
dashboardUsage: [],
|
||||
lookUsage: [],
|
||||
scheduledPlans: [],
|
||||
favorites: [
|
||||
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
|
||||
{ contentId: '30', contentType: 'look', favoriteCount: 2 },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('paginates dashboard and Look searches', async () => {
|
||||
const dashboardPageOne = Array.from({ length: 500 }, (_, index) => ({ id: String(index + 1) }));
|
||||
const lookPageOne = Array.from({ length: 500 }, (_, index) => ({ id: String(index + 1001) }));
|
||||
const fakeSdk = sdk({
|
||||
search_dashboards: vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(dashboardPageOne)
|
||||
.mockResolvedValueOnce([{ id: '501' }]),
|
||||
search_looks: vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(lookPageOne)
|
||||
.mockResolvedValueOnce([{ id: '1501' }]),
|
||||
});
|
||||
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
|
||||
|
||||
await expect(client.listDashboards()).resolves.toHaveLength(501);
|
||||
await expect(client.listLooks()).resolves.toHaveLength(501);
|
||||
|
||||
expect(fakeSdk.search_dashboards).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({
|
||||
deleted: false,
|
||||
fields: 'id,updated_at',
|
||||
limit: 500,
|
||||
offset: 0,
|
||||
sorts: 'id',
|
||||
}),
|
||||
);
|
||||
expect(fakeSdk.search_dashboards).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
limit: 500,
|
||||
offset: 500,
|
||||
}),
|
||||
);
|
||||
expect(fakeSdk.search_looks).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({
|
||||
deleted: false,
|
||||
fields: 'id,updated_at',
|
||||
limit: 500,
|
||||
offset: 0,
|
||||
sorts: 'id',
|
||||
}),
|
||||
);
|
||||
expect(fakeSdk.search_looks).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
limit: 500,
|
||||
offset: 500,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('returns updatedAt cursors from dashboard and Look listing rows', async () => {
|
||||
const fakeSdk = sdk({
|
||||
search_dashboards: vi.fn().mockResolvedValue([{ id: '10', updated_at: '2026-04-30T12:00:00.000Z' }]),
|
||||
search_looks: vi.fn().mockResolvedValue([{ id: '30', updated_at: '2026-04-30T11:00:00.000Z' }]),
|
||||
});
|
||||
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
|
||||
|
||||
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' }]);
|
||||
await expect(client.listLooks()).resolves.toEqual([{ id: '30', updatedAt: '2026-04-30T11:00:00.000Z' }]);
|
||||
});
|
||||
|
||||
it('logs out the SDK session during cleanup', async () => {
|
||||
const fakeSdk = sdk();
|
||||
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
|
||||
|
||||
await client.testConnection();
|
||||
await client.cleanup();
|
||||
|
||||
expect(fakeSdk.logout).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('aggregates usage, scheduled-plan, and favorite signals', async () => {
|
||||
const runInlineQuery = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(
|
||||
JSON.stringify([
|
||||
{
|
||||
'dashboard.id': '10',
|
||||
'history.query_run_count': 3,
|
||||
'history.created_date': '2026-04-30',
|
||||
'user.id': 'user-1',
|
||||
},
|
||||
{
|
||||
'dashboard.id': '10',
|
||||
'history.query_run_count': '2',
|
||||
'history.created_date': '2026-04-29',
|
||||
'user.id': 'user-2',
|
||||
},
|
||||
]),
|
||||
)
|
||||
.mockResolvedValueOnce(
|
||||
JSON.stringify([
|
||||
{
|
||||
'look.id': '30',
|
||||
'history.query_run_count': 7,
|
||||
'history.created_date': '2026-04-28',
|
||||
'user.id': 'user-1',
|
||||
},
|
||||
]),
|
||||
);
|
||||
const fakeSdk = sdk({
|
||||
run_inline_query: runInlineQuery,
|
||||
search_dashboards: vi.fn().mockResolvedValueOnce([{ id: '10', favorite_count: 4 }]),
|
||||
search_looks: vi.fn().mockResolvedValueOnce([{ id: '30', favorite_count: 2 }]),
|
||||
search_scheduled_plans: vi.fn().mockResolvedValueOnce([
|
||||
{
|
||||
id: 'sp-dashboard',
|
||||
dashboard_id: '10',
|
||||
look_id: null,
|
||||
enabled: true,
|
||||
scheduled_plan_destination: [{ id: 'dest-1' }, { id: 'dest-2' }],
|
||||
},
|
||||
{
|
||||
id: 'sp-look',
|
||||
dashboard_id: null,
|
||||
look_id: '30',
|
||||
enabled: true,
|
||||
scheduled_plan_destination: [{ id: 'dest-3' }],
|
||||
},
|
||||
]),
|
||||
});
|
||||
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
|
||||
|
||||
await expect(client.getSignals()).resolves.toEqual({
|
||||
dashboardUsage: [
|
||||
{
|
||||
contentId: '10',
|
||||
queryCount30d: 5,
|
||||
uniqueUsers30d: 2,
|
||||
lastRunAt: '2026-04-30',
|
||||
topUsers: ['user-1', 'user-2'],
|
||||
},
|
||||
],
|
||||
lookUsage: [
|
||||
{
|
||||
contentId: '30',
|
||||
queryCount30d: 7,
|
||||
uniqueUsers30d: 1,
|
||||
lastRunAt: '2026-04-28',
|
||||
topUsers: ['user-1'],
|
||||
},
|
||||
],
|
||||
scheduledPlans: [
|
||||
{
|
||||
contentId: '10',
|
||||
contentType: 'dashboard',
|
||||
isScheduled: true,
|
||||
scheduleCount: 1,
|
||||
recipientCount: 2,
|
||||
},
|
||||
{
|
||||
contentId: '30',
|
||||
contentType: 'look',
|
||||
isScheduled: true,
|
||||
scheduleCount: 1,
|
||||
recipientCount: 1,
|
||||
},
|
||||
],
|
||||
favorites: [
|
||||
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
|
||||
{ contentId: '30', contentType: 'look', favoriteCount: 2 },
|
||||
],
|
||||
});
|
||||
|
||||
expect(runInlineQuery).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({
|
||||
result_format: 'json',
|
||||
body: expect.objectContaining({
|
||||
model: 'system__activity',
|
||||
view: 'history',
|
||||
fields: ['dashboard.id', 'history.query_run_count', 'history.created_date', 'user.id'],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(fakeSdk.search_scheduled_plans).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
all_users: true,
|
||||
fields: 'id,dashboard_id,look_id,enabled,scheduled_plan_destination',
|
||||
limit: 500,
|
||||
offset: 0,
|
||||
sorts: 'id',
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('retries a 429 response once using Retry-After seconds', async () => {
|
||||
const sleep = vi.fn().mockResolvedValue(undefined);
|
||||
const rateLimitError = new Error('rate limited');
|
||||
Object.assign(rateLimitError, { statusCode: 429, headers: { 'retry-after': '2' } });
|
||||
const fakeSdk = sdk({
|
||||
search_dashboards: vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(rateLimitError)
|
||||
.mockResolvedValueOnce([{ id: '10' }]),
|
||||
});
|
||||
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk, sleep });
|
||||
|
||||
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
|
||||
|
||||
expect(sleep).toHaveBeenCalledWith(2000);
|
||||
expect(fakeSdk.search_dashboards).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('does not retry non-429 errors', async () => {
|
||||
const sleep = vi.fn().mockResolvedValue(undefined);
|
||||
const error = new Error('broken dashboard');
|
||||
Object.assign(error, { statusCode: 500 });
|
||||
const fakeSdk = sdk({ dashboard: vi.fn().mockRejectedValue(error) });
|
||||
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk, sleep });
|
||||
|
||||
await expect(client.getDashboard('10')).rejects.toThrow('broken dashboard');
|
||||
|
||||
expect(sleep).not.toHaveBeenCalled();
|
||||
expect(fakeSdk.dashboard).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('initializes the real @looker/sdk-node SDK with inline credentials without throwing', async () => {
|
||||
const client = new LookerClient(params());
|
||||
|
||||
const result = await client.testConnection();
|
||||
|
||||
// Without injected sdkFactory the real SDK is constructed via InlineLookerSettings.
|
||||
// This used to throw "Missing required configuration values like base_url" because
|
||||
// the parent NodeSettingsIniFile constructor validated config before the override
|
||||
// could supply credentials. Whatever happens now (auth/network failure against the
|
||||
// bogus example URL is fine) — what must NOT happen is a synchronous SDK-init throw.
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toBeDefined();
|
||||
expect(result.error).not.toMatch(/Missing required configuration values/i);
|
||||
|
||||
await client.cleanup();
|
||||
});
|
||||
|
||||
it('strips trailing /api/4.0 from base_url so the SDK does not double-prefix it', async () => {
|
||||
const clientWithSuffix = new LookerClient({
|
||||
base_url: 'https://example.looker.com/api/4.0',
|
||||
client_id: 'id',
|
||||
[clientSecretParam]: 'credential', // pragma: allowlist secret
|
||||
});
|
||||
const result = await clientWithSuffix.testConnection();
|
||||
expect(result.success).toBe(false);
|
||||
// If base_url is double-prefixed the SDK would hit /api/4.0/api/4.0/login. Either
|
||||
// the URL is correctly normalized (transport-level network failure) or we'd see a
|
||||
// 404/HTML response — either way the stack must not be a config-validation throw.
|
||||
expect(result.error).not.toMatch(/Missing required configuration values/i);
|
||||
await clientWithSuffix.cleanup();
|
||||
});
|
||||
});
|
||||
732
packages/context/src/ingest/adapters/looker/client.ts
Normal file
732
packages/context/src/ingest/adapters/looker/client.ts
Normal file
|
|
@ -0,0 +1,732 @@
|
|||
import type {
|
||||
IRequestRunInlineQuery,
|
||||
IRequestSearchDashboards,
|
||||
IRequestSearchLooks,
|
||||
IRequestSearchScheduledPlans,
|
||||
} from '@looker/sdk';
|
||||
import type { IApiSection, IApiSettings } from '@looker/sdk-rtl';
|
||||
import { LookerNodeSDK, NodeSettings } from '@looker/sdk-node';
|
||||
import type { LookerRuntimeClient } from './fetch.js';
|
||||
import type {
|
||||
StagedDashboardFile,
|
||||
StagedExploreFile,
|
||||
StagedFoldersTreeFile,
|
||||
StagedGroupFile,
|
||||
StagedLookerQuery,
|
||||
StagedLookerSignalsFile,
|
||||
StagedLookFile,
|
||||
StagedLookmlModelsFile,
|
||||
StagedUserFile,
|
||||
} from './types.js';
|
||||
|
||||
type LookerRecord = Record<string, unknown>;
|
||||
|
||||
export interface TestConnectionResult {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface LookerConnectionParams extends Record<string, unknown> {
|
||||
base_url: string;
|
||||
client_id: string;
|
||||
client_secret: string;
|
||||
}
|
||||
|
||||
export interface LookerWarehouseConnectionInfo {
|
||||
name: string;
|
||||
host: string | null;
|
||||
database: string | null;
|
||||
schema: string | null;
|
||||
dialect: string | null;
|
||||
}
|
||||
|
||||
const LOOKER_PAGE_SIZE = 500;
|
||||
const LOOKER_DASHBOARD_FIELDS =
|
||||
'id,title,description,folder_id,user_id,updated_at,dashboard_elements(id,title,look_id,query(id,model,view,fields,filters,sorts,limit,dynamic_fields))';
|
||||
const LOOKER_LOOK_FIELDS =
|
||||
'id,title,description,folder_id,user_id,updated_at,query(id,model,view,fields,filters,sorts,limit,dynamic_fields)';
|
||||
const LOOKER_EXPLORE_FIELDS =
|
||||
'name,label,description,sql_table_name,connection_name,view_name,fields,joins(name,type,relationship,sql_table_name,sql_on,from)';
|
||||
|
||||
export interface LookerSdkPort {
|
||||
me(fields?: string): Promise<LookerRecord>;
|
||||
search_dashboards(request?: LookerRecord): Promise<LookerRecord[]>;
|
||||
dashboard(id: string, fields?: string): Promise<LookerRecord>;
|
||||
search_looks(request?: LookerRecord): Promise<LookerRecord[]>;
|
||||
search_scheduled_plans(request?: LookerRecord): Promise<LookerRecord[]>;
|
||||
look(id: string, fields?: string): Promise<LookerRecord>;
|
||||
all_folders(fields?: string): Promise<LookerRecord[]>;
|
||||
all_users(fields?: string): Promise<LookerRecord[]>;
|
||||
all_groups(fields?: string): Promise<LookerRecord[]>;
|
||||
all_connections(fields?: string): Promise<LookerRecord[]>;
|
||||
all_lookml_models(fields?: string): Promise<LookerRecord[]>;
|
||||
lookml_model_explore(modelName: string, exploreName: string, fields?: string): Promise<LookerRecord>;
|
||||
run_inline_query(request: IRequestRunInlineQuery): Promise<string>;
|
||||
logout(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface LookerClientLogger {
|
||||
log(message: string): void;
|
||||
warn(message: string): void;
|
||||
error(message: string): void;
|
||||
debug?(message: string): void;
|
||||
}
|
||||
|
||||
export interface LookerClientDeps {
|
||||
sdkFactory?: (params: LookerConnectionParams) => LookerSdkPort;
|
||||
sleep?: (ms: number) => Promise<void>;
|
||||
logger?: LookerClientLogger;
|
||||
}
|
||||
|
||||
const defaultLogger: LookerClientLogger = {
|
||||
log: (message) => console.log(message),
|
||||
warn: (message) => console.warn(message),
|
||||
error: (message) => console.error(message),
|
||||
debug: (message) => console.debug(message),
|
||||
};
|
||||
|
||||
class InlineLookerSettings extends NodeSettings {
|
||||
constructor(private readonly params: LookerConnectionParams) {
|
||||
super('', {
|
||||
base_url: normalizeBaseUrl(params.base_url),
|
||||
client_id: params.client_id,
|
||||
client_secret: params.client_secret, // pragma: allowlist secret
|
||||
verify_ssl: 'true',
|
||||
timeout: '120',
|
||||
} as unknown as IApiSettings);
|
||||
}
|
||||
|
||||
override readConfig(_section?: string): IApiSection {
|
||||
return {
|
||||
base_url: normalizeBaseUrl(this.params.base_url),
|
||||
client_id: this.params.client_id,
|
||||
client_secret: this.params.client_secret, // pragma: allowlist secret
|
||||
verify_ssl: 'true',
|
||||
timeout: '120',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function createLookerSdkPort(params: LookerConnectionParams): LookerSdkPort {
|
||||
const sdk = LookerNodeSDK.init40(new InlineLookerSettings(params));
|
||||
return {
|
||||
me: (fields) => sdk.ok(sdk.me(fields)).then(toRecord),
|
||||
search_dashboards: (request) =>
|
||||
sdk.ok(sdk.search_dashboards((request ?? {}) as IRequestSearchDashboards)).then(toRecordArray),
|
||||
dashboard: (id, fields) => sdk.ok(sdk.dashboard(id, fields)).then(toRecord),
|
||||
search_looks: (request) => sdk.ok(sdk.search_looks((request ?? {}) as IRequestSearchLooks)).then(toRecordArray),
|
||||
search_scheduled_plans: (request) =>
|
||||
sdk.ok(sdk.search_scheduled_plans((request ?? {}) as IRequestSearchScheduledPlans)).then(toRecordArray),
|
||||
look: (id, fields) => sdk.ok(sdk.look(id, fields)).then(toRecord),
|
||||
all_folders: (fields) => sdk.ok(sdk.all_folders(fields)).then(toRecordArray),
|
||||
all_users: (fields) => sdk.ok(sdk.all_users({ fields })).then(toRecordArray),
|
||||
all_groups: (fields) => sdk.ok(sdk.all_groups({ fields })).then(toRecordArray),
|
||||
all_connections: (fields) => sdk.ok(sdk.all_connections(fields)).then(toRecordArray),
|
||||
all_lookml_models: (fields) => sdk.ok(sdk.all_lookml_models({ fields })).then(toRecordArray),
|
||||
lookml_model_explore: (modelName, exploreName, fields) =>
|
||||
sdk
|
||||
.ok(sdk.lookml_model_explore({ lookml_model_name: modelName, explore_name: exploreName, fields }))
|
||||
.then(toRecord),
|
||||
run_inline_query: (request) => sdk.ok(sdk.run_inline_query(request)),
|
||||
logout: async () => {
|
||||
await sdk.authSession.logout();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export class LookerClient implements LookerRuntimeClient {
|
||||
private readonly logger: LookerClientLogger;
|
||||
private readonly params: LookerConnectionParams;
|
||||
private sdkInstance: LookerSdkPort | null = null;
|
||||
|
||||
constructor(
|
||||
connectionParams: Record<string, unknown>,
|
||||
private readonly deps: LookerClientDeps = {},
|
||||
) {
|
||||
this.logger = deps.logger ?? defaultLogger;
|
||||
this.params = parseLookerConnectionParams(connectionParams);
|
||||
}
|
||||
|
||||
get dataSourceType(): string {
|
||||
return 'LOOKER';
|
||||
}
|
||||
|
||||
async testConnection(): Promise<TestConnectionResult> {
|
||||
try {
|
||||
const me = await this.withRateLimitRetry(() => this.sdk().me('id,display_name,email'));
|
||||
return {
|
||||
success: true,
|
||||
metadata: {
|
||||
userId: stringValue(me.id),
|
||||
displayName: nullableString(me.display_name),
|
||||
email: nullableString(me.email),
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async listDashboards(): Promise<Array<{ id: string; updatedAt: string | null }>> {
|
||||
const dashboards = await this.collectPaged((offset) =>
|
||||
this.sdk().search_dashboards({
|
||||
deleted: false,
|
||||
fields: 'id,updated_at',
|
||||
limit: LOOKER_PAGE_SIZE,
|
||||
offset,
|
||||
sorts: 'id',
|
||||
}),
|
||||
);
|
||||
return dashboards.flatMap(entityRef);
|
||||
}
|
||||
|
||||
async getDashboard(id: string): Promise<StagedDashboardFile> {
|
||||
const dashboard = await this.withRateLimitRetry(() => this.sdk().dashboard(id, LOOKER_DASHBOARD_FIELDS));
|
||||
const elements = arrayValue(dashboard.dashboard_elements);
|
||||
return {
|
||||
lookerId: stringValue(dashboard.id),
|
||||
title: stringValue(dashboard.title),
|
||||
description: nullableString(dashboard.description),
|
||||
folderId: nullableString(dashboard.folder_id),
|
||||
ownerId: nullableString(dashboard.user_id),
|
||||
updatedAt: nullableString(dashboard.updated_at),
|
||||
tiles: elements.map((tile) => ({
|
||||
id: stringValue(tile.id),
|
||||
title: nullableString(tile.title),
|
||||
lookId: nullableString(tile.look_id),
|
||||
query: queryValue(tile.query),
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
async listLooks(): Promise<Array<{ id: string; updatedAt: string | null }>> {
|
||||
const looks = await this.collectPaged((offset) =>
|
||||
this.sdk().search_looks({
|
||||
deleted: false,
|
||||
fields: 'id,updated_at',
|
||||
limit: LOOKER_PAGE_SIZE,
|
||||
offset,
|
||||
sorts: 'id',
|
||||
}),
|
||||
);
|
||||
return looks.flatMap(entityRef);
|
||||
}
|
||||
|
||||
async getLook(id: string): Promise<StagedLookFile> {
|
||||
const look = await this.withRateLimitRetry(() => this.sdk().look(id, LOOKER_LOOK_FIELDS));
|
||||
return {
|
||||
lookerId: stringValue(look.id),
|
||||
title: stringValue(look.title),
|
||||
description: nullableString(look.description),
|
||||
folderId: nullableString(look.folder_id),
|
||||
ownerId: nullableString(look.user_id),
|
||||
updatedAt: nullableString(look.updated_at),
|
||||
query: queryValue(look.query),
|
||||
};
|
||||
}
|
||||
|
||||
async listFolders(): Promise<StagedFoldersTreeFile> {
|
||||
const folders = await this.withRateLimitRetry(() => this.sdk().all_folders('id,name,parent_id'));
|
||||
const byId = new Map<string, LookerRecord>();
|
||||
for (const folder of folders) {
|
||||
byId.set(stringValue(folder.id), folder);
|
||||
}
|
||||
return {
|
||||
folders: folders.map((folder) => ({
|
||||
id: stringValue(folder.id),
|
||||
name: stringValue(folder.name),
|
||||
parentId: nullableString(folder.parent_id),
|
||||
path: folderPath(folder, byId),
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
async listUsers(): Promise<StagedUserFile[]> {
|
||||
const users = await this.withRateLimitRetry(() => this.sdk().all_users('id,display_name,email'));
|
||||
return users.map((user) => ({
|
||||
id: stringValue(user.id),
|
||||
displayName: nullableString(user.display_name),
|
||||
email: nullableString(user.email),
|
||||
}));
|
||||
}
|
||||
|
||||
async listGroups(): Promise<StagedGroupFile[]> {
|
||||
const groups = await this.withRateLimitRetry(() => this.sdk().all_groups('id,name'));
|
||||
return groups.map((group) => ({
|
||||
id: stringValue(group.id),
|
||||
name: stringValue(group.name),
|
||||
}));
|
||||
}
|
||||
|
||||
async listLookmlModels(): Promise<StagedLookmlModelsFile> {
|
||||
const models = await this.withRateLimitRetry(() => this.sdk().all_lookml_models('name,label,explores'));
|
||||
return {
|
||||
models: models.map((model) => ({
|
||||
name: stringValue(model.name),
|
||||
label: nullableString(model.label),
|
||||
explores: arrayValue(model.explores).map((explore) => ({
|
||||
name: stringValue(explore.name),
|
||||
label: nullableString(explore.label),
|
||||
})),
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
async listLookerConnections(): Promise<LookerWarehouseConnectionInfo[]> {
|
||||
const connections = await this.withRateLimitRetry(() =>
|
||||
this.sdk().all_connections('name,host,database,schema,dialect_name'),
|
||||
);
|
||||
return connections.map((connection) => ({
|
||||
name: stringValue(connection.name),
|
||||
host: nullableString(connection.host),
|
||||
database: nullableString(connection.database),
|
||||
schema: nullableString(connection.schema),
|
||||
dialect: nullableString(connection.dialect_name ?? connection.dialect),
|
||||
}));
|
||||
}
|
||||
|
||||
async getExplore(modelName: string, exploreName: string): Promise<StagedExploreFile> {
|
||||
const explore = await this.withRateLimitRetry(() =>
|
||||
this.sdk().lookml_model_explore(modelName, exploreName, LOOKER_EXPLORE_FIELDS),
|
||||
);
|
||||
const fields = recordValue(explore.fields);
|
||||
return {
|
||||
modelName,
|
||||
exploreName: stringValue(explore.name),
|
||||
label: nullableString(explore.label),
|
||||
description: nullableString(explore.description),
|
||||
rawSqlTableName: nullableString(explore.sql_table_name ?? explore.sqlTableName),
|
||||
connectionName: nullableString(explore.connection_name ?? explore.connectionName),
|
||||
viewName: nullableString(explore.view_name ?? explore.viewName),
|
||||
fields: {
|
||||
dimensions: arrayValue(fields.dimensions).map(stagedField),
|
||||
measures: arrayValue(fields.measures).map(stagedField),
|
||||
},
|
||||
joins: arrayValue(explore.joins).map((join) => ({
|
||||
name: stringValue(join.name),
|
||||
type: nullableString(join.type),
|
||||
relationship: nullableString(join.relationship),
|
||||
rawSqlTableName: nullableString(join.sql_table_name ?? join.sqlTableName),
|
||||
sqlOn: nullableString(join.sql_on ?? join.sqlOn),
|
||||
from: nullableString(join.from),
|
||||
targetTable: null,
|
||||
})),
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: null,
|
||||
};
|
||||
}
|
||||
|
||||
async getSignals(): Promise<StagedLookerSignalsFile> {
|
||||
const [dashboardUsage, lookUsage, scheduledPlans, favorites] = await Promise.all([
|
||||
this.getUsageSignals('dashboard').catch((error) =>
|
||||
this.warnAndReturnEmpty('Looker system__activity dashboard usage unavailable', error),
|
||||
),
|
||||
this.getUsageSignals('look').catch((error) =>
|
||||
this.warnAndReturnEmpty('Looker system__activity Look usage unavailable', error),
|
||||
),
|
||||
this.getScheduledPlanSignals().catch((error) =>
|
||||
this.warnAndReturnEmpty('Looker scheduled-plan signals unavailable', error),
|
||||
),
|
||||
this.getFavoriteSignals().catch((error) => this.warnAndReturnEmpty('Looker favorite signals unavailable', error)),
|
||||
]);
|
||||
|
||||
return { dashboardUsage, lookUsage, scheduledPlans, favorites };
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
const sdk = this.sdkInstance;
|
||||
if (!sdk) {
|
||||
return;
|
||||
}
|
||||
await sdk.logout();
|
||||
this.sdkInstance = null;
|
||||
}
|
||||
|
||||
private async getUsageSignals(contentType: 'dashboard' | 'look'): Promise<StagedLookerSignalsFile['dashboardUsage']> {
|
||||
const idField = contentType === 'dashboard' ? 'dashboard.id' : 'look.id';
|
||||
const raw = await this.withRateLimitRetry(() =>
|
||||
this.sdk().run_inline_query({
|
||||
result_format: 'json',
|
||||
body: {
|
||||
model: 'system__activity',
|
||||
view: 'history',
|
||||
fields: [idField, 'history.query_run_count', 'history.created_date', 'user.id'],
|
||||
filters: {
|
||||
'history.created_date': '30 days',
|
||||
[idField]: '-NULL',
|
||||
},
|
||||
sorts: ['history.query_run_count desc'],
|
||||
limit: '5000',
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
return aggregateUsageRows(parseJsonRows(raw), idField);
|
||||
}
|
||||
|
||||
private async getScheduledPlanSignals(): Promise<StagedLookerSignalsFile['scheduledPlans']> {
|
||||
const plans = await this.collectPaged((offset) =>
|
||||
this.sdk().search_scheduled_plans({
|
||||
all_users: true,
|
||||
fields: 'id,dashboard_id,look_id,enabled,scheduled_plan_destination',
|
||||
limit: LOOKER_PAGE_SIZE,
|
||||
offset,
|
||||
sorts: 'id',
|
||||
}),
|
||||
);
|
||||
const byContent = new Map<
|
||||
string,
|
||||
{
|
||||
contentId: string;
|
||||
contentType: 'dashboard' | 'look';
|
||||
isScheduled: boolean;
|
||||
scheduleCount: number;
|
||||
recipientCount: number;
|
||||
}
|
||||
>();
|
||||
|
||||
for (const plan of plans) {
|
||||
const dashboardId = nullableString(plan.dashboard_id);
|
||||
const lookId = nullableString(plan.look_id);
|
||||
const contentType = dashboardId ? 'dashboard' : lookId ? 'look' : null;
|
||||
const contentId = dashboardId ?? lookId;
|
||||
if (!contentType || !contentId) {
|
||||
continue;
|
||||
}
|
||||
const key = `${contentType}:${contentId}`;
|
||||
const current =
|
||||
byContent.get(key) ??
|
||||
({
|
||||
contentId,
|
||||
contentType,
|
||||
isScheduled: false,
|
||||
scheduleCount: 0,
|
||||
recipientCount: 0,
|
||||
} satisfies StagedLookerSignalsFile['scheduledPlans'][number]);
|
||||
if (plan.enabled !== false) {
|
||||
current.isScheduled = true;
|
||||
current.scheduleCount += 1;
|
||||
current.recipientCount += arrayValue(plan.scheduled_plan_destination).length;
|
||||
}
|
||||
byContent.set(key, current);
|
||||
}
|
||||
|
||||
return [...byContent.values()].filter((signal) => signal.scheduleCount > 0).sort(compareContentSignals);
|
||||
}
|
||||
|
||||
private async getFavoriteSignals(): Promise<StagedLookerSignalsFile['favorites']> {
|
||||
const dashboards = await this.collectPaged((offset) =>
|
||||
this.sdk().search_dashboards({
|
||||
deleted: false,
|
||||
fields: 'id,favorite_count',
|
||||
limit: LOOKER_PAGE_SIZE,
|
||||
offset,
|
||||
sorts: 'id',
|
||||
}),
|
||||
);
|
||||
const looks = await this.collectPaged((offset) =>
|
||||
this.sdk().search_looks({
|
||||
deleted: false,
|
||||
fields: 'id,favorite_count',
|
||||
limit: LOOKER_PAGE_SIZE,
|
||||
offset,
|
||||
sorts: 'id',
|
||||
}),
|
||||
);
|
||||
|
||||
return [
|
||||
...dashboards.flatMap((dashboard) => favoriteSignal(dashboard, 'dashboard')),
|
||||
...looks.flatMap((look) => favoriteSignal(look, 'look')),
|
||||
].sort(compareContentSignals);
|
||||
}
|
||||
|
||||
private warnAndReturnEmpty(message: string, error: unknown): never[] {
|
||||
this.logger.warn(`${message}; continuing without that prioritization input: ${errorMessage(error)}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
private async collectPaged(loadPage: (offset: number) => Promise<LookerRecord[]>): Promise<LookerRecord[]> {
|
||||
const rows: LookerRecord[] = [];
|
||||
for (let offset = 0; ; offset += LOOKER_PAGE_SIZE) {
|
||||
const page = await this.withRateLimitRetry(() => loadPage(offset));
|
||||
rows.push(...page);
|
||||
if (page.length < LOOKER_PAGE_SIZE) {
|
||||
return rows;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async withRateLimitRetry<T>(load: () => Promise<T>): Promise<T> {
|
||||
try {
|
||||
return await load();
|
||||
} catch (error) {
|
||||
if (lookerStatusCode(error) !== 429) {
|
||||
throw error;
|
||||
}
|
||||
await (this.deps.sleep ?? sleep)(retryAfterMs(error));
|
||||
return load();
|
||||
}
|
||||
}
|
||||
|
||||
private sdk(): LookerSdkPort {
|
||||
if (!this.sdkInstance) {
|
||||
this.sdkInstance = this.deps.sdkFactory?.(this.params) ?? createLookerSdkPort(this.params);
|
||||
}
|
||||
return this.sdkInstance;
|
||||
}
|
||||
}
|
||||
|
||||
function parseLookerConnectionParams(raw: Record<string, unknown>): LookerConnectionParams {
|
||||
const baseUrl = raw.base_url;
|
||||
const clientId = raw.client_id;
|
||||
const apiCredential = raw.client_secret; // pragma: allowlist secret
|
||||
if (typeof baseUrl !== 'string' || baseUrl.trim() === '') {
|
||||
throw new Error('Looker base_url is required');
|
||||
}
|
||||
if (typeof clientId !== 'string' || clientId.trim() === '') {
|
||||
throw new Error('Looker client_id is required');
|
||||
}
|
||||
if (typeof apiCredential !== 'string' || apiCredential.trim() === '') {
|
||||
throw new Error('Looker client_secret is required'); // pragma: allowlist secret
|
||||
}
|
||||
return { base_url: baseUrl, client_id: clientId, client_secret: apiCredential }; // pragma: allowlist secret
|
||||
}
|
||||
|
||||
function toRecord(value: object): LookerRecord {
|
||||
return value as LookerRecord;
|
||||
}
|
||||
|
||||
function toRecordArray(values: object[]): LookerRecord[] {
|
||||
return values.map(toRecord);
|
||||
}
|
||||
|
||||
function normalizeBaseUrl(baseUrl: string): string {
|
||||
return baseUrl
|
||||
.trim()
|
||||
.replace(/\/+$/, '')
|
||||
.replace(/\/api\/(4\.0|3\.1)$/, '');
|
||||
}
|
||||
|
||||
function entityRef(row: LookerRecord): Array<{ id: string; updatedAt: string | null }> {
|
||||
if (row.id === null || row.id === undefined) {
|
||||
return [];
|
||||
}
|
||||
return [{ id: String(row.id), updatedAt: nullableString(row.updated_at) }];
|
||||
}
|
||||
|
||||
function queryValue(value: unknown): StagedLookerQuery | null {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return null;
|
||||
}
|
||||
const record = value as LookerRecord;
|
||||
if (typeof record.model !== 'string' || typeof record.view !== 'string') {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
id: nullableString(record.id) ?? undefined,
|
||||
model: record.model,
|
||||
view: record.view,
|
||||
fields: stringArray(record.fields),
|
||||
filters: recordValue(record.filters),
|
||||
sorts: stringArray(record.sorts),
|
||||
limit: typeof record.limit === 'string' || typeof record.limit === 'number' ? record.limit : null,
|
||||
dynamicFields: nullableString(record.dynamic_fields ?? record.dynamicFields),
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: null,
|
||||
};
|
||||
}
|
||||
|
||||
function parseJsonRows(raw: string): LookerRecord[] {
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
return Array.isArray(parsed) ? parsed.filter((row): row is LookerRecord => !!row && typeof row === 'object') : [];
|
||||
}
|
||||
|
||||
function aggregateUsageRows(
|
||||
rows: LookerRecord[],
|
||||
idField: 'dashboard.id' | 'look.id',
|
||||
): StagedLookerSignalsFile['dashboardUsage'] {
|
||||
const byContent = new Map<
|
||||
string,
|
||||
{
|
||||
contentId: string;
|
||||
queryCount30d: number;
|
||||
lastRunAt: string | null;
|
||||
users: Set<string>;
|
||||
}
|
||||
>();
|
||||
|
||||
for (const row of rows) {
|
||||
const contentId = nullableString(row[idField]);
|
||||
if (!contentId) {
|
||||
continue;
|
||||
}
|
||||
const current = byContent.get(contentId) ?? {
|
||||
contentId,
|
||||
queryCount30d: 0,
|
||||
lastRunAt: null,
|
||||
users: new Set<string>(),
|
||||
};
|
||||
current.queryCount30d += numberValue(row['history.query_run_count']);
|
||||
const userId = nullableString(row['user.id']);
|
||||
if (userId) {
|
||||
current.users.add(userId);
|
||||
}
|
||||
const lastRunAt = nullableString(row['history.created_date']);
|
||||
if (lastRunAt && (!current.lastRunAt || lastRunAt > current.lastRunAt)) {
|
||||
current.lastRunAt = lastRunAt;
|
||||
}
|
||||
byContent.set(contentId, current);
|
||||
}
|
||||
|
||||
return [...byContent.values()]
|
||||
.map((signal) => ({
|
||||
contentId: signal.contentId,
|
||||
queryCount30d: signal.queryCount30d,
|
||||
uniqueUsers30d: signal.users.size,
|
||||
lastRunAt: signal.lastRunAt,
|
||||
topUsers: [...signal.users].sort().slice(0, 5),
|
||||
}))
|
||||
.sort((a, b) => a.contentId.localeCompare(b.contentId));
|
||||
}
|
||||
|
||||
function favoriteSignal(row: LookerRecord, contentType: 'dashboard' | 'look'): StagedLookerSignalsFile['favorites'] {
|
||||
const contentId = nullableString(row.id);
|
||||
if (!contentId) {
|
||||
return [];
|
||||
}
|
||||
return [{ contentId, contentType, favoriteCount: numberValue(row.favorite_count) }];
|
||||
}
|
||||
|
||||
function compareContentSignals(
|
||||
a: { contentType?: string; contentId: string },
|
||||
b: { contentType?: string; contentId: string },
|
||||
): number {
|
||||
return `${a.contentType ?? ''}:${a.contentId}`.localeCompare(`${b.contentType ?? ''}:${b.contentId}`);
|
||||
}
|
||||
|
||||
function numberValue(value: unknown): number {
|
||||
if (typeof value === 'number' && Number.isFinite(value)) {
|
||||
return value;
|
||||
}
|
||||
if (typeof value === 'string' && value.trim() !== '') {
|
||||
const parsed = Number(value);
|
||||
return Number.isFinite(parsed) ? parsed : 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
await new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function lookerStatusCode(error: unknown): number | null {
|
||||
if (!error || typeof error !== 'object') {
|
||||
return null;
|
||||
}
|
||||
const record = error as Record<string, unknown>;
|
||||
const direct = record.statusCode ?? record.status;
|
||||
if (typeof direct === 'number') {
|
||||
return direct;
|
||||
}
|
||||
if (typeof direct === 'string') {
|
||||
const parsed = Number(direct);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
const response = record.response;
|
||||
if (response && typeof response === 'object') {
|
||||
return lookerStatusCode(response);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function retryAfterMs(error: unknown): number {
|
||||
const value = retryAfterHeader(error);
|
||||
if (!value) {
|
||||
return 1000;
|
||||
}
|
||||
const seconds = Number(value);
|
||||
if (Number.isFinite(seconds)) {
|
||||
return Math.max(0, seconds * 1000);
|
||||
}
|
||||
const dateMs = Date.parse(value);
|
||||
return Number.isFinite(dateMs) ? Math.max(0, dateMs - Date.now()) : 1000;
|
||||
}
|
||||
|
||||
function retryAfterHeader(error: unknown): string | null {
|
||||
if (!error || typeof error !== 'object') {
|
||||
return null;
|
||||
}
|
||||
const record = error as Record<string, unknown>;
|
||||
const response = record.response;
|
||||
const responseRecord = response && typeof response === 'object' ? (response as Record<string, unknown>) : null;
|
||||
const headers = record.headers ?? responseRecord?.headers;
|
||||
if (!headers || typeof headers !== 'object') {
|
||||
return null;
|
||||
}
|
||||
const getter = (headers as { get?: unknown }).get;
|
||||
if (typeof getter === 'function') {
|
||||
const value = getter.call(headers, 'retry-after');
|
||||
return typeof value === 'string' ? value : null;
|
||||
}
|
||||
const headerRecord = headers as Record<string, unknown>;
|
||||
const direct = headerRecord['retry-after'] ?? headerRecord['Retry-After'];
|
||||
return typeof direct === 'string' ? direct : null;
|
||||
}
|
||||
|
||||
function stagedField(value: LookerRecord) {
|
||||
return {
|
||||
name: stringValue(value.name),
|
||||
label: nullableString(value.label),
|
||||
type: nullableString(value.type),
|
||||
sql: nullableString(value.sql),
|
||||
description: nullableString(value.description),
|
||||
};
|
||||
}
|
||||
|
||||
function folderPath(folder: LookerRecord, byId: Map<string, LookerRecord>): string[] {
|
||||
const path: string[] = [];
|
||||
let current: LookerRecord | undefined = folder;
|
||||
const seen = new Set<string>();
|
||||
while (current) {
|
||||
const id = stringValue(current.id);
|
||||
if (seen.has(id)) {
|
||||
break;
|
||||
}
|
||||
seen.add(id);
|
||||
path.unshift(stringValue(current.name));
|
||||
const parentId = nullableString(current.parent_id);
|
||||
current = parentId ? byId.get(parentId) : undefined;
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
function arrayValue(value: unknown): LookerRecord[] {
|
||||
return Array.isArray(value) ? value.filter((item): item is LookerRecord => !!item && typeof item === 'object') : [];
|
||||
}
|
||||
|
||||
function recordValue(value: unknown): Record<string, unknown> {
|
||||
return value && typeof value === 'object' && !Array.isArray(value) ? { ...(value as Record<string, unknown>) } : {};
|
||||
}
|
||||
|
||||
function stringArray(value: unknown): string[] {
|
||||
return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : [];
|
||||
}
|
||||
|
||||
function stringValue(value: unknown): string {
|
||||
if (value === null || value === undefined) {
|
||||
return '';
|
||||
}
|
||||
return String(value);
|
||||
}
|
||||
|
||||
function nullableString(value: unknown): string | null {
|
||||
if (value === null || value === undefined) {
|
||||
return null;
|
||||
}
|
||||
return String(value);
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createDaemonLookerTableIdentifierParser } from './daemon-table-identifier-parser.js';
|
||||
|
||||
describe('createDaemonLookerTableIdentifierParser', () => {
|
||||
it('posts parse items to the daemon endpoint', async () => {
|
||||
const requestJson = vi.fn(async () => ({
|
||||
results: {
|
||||
orders: {
|
||||
ok: true,
|
||||
catalog: null,
|
||||
schema: 'public',
|
||||
name: 'orders',
|
||||
canonical_table: 'public.orders',
|
||||
},
|
||||
},
|
||||
}));
|
||||
const parser = createDaemonLookerTableIdentifierParser({
|
||||
baseUrl: 'http://127.0.0.1:8765',
|
||||
requestJson,
|
||||
});
|
||||
|
||||
await expect(parser.parse([{ key: 'orders', sql_table_name: 'public.orders', dialect: 'postgres' }])).resolves.toEqual({
|
||||
orders: {
|
||||
ok: true,
|
||||
catalog: null,
|
||||
schema: 'public',
|
||||
name: 'orders',
|
||||
canonical_table: 'public.orders',
|
||||
},
|
||||
});
|
||||
expect(requestJson).toHaveBeenCalledWith('/sql/parse-table-identifier', {
|
||||
items: [{ key: 'orders', sql_table_name: 'public.orders', dialect: 'postgres' }],
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects non-object daemon responses', async () => {
|
||||
const parser = createDaemonLookerTableIdentifierParser({
|
||||
baseUrl: 'http://127.0.0.1:8765',
|
||||
requestJson: async () => ({ results: null }),
|
||||
});
|
||||
|
||||
await expect(parser.parse([])).rejects.toThrow('klo-daemon table identifier parser returned invalid results');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
import { request as httpRequest } from 'node:http';
|
||||
import { request as httpsRequest } from 'node:https';
|
||||
import { URL } from 'node:url';
|
||||
import type {
|
||||
LookerParsedIdentifier,
|
||||
LookerTableIdentifierParseItem,
|
||||
LookerTableIdentifierParser,
|
||||
} from './mapping.js';
|
||||
|
||||
export type KloDaemonTableIdentifierHttpJsonRunner = (
|
||||
path: string,
|
||||
payload: Record<string, unknown>,
|
||||
) => Promise<Record<string, unknown>>;
|
||||
|
||||
export interface DaemonLookerTableIdentifierParserOptions {
|
||||
baseUrl: string;
|
||||
requestJson?: KloDaemonTableIdentifierHttpJsonRunner;
|
||||
}
|
||||
|
||||
export function createDaemonLookerTableIdentifierParser(
|
||||
options: DaemonLookerTableIdentifierParserOptions,
|
||||
): LookerTableIdentifierParser {
|
||||
const requestJson = options.requestJson ?? postJson(options.baseUrl);
|
||||
return {
|
||||
async parse(items: LookerTableIdentifierParseItem[]): Promise<Record<string, LookerParsedIdentifier>> {
|
||||
const raw = await requestJson('/sql/parse-table-identifier', { items });
|
||||
if (!raw.results || typeof raw.results !== 'object' || Array.isArray(raw.results)) {
|
||||
throw new Error('klo-daemon table identifier parser returned invalid results');
|
||||
}
|
||||
return raw.results as Record<string, LookerParsedIdentifier>;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function normalizedBaseUrl(baseUrl: string): string {
|
||||
return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`;
|
||||
}
|
||||
|
||||
function postJson(baseUrl: string): KloDaemonTableIdentifierHttpJsonRunner {
|
||||
return async (path, payload) =>
|
||||
new Promise((resolve, reject) => {
|
||||
const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl));
|
||||
const body = JSON.stringify(payload);
|
||||
const client = target.protocol === 'https:' ? httpsRequest : httpRequest;
|
||||
const request = client(
|
||||
target,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
accept: 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'content-length': Buffer.byteLength(body),
|
||||
},
|
||||
},
|
||||
(response) => {
|
||||
const chunks: Buffer[] = [];
|
||||
response.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
response.on('end', () => {
|
||||
const text = Buffer.concat(chunks).toString('utf8');
|
||||
const statusCode = response.statusCode ?? 0;
|
||||
if (statusCode < 200 || statusCode >= 300) {
|
||||
reject(new Error(`klo-daemon HTTP ${path} failed with ${statusCode}: ${text}`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(text) as unknown;
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
||||
reject(new Error(`klo-daemon HTTP ${path} returned non-object JSON`));
|
||||
return;
|
||||
}
|
||||
resolve(parsed as Record<string, unknown>);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
request.on('error', reject);
|
||||
request.end(body);
|
||||
});
|
||||
}
|
||||
47
packages/context/src/ingest/adapters/looker/detect.test.ts
Normal file
47
packages/context/src/ingest/adapters/looker/detect.test.ts
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { detectLookerStagedDir } from './detect.js';
|
||||
|
||||
async function touch(stagedDir: string, relPath: string, body = '{}\n'): Promise<void> {
|
||||
const abs = join(stagedDir, relPath);
|
||||
await mkdir(join(abs, '..'), { recursive: true });
|
||||
await writeFile(abs, body, 'utf-8');
|
||||
}
|
||||
|
||||
describe('detectLookerStagedDir', () => {
|
||||
let stagedDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'looker-detect-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(stagedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('returns true when sync-config.json and at least one runtime entity are present', async () => {
|
||||
await touch(stagedDir, 'sync-config.json');
|
||||
await touch(stagedDir, 'explores/b2b/sales_pipeline.json');
|
||||
expect(await detectLookerStagedDir(stagedDir)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true for dashboard-only staged dirs', async () => {
|
||||
await touch(stagedDir, 'sync-config.json');
|
||||
await touch(stagedDir, 'dashboards/10.json');
|
||||
expect(await detectLookerStagedDir(stagedDir)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false without sync-config.json', async () => {
|
||||
await touch(stagedDir, 'looks/20.json');
|
||||
expect(await detectLookerStagedDir(stagedDir)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false when only control files are present', async () => {
|
||||
await touch(stagedDir, 'sync-config.json');
|
||||
await touch(stagedDir, 'lookml_models.json');
|
||||
await touch(stagedDir, 'signals/dashboard_usage.json', '[]\n');
|
||||
expect(await detectLookerStagedDir(stagedDir)).toBe(false);
|
||||
});
|
||||
});
|
||||
28
packages/context/src/ingest/adapters/looker/detect.ts
Normal file
28
packages/context/src/ingest/adapters/looker/detect.ts
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
import { readdir, stat } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import { STAGED_FILES } from './types.js';
|
||||
|
||||
const LOOKER_ENTITY_FILE_RE = /^(explores\/[^/]+\/[^/]+|dashboards\/[^/]+|looks\/[^/]+)\.json$/;
|
||||
|
||||
async function walk(root: string): Promise<string[]> {
|
||||
const entries = await readdir(root, { withFileTypes: true, recursive: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isFile())
|
||||
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
|
||||
.sort();
|
||||
}
|
||||
|
||||
export async function detectLookerStagedDir(stagedDir: string): Promise<boolean> {
|
||||
try {
|
||||
await stat(join(stagedDir, STAGED_FILES.syncConfig));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
const paths = await walk(stagedDir);
|
||||
return paths.some((path) => LOOKER_ENTITY_FILE_RE.test(path));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,188 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { getLookerTriageSignals, writeLookerEvidenceDocuments } from './evidence-documents.js';
|
||||
|
||||
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
|
||||
const target = join(root, relPath);
|
||||
await mkdir(dirname(target), { recursive: true });
|
||||
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
async function readJson<T>(root: string, relPath: string): Promise<T> {
|
||||
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
|
||||
}
|
||||
|
||||
describe('Looker evidence documents', () => {
|
||||
let stagedDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'looker-evidence-docs-'));
|
||||
await writeJson(stagedDir, 'explores/b2b/sales_pipeline.json', {
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: 'Pipeline analysis explore.',
|
||||
fields: {
|
||||
dimensions: [
|
||||
{ name: 'opportunities.stage', label: 'Stage', type: 'string', sql: '${TABLE}.stage', description: null },
|
||||
],
|
||||
measures: [
|
||||
{
|
||||
name: 'opportunities.arr',
|
||||
label: 'ARR',
|
||||
type: 'sum',
|
||||
sql: '${TABLE}.arr',
|
||||
description: 'Annual recurring revenue.',
|
||||
},
|
||||
],
|
||||
},
|
||||
joins: [{ name: 'accounts', type: 'left_outer', relationship: 'many_to_one' }],
|
||||
});
|
||||
await writeJson(stagedDir, 'dashboards/10.json', {
|
||||
lookerId: '10',
|
||||
title: 'Sales Pipeline Overview',
|
||||
description: 'Executive dashboard for open pipeline ARR.',
|
||||
folderId: '7',
|
||||
ownerId: '3',
|
||||
updatedAt: '2026-04-30T10:00:00.000Z',
|
||||
tiles: [
|
||||
{
|
||||
id: '100',
|
||||
title: 'Open Pipeline ARR',
|
||||
lookId: null,
|
||||
query: {
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
fields: ['opportunities.arr', 'opportunities.stage'],
|
||||
filters: { 'opportunities.stage': 'open' },
|
||||
sorts: ['opportunities.arr desc'],
|
||||
limit: '500',
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
await writeJson(stagedDir, 'looks/20.json', {
|
||||
lookerId: '20',
|
||||
title: 'Active Opportunity Pipeline',
|
||||
description: 'Saved Look for active opportunity pipeline review.',
|
||||
folderId: '7',
|
||||
ownerId: '3',
|
||||
updatedAt: '2026-04-30T11:00:00.000Z',
|
||||
query: {
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
fields: ['opportunities.arr'],
|
||||
filters: { 'opportunities.stage': 'open' },
|
||||
sorts: [],
|
||||
limit: '500',
|
||||
},
|
||||
});
|
||||
await writeJson(stagedDir, 'signals/dashboard_usage.json', [
|
||||
{
|
||||
contentId: '10',
|
||||
queryCount30d: 80,
|
||||
uniqueUsers30d: 12,
|
||||
lastRunAt: '2026-04-30T09:00:00.000Z',
|
||||
topUsers: ['3'],
|
||||
},
|
||||
]);
|
||||
await writeJson(stagedDir, 'signals/look_usage.json', [
|
||||
{
|
||||
contentId: '20',
|
||||
queryCount30d: 2,
|
||||
uniqueUsers30d: 1,
|
||||
lastRunAt: '2026-04-29T09:00:00.000Z',
|
||||
topUsers: ['3'],
|
||||
},
|
||||
]);
|
||||
await writeJson(stagedDir, 'signals/scheduled_plans.json', [
|
||||
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 2, recipientCount: 5 },
|
||||
]);
|
||||
await writeJson(stagedDir, 'signals/favorites.json', [
|
||||
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
|
||||
]);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(stagedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('writes indexable metadata and markdown for explores, dashboards, and Looks', async () => {
|
||||
await writeLookerEvidenceDocuments(stagedDir);
|
||||
|
||||
await expect(readJson(stagedDir, 'evidence/explores/b2b/sales_pipeline/metadata.json')).resolves.toMatchObject({
|
||||
objectType: 'looker_explore',
|
||||
id: 'looker:explore:b2b.sales_pipeline',
|
||||
title: 'Sales Pipeline',
|
||||
path: 'Looker / Explores / b2b.sales_pipeline',
|
||||
properties: {
|
||||
rawPath: 'explores/b2b/sales_pipeline.json',
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
},
|
||||
});
|
||||
await expect(readJson(stagedDir, 'evidence/dashboards/10/metadata.json')).resolves.toMatchObject({
|
||||
objectType: 'looker_dashboard',
|
||||
id: 'looker:dashboard:10',
|
||||
title: 'Sales Pipeline Overview',
|
||||
path: 'Looker / Dashboards / Sales Pipeline Overview',
|
||||
lastEditedAt: '2026-04-30T10:00:00.000Z',
|
||||
properties: {
|
||||
rawPath: 'dashboards/10.json',
|
||||
lookerId: '10',
|
||||
},
|
||||
});
|
||||
await expect(readJson(stagedDir, 'evidence/looks/20/metadata.json')).resolves.toMatchObject({
|
||||
objectType: 'looker_look',
|
||||
id: 'looker:look:20',
|
||||
title: 'Active Opportunity Pipeline',
|
||||
path: 'Looker / Looks / Active Opportunity Pipeline',
|
||||
properties: {
|
||||
rawPath: 'looks/20.json',
|
||||
lookerId: '20',
|
||||
},
|
||||
});
|
||||
|
||||
const dashboardMarkdown = await readFile(join(stagedDir, 'evidence/dashboards/10/page.md'), 'utf-8');
|
||||
expect(dashboardMarkdown).toContain('# Sales Pipeline Overview');
|
||||
expect(dashboardMarkdown).toContain('Executive dashboard for open pipeline ARR.');
|
||||
expect(dashboardMarkdown).toContain('## Tile: Open Pipeline ARR');
|
||||
expect(dashboardMarkdown).toContain('- model: b2b');
|
||||
expect(dashboardMarkdown).toContain('- explore: sales_pipeline');
|
||||
expect(dashboardMarkdown).toContain('- opportunities.stage = open');
|
||||
expect(dashboardMarkdown).not.toContain('80');
|
||||
expect(dashboardMarkdown).not.toContain('queryCount30d');
|
||||
expect(dashboardMarkdown).not.toContain('recipient');
|
||||
expect(dashboardMarkdown).not.toContain('favorite');
|
||||
expect(dashboardMarkdown).not.toContain('owner');
|
||||
});
|
||||
|
||||
it('returns usage-aware triage signals without exposing usage as document prose', async () => {
|
||||
await writeLookerEvidenceDocuments(stagedDir);
|
||||
|
||||
await expect(getLookerTriageSignals(stagedDir, 'looker:dashboard:10')).resolves.toEqual({
|
||||
objectType: 'looker_dashboard',
|
||||
propertyHints: {
|
||||
contentType: 'dashboard',
|
||||
queryCount30d: '80',
|
||||
uniqueUsers30d: '12',
|
||||
isScheduled: 'true',
|
||||
favoriteCount: '4',
|
||||
},
|
||||
lastEditedAt: '2026-04-30T10:00:00.000Z',
|
||||
});
|
||||
await expect(getLookerTriageSignals(stagedDir, 'looker:look:20')).resolves.toEqual({
|
||||
objectType: 'looker_look',
|
||||
propertyHints: {
|
||||
contentType: 'look',
|
||||
queryCount30d: '2',
|
||||
uniqueUsers30d: '1',
|
||||
isScheduled: 'false',
|
||||
favoriteCount: '0',
|
||||
},
|
||||
lastEditedAt: '2026-04-30T11:00:00.000Z',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,378 @@
|
|||
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join, relative } from 'node:path';
|
||||
import type { TriageSignals } from '../../types.js';
|
||||
import {
|
||||
STAGED_FILES,
|
||||
type StagedDashboardFile,
|
||||
type StagedExploreFile,
|
||||
type StagedLookerSignalsFile,
|
||||
type StagedLookFile,
|
||||
stagedDashboardFileSchema,
|
||||
stagedExploreFileSchema,
|
||||
stagedLookerSignalsFileSchema,
|
||||
stagedLookFileSchema,
|
||||
} from './types.js';
|
||||
|
||||
type JsonObject = Record<string, unknown>;
|
||||
|
||||
interface EvidenceDocument {
|
||||
relDir: string;
|
||||
metadata: JsonObject;
|
||||
markdown: string;
|
||||
}
|
||||
|
||||
export async function writeLookerEvidenceDocuments(stagedDir: string): Promise<void> {
|
||||
const paths = await walkJson(stagedDir);
|
||||
const signals = await readSignals(stagedDir);
|
||||
const documents: EvidenceDocument[] = [];
|
||||
|
||||
for (const relPath of paths) {
|
||||
if (/^explores\/[^/]+\/[^/]+\.json$/.test(relPath)) {
|
||||
const explore = await readJson(stagedDir, relPath, stagedExploreFileSchema);
|
||||
documents.push(renderExploreEvidence(relPath, explore));
|
||||
continue;
|
||||
}
|
||||
if (/^dashboards\/[^/]+\.json$/.test(relPath)) {
|
||||
const dashboard = await readJson(stagedDir, relPath, stagedDashboardFileSchema);
|
||||
documents.push(renderDashboardEvidence(relPath, dashboard));
|
||||
continue;
|
||||
}
|
||||
if (/^looks\/[^/]+\.json$/.test(relPath)) {
|
||||
const look = await readJson(stagedDir, relPath, stagedLookFileSchema);
|
||||
documents.push(renderLookEvidence(relPath, look));
|
||||
}
|
||||
}
|
||||
|
||||
for (const document of documents) {
|
||||
await writeJson(stagedDir, join(document.relDir, 'metadata.json'), document.metadata);
|
||||
await writeText(stagedDir, join(document.relDir, 'page.md'), document.markdown);
|
||||
}
|
||||
|
||||
await writeJson(stagedDir, join(STAGED_FILES.evidenceRoot, 'signals-summary.json'), {
|
||||
dashboardUsageCount: signals.dashboardUsage.length,
|
||||
lookUsageCount: signals.lookUsage.length,
|
||||
scheduledPlanCount: signals.scheduledPlans.length,
|
||||
favoriteCount: signals.favorites.length,
|
||||
});
|
||||
}
|
||||
|
||||
export async function getLookerTriageSignals(stagedDir: string, externalId: string): Promise<TriageSignals> {
|
||||
const signals = await readSignals(stagedDir);
|
||||
const dashboardId = /^looker:dashboard:(.+)$/.exec(externalId)?.[1];
|
||||
if (dashboardId) {
|
||||
const dashboard = await readOptionalJson(
|
||||
stagedDir,
|
||||
`dashboards/${safePathSegment(dashboardId)}.json`,
|
||||
stagedDashboardFileSchema,
|
||||
);
|
||||
const usage = signals.dashboardUsage.find((item) => item.contentId === dashboardId);
|
||||
const schedule = signals.scheduledPlans.find(
|
||||
(item) => item.contentType === 'dashboard' && item.contentId === dashboardId,
|
||||
);
|
||||
const favorite = signals.favorites.find(
|
||||
(item) => item.contentType === 'dashboard' && item.contentId === dashboardId,
|
||||
);
|
||||
return {
|
||||
objectType: 'looker_dashboard',
|
||||
lastEditedAt: dashboard?.updatedAt ?? usage?.lastRunAt ?? undefined,
|
||||
propertyHints: {
|
||||
contentType: 'dashboard',
|
||||
queryCount30d: String(usage?.queryCount30d ?? 0),
|
||||
uniqueUsers30d: String(usage?.uniqueUsers30d ?? 0),
|
||||
isScheduled: String(schedule?.isScheduled ?? false),
|
||||
favoriteCount: String(favorite?.favoriteCount ?? 0),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const lookId = /^looker:look:(.+)$/.exec(externalId)?.[1];
|
||||
if (lookId) {
|
||||
const look = await readOptionalJson(stagedDir, `looks/${safePathSegment(lookId)}.json`, stagedLookFileSchema);
|
||||
const usage = signals.lookUsage.find((item) => item.contentId === lookId);
|
||||
const schedule = signals.scheduledPlans.find((item) => item.contentType === 'look' && item.contentId === lookId);
|
||||
const favorite = signals.favorites.find((item) => item.contentType === 'look' && item.contentId === lookId);
|
||||
return {
|
||||
objectType: 'looker_look',
|
||||
lastEditedAt: look?.updatedAt ?? usage?.lastRunAt ?? undefined,
|
||||
propertyHints: {
|
||||
contentType: 'look',
|
||||
queryCount30d: String(usage?.queryCount30d ?? 0),
|
||||
uniqueUsers30d: String(usage?.uniqueUsers30d ?? 0),
|
||||
isScheduled: String(schedule?.isScheduled ?? false),
|
||||
favoriteCount: String(favorite?.favoriteCount ?? 0),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const explore = /^looker:explore:([^.]+)\.(.+)$/.exec(externalId);
|
||||
if (explore) {
|
||||
return {
|
||||
objectType: 'looker_explore',
|
||||
propertyHints: {
|
||||
contentType: 'explore',
|
||||
modelName: explore[1],
|
||||
exploreName: explore[2],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return { objectType: 'looker_runtime' };
|
||||
}
|
||||
|
||||
function renderExploreEvidence(rawPath: string, explore: StagedExploreFile): EvidenceDocument {
|
||||
const title = explore.label ?? `${explore.modelName}.${explore.exploreName}`;
|
||||
const relDir = join(
|
||||
STAGED_FILES.evidenceRoot,
|
||||
'explores',
|
||||
safePathSegment(explore.modelName),
|
||||
safePathSegment(explore.exploreName),
|
||||
);
|
||||
const lines = [
|
||||
`# ${title}`,
|
||||
'',
|
||||
explore.description ? explore.description : '',
|
||||
'',
|
||||
'## Explore',
|
||||
'',
|
||||
`- model: ${explore.modelName}`,
|
||||
`- explore: ${explore.exploreName}`,
|
||||
'',
|
||||
'## Dimensions',
|
||||
'',
|
||||
...fieldLines(explore.fields.dimensions),
|
||||
'',
|
||||
'## Measures',
|
||||
'',
|
||||
...fieldLines(explore.fields.measures),
|
||||
'',
|
||||
'## Joins',
|
||||
'',
|
||||
...(explore.joins.length === 0
|
||||
? ['- none']
|
||||
: explore.joins.map((item) => `- ${item.name}${item.relationship ? ` (${item.relationship})` : ''}`)),
|
||||
];
|
||||
return {
|
||||
relDir,
|
||||
metadata: {
|
||||
objectType: 'looker_explore',
|
||||
id: `looker:explore:${explore.modelName}.${explore.exploreName}`,
|
||||
title,
|
||||
path: `Looker / Explores / ${explore.modelName}.${explore.exploreName}`,
|
||||
url: null,
|
||||
parentId: null,
|
||||
databaseId: null,
|
||||
dataSourceId: null,
|
||||
lastEditedAt: null,
|
||||
lastEditedBy: null,
|
||||
properties: {
|
||||
rawPath,
|
||||
modelName: explore.modelName,
|
||||
exploreName: explore.exploreName,
|
||||
},
|
||||
},
|
||||
markdown: normalizeMarkdown(lines),
|
||||
};
|
||||
}
|
||||
|
||||
function renderDashboardEvidence(rawPath: string, dashboard: StagedDashboardFile): EvidenceDocument {
|
||||
const relDir = join(STAGED_FILES.evidenceRoot, 'dashboards', safePathSegment(dashboard.lookerId));
|
||||
const lines = [
|
||||
`# ${dashboard.title}`,
|
||||
'',
|
||||
dashboard.description ?? '',
|
||||
'',
|
||||
'## Dashboard Queries',
|
||||
'',
|
||||
...dashboard.tiles.flatMap((tile) => [
|
||||
`## Tile: ${tile.title ?? tile.id}`,
|
||||
'',
|
||||
...(tile.query ? queryLines(tile.query) : ['- no inline query captured']),
|
||||
'',
|
||||
]),
|
||||
];
|
||||
return {
|
||||
relDir,
|
||||
metadata: {
|
||||
objectType: 'looker_dashboard',
|
||||
id: `looker:dashboard:${dashboard.lookerId}`,
|
||||
title: dashboard.title,
|
||||
path: `Looker / Dashboards / ${dashboard.title}`,
|
||||
url: null,
|
||||
parentId: dashboard.folderId,
|
||||
databaseId: null,
|
||||
dataSourceId: null,
|
||||
lastEditedAt: dashboard.updatedAt,
|
||||
lastEditedBy: null,
|
||||
properties: {
|
||||
rawPath,
|
||||
lookerId: dashboard.lookerId,
|
||||
},
|
||||
},
|
||||
markdown: normalizeMarkdown(lines),
|
||||
};
|
||||
}
|
||||
|
||||
function renderLookEvidence(rawPath: string, look: StagedLookFile): EvidenceDocument {
|
||||
const relDir = join(STAGED_FILES.evidenceRoot, 'looks', safePathSegment(look.lookerId));
|
||||
const lines = [
|
||||
`# ${look.title}`,
|
||||
'',
|
||||
look.description ?? '',
|
||||
'',
|
||||
'## Look Query',
|
||||
'',
|
||||
...(look.query ? queryLines(look.query) : ['- no query captured']),
|
||||
];
|
||||
return {
|
||||
relDir,
|
||||
metadata: {
|
||||
objectType: 'looker_look',
|
||||
id: `looker:look:${look.lookerId}`,
|
||||
title: look.title,
|
||||
path: `Looker / Looks / ${look.title}`,
|
||||
url: null,
|
||||
parentId: look.folderId,
|
||||
databaseId: null,
|
||||
dataSourceId: null,
|
||||
lastEditedAt: look.updatedAt,
|
||||
lastEditedBy: null,
|
||||
properties: {
|
||||
rawPath,
|
||||
lookerId: look.lookerId,
|
||||
},
|
||||
},
|
||||
markdown: normalizeMarkdown(lines),
|
||||
};
|
||||
}
|
||||
|
||||
function fieldLines(
|
||||
fields: Array<{
|
||||
name: string;
|
||||
label: string | null;
|
||||
type: string | null;
|
||||
sql: string | null;
|
||||
description: string | null;
|
||||
}>,
|
||||
): string[] {
|
||||
if (fields.length === 0) {
|
||||
return ['- none'];
|
||||
}
|
||||
return fields.map((field) => {
|
||||
const parts = [
|
||||
field.name,
|
||||
field.label ? `label: ${field.label}` : null,
|
||||
field.type ? `type: ${field.type}` : null,
|
||||
field.description ? `description: ${field.description}` : null,
|
||||
].filter(Boolean);
|
||||
return `- ${parts.join('; ')}`;
|
||||
});
|
||||
}
|
||||
|
||||
function queryLines(query: StagedDashboardFile['tiles'][number]['query']): string[] {
|
||||
if (!query) {
|
||||
return ['- no query captured'];
|
||||
}
|
||||
return [
|
||||
`- model: ${query.model}`,
|
||||
`- explore: ${query.view}`,
|
||||
'',
|
||||
'### Fields',
|
||||
'',
|
||||
...(query.fields.length === 0 ? ['- none'] : query.fields.map((field) => `- ${field}`)),
|
||||
'',
|
||||
'### Filters',
|
||||
'',
|
||||
...filterLines(query.filters),
|
||||
];
|
||||
}
|
||||
|
||||
function filterLines(filters: Record<string, unknown>): string[] {
|
||||
const entries = Object.entries(filters).filter(
|
||||
([, value]) => value !== null && value !== undefined && String(value).trim() !== '',
|
||||
);
|
||||
if (entries.length === 0) {
|
||||
return ['- none'];
|
||||
}
|
||||
return entries.map(([field, value]) => `- ${field} = ${String(value)}`);
|
||||
}
|
||||
|
||||
async function readSignals(stagedDir: string): Promise<StagedLookerSignalsFile> {
|
||||
const [dashboardUsage, lookUsage, scheduledPlans, favorites] = await Promise.all([
|
||||
readOptionalArray(stagedDir, STAGED_FILES.signals.dashboardUsage),
|
||||
readOptionalArray(stagedDir, STAGED_FILES.signals.lookUsage),
|
||||
readOptionalArray(stagedDir, STAGED_FILES.signals.scheduledPlans),
|
||||
readOptionalArray(stagedDir, STAGED_FILES.signals.favorites),
|
||||
]);
|
||||
return stagedLookerSignalsFileSchema.parse({ dashboardUsage, lookUsage, scheduledPlans, favorites });
|
||||
}
|
||||
|
||||
async function readOptionalArray(stagedDir: string, relPath: string): Promise<unknown[]> {
|
||||
try {
|
||||
const parsed = JSON.parse(await readFile(join(stagedDir, relPath), 'utf-8')) as unknown;
|
||||
return Array.isArray(parsed) ? parsed : [];
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return [];
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function readOptionalJson<T>(
|
||||
stagedDir: string,
|
||||
relPath: string,
|
||||
schema: { parse(value: unknown): T },
|
||||
): Promise<T | null> {
|
||||
try {
|
||||
return await readJson(stagedDir, relPath, schema);
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return null;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function readJson<T>(stagedDir: string, relPath: string, schema: { parse(value: unknown): T }): Promise<T> {
|
||||
return schema.parse(JSON.parse(await readFile(join(stagedDir, relPath), 'utf-8')));
|
||||
}
|
||||
|
||||
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
|
||||
await writeText(stagedDir, relPath, `${JSON.stringify(value, null, 2)}\n`);
|
||||
}
|
||||
|
||||
async function writeText(stagedDir: string, relPath: string, body: string): Promise<void> {
|
||||
const target = join(stagedDir, relPath);
|
||||
await mkdir(dirname(target), { recursive: true });
|
||||
await writeFile(target, body, 'utf-8');
|
||||
}
|
||||
|
||||
async function walkJson(root: string, dir = root): Promise<string[]> {
|
||||
const entries = await readdir(dir, { withFileTypes: true });
|
||||
const paths: string[] = [];
|
||||
for (const entry of entries) {
|
||||
const absPath = join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
paths.push(...(await walkJson(root, absPath)));
|
||||
continue;
|
||||
}
|
||||
if (entry.isFile() && entry.name.endsWith('.json')) {
|
||||
paths.push(relative(root, absPath).replace(/\\/g, '/'));
|
||||
}
|
||||
}
|
||||
return paths.sort();
|
||||
}
|
||||
|
||||
function safePathSegment(value: string): string {
|
||||
if (!/^[a-zA-Z0-9_-]+$/.test(value)) {
|
||||
throw new Error(`Unsafe Looker evidence path segment: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function normalizeMarkdown(lines: string[]): string {
|
||||
return `${lines
|
||||
.filter((line, index, all) => line !== '' || all[index - 1] !== '')
|
||||
.join('\n')
|
||||
.trim()}\n`;
|
||||
}
|
||||
74
packages/context/src/ingest/adapters/looker/factory.test.ts
Normal file
74
packages/context/src/ingest/adapters/looker/factory.test.ts
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { FetchContext } from '../../types.js';
|
||||
import type { LookerSdkPort } from './client.js';
|
||||
import {
|
||||
DefaultLookerClientFactory,
|
||||
DefaultLookerConnectionClientFactory,
|
||||
type LookerCredentialResolver,
|
||||
} from './factory.js';
|
||||
import type { LookerRuntimeClient } from './fetch.js';
|
||||
import type { LookerPullConfig } from './types.js';
|
||||
|
||||
function sdk(): LookerSdkPort {
|
||||
return {
|
||||
me: vi.fn().mockResolvedValue({ id: '1', display_name: 'API User', email: 'api@example.com' }),
|
||||
search_dashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
|
||||
dashboard: vi.fn(),
|
||||
search_looks: vi.fn().mockResolvedValue([]),
|
||||
search_scheduled_plans: vi.fn().mockResolvedValue([]),
|
||||
look: vi.fn(),
|
||||
all_folders: vi.fn().mockResolvedValue([]),
|
||||
all_users: vi.fn().mockResolvedValue([]),
|
||||
all_groups: vi.fn().mockResolvedValue([]),
|
||||
all_connections: vi.fn().mockResolvedValue([]),
|
||||
all_lookml_models: vi.fn().mockResolvedValue([]),
|
||||
lookml_model_explore: vi.fn(),
|
||||
run_inline_query: vi.fn().mockResolvedValue('[]'),
|
||||
logout: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
}
|
||||
|
||||
describe('DefaultLookerConnectionClientFactory', () => {
|
||||
it('resolves credentials by Looker connection id and creates a KLO Looker client', async () => {
|
||||
const fakeSdk = sdk();
|
||||
const resolver: LookerCredentialResolver = {
|
||||
resolve: vi.fn().mockResolvedValue({
|
||||
base_url: 'https://example.looker.com',
|
||||
client_id: 'id',
|
||||
client_secret: 'credential', // pragma: allowlist secret
|
||||
}),
|
||||
};
|
||||
const factory = new DefaultLookerConnectionClientFactory(resolver, { sdkFactory: () => fakeSdk });
|
||||
|
||||
const client = await factory.createClient('prod-looker');
|
||||
|
||||
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
|
||||
expect(resolver.resolve).toHaveBeenCalledWith('prod-looker');
|
||||
});
|
||||
});
|
||||
|
||||
describe('DefaultLookerClientFactory', () => {
|
||||
const ctx: FetchContext = { connectionId: 'ctx-looker', sourceKey: 'looker' };
|
||||
|
||||
it('uses pullConfig.lookerConnectionId when present', async () => {
|
||||
const runtimeClient = { listDashboards: vi.fn() } as unknown as LookerRuntimeClient;
|
||||
const inner = { createClient: vi.fn().mockResolvedValue(runtimeClient) };
|
||||
const factory = new DefaultLookerClientFactory(inner);
|
||||
const config = { lookerConnectionId: 'prod-looker' } as LookerPullConfig;
|
||||
|
||||
await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient);
|
||||
|
||||
expect(inner.createClient).toHaveBeenCalledWith('prod-looker');
|
||||
});
|
||||
|
||||
it('falls back to ctx.connectionId when pullConfig.lookerConnectionId is absent', async () => {
|
||||
const runtimeClient = { listDashboards: vi.fn() } as unknown as LookerRuntimeClient;
|
||||
const inner = { createClient: vi.fn().mockResolvedValue(runtimeClient) };
|
||||
const factory = new DefaultLookerClientFactory(inner);
|
||||
const config = {} as LookerPullConfig;
|
||||
|
||||
await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient);
|
||||
|
||||
expect(inner.createClient).toHaveBeenCalledWith('ctx-looker');
|
||||
});
|
||||
});
|
||||
32
packages/context/src/ingest/adapters/looker/factory.ts
Normal file
32
packages/context/src/ingest/adapters/looker/factory.ts
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
import type { FetchContext } from '../../types.js';
|
||||
import { LookerClient, type LookerClientDeps, type LookerConnectionParams } from './client.js';
|
||||
import type { LookerClientFactory, LookerRuntimeClient } from './fetch.js';
|
||||
import type { LookerPullConfig } from './types.js';
|
||||
|
||||
export interface LookerCredentialResolver {
|
||||
resolve(lookerConnectionId: string): Promise<LookerConnectionParams>;
|
||||
}
|
||||
|
||||
export interface LookerConnectionClientFactory {
|
||||
createClient(lookerConnectionId: string): Promise<LookerRuntimeClient>;
|
||||
}
|
||||
|
||||
export class DefaultLookerConnectionClientFactory implements LookerConnectionClientFactory {
|
||||
constructor(
|
||||
private readonly resolver: LookerCredentialResolver,
|
||||
private readonly deps: LookerClientDeps = {},
|
||||
) {}
|
||||
|
||||
async createClient(lookerConnectionId: string): Promise<LookerRuntimeClient> {
|
||||
const credentials = await this.resolver.resolve(lookerConnectionId);
|
||||
return new LookerClient(credentials, this.deps);
|
||||
}
|
||||
}
|
||||
|
||||
export class DefaultLookerClientFactory implements LookerClientFactory {
|
||||
constructor(private readonly inner: LookerConnectionClientFactory) {}
|
||||
|
||||
async createClient(config: LookerPullConfig, ctx: FetchContext): Promise<LookerRuntimeClient> {
|
||||
return this.inner.createClient(config.lookerConnectionId ?? ctx.connectionId);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { readLookerFetchReport, writeLookerFetchReport } from './fetch-report.js';
|
||||
|
||||
describe('Looker staged fetch report', () => {
|
||||
let stagedDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'looker-fetch-report-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(stagedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('returns null when a staged bundle has no fetch report', async () => {
|
||||
await expect(readLookerFetchReport(stagedDir)).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('round-trips partial fetch issues', async () => {
|
||||
await writeLookerFetchReport(stagedDir, {
|
||||
status: 'partial',
|
||||
retryRecommended: true,
|
||||
skipped: [
|
||||
{
|
||||
rawPath: 'dashboards/10.json',
|
||||
entityType: 'dashboard',
|
||||
entityId: '10',
|
||||
severity: 'error',
|
||||
statusCode: 429,
|
||||
message: 'Looker API rate limit remained after retry',
|
||||
retryRecommended: true,
|
||||
},
|
||||
],
|
||||
warnings: [
|
||||
{
|
||||
rawPath: 'signals/dashboard_usage.json',
|
||||
entityType: 'signals',
|
||||
entityId: null,
|
||||
severity: 'warning',
|
||||
statusCode: 403,
|
||||
message: 'system__activity unavailable',
|
||||
retryRecommended: false,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
await expect(readLookerFetchReport(stagedDir)).resolves.toEqual({
|
||||
status: 'partial',
|
||||
retryRecommended: true,
|
||||
skipped: [
|
||||
{
|
||||
rawPath: 'dashboards/10.json',
|
||||
entityType: 'dashboard',
|
||||
entityId: '10',
|
||||
severity: 'error',
|
||||
statusCode: 429,
|
||||
message: 'Looker API rate limit remained after retry',
|
||||
retryRecommended: true,
|
||||
},
|
||||
],
|
||||
warnings: [
|
||||
{
|
||||
rawPath: 'signals/dashboard_usage.json',
|
||||
entityType: 'signals',
|
||||
entityId: null,
|
||||
severity: 'warning',
|
||||
statusCode: 403,
|
||||
message: 'system__activity unavailable',
|
||||
retryRecommended: false,
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
22
packages/context/src/ingest/adapters/looker/fetch-report.ts
Normal file
22
packages/context/src/ingest/adapters/looker/fetch-report.ts
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { STAGED_FILES, type StagedLookerFetchReport, stagedLookerFetchReportSchema } from './types.js';
|
||||
|
||||
export async function readLookerFetchReport(stagedDir: string): Promise<StagedLookerFetchReport | null> {
|
||||
try {
|
||||
const raw = await readFile(join(stagedDir, STAGED_FILES.fetchReport), 'utf-8');
|
||||
return stagedLookerFetchReportSchema.parse(JSON.parse(raw));
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return null;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function writeLookerFetchReport(stagedDir: string, report: StagedLookerFetchReport): Promise<void> {
|
||||
const parsed = stagedLookerFetchReportSchema.parse(report);
|
||||
const target = join(stagedDir, STAGED_FILES.fetchReport);
|
||||
await mkdir(dirname(target), { recursive: true });
|
||||
await writeFile(target, `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
645
packages/context/src/ingest/adapters/looker/fetch.test.ts
Normal file
645
packages/context/src/ingest/adapters/looker/fetch.test.ts
Normal file
|
|
@ -0,0 +1,645 @@
|
|||
import { mkdtemp, readdir, readFile, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { chunkLookerStagedDir } from './chunk.js';
|
||||
import { fetchLookerRuntimeBundle, type LookerRuntimeClient } from './fetch.js';
|
||||
|
||||
const connectionId = '11111111-1111-4111-8111-111111111111';
|
||||
|
||||
function makeClient(): LookerRuntimeClient {
|
||||
return {
|
||||
listDashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
|
||||
getDashboard: vi.fn().mockResolvedValue({
|
||||
lookerId: '10',
|
||||
title: 'Sales Pipeline',
|
||||
description: 'Pipeline health',
|
||||
folderId: '7',
|
||||
ownerId: '3',
|
||||
updatedAt: '2026-04-30T12:00:00.000Z',
|
||||
tiles: [{ id: '100', title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
|
||||
}),
|
||||
listLooks: vi.fn().mockResolvedValue([{ id: '20' }]),
|
||||
getLook: vi.fn().mockResolvedValue({
|
||||
lookerId: '20',
|
||||
title: 'Open Pipeline',
|
||||
description: null,
|
||||
folderId: '7',
|
||||
ownerId: '3',
|
||||
updatedAt: '2026-04-30T12:00:00.000Z',
|
||||
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
|
||||
}),
|
||||
listFolders: vi
|
||||
.fn()
|
||||
.mockResolvedValue({ folders: [{ id: '7', name: 'Sandbox', parentId: null, path: ['Sandbox'] }] }),
|
||||
listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: null }]),
|
||||
listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Sales' }]),
|
||||
listLookmlModels: vi.fn().mockResolvedValue({
|
||||
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
|
||||
}),
|
||||
getExplore: vi.fn().mockResolvedValue({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: null,
|
||||
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
|
||||
joins: [],
|
||||
}),
|
||||
getSignals: vi.fn().mockResolvedValue({
|
||||
dashboardUsage: [{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8, lastRunAt: null, topUsers: ['3'] }],
|
||||
lookUsage: [{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5, lastRunAt: null, topUsers: ['3'] }],
|
||||
scheduledPlans: [
|
||||
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
|
||||
],
|
||||
favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 }],
|
||||
}),
|
||||
cleanup: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
}
|
||||
|
||||
describe('fetchLookerRuntimeBundle', () => {
|
||||
let stagedDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'looker-fetch-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(stagedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('writes dashboards, looks, folders, users, groups, models, explores, signals, and sync config', async () => {
|
||||
const client = makeClient();
|
||||
await fetchLookerRuntimeBundle({
|
||||
pullConfig: { lookerConnectionId: connectionId, instanceBaseUrl: 'https://example.looker.com' },
|
||||
stagedDir,
|
||||
ctx: { connectionId, sourceKey: 'looker' },
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
});
|
||||
|
||||
expect(await readdir(join(stagedDir, 'dashboards'))).toEqual(['10.json']);
|
||||
expect(await readdir(join(stagedDir, 'looks'))).toEqual(['20.json']);
|
||||
expect(await readdir(join(stagedDir, 'users'))).toEqual(['3.json']);
|
||||
expect(await readdir(join(stagedDir, 'groups'))).toEqual(['4.json']);
|
||||
expect(await readdir(join(stagedDir, 'explores/b2b'))).toEqual(['sales_pipeline.json']);
|
||||
|
||||
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
|
||||
expect(syncConfig).toEqual({
|
||||
lookerConnectionId: connectionId,
|
||||
fetchedAt: '2026-04-30T12:30:00.000Z',
|
||||
instanceBaseUrl: 'https://example.looker.com',
|
||||
previousCursors: {
|
||||
dashboardsLastSyncedAt: null,
|
||||
looksLastSyncedAt: null,
|
||||
},
|
||||
nextCursors: {
|
||||
dashboardsLastSyncedAt: null,
|
||||
looksLastSyncedAt: null,
|
||||
},
|
||||
});
|
||||
|
||||
const scope = JSON.parse(await readFile(join(stagedDir, 'looker-scope.json'), 'utf-8'));
|
||||
expect(scope).toEqual({
|
||||
mode: 'full',
|
||||
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
|
||||
fetchedRawPaths: ['dashboards/10.json', 'looks/20.json'],
|
||||
});
|
||||
|
||||
const dashboardUsage = JSON.parse(await readFile(join(stagedDir, 'signals/dashboard_usage.json'), 'utf-8'));
|
||||
expect(dashboardUsage).toEqual([
|
||||
{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8, lastRunAt: null, topUsers: ['3'] },
|
||||
]);
|
||||
|
||||
const lookUsage = JSON.parse(await readFile(join(stagedDir, 'signals/look_usage.json'), 'utf-8'));
|
||||
const scheduledPlans = JSON.parse(await readFile(join(stagedDir, 'signals/scheduled_plans.json'), 'utf-8'));
|
||||
const favorites = JSON.parse(await readFile(join(stagedDir, 'signals/favorites.json'), 'utf-8'));
|
||||
|
||||
expect(lookUsage).toEqual([
|
||||
{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5, lastRunAt: null, topUsers: ['3'] },
|
||||
]);
|
||||
expect(scheduledPlans).toEqual([
|
||||
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
|
||||
]);
|
||||
expect(favorites).toEqual([{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 }]);
|
||||
});
|
||||
|
||||
it('stages only changed Dashboard and Look entity bodies during incremental pulls', async () => {
|
||||
const client = makeClient();
|
||||
vi.mocked(client.listDashboards).mockResolvedValue([
|
||||
{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' },
|
||||
{ id: '11', updatedAt: '2026-04-30T12:10:00.000Z' },
|
||||
]);
|
||||
vi.mocked(client.getDashboard).mockImplementation(async (id: string) => ({
|
||||
lookerId: id,
|
||||
title: `Dashboard ${id}`,
|
||||
description: null,
|
||||
folderId: '7',
|
||||
ownerId: '3',
|
||||
updatedAt: id === '11' ? '2026-04-30T12:10:00.000Z' : '2026-04-30T12:00:00.000Z',
|
||||
tiles: [],
|
||||
}));
|
||||
vi.mocked(client.listLooks).mockResolvedValue([
|
||||
{ id: '20', updatedAt: '2026-04-30T11:00:00.000Z' },
|
||||
{ id: '21', updatedAt: null },
|
||||
]);
|
||||
vi.mocked(client.getLook).mockImplementation(async (id: string) => ({
|
||||
lookerId: id,
|
||||
title: `Look ${id}`,
|
||||
description: null,
|
||||
folderId: '7',
|
||||
ownerId: '3',
|
||||
updatedAt: id === '21' ? null : '2026-04-30T11:00:00.000Z',
|
||||
query: null,
|
||||
}));
|
||||
|
||||
await fetchLookerRuntimeBundle({
|
||||
pullConfig: {
|
||||
lookerConnectionId: connectionId,
|
||||
dashboardUpdatedSince: '2026-04-30T12:00:00.000Z',
|
||||
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
|
||||
},
|
||||
stagedDir,
|
||||
ctx: { connectionId, sourceKey: 'looker' },
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
});
|
||||
|
||||
expect(client.getDashboard).toHaveBeenCalledTimes(1);
|
||||
expect(client.getDashboard).toHaveBeenCalledWith('11');
|
||||
expect(client.getLook).toHaveBeenCalledTimes(1);
|
||||
expect(client.getLook).toHaveBeenCalledWith('21');
|
||||
|
||||
await expect(readdir(join(stagedDir, 'dashboards'))).resolves.toEqual(['11.json']);
|
||||
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['21.json']);
|
||||
|
||||
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
|
||||
expect(syncConfig.previousCursors).toEqual({
|
||||
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
|
||||
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
|
||||
});
|
||||
expect(syncConfig.nextCursors).toEqual({
|
||||
dashboardsLastSyncedAt: '2026-04-30T12:10:00.000Z',
|
||||
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
|
||||
});
|
||||
|
||||
const scope = JSON.parse(await readFile(join(stagedDir, 'looker-scope.json'), 'utf-8'));
|
||||
expect(scope).toEqual({
|
||||
mode: 'incremental',
|
||||
knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json', 'looks/20.json', 'looks/21.json'],
|
||||
fetchedRawPaths: ['dashboards/11.json', 'looks/21.json'],
|
||||
});
|
||||
});
|
||||
|
||||
it('falls back to empty signal files when the client has no signal support', async () => {
|
||||
const client = makeClient();
|
||||
delete client.getSignals;
|
||||
|
||||
await fetchLookerRuntimeBundle({
|
||||
pullConfig: { lookerConnectionId: connectionId },
|
||||
stagedDir,
|
||||
ctx: { connectionId, sourceKey: 'looker' },
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
});
|
||||
|
||||
expect(JSON.parse(await readFile(join(stagedDir, 'signals/look_usage.json'), 'utf-8'))).toEqual([]);
|
||||
});
|
||||
|
||||
it('stamps explore warehouse targets from pull config and reports unmapped Looker connections', async () => {
|
||||
const client = makeClient();
|
||||
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
|
||||
vi.mocked(client.listLookmlModels).mockResolvedValue({
|
||||
models: [
|
||||
{
|
||||
name: 'b2b',
|
||||
label: 'B2B',
|
||||
explores: [
|
||||
{ name: 'sales_pipeline', label: 'Sales Pipeline' },
|
||||
{ name: 'marketing', label: 'Marketing' },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
vi.mocked(client.getExplore).mockImplementation(async (_modelName: string, exploreName: string) => {
|
||||
if (exploreName === 'marketing') {
|
||||
return {
|
||||
modelName: 'b2b',
|
||||
exploreName: 'marketing',
|
||||
label: 'Marketing',
|
||||
description: null,
|
||||
rawSqlTableName: 'proj.dataset.marketing',
|
||||
connectionName: 'missing_mapping',
|
||||
viewName: 'marketing',
|
||||
fields: {
|
||||
dimensions: [{ name: 'marketing.id', label: null, type: null, sql: null, description: null }],
|
||||
measures: [{ name: 'marketing.spend', label: null, type: null, sql: null, description: null }],
|
||||
},
|
||||
joins: [],
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: null,
|
||||
};
|
||||
}
|
||||
return {
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: null,
|
||||
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
|
||||
connectionName: 'b2b_sandbox_bq',
|
||||
viewName: 'opportunities',
|
||||
fields: {
|
||||
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
|
||||
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
|
||||
},
|
||||
joins: [
|
||||
{
|
||||
name: 'accounts',
|
||||
type: 'left_outer',
|
||||
relationship: 'many_to_one',
|
||||
rawSqlTableName: 'proj.dataset.accounts',
|
||||
sqlOn: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
|
||||
from: null,
|
||||
targetTable: null,
|
||||
},
|
||||
],
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: null,
|
||||
};
|
||||
});
|
||||
|
||||
await fetchLookerRuntimeBundle({
|
||||
pullConfig: {
|
||||
lookerConnectionId: connectionId,
|
||||
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
|
||||
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
|
||||
parsedTargetTables: {
|
||||
'b2b.sales_pipeline': {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'opportunities',
|
||||
canonicalTable: 'proj.dataset.opportunities',
|
||||
},
|
||||
'b2b.sales_pipeline.accounts': {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'accounts',
|
||||
canonicalTable: 'proj.dataset.accounts',
|
||||
},
|
||||
},
|
||||
},
|
||||
stagedDir,
|
||||
ctx: { connectionId, sourceKey: 'looker' },
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
});
|
||||
|
||||
const salesPipeline = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8'));
|
||||
expect(salesPipeline).toMatchObject({
|
||||
connectionName: 'b2b_sandbox_bq',
|
||||
targetWarehouseConnectionId: warehouseConnectionId,
|
||||
targetTable: {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'opportunities',
|
||||
canonicalTable: 'proj.dataset.opportunities',
|
||||
},
|
||||
joins: [
|
||||
{
|
||||
name: 'accounts',
|
||||
targetTable: {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'accounts',
|
||||
canonicalTable: 'proj.dataset.accounts',
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const marketing = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/marketing.json'), 'utf-8'));
|
||||
expect(marketing).toMatchObject({
|
||||
connectionName: 'missing_mapping',
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: {
|
||||
ok: false,
|
||||
reason: 'no_connection_mapping',
|
||||
},
|
||||
});
|
||||
|
||||
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
|
||||
expect(report.status).toBe('partial');
|
||||
expect(report.skipped).toEqual([]);
|
||||
expect(report.warnings).toEqual([
|
||||
{
|
||||
rawPath: 'looker_connection_mappings/missing_mapping',
|
||||
entityType: 'looker_connection_mapping',
|
||||
entityId: 'missing_mapping',
|
||||
severity: 'warning',
|
||||
statusCode: null,
|
||||
message: 'Looker connection missing_mapping is not mapped to a warehouse connection; 1 explore will be wiki-only.',
|
||||
retryRecommended: false,
|
||||
kind: 'unmapped_looker_connection',
|
||||
details: {
|
||||
lookerConnectionName: 'missing_mapping',
|
||||
affectedExplores: ['b2b.marketing'],
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('reports parsed target table failures without retrying the Looker fetch', async () => {
|
||||
const client = makeClient();
|
||||
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
|
||||
vi.mocked(client.getExplore).mockResolvedValue({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: null,
|
||||
rawSqlTableName: '$' + '{derived.SQL_TABLE_NAME}',
|
||||
connectionName: 'b2b_sandbox_bq',
|
||||
viewName: 'opportunities',
|
||||
fields: {
|
||||
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
|
||||
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
|
||||
},
|
||||
joins: [],
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: null,
|
||||
});
|
||||
|
||||
await fetchLookerRuntimeBundle({
|
||||
pullConfig: {
|
||||
lookerConnectionId: connectionId,
|
||||
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
|
||||
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
|
||||
parsedTargetTables: {
|
||||
'b2b.sales_pipeline': {
|
||||
ok: false,
|
||||
reason: 'looker_template_unresolved',
|
||||
detail: 'Looker template markers cannot be resolved before parsing.',
|
||||
},
|
||||
},
|
||||
},
|
||||
stagedDir,
|
||||
ctx: { connectionId, sourceKey: 'looker' },
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
});
|
||||
|
||||
const explore = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8'));
|
||||
expect(explore).toMatchObject({
|
||||
targetWarehouseConnectionId: warehouseConnectionId,
|
||||
targetTable: {
|
||||
ok: false,
|
||||
reason: 'looker_template_unresolved',
|
||||
},
|
||||
});
|
||||
|
||||
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
|
||||
expect(report).toMatchObject({
|
||||
status: 'partial',
|
||||
retryRecommended: false,
|
||||
skipped: [],
|
||||
warnings: [
|
||||
{
|
||||
rawPath: 'looker_connection_mappings/b2b_sandbox_bq',
|
||||
entityType: 'looker_connection_mapping',
|
||||
entityId: 'b2b_sandbox_bq',
|
||||
severity: 'warning',
|
||||
statusCode: null,
|
||||
message:
|
||||
'Looker explore b2b.sales_pipeline has sql_table_name that cannot be mapped to a physical warehouse table: looker_template_unresolved.',
|
||||
retryRecommended: false,
|
||||
kind: 'looker_template_unresolved',
|
||||
details: {
|
||||
lookerConnectionName: 'b2b_sandbox_bq',
|
||||
rawSqlTableName: '$' + '{derived.SQL_TABLE_NAME}',
|
||||
reason: 'looker_template_unresolved',
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('propagates parent explore warehouse targets onto Dashboard tile and Look queries', async () => {
|
||||
const client = makeClient();
|
||||
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
|
||||
vi.mocked(client.getExplore).mockResolvedValue({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: null,
|
||||
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
|
||||
connectionName: 'b2b_sandbox_bq',
|
||||
viewName: 'opportunities',
|
||||
fields: {
|
||||
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
|
||||
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
|
||||
},
|
||||
joins: [],
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: null,
|
||||
});
|
||||
|
||||
await fetchLookerRuntimeBundle({
|
||||
pullConfig: {
|
||||
lookerConnectionId: connectionId,
|
||||
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
|
||||
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
|
||||
parsedTargetTables: {
|
||||
'b2b.sales_pipeline': {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'opportunities',
|
||||
canonicalTable: 'proj.dataset.opportunities',
|
||||
},
|
||||
},
|
||||
},
|
||||
stagedDir,
|
||||
ctx: { connectionId, sourceKey: 'looker' },
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
});
|
||||
|
||||
const dashboard = JSON.parse(await readFile(join(stagedDir, 'dashboards/10.json'), 'utf-8'));
|
||||
expect(dashboard.tiles[0].query).toMatchObject({
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
targetWarehouseConnectionId: warehouseConnectionId,
|
||||
targetTable: {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'opportunities',
|
||||
canonicalTable: 'proj.dataset.opportunities',
|
||||
},
|
||||
});
|
||||
|
||||
const look = JSON.parse(await readFile(join(stagedDir, 'looks/20.json'), 'utf-8'));
|
||||
expect(look.query).toMatchObject({
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
targetWarehouseConnectionId: warehouseConnectionId,
|
||||
targetTable: {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'opportunities',
|
||||
canonicalTable: 'proj.dataset.opportunities',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('records skipped detail entities and keeps cursors pinned for affected entity types', async () => {
|
||||
const client = makeClient();
|
||||
vi.mocked(client.listDashboards).mockResolvedValue([
|
||||
{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' },
|
||||
{ id: '11', updatedAt: '2026-04-30T12:10:00.000Z' },
|
||||
]);
|
||||
vi.mocked(client.getDashboard).mockImplementation(async (id: string) => {
|
||||
if (id === '11') {
|
||||
const error = new Error('Looker API rate limit remained after retry');
|
||||
Object.assign(error, { statusCode: 429 });
|
||||
throw error;
|
||||
}
|
||||
return {
|
||||
lookerId: id,
|
||||
title: `Dashboard ${id}`,
|
||||
description: null,
|
||||
folderId: '7',
|
||||
ownerId: '3',
|
||||
updatedAt: '2026-04-30T12:00:00.000Z',
|
||||
tiles: [],
|
||||
};
|
||||
});
|
||||
vi.mocked(client.listLooks).mockResolvedValue([{ id: '20', updatedAt: '2026-04-30T11:15:00.000Z' }]);
|
||||
vi.mocked(client.getLook).mockResolvedValue({
|
||||
lookerId: '20',
|
||||
title: 'Look 20',
|
||||
description: null,
|
||||
folderId: '7',
|
||||
ownerId: '3',
|
||||
updatedAt: '2026-04-30T11:15:00.000Z',
|
||||
query: null,
|
||||
});
|
||||
|
||||
await fetchLookerRuntimeBundle({
|
||||
pullConfig: {
|
||||
lookerConnectionId: connectionId,
|
||||
dashboardUpdatedSince: '2026-04-30T12:00:00.000Z',
|
||||
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
|
||||
},
|
||||
stagedDir,
|
||||
ctx: { connectionId, sourceKey: 'looker' },
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
});
|
||||
|
||||
await expect(readdir(join(stagedDir, 'dashboards'))).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['20.json']);
|
||||
|
||||
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
|
||||
expect(syncConfig.nextCursors).toEqual({
|
||||
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
|
||||
looksLastSyncedAt: '2026-04-30T11:15:00.000Z',
|
||||
});
|
||||
|
||||
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
|
||||
expect(report).toEqual({
|
||||
status: 'partial',
|
||||
retryRecommended: true,
|
||||
skipped: [
|
||||
{
|
||||
rawPath: 'dashboards/11.json',
|
||||
entityType: 'dashboard',
|
||||
entityId: '11',
|
||||
severity: 'error',
|
||||
statusCode: 429,
|
||||
message: 'Looker API rate limit remained after retry',
|
||||
retryRecommended: true,
|
||||
},
|
||||
],
|
||||
warnings: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('continues without explore bootstrap when LookML model listing is denied', async () => {
|
||||
const client = makeClient();
|
||||
const error = new Error('LookML model access denied');
|
||||
Object.assign(error, { statusCode: 403 });
|
||||
vi.mocked(client.listLookmlModels).mockRejectedValue(error);
|
||||
|
||||
await fetchLookerRuntimeBundle({
|
||||
pullConfig: { lookerConnectionId: connectionId },
|
||||
stagedDir,
|
||||
ctx: { connectionId, sourceKey: 'looker' },
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
});
|
||||
|
||||
await expect(readdir(join(stagedDir, 'dashboards'))).resolves.toEqual(['10.json']);
|
||||
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['20.json']);
|
||||
await expect(readFile(join(stagedDir, 'lookml_models.json'), 'utf-8')).resolves.toBe('{\n "models": []\n}\n');
|
||||
await expect(readdir(join(stagedDir, 'explores'))).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
expect(client.getExplore).not.toHaveBeenCalled();
|
||||
|
||||
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
|
||||
expect(report).toEqual({
|
||||
status: 'success',
|
||||
retryRecommended: false,
|
||||
skipped: [],
|
||||
warnings: [
|
||||
{
|
||||
rawPath: 'lookml_models.json',
|
||||
entityType: 'lookml_models',
|
||||
entityId: null,
|
||||
severity: 'warning',
|
||||
statusCode: 403,
|
||||
message: 'LookML model access denied',
|
||||
retryRecommended: false,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const chunked = await chunkLookerStagedDir(stagedDir);
|
||||
expect(chunked.workUnits.map((wu) => wu.unitKey).sort()).toEqual(['looker-dashboard-10', 'looker-look-20']);
|
||||
expect(chunked.workUnits.flatMap((wu) => wu.dependencyPaths)).not.toContain('explores/b2b/sales_pipeline.json');
|
||||
});
|
||||
|
||||
it('cleans up the Looker client after a successful fetch', async () => {
|
||||
const client = makeClient();
|
||||
|
||||
await fetchLookerRuntimeBundle({
|
||||
pullConfig: { lookerConnectionId: connectionId },
|
||||
stagedDir,
|
||||
ctx: { connectionId, sourceKey: 'looker' },
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
});
|
||||
|
||||
expect(client.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('cleans up the Looker client when fetch throws', async () => {
|
||||
const client = makeClient();
|
||||
vi.mocked(client.listDashboards).mockRejectedValue(new Error('Looker API unavailable'));
|
||||
|
||||
await expect(
|
||||
fetchLookerRuntimeBundle({
|
||||
pullConfig: { lookerConnectionId: connectionId },
|
||||
stagedDir,
|
||||
ctx: { connectionId, sourceKey: 'looker' },
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
}),
|
||||
).rejects.toThrow('Looker API unavailable');
|
||||
|
||||
expect(client.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
555
packages/context/src/ingest/adapters/looker/fetch.ts
Normal file
555
packages/context/src/ingest/adapters/looker/fetch.ts
Normal file
|
|
@ -0,0 +1,555 @@
|
|||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import type { FetchContext } from '../../types.js';
|
||||
import { writeLookerEvidenceDocuments } from './evidence-documents.js';
|
||||
import { writeLookerFetchReport } from './fetch-report.js';
|
||||
import {
|
||||
type LookerPullConfig,
|
||||
type ParsedTargetTable,
|
||||
parseLookerPullConfig,
|
||||
STAGED_FILES,
|
||||
type StagedDashboardFile,
|
||||
type StagedExploreFile,
|
||||
type StagedFoldersTreeFile,
|
||||
type StagedGroupFile,
|
||||
type StagedLookerFetchIssue,
|
||||
type StagedLookerFetchReport,
|
||||
type StagedLookerQuery,
|
||||
type StagedLookerSignalsFile,
|
||||
type StagedLookFile,
|
||||
type StagedLookmlModelsFile,
|
||||
type StagedUserFile,
|
||||
stagedDashboardFileSchema,
|
||||
stagedExploreFileSchema,
|
||||
stagedFoldersTreeFileSchema,
|
||||
stagedGroupFileSchema,
|
||||
stagedLookerScopeFileSchema,
|
||||
stagedLookerSignalsFileSchema,
|
||||
stagedLookFileSchema,
|
||||
stagedLookmlModelsFileSchema,
|
||||
stagedSyncConfigSchema,
|
||||
stagedUserFileSchema,
|
||||
} from './types.js';
|
||||
|
||||
export interface LookerEntityRef {
|
||||
id: string;
|
||||
updatedAt?: string | null;
|
||||
}
|
||||
|
||||
export interface LookerRuntimeClient {
|
||||
listDashboards(): Promise<LookerEntityRef[]>;
|
||||
getDashboard(id: string): Promise<StagedDashboardFile>;
|
||||
listLooks(): Promise<LookerEntityRef[]>;
|
||||
getLook(id: string): Promise<StagedLookFile>;
|
||||
listFolders(): Promise<StagedFoldersTreeFile>;
|
||||
listUsers(): Promise<StagedUserFile[]>;
|
||||
listGroups(): Promise<StagedGroupFile[]>;
|
||||
listLookmlModels(): Promise<StagedLookmlModelsFile>;
|
||||
getExplore(modelName: string, exploreName: string): Promise<StagedExploreFile>;
|
||||
getSignals?(): Promise<StagedLookerSignalsFile>;
|
||||
cleanup?(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface LookerClientFactory {
|
||||
createClient(config: LookerPullConfig, ctx: FetchContext): Promise<LookerRuntimeClient> | LookerRuntimeClient;
|
||||
}
|
||||
|
||||
interface ExploreTargetSummary {
|
||||
targetWarehouseConnectionId: string | null;
|
||||
targetTable: ParsedTargetTable | null;
|
||||
}
|
||||
|
||||
interface StampedExploreResult {
|
||||
explore: StagedExploreFile;
|
||||
targetSummary: ExploreTargetSummary;
|
||||
}
|
||||
|
||||
interface StagedJsonFile<T> {
|
||||
rawPath: string;
|
||||
value: T;
|
||||
}
|
||||
|
||||
type ParsedTargetTableFailureReason = Extract<ParsedTargetTable, { ok: false }>['reason'];
|
||||
|
||||
interface FetchLookerRuntimeBundleParams {
|
||||
pullConfig: unknown;
|
||||
stagedDir: string;
|
||||
ctx: FetchContext;
|
||||
clientFactory: LookerClientFactory;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export async function fetchLookerRuntimeBundle(params: FetchLookerRuntimeBundleParams): Promise<void> {
|
||||
const config = parseLookerPullConfig(params.pullConfig);
|
||||
const connectionId = config.lookerConnectionId ?? params.ctx.connectionId;
|
||||
const client = await params.clientFactory.createClient(config, params.ctx);
|
||||
try {
|
||||
const now = params.now ?? (() => new Date());
|
||||
const skipped: StagedLookerFetchIssue[] = [];
|
||||
const warnings: StagedLookerFetchIssue[] = [];
|
||||
let dashboardFetchHadSkips = false;
|
||||
let lookFetchHadSkips = false;
|
||||
const fetchedDashboards: Array<StagedJsonFile<StagedDashboardFile>> = [];
|
||||
const fetchedLooks: Array<StagedJsonFile<StagedLookFile>> = [];
|
||||
|
||||
const previousCursors = {
|
||||
dashboardsLastSyncedAt: config.dashboardUpdatedSince ?? null,
|
||||
looksLastSyncedAt: config.lookUpdatedSince ?? null,
|
||||
};
|
||||
|
||||
const dashboards = await client.listDashboards();
|
||||
const dashboardRawPaths = dashboards.map((dashboardRef) => `dashboards/${safePathSegment(dashboardRef.id)}.json`);
|
||||
const dashboardsToFetch = dashboards.filter((dashboardRef) =>
|
||||
shouldFetchEntity(dashboardRef, previousCursors.dashboardsLastSyncedAt),
|
||||
);
|
||||
const fetchedRawPaths: string[] = [];
|
||||
for (const dashboardRef of dashboardsToFetch) {
|
||||
const rawPath = `dashboards/${safePathSegment(dashboardRef.id)}.json`;
|
||||
try {
|
||||
const dashboard = stagedDashboardFileSchema.parse(await client.getDashboard(dashboardRef.id));
|
||||
const dashboardRawPath = `dashboards/${safePathSegment(dashboard.lookerId)}.json`;
|
||||
fetchedRawPaths.push(dashboardRawPath);
|
||||
fetchedDashboards.push({ rawPath: dashboardRawPath, value: dashboard });
|
||||
} catch (error) {
|
||||
dashboardFetchHadSkips = true;
|
||||
skipped.push(issueForFetchError({ rawPath, entityType: 'dashboard', entityId: dashboardRef.id, error }));
|
||||
}
|
||||
}
|
||||
|
||||
const looks = await client.listLooks();
|
||||
const lookRawPaths = looks.map((lookRef) => `looks/${safePathSegment(lookRef.id)}.json`);
|
||||
const looksToFetch = looks.filter((lookRef) => shouldFetchEntity(lookRef, previousCursors.looksLastSyncedAt));
|
||||
for (const lookRef of looksToFetch) {
|
||||
const rawPath = `looks/${safePathSegment(lookRef.id)}.json`;
|
||||
try {
|
||||
const look = stagedLookFileSchema.parse(await client.getLook(lookRef.id));
|
||||
const lookRawPath = `looks/${safePathSegment(look.lookerId)}.json`;
|
||||
fetchedRawPaths.push(lookRawPath);
|
||||
fetchedLooks.push({ rawPath: lookRawPath, value: look });
|
||||
} catch (error) {
|
||||
lookFetchHadSkips = true;
|
||||
skipped.push(issueForFetchError({ rawPath, entityType: 'look', entityId: lookRef.id, error }));
|
||||
}
|
||||
}
|
||||
|
||||
const nextCursors = {
|
||||
dashboardsLastSyncedAt: dashboardFetchHadSkips
|
||||
? previousCursors.dashboardsLastSyncedAt
|
||||
: maxUpdatedAt(dashboards, previousCursors.dashboardsLastSyncedAt),
|
||||
looksLastSyncedAt: lookFetchHadSkips
|
||||
? previousCursors.looksLastSyncedAt
|
||||
: maxUpdatedAt(looks, previousCursors.looksLastSyncedAt),
|
||||
};
|
||||
const fetchMode =
|
||||
previousCursors.dashboardsLastSyncedAt || previousCursors.looksLastSyncedAt ? 'incremental' : 'full';
|
||||
|
||||
await writeJson(
|
||||
params.stagedDir,
|
||||
STAGED_FILES.syncConfig,
|
||||
stagedSyncConfigSchema.parse({
|
||||
lookerConnectionId: connectionId,
|
||||
fetchedAt: now().toISOString(),
|
||||
...(config.instanceBaseUrl ? { instanceBaseUrl: config.instanceBaseUrl } : {}),
|
||||
previousCursors,
|
||||
nextCursors,
|
||||
}),
|
||||
);
|
||||
|
||||
await writeJson(
|
||||
params.stagedDir,
|
||||
STAGED_FILES.scope,
|
||||
stagedLookerScopeFileSchema.parse({
|
||||
mode: fetchMode,
|
||||
knownCurrentRawPaths: [...dashboardRawPaths, ...lookRawPaths].sort(),
|
||||
fetchedRawPaths: fetchedRawPaths.sort(),
|
||||
}),
|
||||
);
|
||||
|
||||
const folders = stagedFoldersTreeFileSchema.parse(await client.listFolders());
|
||||
await writeJson(params.stagedDir, STAGED_FILES.foldersTree, folders);
|
||||
|
||||
const users = await client.listUsers();
|
||||
for (const rawUser of users) {
|
||||
const user = stagedUserFileSchema.parse(rawUser);
|
||||
await writeJson(params.stagedDir, `users/${safePathSegment(user.id)}.json`, user);
|
||||
}
|
||||
|
||||
const groups = await client.listGroups();
|
||||
for (const rawGroup of groups) {
|
||||
const group = stagedGroupFileSchema.parse(rawGroup);
|
||||
await writeJson(params.stagedDir, `groups/${safePathSegment(group.id)}.json`, group);
|
||||
}
|
||||
|
||||
let models: StagedLookmlModelsFile;
|
||||
try {
|
||||
models = stagedLookmlModelsFileSchema.parse(await client.listLookmlModels());
|
||||
} catch (error) {
|
||||
warnings.push(
|
||||
issueForFetchError({
|
||||
rawPath: STAGED_FILES.lookmlModels,
|
||||
entityType: 'lookml_models',
|
||||
entityId: null,
|
||||
error,
|
||||
severity: 'warning',
|
||||
}),
|
||||
);
|
||||
models = stagedLookmlModelsFileSchema.parse({ models: [] });
|
||||
}
|
||||
await writeJson(params.stagedDir, STAGED_FILES.lookmlModels, models);
|
||||
const exploreTargetsByKey = new Map<string, ExploreTargetSummary>();
|
||||
const stagedExplores: StagedExploreFile[] = [];
|
||||
for (const model of models.models) {
|
||||
for (const exploreRef of model.explores) {
|
||||
const rawPath = `explores/${safePathSegment(model.name)}/${safePathSegment(exploreRef.name)}.json`;
|
||||
try {
|
||||
const result = stampExploreWarehouseTarget(await client.getExplore(model.name, exploreRef.name), config);
|
||||
stagedExplores.push(result.explore);
|
||||
exploreTargetsByKey.set(exploreKey(result.explore.modelName, result.explore.exploreName), result.targetSummary);
|
||||
await writeJson(
|
||||
params.stagedDir,
|
||||
`explores/${safePathSegment(result.explore.modelName)}/${safePathSegment(result.explore.exploreName)}.json`,
|
||||
result.explore,
|
||||
);
|
||||
} catch (error) {
|
||||
skipped.push(
|
||||
issueForFetchError({
|
||||
rawPath,
|
||||
entityType: 'explore',
|
||||
entityId: `${model.name}.${exploreRef.name}`,
|
||||
error,
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
warnings.push(...warehouseTargetWarnings(stagedExplores));
|
||||
|
||||
for (const dashboard of fetchedDashboards) {
|
||||
await writeJson(params.stagedDir, dashboard.rawPath, stampDashboardQueries(dashboard.value, exploreTargetsByKey));
|
||||
}
|
||||
|
||||
for (const look of fetchedLooks) {
|
||||
await writeJson(params.stagedDir, look.rawPath, stampLookQuery(look.value, exploreTargetsByKey));
|
||||
}
|
||||
|
||||
let signals: StagedLookerSignalsFile;
|
||||
try {
|
||||
signals = stagedLookerSignalsFileSchema.parse(client.getSignals ? await client.getSignals() : {});
|
||||
} catch (error) {
|
||||
warnings.push(
|
||||
issueForFetchError({
|
||||
rawPath: STAGED_FILES.signals.dashboardUsage,
|
||||
entityType: 'signals',
|
||||
entityId: null,
|
||||
error,
|
||||
}),
|
||||
);
|
||||
signals = stagedLookerSignalsFileSchema.parse({});
|
||||
}
|
||||
await writeJson(params.stagedDir, STAGED_FILES.signals.dashboardUsage, signals.dashboardUsage);
|
||||
await writeJson(params.stagedDir, STAGED_FILES.signals.lookUsage, signals.lookUsage);
|
||||
await writeJson(params.stagedDir, STAGED_FILES.signals.scheduledPlans, signals.scheduledPlans);
|
||||
await writeJson(params.stagedDir, STAGED_FILES.signals.favorites, signals.favorites);
|
||||
|
||||
await writeLookerEvidenceDocuments(params.stagedDir);
|
||||
await writeLookerFetchReport(params.stagedDir, buildFetchReport(skipped, warnings));
|
||||
} finally {
|
||||
await client.cleanup?.();
|
||||
}
|
||||
}
|
||||
|
||||
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
|
||||
const abs = join(stagedDir, relPath);
|
||||
await mkdir(dirname(abs), { recursive: true });
|
||||
await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
function safePathSegment(value: string): string {
|
||||
if (!/^[a-zA-Z0-9_-]+$/.test(value)) {
|
||||
throw new Error(`Unsafe Looker staged path segment: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function shouldFetchEntity(ref: LookerEntityRef, updatedSince: string | null): boolean {
|
||||
if (!updatedSince) {
|
||||
return true;
|
||||
}
|
||||
if (!ref.updatedAt) {
|
||||
return true;
|
||||
}
|
||||
return Date.parse(ref.updatedAt) > Date.parse(updatedSince);
|
||||
}
|
||||
|
||||
function maxUpdatedAt(refs: LookerEntityRef[], fallback: string | null): string | null {
|
||||
let max = fallback;
|
||||
for (const ref of refs) {
|
||||
if (!ref.updatedAt) {
|
||||
continue;
|
||||
}
|
||||
if (!max || Date.parse(ref.updatedAt) > Date.parse(max)) {
|
||||
max = ref.updatedAt;
|
||||
}
|
||||
}
|
||||
if (!max) {
|
||||
return null;
|
||||
}
|
||||
const ms = Date.parse(max);
|
||||
return Number.isNaN(ms) ? null : new Date(ms).toISOString();
|
||||
}
|
||||
|
||||
function stampExploreWarehouseTarget(rawExplore: unknown, config: LookerPullConfig): StampedExploreResult {
|
||||
const parsed = stagedExploreFileSchema.parse(rawExplore);
|
||||
const key = exploreKey(parsed.modelName, parsed.exploreName);
|
||||
const targetWarehouseConnectionId = connectionMappingFor(parsed.connectionName, config);
|
||||
const targetTable = targetTableFor({
|
||||
key,
|
||||
rawSqlTableName: parsed.rawSqlTableName,
|
||||
targetWarehouseConnectionId,
|
||||
config,
|
||||
entityLabel: `Looker explore ${key}`,
|
||||
});
|
||||
|
||||
const explore = stagedExploreFileSchema.parse({
|
||||
...parsed,
|
||||
targetWarehouseConnectionId,
|
||||
targetTable,
|
||||
joins: parsed.joins.map((join) => ({
|
||||
...join,
|
||||
targetTable: join.rawSqlTableName
|
||||
? targetTableFor({
|
||||
key: `${key}.${join.name}`,
|
||||
rawSqlTableName: join.rawSqlTableName,
|
||||
targetWarehouseConnectionId,
|
||||
config,
|
||||
entityLabel: `Looker join ${key}.${join.name}`,
|
||||
})
|
||||
: null,
|
||||
})),
|
||||
});
|
||||
|
||||
return {
|
||||
explore,
|
||||
targetSummary: {
|
||||
targetWarehouseConnectionId: explore.targetWarehouseConnectionId,
|
||||
targetTable: explore.targetTable,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function connectionMappingFor(connectionName: string | null, config: LookerPullConfig): string | null {
|
||||
if (!connectionName) {
|
||||
return null;
|
||||
}
|
||||
return config.connectionMappings[connectionName] ?? null;
|
||||
}
|
||||
|
||||
function targetTableFor(input: {
|
||||
key: string;
|
||||
rawSqlTableName: string | null;
|
||||
targetWarehouseConnectionId: string | null;
|
||||
config: LookerPullConfig;
|
||||
entityLabel: string;
|
||||
}): ParsedTargetTable | null {
|
||||
if (!input.rawSqlTableName && !input.targetWarehouseConnectionId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!input.targetWarehouseConnectionId) {
|
||||
return {
|
||||
ok: false,
|
||||
reason: 'no_connection_mapping',
|
||||
detail: `${input.entityLabel} has no mapped warehouse connection.`,
|
||||
};
|
||||
}
|
||||
|
||||
const parsed = input.config.parsedTargetTables[input.key];
|
||||
if (parsed) {
|
||||
return parsed;
|
||||
}
|
||||
|
||||
if (!input.rawSqlTableName) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
ok: false,
|
||||
reason: 'parse_error',
|
||||
detail: `${input.entityLabel} has raw sql_table_name but no parsedTargetTables entry for key ${input.key}.`,
|
||||
};
|
||||
}
|
||||
|
||||
function exploreKey(modelName: string, exploreName: string): string {
|
||||
return `${modelName}.${exploreName}`;
|
||||
}
|
||||
|
||||
function stampQueryWarehouseTarget(
|
||||
query: StagedLookerQuery | null,
|
||||
exploreTargetsByKey: Map<string, ExploreTargetSummary>,
|
||||
): StagedLookerQuery | null {
|
||||
if (!query) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const target = exploreTargetsByKey.get(exploreKey(query.model, query.view));
|
||||
if (!target) {
|
||||
return query;
|
||||
}
|
||||
|
||||
return {
|
||||
...query,
|
||||
targetWarehouseConnectionId: target.targetWarehouseConnectionId,
|
||||
targetTable: target.targetTable,
|
||||
};
|
||||
}
|
||||
|
||||
function stampDashboardQueries(
|
||||
dashboard: StagedDashboardFile,
|
||||
exploreTargetsByKey: Map<string, ExploreTargetSummary>,
|
||||
): StagedDashboardFile {
|
||||
return stagedDashboardFileSchema.parse({
|
||||
...dashboard,
|
||||
tiles: dashboard.tiles.map((tile) => ({
|
||||
...tile,
|
||||
query: stampQueryWarehouseTarget(tile.query, exploreTargetsByKey),
|
||||
})),
|
||||
});
|
||||
}
|
||||
|
||||
function stampLookQuery(look: StagedLookFile, exploreTargetsByKey: Map<string, ExploreTargetSummary>): StagedLookFile {
|
||||
return stagedLookFileSchema.parse({
|
||||
...look,
|
||||
query: stampQueryWarehouseTarget(look.query, exploreTargetsByKey),
|
||||
});
|
||||
}
|
||||
|
||||
function warehouseTargetWarnings(explores: StagedExploreFile[]): StagedLookerFetchIssue[] {
|
||||
const unmapped = new Map<string, string[]>();
|
||||
const warnings: StagedLookerFetchIssue[] = [];
|
||||
|
||||
for (const explore of explores) {
|
||||
const targetTable = explore.targetTable;
|
||||
if (!targetTable || targetTable.ok) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const sourceKey = exploreKey(explore.modelName, explore.exploreName);
|
||||
const lookerConnectionName = explore.connectionName ?? 'missing_connection_name';
|
||||
|
||||
if (targetTable.reason === 'no_connection_mapping') {
|
||||
const existing = unmapped.get(lookerConnectionName) ?? [];
|
||||
existing.push(sourceKey);
|
||||
unmapped.set(lookerConnectionName, existing);
|
||||
continue;
|
||||
}
|
||||
|
||||
warnings.push({
|
||||
rawPath: `looker_connection_mappings/${safeWarningPathSegment(lookerConnectionName)}`,
|
||||
entityType: 'looker_connection_mapping',
|
||||
entityId: explore.connectionName,
|
||||
severity: 'warning',
|
||||
statusCode: null,
|
||||
message: `Looker explore ${sourceKey} has sql_table_name that cannot be mapped to a physical warehouse table: ${targetTable.reason}.`,
|
||||
retryRecommended: false,
|
||||
kind: warningKindForReason(targetTable.reason),
|
||||
details: {
|
||||
lookerConnectionName,
|
||||
rawSqlTableName: explore.rawSqlTableName,
|
||||
reason: targetTable.reason,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
for (const [lookerConnectionName, affectedExplores] of [...unmapped.entries()].sort(([a], [b]) =>
|
||||
a.localeCompare(b),
|
||||
)) {
|
||||
const sortedAffectedExplores = [...affectedExplores].sort();
|
||||
warnings.push({
|
||||
rawPath: `looker_connection_mappings/${safeWarningPathSegment(lookerConnectionName)}`,
|
||||
entityType: 'looker_connection_mapping',
|
||||
entityId: lookerConnectionName === 'missing_connection_name' ? null : lookerConnectionName,
|
||||
severity: 'warning',
|
||||
statusCode: null,
|
||||
message: `Looker connection ${lookerConnectionName} is not mapped to a warehouse connection; ${sortedAffectedExplores.length} explore${sortedAffectedExplores.length === 1 ? '' : 's'} will be wiki-only.`,
|
||||
retryRecommended: false,
|
||||
kind: 'unmapped_looker_connection',
|
||||
details: {
|
||||
lookerConnectionName,
|
||||
affectedExplores: sortedAffectedExplores,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
return warnings;
|
||||
}
|
||||
|
||||
function warningKindForReason(reason: ParsedTargetTableFailureReason): StagedLookerFetchIssue['kind'] {
|
||||
if (reason === 'looker_template_unresolved') {
|
||||
return 'looker_template_unresolved';
|
||||
}
|
||||
if (reason === 'derived_table_not_supported') {
|
||||
return 'derived_table_not_supported';
|
||||
}
|
||||
return 'unparseable_sql_table_name';
|
||||
}
|
||||
|
||||
function safeWarningPathSegment(value: string): string {
|
||||
return value.replace(/[^a-zA-Z0-9_-]+/g, '_');
|
||||
}
|
||||
|
||||
function issueForFetchError(input: {
|
||||
rawPath: string;
|
||||
entityType: StagedLookerFetchIssue['entityType'];
|
||||
entityId: string | null;
|
||||
error: unknown;
|
||||
severity?: StagedLookerFetchIssue['severity'];
|
||||
}): StagedLookerFetchIssue {
|
||||
const statusCode = errorStatusCode(input.error);
|
||||
return {
|
||||
rawPath: input.rawPath,
|
||||
entityType: input.entityType,
|
||||
entityId: input.entityId,
|
||||
severity: input.severity ?? (input.entityType === 'signals' ? 'warning' : 'error'),
|
||||
statusCode,
|
||||
message: errorMessage(input.error),
|
||||
retryRecommended: statusCode === 429,
|
||||
};
|
||||
}
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
function errorStatusCode(error: unknown): number | null {
|
||||
if (!error || typeof error !== 'object') {
|
||||
return null;
|
||||
}
|
||||
const record = error as Record<string, unknown>;
|
||||
const direct = record.statusCode ?? record.status;
|
||||
if (typeof direct === 'number') {
|
||||
return direct;
|
||||
}
|
||||
if (typeof direct === 'string') {
|
||||
const parsed = Number(direct);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
const response = record.response;
|
||||
if (response && typeof response === 'object') {
|
||||
return errorStatusCode(response);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function buildFetchReport(
|
||||
skipped: StagedLookerFetchIssue[],
|
||||
warnings: StagedLookerFetchIssue[],
|
||||
): StagedLookerFetchReport {
|
||||
const retryRecommended = [...skipped, ...warnings].some((issue) => issue.retryRecommended);
|
||||
const hasWarehouseTargetWarnings = warnings.some((issue) => issue.entityType === 'looker_connection_mapping');
|
||||
return {
|
||||
status: skipped.length > 0 || hasWarehouseTargetWarnings ? 'partial' : 'success',
|
||||
retryRecommended,
|
||||
skipped,
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
import type { KloLocalProject, KloProjectConnectionConfig } from '../../../project/index.js';
|
||||
import {
|
||||
DefaultLookerClientFactory,
|
||||
DefaultLookerConnectionClientFactory,
|
||||
type LookerCredentialResolver,
|
||||
} from './factory.js';
|
||||
import { LookerSourceAdapter } from './looker.adapter.js';
|
||||
|
||||
function stringField(value: unknown): string | null {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
|
||||
}
|
||||
|
||||
function resolveEnvReference(ref: string, env: NodeJS.ProcessEnv): string | null {
|
||||
if (!ref.startsWith('env:')) {
|
||||
return null;
|
||||
}
|
||||
return stringField(env[ref.slice('env:'.length)]);
|
||||
}
|
||||
|
||||
export function lookerCredentialsFromLocalConnection(
|
||||
connectionId: string,
|
||||
connection: KloProjectConnectionConfig | undefined,
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
) {
|
||||
if (!connection || String(connection.driver).toLowerCase() !== 'looker') {
|
||||
throw new Error(`Connection "${connectionId}" is not a Looker connection`);
|
||||
}
|
||||
const baseUrl = stringField(connection.base_url) ?? stringField(connection.baseUrl) ?? stringField(connection.url);
|
||||
const clientId = stringField(connection.client_id) ?? stringField(connection.clientId);
|
||||
const clientSecret =
|
||||
stringField(connection.client_secret) ??
|
||||
stringField(connection.clientSecret) ??
|
||||
(stringField(connection.client_secret_ref) ? resolveEnvReference(String(connection.client_secret_ref), env) : null) ??
|
||||
(stringField(connection.clientSecretRef) ? resolveEnvReference(String(connection.clientSecretRef), env) : null);
|
||||
|
||||
if (!baseUrl) {
|
||||
throw new Error(`Connection "${connectionId}" is missing Looker base_url`);
|
||||
}
|
||||
if (!clientId) {
|
||||
throw new Error(`Connection "${connectionId}" is missing Looker client_id`);
|
||||
}
|
||||
if (!clientSecret) {
|
||||
throw new Error(`Connection "${connectionId}" is missing Looker client_secret or client_secret_ref`);
|
||||
}
|
||||
return { base_url: baseUrl, client_id: clientId, client_secret: clientSecret };
|
||||
}
|
||||
|
||||
export function createLocalLookerCredentialResolver(
|
||||
project: KloLocalProject,
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): LookerCredentialResolver {
|
||||
return {
|
||||
async resolve(lookerConnectionId) {
|
||||
return lookerCredentialsFromLocalConnection(lookerConnectionId, project.config.connections[lookerConnectionId], env);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function createLocalLookerSourceAdapter(
|
||||
project: KloLocalProject,
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): LookerSourceAdapter {
|
||||
const connectionFactory = new DefaultLookerConnectionClientFactory(createLocalLookerCredentialResolver(project, env));
|
||||
return new LookerSourceAdapter({
|
||||
clientFactory: new DefaultLookerClientFactory(connectionFactory),
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
import { mkdtemp } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { LocalLookerRuntimeStore } from './local-runtime-store.js';
|
||||
|
||||
describe('LocalLookerRuntimeStore', () => {
|
||||
async function store() {
|
||||
const dir = await mkdtemp(join(tmpdir(), 'klo-looker-store-'));
|
||||
return new LocalLookerRuntimeStore({
|
||||
dbPath: join(dir, 'db.sqlite'),
|
||||
now: () => new Date('2026-05-05T12:00:00.000Z'),
|
||||
});
|
||||
}
|
||||
|
||||
it('stores cursors and connection mappings', async () => {
|
||||
const local = await store();
|
||||
|
||||
await local.setCursors('prod-looker', {
|
||||
dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z',
|
||||
looksLastSyncedAt: null,
|
||||
});
|
||||
await local.upsertConnectionMapping({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
lookerConnectionName: 'bq_reporting',
|
||||
kloConnectionId: 'prod-warehouse',
|
||||
source: 'cli',
|
||||
});
|
||||
|
||||
await expect(local.readCursors('prod-looker')).resolves.toEqual({
|
||||
dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z',
|
||||
looksLastSyncedAt: null,
|
||||
});
|
||||
await expect(local.readMappings('prod-looker')).resolves.toEqual([
|
||||
{
|
||||
lookerConnectionName: 'bq_reporting',
|
||||
kloConnectionId: 'prod-warehouse',
|
||||
lookerHost: null,
|
||||
lookerDatabase: null,
|
||||
lookerDialect: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('refreshes discovered metadata without dropping local targets', async () => {
|
||||
const local = await store();
|
||||
await local.upsertConnectionMapping({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
lookerConnectionName: 'bq_reporting',
|
||||
kloConnectionId: 'prod-warehouse',
|
||||
source: 'cli',
|
||||
});
|
||||
|
||||
await local.refreshDiscoveredConnections({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
discovered: [
|
||||
{
|
||||
name: 'bq_reporting',
|
||||
host: 'bigquery.googleapis.com',
|
||||
database: 'analytics',
|
||||
schema: null,
|
||||
dialect: 'bigquery_standard_sql',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
await expect(local.listConnectionMappings('prod-looker')).resolves.toEqual([
|
||||
{
|
||||
lookerConnectionName: 'bq_reporting',
|
||||
kloConnectionId: 'prod-warehouse',
|
||||
lookerHost: 'bigquery.googleapis.com',
|
||||
lookerDatabase: 'analytics',
|
||||
lookerDialect: 'bigquery_standard_sql',
|
||||
source: 'refresh',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('applies yaml mapping intent while preserving refresh metadata and cli overrides', async () => {
|
||||
const local = await store();
|
||||
await local.refreshDiscoveredConnections({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
discovered: [{ name: 'analytics', host: 'looker-db.test', database: 'warehouse', schema: null, dialect: 'postgres' }],
|
||||
});
|
||||
await local.upsertConnectionMapping({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
lookerConnectionName: 'manual',
|
||||
kloConnectionId: 'cli-warehouse',
|
||||
source: 'cli',
|
||||
});
|
||||
|
||||
await local.applyYamlBootstrap({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
mappings: [
|
||||
{ lookerConnectionName: 'analytics', kloConnectionId: 'yaml-warehouse' },
|
||||
{ lookerConnectionName: 'manual', kloConnectionId: 'yaml-warehouse' },
|
||||
],
|
||||
});
|
||||
|
||||
await expect(local.listConnectionMappings('prod-looker')).resolves.toMatchObject([
|
||||
{
|
||||
lookerConnectionName: 'analytics',
|
||||
kloConnectionId: 'yaml-warehouse',
|
||||
lookerHost: 'looker-db.test',
|
||||
lookerDatabase: 'warehouse',
|
||||
lookerDialect: 'postgres',
|
||||
source: 'klo.yaml',
|
||||
},
|
||||
{
|
||||
lookerConnectionName: 'manual',
|
||||
kloConnectionId: 'cli-warehouse',
|
||||
source: 'cli',
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,280 @@
|
|||
import { mkdirSync } from 'node:fs';
|
||||
import { dirname } from 'node:path';
|
||||
import Database from 'better-sqlite3';
|
||||
import type { LookerWarehouseConnectionInfo } from './client.js';
|
||||
import type { LookerConnectionMapping } from './mapping.js';
|
||||
import type { LookerRuntimeCursors } from './types.js';
|
||||
|
||||
export type LocalLookerMappingSource = 'klo.yaml' | 'cli' | 'refresh';
|
||||
|
||||
interface LocalLookerRuntimeStoreOptions {
|
||||
dbPath: string;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface LocalLookerConnectionMappingListRow extends LookerConnectionMapping {
|
||||
source: LocalLookerMappingSource;
|
||||
}
|
||||
|
||||
export interface UpsertLocalLookerConnectionMappingInput {
|
||||
lookerConnectionId: string;
|
||||
lookerConnectionName: string;
|
||||
kloConnectionId: string | null;
|
||||
source: LocalLookerMappingSource;
|
||||
}
|
||||
|
||||
interface ApplyLocalLookerYamlBootstrapInput {
|
||||
lookerConnectionId: string;
|
||||
mappings: Array<{
|
||||
lookerConnectionName: string;
|
||||
kloConnectionId: string | null;
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface RefreshLocalLookerDiscoveredConnectionsInput {
|
||||
lookerConnectionId: string;
|
||||
discovered: LookerWarehouseConnectionInfo[];
|
||||
}
|
||||
|
||||
export interface ClearLocalLookerMappingsInput {
|
||||
lookerConnectionId: string;
|
||||
lookerConnectionName?: string;
|
||||
}
|
||||
|
||||
export interface LookerSourceStateReader {
|
||||
readMappings(lookerConnectionId: string): Promise<LookerConnectionMapping[]>;
|
||||
readCursors(lookerConnectionId: string): Promise<LookerRuntimeCursors>;
|
||||
}
|
||||
|
||||
export class LocalLookerRuntimeStore implements LookerSourceStateReader {
|
||||
private readonly db: Database.Database;
|
||||
private readonly now: () => Date;
|
||||
|
||||
constructor(options: LocalLookerRuntimeStoreOptions) {
|
||||
mkdirSync(dirname(options.dbPath), { recursive: true });
|
||||
this.db = new Database(options.dbPath);
|
||||
this.db.pragma('journal_mode = WAL');
|
||||
this.db.pragma('foreign_keys = ON');
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS local_looker_runtime_config (
|
||||
looker_connection_id TEXT PRIMARY KEY,
|
||||
dashboards_last_synced_at TEXT,
|
||||
looks_last_synced_at TEXT,
|
||||
updated_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS local_looker_connection_mappings (
|
||||
looker_connection_id TEXT NOT NULL,
|
||||
looker_connection_name TEXT NOT NULL,
|
||||
klo_connection_id TEXT,
|
||||
looker_host TEXT,
|
||||
looker_database TEXT,
|
||||
looker_dialect TEXT,
|
||||
source TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL,
|
||||
PRIMARY KEY (looker_connection_id, looker_connection_name)
|
||||
);
|
||||
`);
|
||||
}
|
||||
|
||||
async applyYamlBootstrap(input: ApplyLocalLookerYamlBootstrapInput): Promise<void> {
|
||||
const timestamp = this.now().toISOString();
|
||||
const apply = this.db.transaction(() => {
|
||||
const existing = this.db.prepare(`
|
||||
SELECT klo_connection_id, source
|
||||
FROM local_looker_connection_mappings
|
||||
WHERE looker_connection_id = ? AND looker_connection_name = ?
|
||||
`);
|
||||
const insert = this.db.prepare(`
|
||||
INSERT INTO local_looker_connection_mappings (
|
||||
looker_connection_id,
|
||||
looker_connection_name,
|
||||
klo_connection_id,
|
||||
looker_host,
|
||||
looker_database,
|
||||
looker_dialect,
|
||||
source,
|
||||
updated_at
|
||||
)
|
||||
VALUES (?, ?, ?, NULL, NULL, NULL, 'klo.yaml', ?)
|
||||
`);
|
||||
const updateRefreshRow = this.db.prepare(`
|
||||
UPDATE local_looker_connection_mappings
|
||||
SET klo_connection_id = ?,
|
||||
source = 'klo.yaml',
|
||||
updated_at = ?
|
||||
WHERE looker_connection_id = ?
|
||||
AND looker_connection_name = ?
|
||||
AND source = 'refresh'
|
||||
AND klo_connection_id IS NULL
|
||||
`);
|
||||
|
||||
for (const mapping of input.mappings) {
|
||||
const row = existing.get(input.lookerConnectionId, mapping.lookerConnectionName) as
|
||||
| { klo_connection_id: string | null; source: LocalLookerMappingSource }
|
||||
| undefined;
|
||||
if (!row) {
|
||||
insert.run(input.lookerConnectionId, mapping.lookerConnectionName, mapping.kloConnectionId, timestamp);
|
||||
continue;
|
||||
}
|
||||
if (row.source === 'refresh' && row.klo_connection_id === null) {
|
||||
updateRefreshRow.run(mapping.kloConnectionId, timestamp, input.lookerConnectionId, mapping.lookerConnectionName);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
apply();
|
||||
}
|
||||
|
||||
async readCursors(lookerConnectionId: string): Promise<LookerRuntimeCursors> {
|
||||
const row = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT dashboards_last_synced_at, looks_last_synced_at
|
||||
FROM local_looker_runtime_config
|
||||
WHERE looker_connection_id = ?
|
||||
`,
|
||||
)
|
||||
.get(lookerConnectionId) as { dashboards_last_synced_at: string | null; looks_last_synced_at: string | null } | undefined;
|
||||
|
||||
return {
|
||||
dashboardsLastSyncedAt: row?.dashboards_last_synced_at ?? null,
|
||||
looksLastSyncedAt: row?.looks_last_synced_at ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
async setCursors(lookerConnectionId: string, cursors: LookerRuntimeCursors): Promise<void> {
|
||||
this.db
|
||||
.prepare(
|
||||
`
|
||||
INSERT INTO local_looker_runtime_config (
|
||||
looker_connection_id,
|
||||
dashboards_last_synced_at,
|
||||
looks_last_synced_at,
|
||||
updated_at
|
||||
)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(looker_connection_id) DO UPDATE SET
|
||||
dashboards_last_synced_at = excluded.dashboards_last_synced_at,
|
||||
looks_last_synced_at = excluded.looks_last_synced_at,
|
||||
updated_at = excluded.updated_at
|
||||
`,
|
||||
)
|
||||
.run(lookerConnectionId, cursors.dashboardsLastSyncedAt, cursors.looksLastSyncedAt, this.now().toISOString());
|
||||
}
|
||||
|
||||
async readMappings(lookerConnectionId: string): Promise<LookerConnectionMapping[]> {
|
||||
return (await this.listConnectionMappings(lookerConnectionId)).map(({ source: _source, ...mapping }) => mapping);
|
||||
}
|
||||
|
||||
async listConnectionMappings(lookerConnectionId: string): Promise<LocalLookerConnectionMappingListRow[]> {
|
||||
const rows = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
looker_connection_name,
|
||||
klo_connection_id,
|
||||
looker_host,
|
||||
looker_database,
|
||||
looker_dialect,
|
||||
source
|
||||
FROM local_looker_connection_mappings
|
||||
WHERE looker_connection_id = ?
|
||||
ORDER BY looker_connection_name
|
||||
`,
|
||||
)
|
||||
.all(lookerConnectionId) as Array<{
|
||||
looker_connection_name: string;
|
||||
klo_connection_id: string | null;
|
||||
looker_host: string | null;
|
||||
looker_database: string | null;
|
||||
looker_dialect: string | null;
|
||||
source: LocalLookerMappingSource;
|
||||
}>;
|
||||
|
||||
return rows.map((row) => ({
|
||||
lookerConnectionName: row.looker_connection_name,
|
||||
kloConnectionId: row.klo_connection_id,
|
||||
lookerHost: row.looker_host,
|
||||
lookerDatabase: row.looker_database,
|
||||
lookerDialect: row.looker_dialect,
|
||||
source: row.source,
|
||||
}));
|
||||
}
|
||||
|
||||
async upsertConnectionMapping(input: UpsertLocalLookerConnectionMappingInput): Promise<void> {
|
||||
this.db
|
||||
.prepare(
|
||||
`
|
||||
INSERT INTO local_looker_connection_mappings (
|
||||
looker_connection_id,
|
||||
looker_connection_name,
|
||||
klo_connection_id,
|
||||
looker_host,
|
||||
looker_database,
|
||||
looker_dialect,
|
||||
source,
|
||||
updated_at
|
||||
)
|
||||
VALUES (?, ?, ?, NULL, NULL, NULL, ?, ?)
|
||||
ON CONFLICT(looker_connection_id, looker_connection_name) DO UPDATE SET
|
||||
klo_connection_id = excluded.klo_connection_id,
|
||||
source = excluded.source,
|
||||
updated_at = excluded.updated_at
|
||||
`,
|
||||
)
|
||||
.run(input.lookerConnectionId, input.lookerConnectionName, input.kloConnectionId, input.source, this.now().toISOString());
|
||||
}
|
||||
|
||||
async refreshDiscoveredConnections(input: RefreshLocalLookerDiscoveredConnectionsInput): Promise<void> {
|
||||
const timestamp = this.now().toISOString();
|
||||
const update = this.db.transaction(() => {
|
||||
const upsert = this.db.prepare(`
|
||||
INSERT INTO local_looker_connection_mappings (
|
||||
looker_connection_id,
|
||||
looker_connection_name,
|
||||
klo_connection_id,
|
||||
looker_host,
|
||||
looker_database,
|
||||
looker_dialect,
|
||||
source,
|
||||
updated_at
|
||||
)
|
||||
VALUES (?, ?, NULL, ?, ?, ?, 'refresh', ?)
|
||||
ON CONFLICT(looker_connection_id, looker_connection_name) DO UPDATE SET
|
||||
looker_host = excluded.looker_host,
|
||||
looker_database = excluded.looker_database,
|
||||
looker_dialect = excluded.looker_dialect,
|
||||
source = excluded.source,
|
||||
updated_at = excluded.updated_at
|
||||
`);
|
||||
for (const connection of input.discovered) {
|
||||
upsert.run(
|
||||
input.lookerConnectionId,
|
||||
connection.name,
|
||||
connection.host,
|
||||
connection.database,
|
||||
connection.dialect,
|
||||
timestamp,
|
||||
);
|
||||
}
|
||||
});
|
||||
update();
|
||||
}
|
||||
|
||||
async clearConnectionMappings(input: ClearLocalLookerMappingsInput): Promise<void> {
|
||||
if (input.lookerConnectionName) {
|
||||
this.db
|
||||
.prepare(
|
||||
`
|
||||
DELETE FROM local_looker_connection_mappings
|
||||
WHERE looker_connection_id = ? AND looker_connection_name = ?
|
||||
`,
|
||||
)
|
||||
.run(input.lookerConnectionId, input.lookerConnectionName);
|
||||
return;
|
||||
}
|
||||
this.db.prepare('DELETE FROM local_looker_connection_mappings WHERE looker_connection_id = ?').run(input.lookerConnectionId);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import type { LookerRuntimeClient } from './fetch.js';
|
||||
import { LookerSourceAdapter } from './looker.adapter.js';
|
||||
|
||||
const connectionId = '11111111-1111-4111-8111-111111111111';
|
||||
|
||||
function makeClient(): LookerRuntimeClient {
|
||||
return {
|
||||
listDashboards: vi.fn().mockResolvedValue([]),
|
||||
getDashboard: vi.fn(),
|
||||
listLooks: vi.fn().mockResolvedValue([]),
|
||||
getLook: vi.fn(),
|
||||
listFolders: vi.fn().mockResolvedValue({ folders: [] }),
|
||||
listUsers: vi.fn().mockResolvedValue([]),
|
||||
listGroups: vi.fn().mockResolvedValue([]),
|
||||
listLookmlModels: vi.fn().mockResolvedValue({
|
||||
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
|
||||
}),
|
||||
getExplore: vi.fn().mockResolvedValue({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: null,
|
||||
fields: { dimensions: [], measures: [] },
|
||||
joins: [],
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
describe('LookerSourceAdapter', () => {
|
||||
let stagedDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'looker-adapter-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(stagedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('exposes source="looker" and skillNames=["looker_ingest"]', () => {
|
||||
const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } });
|
||||
expect(adapter.source).toBe('looker');
|
||||
expect(adapter.skillNames).toEqual(['looker_ingest']);
|
||||
});
|
||||
|
||||
it('enables context evidence indexing and delegates triage signals', async () => {
|
||||
const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } });
|
||||
|
||||
expect(adapter.evidenceIndexing).toBe('documents');
|
||||
expect(adapter.triageSupported).toBe(true);
|
||||
await expect(adapter.getTriageSignals?.(stagedDir, 'looker:dashboard:10')).resolves.toMatchObject({
|
||||
objectType: 'looker_dashboard',
|
||||
});
|
||||
});
|
||||
|
||||
it('fetches, detects, and chunks a runtime bundle through the composed adapter', async () => {
|
||||
const adapter = new LookerSourceAdapter({
|
||||
clientFactory: { createClient: vi.fn().mockResolvedValue(makeClient()) },
|
||||
now: () => new Date('2026-04-30T12:30:00.000Z'),
|
||||
});
|
||||
|
||||
await mkdir(stagedDir, { recursive: true });
|
||||
await adapter.fetch({ lookerConnectionId: connectionId }, stagedDir, { connectionId, sourceKey: 'looker' });
|
||||
|
||||
expect(await adapter.detect(stagedDir)).toBe(true);
|
||||
expect(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8')).toContain('sales_pipeline');
|
||||
|
||||
const result = await adapter.chunk(stagedDir);
|
||||
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['looker-explore-b2b-sales_pipeline']);
|
||||
});
|
||||
|
||||
it('passes pull success notifications to the server callback', async () => {
|
||||
const onPullSucceeded = vi.fn().mockResolvedValue(undefined);
|
||||
const adapter = new LookerSourceAdapter({
|
||||
clientFactory: { createClient: () => makeClient() },
|
||||
onPullSucceeded,
|
||||
});
|
||||
const completedAt = new Date('2026-04-30T12:00:00.000Z');
|
||||
|
||||
await adapter.onPullSucceeded({
|
||||
connectionId,
|
||||
sourceKey: 'looker',
|
||||
syncId: 'sync-1',
|
||||
trigger: 'scheduled_pull',
|
||||
completedAt,
|
||||
stagedDir: '/tmp/staged',
|
||||
});
|
||||
|
||||
expect(onPullSucceeded).toHaveBeenCalledWith({
|
||||
connectionId,
|
||||
sourceKey: 'looker',
|
||||
syncId: 'sync-1',
|
||||
trigger: 'scheduled_pull',
|
||||
completedAt,
|
||||
stagedDir: '/tmp/staged',
|
||||
});
|
||||
});
|
||||
|
||||
it('describes incremental fetch scope from the staged scope file', async () => {
|
||||
await mkdir(join(stagedDir, 'dashboards'), { recursive: true });
|
||||
await writeFile(
|
||||
join(stagedDir, 'looker-scope.json'),
|
||||
JSON.stringify(
|
||||
{
|
||||
mode: 'incremental',
|
||||
knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json'],
|
||||
fetchedRawPaths: ['dashboards/11.json'],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } });
|
||||
|
||||
const scope = await adapter.describeScope(stagedDir);
|
||||
|
||||
expect(scope.isPathInScope('dashboards/10.json')).toBe(false);
|
||||
expect(scope.isPathInScope('dashboards/11.json')).toBe(true);
|
||||
expect(scope.isPathInScope('dashboards/12.json')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
import type { ChunkResult, DiffSet, FetchContext, IngestTrigger, ScopeDescriptor, SourceAdapter } from '../../types.js';
|
||||
import { chunkLookerStagedDir } from './chunk.js';
|
||||
import { detectLookerStagedDir } from './detect.js';
|
||||
import { getLookerTriageSignals } from './evidence-documents.js';
|
||||
import { fetchLookerRuntimeBundle, type LookerClientFactory } from './fetch.js';
|
||||
import { readLookerFetchReport } from './fetch-report.js';
|
||||
import { describeLookerScope } from './scope.js';
|
||||
import { listLookerTargetConnectionIds } from './target-connections.js';
|
||||
|
||||
interface LookerPullSucceededContext {
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
syncId: string;
|
||||
trigger: IngestTrigger;
|
||||
completedAt: Date;
|
||||
stagedDir: string;
|
||||
}
|
||||
|
||||
export interface LookerSourceAdapterDeps {
|
||||
clientFactory: LookerClientFactory;
|
||||
now?: () => Date;
|
||||
onPullSucceeded?: (ctx: LookerPullSucceededContext) => Promise<void>;
|
||||
}
|
||||
|
||||
export class LookerSourceAdapter implements SourceAdapter {
|
||||
readonly source = 'looker';
|
||||
readonly skillNames: string[] = ['looker_ingest'];
|
||||
readonly evidenceIndexing = 'documents' as const;
|
||||
readonly triageSupported = true;
|
||||
|
||||
constructor(private readonly deps: LookerSourceAdapterDeps) {}
|
||||
|
||||
detect(stagedDir: string): Promise<boolean> {
|
||||
return detectLookerStagedDir(stagedDir);
|
||||
}
|
||||
|
||||
fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
|
||||
return fetchLookerRuntimeBundle({
|
||||
pullConfig,
|
||||
stagedDir,
|
||||
ctx,
|
||||
clientFactory: this.deps.clientFactory,
|
||||
now: this.deps.now,
|
||||
});
|
||||
}
|
||||
|
||||
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
return chunkLookerStagedDir(stagedDir, diffSet);
|
||||
}
|
||||
|
||||
readFetchReport(stagedDir: string) {
|
||||
return readLookerFetchReport(stagedDir);
|
||||
}
|
||||
|
||||
listTargetConnectionIds(stagedDir: string): Promise<string[]> {
|
||||
return listLookerTargetConnectionIds(stagedDir);
|
||||
}
|
||||
|
||||
getTriageSignals(stagedDir: string, externalId: string) {
|
||||
return getLookerTriageSignals(stagedDir, externalId);
|
||||
}
|
||||
|
||||
describeScope(stagedDir: string): Promise<ScopeDescriptor> {
|
||||
return describeLookerScope(stagedDir);
|
||||
}
|
||||
|
||||
async onPullSucceeded(ctx: LookerPullSucceededContext): Promise<void> {
|
||||
await this.deps.onPullSucceeded?.(ctx);
|
||||
}
|
||||
}
|
||||
384
packages/context/src/ingest/adapters/looker/mapping.test.ts
Normal file
384
packages/context/src/ingest/adapters/looker/mapping.test.ts
Normal file
|
|
@ -0,0 +1,384 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { StagedExploreFile, StagedLookmlModelsFile } from './types.js';
|
||||
import {
|
||||
buildLookerPullConfigFromInputs,
|
||||
collectExploreParseItems,
|
||||
computeLookerMappingDrift,
|
||||
discoverLookerConnections,
|
||||
lookerDialectToConnectionType,
|
||||
projectParsedIdentifier,
|
||||
refreshLookerMappingPlaceholders,
|
||||
sqlglotDialectForConnectionType,
|
||||
suggestKloConnectionForLookerConnection,
|
||||
validateLookerMappings,
|
||||
validateLookerWarehouseTarget,
|
||||
} from './mapping.js';
|
||||
|
||||
const liveConnections = [
|
||||
{
|
||||
name: 'b2b_sandbox_bq',
|
||||
host: 'warehouse.example.com',
|
||||
database: 'analytics',
|
||||
schema: null,
|
||||
dialect: 'bigquery_standard_sql',
|
||||
},
|
||||
{
|
||||
name: 'pg_runtime',
|
||||
host: 'pg.internal:5432',
|
||||
database: 'app',
|
||||
schema: 'public',
|
||||
dialect: 'postgres',
|
||||
},
|
||||
];
|
||||
|
||||
const mappedExplore: StagedExploreFile = {
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: null,
|
||||
rawSqlTableName: 'proj.analytics.opportunities AS opportunities',
|
||||
connectionName: 'b2b_sandbox_bq',
|
||||
viewName: 'opportunities',
|
||||
fields: { dimensions: [], measures: [] },
|
||||
joins: [
|
||||
{
|
||||
name: 'accounts',
|
||||
type: 'left_outer',
|
||||
relationship: 'many_to_one',
|
||||
rawSqlTableName: 'proj.analytics.accounts',
|
||||
sqlOn: null,
|
||||
from: null,
|
||||
targetTable: null,
|
||||
},
|
||||
],
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: null,
|
||||
};
|
||||
|
||||
const models: StagedLookmlModelsFile = {
|
||||
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
|
||||
};
|
||||
|
||||
describe('discoverLookerConnections', () => {
|
||||
it('delegates to the runtime client connection discovery method', async () => {
|
||||
const client = { listLookerConnections: vi.fn().mockResolvedValue(liveConnections) };
|
||||
|
||||
await expect(discoverLookerConnections(client)).resolves.toEqual(liveConnections);
|
||||
expect(client.listLookerConnections).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('looker dialect and target validation helpers', () => {
|
||||
it('maps Looker dialect names to KLO connection types', () => {
|
||||
expect(lookerDialectToConnectionType('bigquery_standard_sql')).toBe('BIGQUERY');
|
||||
expect(lookerDialectToConnectionType('postgres')).toBe('POSTGRESQL');
|
||||
expect(lookerDialectToConnectionType('mssql')).toBe('SQLSERVER');
|
||||
expect(lookerDialectToConnectionType('unknown')).toBeNull();
|
||||
});
|
||||
|
||||
it('maps supported warehouse connection types to sqlglot dialects', () => {
|
||||
expect(sqlglotDialectForConnectionType('BIGQUERY')).toBe('bigquery');
|
||||
expect(sqlglotDialectForConnectionType('POSTGRESQL')).toBe('postgres');
|
||||
expect(sqlglotDialectForConnectionType('LOOKER')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns a structured failure for unsupported Looker warehouse targets', () => {
|
||||
expect(validateLookerWarehouseTarget('LOOKER')).toEqual({
|
||||
ok: false,
|
||||
reason: 'Connection type LOOKER cannot be used as a Looker warehouse mapping target',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('suggestKloConnectionForLookerConnection', () => {
|
||||
it('returns the single deterministic target with matching type, host, and database', () => {
|
||||
expect(
|
||||
suggestKloConnectionForLookerConnection({
|
||||
lookerConnection: liveConnections[1],
|
||||
candidateConnections: [
|
||||
{
|
||||
id: 'wrong-type',
|
||||
connection_type: 'MYSQL',
|
||||
connection_params: { host: 'pg.internal', database: 'app' },
|
||||
},
|
||||
{
|
||||
id: 'pg-target',
|
||||
connection_type: 'POSTGRESQL',
|
||||
connection_params: { host: 'PG.INTERNAL', database: 'APP' },
|
||||
},
|
||||
],
|
||||
}),
|
||||
).toBe('pg-target');
|
||||
});
|
||||
|
||||
it('returns null when more than one target matches', () => {
|
||||
expect(
|
||||
suggestKloConnectionForLookerConnection({
|
||||
lookerConnection: liveConnections[1],
|
||||
candidateConnections: [
|
||||
{
|
||||
id: 'first',
|
||||
connection_type: 'POSTGRESQL',
|
||||
connection_params: { host: 'pg.internal', database: 'app' },
|
||||
},
|
||||
{
|
||||
id: 'second',
|
||||
connection_type: 'POSTGRESQL',
|
||||
connection_params: { host: 'pg.internal:5432', database: 'APP' },
|
||||
},
|
||||
],
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('refreshLookerMappingPlaceholders', () => {
|
||||
it('adds newly discovered placeholders and refreshes live metadata without dropping saved targets', () => {
|
||||
expect(
|
||||
refreshLookerMappingPlaceholders({
|
||||
stored: [
|
||||
{
|
||||
lookerConnectionName: 'b2b_sandbox_bq',
|
||||
kloConnectionId: 'warehouse',
|
||||
lookerHost: null,
|
||||
lookerDatabase: null,
|
||||
lookerDialect: null,
|
||||
},
|
||||
],
|
||||
live: liveConnections,
|
||||
}),
|
||||
).toEqual({
|
||||
changed: true,
|
||||
mappings: [
|
||||
{
|
||||
lookerConnectionName: 'b2b_sandbox_bq',
|
||||
kloConnectionId: 'warehouse',
|
||||
lookerHost: 'warehouse.example.com',
|
||||
lookerDatabase: 'analytics',
|
||||
lookerDialect: 'bigquery_standard_sql',
|
||||
},
|
||||
{
|
||||
lookerConnectionName: 'pg_runtime',
|
||||
kloConnectionId: null,
|
||||
lookerHost: 'pg.internal:5432',
|
||||
lookerDatabase: 'app',
|
||||
lookerDialect: 'postgres',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeLookerMappingDrift and validateLookerMappings', () => {
|
||||
it('reports unmapped live connections, stale stored mappings, and in-sync mappings', () => {
|
||||
expect(
|
||||
computeLookerMappingDrift({
|
||||
storedMappings: [
|
||||
{
|
||||
lookerConnectionName: 'b2b_sandbox_bq',
|
||||
kloConnectionId: 'warehouse',
|
||||
lookerHost: null,
|
||||
lookerDatabase: null,
|
||||
lookerDialect: null,
|
||||
},
|
||||
{
|
||||
lookerConnectionName: 'stale_runtime',
|
||||
kloConnectionId: 'warehouse',
|
||||
lookerHost: null,
|
||||
lookerDatabase: null,
|
||||
lookerDialect: null,
|
||||
},
|
||||
],
|
||||
discovered: liveConnections,
|
||||
}),
|
||||
).toEqual({
|
||||
unmappedDiscovered: [liveConnections[1]],
|
||||
staleMappings: [{ lookerConnectionName: 'stale_runtime', reason: 'looker_connection_not_found' }],
|
||||
inSync: [{ lookerConnectionName: 'b2b_sandbox_bq', kloConnectionId: 'warehouse' }],
|
||||
});
|
||||
});
|
||||
|
||||
it('validates missing and unsupported target connection ids', () => {
|
||||
expect(
|
||||
validateLookerMappings({
|
||||
mappings: [
|
||||
{
|
||||
lookerConnectionName: 'b2b_sandbox_bq',
|
||||
kloConnectionId: 'missing',
|
||||
lookerHost: null,
|
||||
lookerDatabase: null,
|
||||
lookerDialect: null,
|
||||
},
|
||||
{
|
||||
lookerConnectionName: 'pg_runtime',
|
||||
kloConnectionId: 'looker-target',
|
||||
lookerHost: null,
|
||||
lookerDatabase: null,
|
||||
lookerDialect: null,
|
||||
},
|
||||
],
|
||||
knownKloConnectionIds: new Set(['looker-target']),
|
||||
knownConnectionTypes: new Map([['looker-target', 'LOOKER']]),
|
||||
}),
|
||||
).toEqual({
|
||||
ok: false,
|
||||
errors: [
|
||||
{ key: 'b2b_sandbox_bq', reason: 'KLO connection missing does not exist' },
|
||||
{
|
||||
key: 'pg_runtime',
|
||||
reason: 'Connection type LOOKER cannot be used as a Looker warehouse mapping target',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('collectExploreParseItems and projectParsedIdentifier', () => {
|
||||
it('collects base explore and join parser inputs for mapped explores', () => {
|
||||
expect(
|
||||
collectExploreParseItems({
|
||||
explore: mappedExplore,
|
||||
connectionMappings: { b2b_sandbox_bq: 'warehouse' },
|
||||
targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]),
|
||||
}),
|
||||
).toEqual({
|
||||
parsedTargetTables: {},
|
||||
parseItems: [
|
||||
{
|
||||
key: 'b2b.sales_pipeline',
|
||||
sql_table_name: 'proj.analytics.opportunities AS opportunities',
|
||||
dialect: 'bigquery',
|
||||
},
|
||||
{
|
||||
key: 'b2b.sales_pipeline.accounts',
|
||||
sql_table_name: 'proj.analytics.accounts',
|
||||
dialect: 'bigquery',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('projects successful and failed parser rows into KLO parsed target tables', () => {
|
||||
expect(
|
||||
projectParsedIdentifier({
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'analytics',
|
||||
name: 'accounts',
|
||||
canonical_table: 'proj.analytics.accounts',
|
||||
}),
|
||||
).toEqual({
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'analytics',
|
||||
name: 'accounts',
|
||||
canonicalTable: 'proj.analytics.accounts',
|
||||
});
|
||||
|
||||
expect(projectParsedIdentifier({ ok: false, reason: 'derived_table_not_supported' })).toEqual({
|
||||
ok: false,
|
||||
reason: 'derived_table_not_supported',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildLookerPullConfigFromInputs', () => {
|
||||
it('builds the hosted-equivalent Looker pull config from caller-loaded inputs', async () => {
|
||||
const parser = {
|
||||
parse: vi.fn().mockResolvedValue({
|
||||
'b2b.sales_pipeline': {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'analytics',
|
||||
name: 'opportunities',
|
||||
canonical_table: 'proj.analytics.opportunities',
|
||||
},
|
||||
'b2b.sales_pipeline.accounts': {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'analytics',
|
||||
name: 'accounts',
|
||||
canonical_table: 'proj.analytics.accounts',
|
||||
},
|
||||
}),
|
||||
};
|
||||
const client = {
|
||||
listLookmlModels: vi.fn().mockResolvedValue(models),
|
||||
getExplore: vi.fn().mockResolvedValue(mappedExplore),
|
||||
};
|
||||
|
||||
await expect(
|
||||
buildLookerPullConfigFromInputs({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
cursors: {
|
||||
dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z',
|
||||
looksLastSyncedAt: null,
|
||||
},
|
||||
refreshedMappings: [
|
||||
{
|
||||
lookerConnectionName: 'b2b_sandbox_bq',
|
||||
kloConnectionId: 'warehouse',
|
||||
lookerHost: 'warehouse.example.com',
|
||||
lookerDatabase: 'analytics',
|
||||
lookerDialect: 'bigquery_standard_sql',
|
||||
},
|
||||
],
|
||||
targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]),
|
||||
client,
|
||||
parser,
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
dashboardUpdatedSince: '2026-05-01T00:00:00.000Z',
|
||||
lookUpdatedSince: null,
|
||||
connectionMappings: { b2b_sandbox_bq: 'warehouse' },
|
||||
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
|
||||
parsedTargetTables: {
|
||||
'b2b.sales_pipeline': {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'analytics',
|
||||
name: 'opportunities',
|
||||
canonicalTable: 'proj.analytics.opportunities',
|
||||
},
|
||||
'b2b.sales_pipeline.accounts': {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'analytics',
|
||||
name: 'accounts',
|
||||
canonicalTable: 'proj.analytics.accounts',
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('marks parser failures as parse_error without blocking pull-config construction', async () => {
|
||||
const parser = { parse: vi.fn().mockRejectedValue(new Error('python unavailable')) };
|
||||
const client = {
|
||||
listLookmlModels: vi.fn().mockResolvedValue(models),
|
||||
getExplore: vi.fn().mockResolvedValue(mappedExplore),
|
||||
};
|
||||
|
||||
const config = await buildLookerPullConfigFromInputs({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
cursors: { dashboardsLastSyncedAt: null, looksLastSyncedAt: null },
|
||||
refreshedMappings: [
|
||||
{
|
||||
lookerConnectionName: 'b2b_sandbox_bq',
|
||||
kloConnectionId: 'warehouse',
|
||||
lookerHost: null,
|
||||
lookerDatabase: null,
|
||||
lookerDialect: null,
|
||||
},
|
||||
],
|
||||
targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]),
|
||||
client,
|
||||
parser,
|
||||
});
|
||||
|
||||
expect(config.parsedTargetTables).toMatchObject({
|
||||
'b2b.sales_pipeline': { ok: false, reason: 'parse_error' },
|
||||
'b2b.sales_pipeline.accounts': { ok: false, reason: 'parse_error' },
|
||||
});
|
||||
});
|
||||
});
|
||||
442
packages/context/src/ingest/adapters/looker/mapping.ts
Normal file
442
packages/context/src/ingest/adapters/looker/mapping.ts
Normal file
|
|
@ -0,0 +1,442 @@
|
|||
import type { LookerWarehouseConnectionInfo } from './client.js';
|
||||
import type {
|
||||
LookerPullConfig,
|
||||
LookerRuntimeCursors,
|
||||
ParsedTargetTable,
|
||||
StagedExploreFile,
|
||||
StagedLookmlModelsFile,
|
||||
} from './types.js';
|
||||
|
||||
export const LOOKER_DIALECT_TO_CONNECTION_TYPE = {
|
||||
bigquery: 'BIGQUERY',
|
||||
bigquery_standard_sql: 'BIGQUERY',
|
||||
snowflake: 'SNOWFLAKE',
|
||||
postgres: 'POSTGRESQL',
|
||||
postgresql: 'POSTGRESQL',
|
||||
mysql: 'MYSQL',
|
||||
sqlite: 'SQLITE',
|
||||
sqlserver: 'SQLSERVER',
|
||||
mssql: 'SQLSERVER',
|
||||
tsql: 'SQLSERVER',
|
||||
clickhouse: 'CLICKHOUSE',
|
||||
} as const;
|
||||
|
||||
export type LookerWarehouseTargetConnectionType =
|
||||
(typeof LOOKER_DIALECT_TO_CONNECTION_TYPE)[keyof typeof LOOKER_DIALECT_TO_CONNECTION_TYPE];
|
||||
|
||||
export interface LookerConnectionMapping {
|
||||
lookerConnectionName: string;
|
||||
kloConnectionId: string | null;
|
||||
lookerHost: string | null;
|
||||
lookerDatabase: string | null;
|
||||
lookerDialect: string | null;
|
||||
}
|
||||
|
||||
export interface LookerTargetConnection {
|
||||
id: string;
|
||||
connection_type: string;
|
||||
connection_params?: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
export interface LookerMappingCandidateConnection extends LookerTargetConnection {}
|
||||
|
||||
export interface LookerMappingDrift {
|
||||
unmappedDiscovered: LookerWarehouseConnectionInfo[];
|
||||
staleMappings: Array<{ lookerConnectionName: string; reason: 'looker_connection_not_found' }>;
|
||||
inSync: Array<{ lookerConnectionName: string; kloConnectionId: string }>;
|
||||
}
|
||||
|
||||
export type LookerMappingValidationResult =
|
||||
| { ok: true }
|
||||
| { ok: false; errors: Array<{ key: string; reason: string }> };
|
||||
|
||||
export interface LookerTableIdentifierParseItem {
|
||||
key: string;
|
||||
sql_table_name: string;
|
||||
dialect: string;
|
||||
}
|
||||
|
||||
type ParsedTargetTableFailureReason = Extract<ParsedTargetTable, { ok: false }>['reason'];
|
||||
|
||||
export interface LookerParsedIdentifier {
|
||||
ok: boolean;
|
||||
catalog?: string | null;
|
||||
schema?: string | null;
|
||||
name?: string | null;
|
||||
canonical_table?: string | null;
|
||||
reason?: ParsedTargetTableFailureReason | null;
|
||||
detail?: string | null;
|
||||
}
|
||||
|
||||
export interface LookerTableIdentifierParser {
|
||||
parse(items: LookerTableIdentifierParseItem[]): Promise<Record<string, LookerParsedIdentifier>>;
|
||||
}
|
||||
|
||||
export interface LookerMappingClient {
|
||||
listLookerConnections(): Promise<LookerWarehouseConnectionInfo[]>;
|
||||
listLookmlModels(): Promise<StagedLookmlModelsFile>;
|
||||
getExplore(modelName: string, exploreName: string): Promise<StagedExploreFile>;
|
||||
}
|
||||
|
||||
const SQLGLOT_DIALECT_BY_CONNECTION_TYPE: Partial<Record<LookerWarehouseTargetConnectionType, string>> = {
|
||||
BIGQUERY: 'bigquery',
|
||||
SNOWFLAKE: 'snowflake',
|
||||
POSTGRESQL: 'postgres',
|
||||
MYSQL: 'mysql',
|
||||
SQLITE: 'sqlite',
|
||||
SQLSERVER: 'tsql',
|
||||
CLICKHOUSE: 'clickhouse',
|
||||
};
|
||||
|
||||
export async function discoverLookerConnections(
|
||||
client: Pick<LookerMappingClient, 'listLookerConnections'>,
|
||||
): Promise<LookerWarehouseConnectionInfo[]> {
|
||||
return client.listLookerConnections();
|
||||
}
|
||||
|
||||
export function lookerDialectToConnectionType(dialect: string | null): LookerWarehouseTargetConnectionType | null {
|
||||
if (!dialect) {
|
||||
return null;
|
||||
}
|
||||
return (
|
||||
LOOKER_DIALECT_TO_CONNECTION_TYPE[dialect.toLowerCase() as keyof typeof LOOKER_DIALECT_TO_CONNECTION_TYPE] ?? null
|
||||
);
|
||||
}
|
||||
|
||||
export function sqlglotDialectForConnectionType(connectionType: string): string | null {
|
||||
return SQLGLOT_DIALECT_BY_CONNECTION_TYPE[connectionType as LookerWarehouseTargetConnectionType] ?? null;
|
||||
}
|
||||
|
||||
export function validateLookerWarehouseTarget(connectionType: string): { ok: true } | { ok: false; reason: string } {
|
||||
return sqlglotDialectForConnectionType(connectionType)
|
||||
? { ok: true }
|
||||
: {
|
||||
ok: false,
|
||||
reason: `Connection type ${connectionType} cannot be used as a Looker warehouse mapping target`,
|
||||
};
|
||||
}
|
||||
|
||||
export function extractWarehouseHost(params: unknown, connectionType: string): string | null {
|
||||
const record = isRecord(params) ? params : {};
|
||||
switch (connectionType) {
|
||||
case 'POSTGRESQL':
|
||||
case 'SQLSERVER':
|
||||
case 'MYSQL':
|
||||
case 'CLICKHOUSE':
|
||||
return readString(record, 'host');
|
||||
case 'SNOWFLAKE':
|
||||
return readString(record, 'account');
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function extractWarehouseDatabase(params: unknown, connectionType: string): string | null {
|
||||
const record = isRecord(params) ? params : {};
|
||||
switch (connectionType) {
|
||||
case 'POSTGRESQL':
|
||||
case 'SQLSERVER':
|
||||
case 'MYSQL':
|
||||
case 'CLICKHOUSE':
|
||||
case 'SNOWFLAKE':
|
||||
return readString(record, 'database');
|
||||
case 'BIGQUERY':
|
||||
return readString(record, 'dataset_id');
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function normalizeHost(value: string | null): string | null {
|
||||
return value ? value.toLowerCase().replace(/:\d+$/, '') : null;
|
||||
}
|
||||
|
||||
export function normalizeName(value: string | null): string | null {
|
||||
return value ? value.toLowerCase() : null;
|
||||
}
|
||||
|
||||
export function suggestKloConnectionForLookerConnection(args: {
|
||||
lookerConnection: LookerWarehouseConnectionInfo;
|
||||
candidateConnections: LookerMappingCandidateConnection[];
|
||||
}): string | null {
|
||||
const expectedType = lookerDialectToConnectionType(args.lookerConnection.dialect);
|
||||
if (!expectedType || !args.lookerConnection.host || !args.lookerConnection.database || !args.lookerConnection.dialect) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const matches = args.candidateConnections.filter((connection) => {
|
||||
if (connection.connection_type !== expectedType) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
normalizeHost(extractWarehouseHost(connection.connection_params, connection.connection_type)) ===
|
||||
normalizeHost(args.lookerConnection.host) &&
|
||||
normalizeName(extractWarehouseDatabase(connection.connection_params, connection.connection_type)) ===
|
||||
normalizeName(args.lookerConnection.database)
|
||||
);
|
||||
});
|
||||
|
||||
return matches.length === 1 ? matches[0].id : null;
|
||||
}
|
||||
|
||||
export function computeLookerMappingDrift(args: {
|
||||
storedMappings: LookerConnectionMapping[];
|
||||
discovered: LookerWarehouseConnectionInfo[];
|
||||
}): LookerMappingDrift {
|
||||
const discoveredByName = new Map(args.discovered.map((connection) => [connection.name, connection]));
|
||||
const storedByName = new Map(args.storedMappings.map((mapping) => [mapping.lookerConnectionName, mapping]));
|
||||
|
||||
return {
|
||||
unmappedDiscovered: args.discovered.filter((connection) => !storedByName.get(connection.name)?.kloConnectionId),
|
||||
staleMappings: args.storedMappings
|
||||
.filter((mapping) => !discoveredByName.has(mapping.lookerConnectionName))
|
||||
.map((mapping) => ({
|
||||
lookerConnectionName: mapping.lookerConnectionName,
|
||||
reason: 'looker_connection_not_found' as const,
|
||||
})),
|
||||
inSync: args.storedMappings
|
||||
.filter((mapping) => discoveredByName.has(mapping.lookerConnectionName) && mapping.kloConnectionId)
|
||||
.map((mapping) => ({
|
||||
lookerConnectionName: mapping.lookerConnectionName,
|
||||
kloConnectionId: mapping.kloConnectionId as string,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
export function validateLookerMappings(args: {
|
||||
mappings: LookerConnectionMapping[];
|
||||
knownKloConnectionIds: Set<string>;
|
||||
knownConnectionTypes: ReadonlyMap<string, string>;
|
||||
}): LookerMappingValidationResult {
|
||||
const errors: Array<{ key: string; reason: string }> = [];
|
||||
for (const mapping of args.mappings) {
|
||||
if (!mapping.kloConnectionId) {
|
||||
continue;
|
||||
}
|
||||
if (!args.knownKloConnectionIds.has(mapping.kloConnectionId)) {
|
||||
errors.push({
|
||||
key: mapping.lookerConnectionName,
|
||||
reason: `KLO connection ${mapping.kloConnectionId} does not exist`,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
const connectionType = args.knownConnectionTypes.get(mapping.kloConnectionId);
|
||||
const validation = validateLookerWarehouseTarget(connectionType ?? 'unknown');
|
||||
if (!validation.ok) {
|
||||
errors.push({ key: mapping.lookerConnectionName, reason: validation.reason });
|
||||
}
|
||||
}
|
||||
return errors.length === 0 ? { ok: true } : { ok: false, errors };
|
||||
}
|
||||
|
||||
export function refreshLookerMappingPlaceholders(args: {
|
||||
stored: LookerConnectionMapping[];
|
||||
live: LookerWarehouseConnectionInfo[];
|
||||
}): { mappings: LookerConnectionMapping[]; changed: boolean } {
|
||||
const byName = new Map(args.stored.map((mapping) => [mapping.lookerConnectionName, mapping]));
|
||||
let changed = false;
|
||||
|
||||
for (const live of args.live) {
|
||||
const existing = byName.get(live.name);
|
||||
if (!existing) {
|
||||
byName.set(live.name, {
|
||||
lookerConnectionName: live.name,
|
||||
kloConnectionId: null,
|
||||
lookerHost: live.host,
|
||||
lookerDatabase: live.database,
|
||||
lookerDialect: live.dialect,
|
||||
});
|
||||
changed = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
const refreshed: LookerConnectionMapping = {
|
||||
...existing,
|
||||
lookerHost: live.host,
|
||||
lookerDatabase: live.database,
|
||||
lookerDialect: live.dialect,
|
||||
};
|
||||
if (
|
||||
refreshed.lookerHost !== existing.lookerHost ||
|
||||
refreshed.lookerDatabase !== existing.lookerDatabase ||
|
||||
refreshed.lookerDialect !== existing.lookerDialect
|
||||
) {
|
||||
byName.set(live.name, refreshed);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
return { mappings: [...byName.values()], changed };
|
||||
}
|
||||
|
||||
export function collectExploreParseItems(args: {
|
||||
explore: StagedExploreFile;
|
||||
connectionMappings: Record<string, string>;
|
||||
targetConnections: ReadonlyMap<string, Pick<LookerTargetConnection, 'id' | 'connection_type'>>;
|
||||
}): { parsedTargetTables: Record<string, ParsedTargetTable>; parseItems: LookerTableIdentifierParseItem[] } {
|
||||
const parsedTargetTables: Record<string, ParsedTargetTable> = {};
|
||||
const parseItems: LookerTableIdentifierParseItem[] = [];
|
||||
const lookerConnectionName = args.explore.connectionName;
|
||||
const targetConnectionId = lookerConnectionName ? args.connectionMappings[lookerConnectionName] : undefined;
|
||||
|
||||
if (!lookerConnectionName || !targetConnectionId) {
|
||||
return { parsedTargetTables, parseItems };
|
||||
}
|
||||
|
||||
const targetConnection = args.targetConnections.get(targetConnectionId);
|
||||
const dialect = targetConnection ? sqlglotDialectForConnectionType(targetConnection.connection_type) : null;
|
||||
const key = `${args.explore.modelName}.${args.explore.exploreName}`;
|
||||
|
||||
if (!dialect) {
|
||||
parsedTargetTables[key] = {
|
||||
ok: false,
|
||||
reason: 'unsupported_dialect',
|
||||
detail: `Connection type ${targetConnection?.connection_type ?? 'unknown'} does not map to a supported sqlglot dialect.`,
|
||||
};
|
||||
return { parsedTargetTables, parseItems };
|
||||
}
|
||||
|
||||
if (args.explore.rawSqlTableName) {
|
||||
parseItems.push({ key, sql_table_name: args.explore.rawSqlTableName, dialect });
|
||||
}
|
||||
|
||||
for (const join of args.explore.joins) {
|
||||
if (!join.rawSqlTableName) {
|
||||
continue;
|
||||
}
|
||||
parseItems.push({
|
||||
key: `${key}.${join.name}`,
|
||||
sql_table_name: join.rawSqlTableName,
|
||||
dialect,
|
||||
});
|
||||
}
|
||||
|
||||
return { parsedTargetTables, parseItems };
|
||||
}
|
||||
|
||||
export function projectParsedIdentifier(row: LookerParsedIdentifier | undefined): ParsedTargetTable {
|
||||
if (!row) {
|
||||
return { ok: false, reason: 'parse_error', detail: 'Python parser response was missing this key.' };
|
||||
}
|
||||
if (row.ok && row.name && row.canonical_table) {
|
||||
return {
|
||||
ok: true,
|
||||
catalog: row.catalog ?? null,
|
||||
schema: row.schema ?? null,
|
||||
name: row.name,
|
||||
canonicalTable: row.canonical_table,
|
||||
};
|
||||
}
|
||||
return {
|
||||
ok: false,
|
||||
reason: row.reason ?? 'parse_error',
|
||||
detail: row.reason ? undefined : 'Python parser returned an invalid success row without name or canonical_table.',
|
||||
};
|
||||
}
|
||||
|
||||
export async function buildLookerPullConfigFromInputs(args: {
|
||||
lookerConnectionId: string;
|
||||
cursors: LookerRuntimeCursors;
|
||||
refreshedMappings: LookerConnectionMapping[];
|
||||
targetConnections: ReadonlyMap<string, Pick<LookerTargetConnection, 'id' | 'connection_type'>>;
|
||||
client: Pick<LookerMappingClient, 'listLookmlModels' | 'getExplore'>;
|
||||
parser: LookerTableIdentifierParser;
|
||||
}): Promise<LookerPullConfig> {
|
||||
const connectionMappings: Record<string, string> = {};
|
||||
const connectionTypes: Record<string, LookerWarehouseTargetConnectionType> = {};
|
||||
|
||||
for (const mapping of args.refreshedMappings) {
|
||||
if (!mapping.kloConnectionId) {
|
||||
continue;
|
||||
}
|
||||
const target = args.targetConnections.get(mapping.kloConnectionId);
|
||||
if (!target || !validateLookerWarehouseTarget(target.connection_type).ok) {
|
||||
continue;
|
||||
}
|
||||
connectionMappings[mapping.lookerConnectionName] = mapping.kloConnectionId;
|
||||
connectionTypes[mapping.lookerConnectionName] = target.connection_type as LookerWarehouseTargetConnectionType;
|
||||
}
|
||||
|
||||
const parsedTargetTables = await parseExploreTargets({
|
||||
client: args.client,
|
||||
connectionMappings,
|
||||
targetConnections: args.targetConnections,
|
||||
parser: args.parser,
|
||||
});
|
||||
|
||||
return {
|
||||
lookerConnectionId: args.lookerConnectionId,
|
||||
dashboardUpdatedSince: args.cursors.dashboardsLastSyncedAt,
|
||||
lookUpdatedSince: args.cursors.looksLastSyncedAt,
|
||||
connectionMappings,
|
||||
connectionTypes,
|
||||
parsedTargetTables,
|
||||
};
|
||||
}
|
||||
|
||||
async function parseExploreTargets(args: {
|
||||
client: Pick<LookerMappingClient, 'listLookmlModels' | 'getExplore'>;
|
||||
connectionMappings: Record<string, string>;
|
||||
targetConnections: ReadonlyMap<string, Pick<LookerTargetConnection, 'id' | 'connection_type'>>;
|
||||
parser: LookerTableIdentifierParser;
|
||||
}): Promise<Record<string, ParsedTargetTable>> {
|
||||
const parsedTargetTables: Record<string, ParsedTargetTable> = {};
|
||||
const parseItems: LookerTableIdentifierParseItem[] = [];
|
||||
|
||||
let models: StagedLookmlModelsFile;
|
||||
try {
|
||||
models = await args.client.listLookmlModels();
|
||||
} catch {
|
||||
return parsedTargetTables;
|
||||
}
|
||||
|
||||
for (const model of models.models) {
|
||||
for (const exploreRef of model.explores) {
|
||||
let explore: StagedExploreFile;
|
||||
try {
|
||||
explore = await args.client.getExplore(model.name, exploreRef.name);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const collected = collectExploreParseItems({
|
||||
explore,
|
||||
connectionMappings: args.connectionMappings,
|
||||
targetConnections: args.targetConnections,
|
||||
});
|
||||
Object.assign(parsedTargetTables, collected.parsedTargetTables);
|
||||
parseItems.push(...collected.parseItems);
|
||||
}
|
||||
}
|
||||
|
||||
if (parseItems.length === 0) {
|
||||
return parsedTargetTables;
|
||||
}
|
||||
|
||||
let results: Record<string, LookerParsedIdentifier>;
|
||||
try {
|
||||
results = await args.parser.parse(parseItems);
|
||||
} catch {
|
||||
for (const item of parseItems) {
|
||||
parsedTargetTables[item.key] = {
|
||||
ok: false,
|
||||
reason: 'parse_error',
|
||||
detail: 'Python parse-table-identifier failed during Looker pull-config projection.',
|
||||
};
|
||||
}
|
||||
return parsedTargetTables;
|
||||
}
|
||||
|
||||
for (const item of parseItems) {
|
||||
parsedTargetTables[item.key] = projectParsedIdentifier(results[item.key]);
|
||||
}
|
||||
return parsedTargetTables;
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return value !== null && typeof value === 'object' && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function readString(record: Record<string, unknown>, key: string): string | null {
|
||||
const value = record[key];
|
||||
return typeof value === 'string' ? value : null;
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { buildLookerReconcileNotes, lookerRuntimeSourceToFileAdapterSource } from './reconcile.js';
|
||||
|
||||
describe('lookerRuntimeSourceToFileAdapterSource', () => {
|
||||
it('maps API-derived Looker source names to file-adapter source names', () => {
|
||||
expect(lookerRuntimeSourceToFileAdapterSource('looker__b2b__sales_pipeline')).toBe('b2b__sales_pipeline');
|
||||
expect(lookerRuntimeSourceToFileAdapterSource('looker__finance__orders')).toBe('finance__orders');
|
||||
});
|
||||
|
||||
it('ignores non-Looker and malformed source names', () => {
|
||||
expect(lookerRuntimeSourceToFileAdapterSource('b2b__sales_pipeline')).toBeNull();
|
||||
expect(lookerRuntimeSourceToFileAdapterSource('looker__missing_explore')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildLookerReconcileNotes', () => {
|
||||
it('instructs reconciliation to record subsumed provenance', () => {
|
||||
expect(buildLookerReconcileNotes()).toEqual([
|
||||
[
|
||||
'Looker runtime API-derived SL sources use looker__<model>__<explore>.',
|
||||
'If the unprefixed file-adapter source <model>__<explore> exists, prefer it in wiki sl_refs, delete or avoid the API-derived source, and call emit_artifact_resolution with actionType="subsumed" for the API raw explore path.',
|
||||
].join(' '),
|
||||
]);
|
||||
});
|
||||
});
|
||||
21
packages/context/src/ingest/adapters/looker/reconcile.ts
Normal file
21
packages/context/src/ingest/adapters/looker/reconcile.ts
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
export function lookerRuntimeSourceToFileAdapterSource(sourceName: string): string | null {
|
||||
if (!sourceName.startsWith('looker__')) {
|
||||
return null;
|
||||
}
|
||||
const stripped = sourceName.slice('looker__'.length);
|
||||
const parts = stripped.split('__');
|
||||
if (parts.length < 2 || parts.some((part) => part.length === 0)) {
|
||||
return null;
|
||||
}
|
||||
const [model, ...exploreParts] = parts;
|
||||
return `${model}__${exploreParts.join('__')}`;
|
||||
}
|
||||
|
||||
export function buildLookerReconcileNotes(): string[] {
|
||||
return [
|
||||
[
|
||||
'Looker runtime API-derived SL sources use looker__<model>__<explore>.',
|
||||
'If the unprefixed file-adapter source <model>__<explore> exists, prefer it in wiki sl_refs, delete or avoid the API-derived source, and call emit_artifact_resolution with actionType="subsumed" for the API raw explore path.',
|
||||
].join(' '),
|
||||
];
|
||||
}
|
||||
101
packages/context/src/ingest/adapters/looker/scope.test.ts
Normal file
101
packages/context/src/ingest/adapters/looker/scope.test.ts
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { describeLookerScope, hashLookerScope, isPathInLookerScope } from './scope.js';
|
||||
|
||||
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
|
||||
const abs = join(stagedDir, relPath);
|
||||
await mkdir(join(abs, '..'), { recursive: true });
|
||||
await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
describe('Looker runtime fetch scope', () => {
|
||||
let stagedDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'looker-scope-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(stagedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('keeps omitted known-current entity files out of the deletion baseline', () => {
|
||||
const scope = {
|
||||
mode: 'incremental' as const,
|
||||
knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json', 'looks/20.json'],
|
||||
fetchedRawPaths: ['dashboards/11.json'],
|
||||
};
|
||||
|
||||
expect(isPathInLookerScope('dashboards/10.json', scope)).toBe(false);
|
||||
expect(isPathInLookerScope('looks/20.json', scope)).toBe(false);
|
||||
expect(isPathInLookerScope('dashboards/11.json', scope)).toBe(true);
|
||||
expect(isPathInLookerScope('looks/21.json', scope)).toBe(true);
|
||||
expect(isPathInLookerScope('signals/dashboard_usage.json', scope)).toBe(true);
|
||||
expect(isPathInLookerScope('explores/b2b/sales_pipeline.json', scope)).toBe(true);
|
||||
});
|
||||
|
||||
it('keeps omitted unchanged evidence documents out of incremental delete scope', () => {
|
||||
const scope = {
|
||||
mode: 'incremental' as const,
|
||||
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
|
||||
fetchedRawPaths: ['dashboards/10.json'],
|
||||
};
|
||||
|
||||
expect(isPathInLookerScope('evidence/dashboards/10/page.md', scope)).toBe(true);
|
||||
expect(isPathInLookerScope('evidence/dashboards/10/metadata.json', scope)).toBe(true);
|
||||
expect(isPathInLookerScope('evidence/looks/20/page.md', scope)).toBe(false);
|
||||
expect(isPathInLookerScope('evidence/looks/20/metadata.json', scope)).toBe(false);
|
||||
});
|
||||
|
||||
it('treats full scope as all raw paths in scope', () => {
|
||||
const scope = {
|
||||
mode: 'full' as const,
|
||||
knownCurrentRawPaths: ['dashboards/10.json'],
|
||||
fetchedRawPaths: ['dashboards/10.json'],
|
||||
};
|
||||
|
||||
expect(isPathInLookerScope('dashboards/10.json', scope)).toBe(true);
|
||||
expect(isPathInLookerScope('dashboards/99.json', scope)).toBe(true);
|
||||
expect(isPathInLookerScope('looks/20.json', scope)).toBe(true);
|
||||
});
|
||||
|
||||
it('hashes scope order-insensitively', () => {
|
||||
const a = hashLookerScope({
|
||||
mode: 'incremental',
|
||||
knownCurrentRawPaths: ['looks/20.json', 'dashboards/10.json'],
|
||||
fetchedRawPaths: ['dashboards/10.json'],
|
||||
});
|
||||
const b = hashLookerScope({
|
||||
mode: 'incremental',
|
||||
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
|
||||
fetchedRawPaths: ['dashboards/10.json'],
|
||||
});
|
||||
|
||||
expect(a).toBe(b);
|
||||
expect(a).toMatch(/^[0-9a-f]{64}$/);
|
||||
});
|
||||
|
||||
it('reads staged scope and returns a SourceAdapter ScopeDescriptor', async () => {
|
||||
await writeJson(stagedDir, 'looker-scope.json', {
|
||||
mode: 'incremental',
|
||||
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
|
||||
fetchedRawPaths: ['dashboards/10.json'],
|
||||
});
|
||||
|
||||
const descriptor = await describeLookerScope(stagedDir);
|
||||
|
||||
expect(descriptor.fingerprint).toMatch(/^[0-9a-f]{64}$/);
|
||||
expect(descriptor.isPathInScope('dashboards/10.json')).toBe(true);
|
||||
expect(descriptor.isPathInScope('looks/20.json')).toBe(false);
|
||||
expect(descriptor.isPathInScope('looks/99.json')).toBe(true);
|
||||
});
|
||||
|
||||
it('falls back to full scope when old fixtures do not have a scope file', async () => {
|
||||
const descriptor = await describeLookerScope(stagedDir);
|
||||
|
||||
expect(descriptor.isPathInScope('dashboards/10.json')).toBe(true);
|
||||
expect(descriptor.isPathInScope('looks/20.json')).toBe(true);
|
||||
});
|
||||
});
|
||||
63
packages/context/src/ingest/adapters/looker/scope.ts
Normal file
63
packages/context/src/ingest/adapters/looker/scope.ts
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { readFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import type { ScopeDescriptor } from '../../types.js';
|
||||
import { STAGED_FILES, type StagedLookerScopeFile, stagedLookerScopeFileSchema } from './types.js';
|
||||
|
||||
const LOOKER_ENTITY_PATH_RE = /^(dashboards|looks)\/[^/]+\.json$/;
|
||||
const LOOKER_EVIDENCE_ENTITY_PATH_RE = /^evidence\/(dashboards|looks)\/([^/]+)\/(?:metadata\.json|page\.md)$/;
|
||||
|
||||
export async function describeLookerScope(stagedDir: string): Promise<ScopeDescriptor> {
|
||||
const scope = await readLookerScope(stagedDir);
|
||||
return {
|
||||
fingerprint: hashLookerScope(scope),
|
||||
isPathInScope: (rawPath) => isPathInLookerScope(rawPath, scope),
|
||||
};
|
||||
}
|
||||
|
||||
export async function readLookerScope(stagedDir: string): Promise<StagedLookerScopeFile> {
|
||||
try {
|
||||
const body = await readFile(join(stagedDir, STAGED_FILES.scope), 'utf-8');
|
||||
return stagedLookerScopeFileSchema.parse(JSON.parse(body));
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return { mode: 'full', knownCurrentRawPaths: [], fetchedRawPaths: [] };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export function hashLookerScope(scope: StagedLookerScopeFile): string {
|
||||
const canonical = JSON.stringify({
|
||||
mode: scope.mode,
|
||||
knownCurrentRawPaths: [...scope.knownCurrentRawPaths].sort(),
|
||||
fetchedRawPaths: [...scope.fetchedRawPaths].sort(),
|
||||
});
|
||||
return createHash('sha256').update(canonical).digest('hex');
|
||||
}
|
||||
|
||||
export function isPathInLookerScope(rawPath: string, scope: StagedLookerScopeFile): boolean {
|
||||
if (scope.mode === 'full') {
|
||||
return true;
|
||||
}
|
||||
|
||||
const entityRawPath = scopedEntityRawPath(rawPath);
|
||||
if (!entityRawPath) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const knownCurrent = new Set(scope.knownCurrentRawPaths);
|
||||
const fetched = new Set(scope.fetchedRawPaths);
|
||||
return fetched.has(entityRawPath) || !knownCurrent.has(entityRawPath);
|
||||
}
|
||||
|
||||
function scopedEntityRawPath(rawPath: string): string | null {
|
||||
if (LOOKER_ENTITY_PATH_RE.test(rawPath)) {
|
||||
return rawPath;
|
||||
}
|
||||
const evidence = LOOKER_EVIDENCE_ENTITY_PATH_RE.exec(rawPath);
|
||||
if (evidence) {
|
||||
return `${evidence[1]}/${evidence[2]}.json`;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { listLookerTargetConnectionIds } from './target-connections.js';
|
||||
|
||||
describe('listLookerTargetConnectionIds', () => {
|
||||
let stagedDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'looker-targets-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(stagedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('collects unique target warehouse IDs from explores, dashboard queries, and Look queries', async () => {
|
||||
await mkdir(join(stagedDir, 'explores', 'b2b'), { recursive: true });
|
||||
await mkdir(join(stagedDir, 'dashboards'), { recursive: true });
|
||||
await mkdir(join(stagedDir, 'looks'), { recursive: true });
|
||||
|
||||
await writeFile(
|
||||
join(stagedDir, 'explores', 'b2b', 'sales_pipeline.json'),
|
||||
JSON.stringify({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: null,
|
||||
description: null,
|
||||
fields: { dimensions: [], measures: [] },
|
||||
joins: [],
|
||||
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
|
||||
}),
|
||||
);
|
||||
await writeFile(
|
||||
join(stagedDir, 'dashboards', '1.json'),
|
||||
JSON.stringify({
|
||||
lookerId: '1',
|
||||
title: 'Pipeline',
|
||||
description: null,
|
||||
folderId: null,
|
||||
ownerId: null,
|
||||
updatedAt: null,
|
||||
tiles: [
|
||||
{
|
||||
id: '11',
|
||||
title: 'ARR',
|
||||
lookId: null,
|
||||
query: {
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
fields: [],
|
||||
filters: {},
|
||||
sorts: [],
|
||||
targetWarehouseConnectionId: '33333333-3333-4333-8333-333333333333',
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
await writeFile(
|
||||
join(stagedDir, 'looks', '2.json'),
|
||||
JSON.stringify({
|
||||
lookerId: '2',
|
||||
title: 'Customers',
|
||||
description: null,
|
||||
folderId: null,
|
||||
ownerId: null,
|
||||
updatedAt: null,
|
||||
query: {
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
fields: [],
|
||||
filters: {},
|
||||
sorts: [],
|
||||
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
await expect(listLookerTargetConnectionIds(stagedDir)).resolves.toEqual([
|
||||
'22222222-2222-4222-8222-222222222222',
|
||||
'33333333-3333-4333-8333-333333333333',
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
import { readdir, readFile } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import { stagedDashboardFileSchema, stagedExploreFileSchema, stagedLookFileSchema } from './types.js';
|
||||
|
||||
async function walk(root: string): Promise<string[]> {
|
||||
const entries = await readdir(root, { withFileTypes: true, recursive: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isFile())
|
||||
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
|
||||
.sort();
|
||||
}
|
||||
|
||||
function addTarget(targets: Set<string>, value: string | null | undefined): void {
|
||||
if (value) {
|
||||
targets.add(value);
|
||||
}
|
||||
}
|
||||
|
||||
export async function listLookerTargetConnectionIds(stagedDir: string): Promise<string[]> {
|
||||
const targets = new Set<string>();
|
||||
for (const path of await walk(stagedDir)) {
|
||||
const fullPath = join(stagedDir, path);
|
||||
if (/^explores\/[^/]+\/[^/]+\.json$/.test(path)) {
|
||||
const explore = stagedExploreFileSchema.parse(JSON.parse(await readFile(fullPath, 'utf-8')));
|
||||
addTarget(targets, explore.targetWarehouseConnectionId);
|
||||
continue;
|
||||
}
|
||||
if (/^dashboards\/[^/]+\.json$/.test(path)) {
|
||||
const dashboard = stagedDashboardFileSchema.parse(JSON.parse(await readFile(fullPath, 'utf-8')));
|
||||
for (const tile of dashboard.tiles) {
|
||||
addTarget(targets, tile.query?.targetWarehouseConnectionId);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (/^looks\/[^/]+\.json$/.test(path)) {
|
||||
const look = stagedLookFileSchema.parse(JSON.parse(await readFile(fullPath, 'utf-8')));
|
||||
addTarget(targets, look.query?.targetWarehouseConnectionId);
|
||||
}
|
||||
}
|
||||
return [...targets].sort();
|
||||
}
|
||||
|
|
@ -0,0 +1,243 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import type { ToolOutput } from '../../../../tools/index.js';
|
||||
import { buildLookerSlProposal, createLookerQueryToSlTool, type LookerSlProposal } from './looker-query-to-sl.tool.js';
|
||||
|
||||
describe('buildLookerSlProposal', () => {
|
||||
it('suggests a measure and segment for an aggregated filtered Looker query', () => {
|
||||
const proposal = buildLookerSlProposal({
|
||||
contentTitle: 'Open Pipeline ARR',
|
||||
contentType: 'look',
|
||||
usage: { queryCount30d: 42, uniqueUsers30d: 7 },
|
||||
query: {
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
fields: ['opportunities.arr', 'opportunities.stage'],
|
||||
filters: { 'opportunities.stage': 'open' },
|
||||
sorts: ['opportunities.arr desc'],
|
||||
limit: '500',
|
||||
},
|
||||
});
|
||||
|
||||
expect(proposal.sourceName).toBe('looker__b2b__sales_pipeline');
|
||||
expect(proposal.triageLane).toBe('full');
|
||||
expect(proposal.decision).toBe('measure_added');
|
||||
expect(proposal.measures).toEqual([
|
||||
{
|
||||
name: 'arr',
|
||||
lookerField: 'opportunities.arr',
|
||||
expr: 'sum(opportunities.arr)',
|
||||
description: 'Suggested from Looker look "Open Pipeline ARR"; verify against explore field SQL before writing.',
|
||||
},
|
||||
]);
|
||||
expect(proposal.dimensions).toEqual([{ name: 'stage', lookerField: 'opportunities.stage' }]);
|
||||
expect(proposal.segments).toEqual([
|
||||
{
|
||||
name: 'open_pipeline_arr',
|
||||
filters: { 'opportunities.stage': 'open' },
|
||||
suggestedPredicate: "opportunities.stage = 'open'",
|
||||
description: 'Reusable filter candidate from Looker look "Open Pipeline ARR".',
|
||||
},
|
||||
]);
|
||||
expect(proposal.notes).toContain(
|
||||
'Usage signals can raise priority, but query counts, users, owners, and folders must not be written as wiki narrative.',
|
||||
);
|
||||
});
|
||||
|
||||
it('keeps simple saved views as wiki-only candidates', () => {
|
||||
const proposal = buildLookerSlProposal({
|
||||
contentTitle: 'Accounts By Region',
|
||||
query: {
|
||||
model: 'b2b',
|
||||
view: 'accounts',
|
||||
fields: ['accounts.region', 'accounts.segment'],
|
||||
filters: {},
|
||||
},
|
||||
});
|
||||
|
||||
expect(proposal.sourceName).toBe('looker__b2b__accounts');
|
||||
expect(proposal.triageLane).toBe('light');
|
||||
expect(proposal.decision).toBe('wiki_only');
|
||||
expect(proposal.measures).toEqual([]);
|
||||
expect(proposal.dimensions).toEqual([
|
||||
{ name: 'region', lookerField: 'accounts.region' },
|
||||
{ name: 'segment', lookerField: 'accounts.segment' },
|
||||
]);
|
||||
expect(proposal.segments).toEqual([]);
|
||||
});
|
||||
|
||||
it('promotes high-usage filter-only queries as derived-source candidates', () => {
|
||||
const proposal = buildLookerSlProposal({
|
||||
contentTitle: 'Active Customers',
|
||||
usage: { queryCount30d: 15, uniqueUsers30d: 4 },
|
||||
query: {
|
||||
model: 'b2b',
|
||||
view: 'customers',
|
||||
fields: ['customers.id', 'customers.name'],
|
||||
filters: { 'customers.status': 'active', 'customers.is_test': '-yes' },
|
||||
},
|
||||
});
|
||||
|
||||
expect(proposal.sourceName).toBe('looker__b2b__customers');
|
||||
expect(proposal.decision).toBe('source_created');
|
||||
expect(proposal.segments).toEqual([
|
||||
{
|
||||
name: 'active_customers',
|
||||
filters: { 'customers.status': 'active', 'customers.is_test': '-yes' },
|
||||
suggestedPredicate: "customers.status = 'active' AND customers.is_test != 'yes'",
|
||||
description: 'Reusable filter candidate from Looker look "Active Customers".',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('surfaces mapped warehouse target metadata for direct SL writes', () => {
|
||||
const proposal = buildLookerSlProposal({
|
||||
contentTitle: 'Open Pipeline ARR',
|
||||
contentType: 'dashboard_tile',
|
||||
usage: { queryCount30d: 42, uniqueUsers30d: 7 },
|
||||
query: {
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
fields: ['opportunities.arr', 'opportunities.stage'],
|
||||
filters: { 'opportunities.stage': 'open' },
|
||||
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
|
||||
targetTable: {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'opportunities',
|
||||
canonicalTable: 'proj.dataset.opportunities',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(proposal.sourceName).toBe('looker__b2b__sales_pipeline');
|
||||
expect(proposal.targetStatus).toBe('mapped');
|
||||
expect(proposal.targetWarehouseConnectionId).toBe('22222222-2222-4222-8222-222222222222');
|
||||
expect(proposal.sourceTable).toBe('proj.dataset.opportunities');
|
||||
expect(proposal.canWriteStandaloneSource).toBe(true);
|
||||
expect(proposal.targetTable).toEqual({
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'opportunities',
|
||||
canonicalTable: 'proj.dataset.opportunities',
|
||||
});
|
||||
expect(proposal.notes).toContain(
|
||||
'targetTable.ok is true: write or edit SL on targetWarehouseConnectionId using targetTable.canonicalTable as source.table.',
|
||||
);
|
||||
});
|
||||
|
||||
it('surfaces unmapped and unparseable target reasons for wiki-only fallback', () => {
|
||||
const unmapped = buildLookerSlProposal({
|
||||
contentTitle: 'Revenue Trend',
|
||||
query: {
|
||||
model: 'b2b',
|
||||
view: 'revenue',
|
||||
fields: ['revenue.arr'],
|
||||
filters: {},
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: {
|
||||
ok: false,
|
||||
reason: 'no_connection_mapping',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(unmapped.targetStatus).toBe('unmapped');
|
||||
expect(unmapped.targetWarehouseConnectionId).toBeNull();
|
||||
expect(unmapped.sourceTable).toBeNull();
|
||||
expect(unmapped.canWriteStandaloneSource).toBe(false);
|
||||
expect(unmapped.notes).toContain(
|
||||
'targetTable.ok is false (no_connection_mapping): keep this query wiki-only and pass the reason through emit_unmapped_fallback.',
|
||||
);
|
||||
|
||||
const unparseable = buildLookerSlProposal({
|
||||
contentTitle: 'Templated Source',
|
||||
query: {
|
||||
model: 'b2b',
|
||||
view: 'templated',
|
||||
fields: ['templated.count'],
|
||||
filters: {},
|
||||
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
|
||||
targetTable: {
|
||||
ok: false,
|
||||
reason: 'looker_template_unresolved',
|
||||
detail: 'The sql_table_name contains ${derived.SQL_TABLE_NAME}.',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(unparseable.targetStatus).toBe('unparseable');
|
||||
expect(unparseable.targetWarehouseConnectionId).toBe('22222222-2222-4222-8222-222222222222');
|
||||
expect(unparseable.sourceTable).toBeNull();
|
||||
expect(unparseable.canWriteStandaloneSource).toBe(false);
|
||||
expect(unparseable.notes).toContain(
|
||||
'targetTable.ok is false (looker_template_unresolved): keep this query wiki-only and pass the reason through emit_unmapped_fallback.',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('createLookerQueryToSlTool', () => {
|
||||
it('returns markdown plus the structured proposal', async () => {
|
||||
const lookerQueryToSl = createLookerQueryToSlTool();
|
||||
if (!lookerQueryToSl.execute) {
|
||||
throw new Error('looker_query_to_sl tool must be executable');
|
||||
}
|
||||
const output = (await lookerQueryToSl.execute(
|
||||
{
|
||||
contentTitle: 'Revenue Trend',
|
||||
contentType: 'dashboard_tile',
|
||||
query: {
|
||||
model: 'finance',
|
||||
view: 'orders',
|
||||
fields: ['orders.total_revenue', 'orders.created_month'],
|
||||
filters: { 'orders.status': 'paid' },
|
||||
sorts: [],
|
||||
targetWarehouseConnectionId: null,
|
||||
targetTable: null,
|
||||
},
|
||||
},
|
||||
{ toolCallId: 'call-1', messages: [] } as never,
|
||||
)) as ToolOutput<LookerSlProposal>;
|
||||
|
||||
expect(output.markdown).toContain('Looker query SL proposal');
|
||||
expect(output.markdown).toContain('looker__finance__orders');
|
||||
expect(output.structured.sourceName).toBe('looker__finance__orders');
|
||||
expect(output.structured.measures[0]?.name).toBe('total_revenue');
|
||||
});
|
||||
|
||||
it('prints target connection and canonical table in markdown output', async () => {
|
||||
const lookerQueryToSl = createLookerQueryToSlTool();
|
||||
if (!lookerQueryToSl.execute) {
|
||||
throw new Error('looker_query_to_sl tool must be executable');
|
||||
}
|
||||
|
||||
const output = (await lookerQueryToSl.execute(
|
||||
{
|
||||
contentTitle: 'Revenue Trend',
|
||||
contentType: 'dashboard_tile',
|
||||
query: {
|
||||
model: 'finance',
|
||||
view: 'orders',
|
||||
fields: ['orders.total_revenue', 'orders.created_month'],
|
||||
filters: { 'orders.status': 'paid' },
|
||||
sorts: [],
|
||||
targetWarehouseConnectionId: '33333333-3333-4333-8333-333333333333',
|
||||
targetTable: {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'finance',
|
||||
name: 'orders',
|
||||
canonicalTable: 'proj.finance.orders',
|
||||
},
|
||||
},
|
||||
},
|
||||
{ toolCallId: 'call-1', messages: [] } as never,
|
||||
)) as ToolOutput<LookerSlProposal>;
|
||||
|
||||
expect(output.markdown).toContain('- targetStatus: mapped');
|
||||
expect(output.markdown).toContain('- targetWarehouseConnectionId: 33333333-3333-4333-8333-333333333333');
|
||||
expect(output.markdown).toContain('- sourceTable: proj.finance.orders');
|
||||
expect(output.structured.canWriteStandaloneSource).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,305 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { ToolOutput } from '../../../../tools/index.js';
|
||||
import { type ParsedTargetTable, stagedLookerQuerySchema } from '../types.js';
|
||||
|
||||
const lookerUsageInputSchema = z.object({
|
||||
queryCount30d: z.number().int().nonnegative().default(0),
|
||||
uniqueUsers30d: z.number().int().nonnegative().default(0),
|
||||
});
|
||||
|
||||
export const lookerQueryToSlInputSchema = z.object({
|
||||
query: stagedLookerQuerySchema,
|
||||
contentTitle: z.string().min(1).optional(),
|
||||
contentType: z.enum(['look', 'dashboard_tile']).default('look'),
|
||||
usage: lookerUsageInputSchema.optional(),
|
||||
});
|
||||
|
||||
export type LookerQueryToSlInput = z.input<typeof lookerQueryToSlInputSchema>;
|
||||
|
||||
type LookerTargetStatus = 'mapped' | 'unmapped' | 'unparseable' | 'missing_target_table';
|
||||
|
||||
export interface LookerSlFieldProposal {
|
||||
name: string;
|
||||
lookerField: string;
|
||||
}
|
||||
|
||||
export interface LookerSlMeasureProposal extends LookerSlFieldProposal {
|
||||
expr: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
export interface LookerSlSegmentProposal {
|
||||
name: string;
|
||||
filters: Record<string, unknown>;
|
||||
suggestedPredicate: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
export interface LookerSlProposal {
|
||||
sourceName: string;
|
||||
targetWarehouseConnectionId: string | null;
|
||||
targetTable: ParsedTargetTable | null;
|
||||
targetStatus: LookerTargetStatus;
|
||||
sourceTable: string | null;
|
||||
canWriteStandaloneSource: boolean;
|
||||
triageLane: 'skip' | 'light' | 'full';
|
||||
decision: 'wiki_only' | 'measure_added' | 'source_created';
|
||||
dimensions: LookerSlFieldProposal[];
|
||||
measures: LookerSlMeasureProposal[];
|
||||
segments: LookerSlSegmentProposal[];
|
||||
notes: string[];
|
||||
}
|
||||
|
||||
const MEASURE_FIELD_RE =
|
||||
/\b(count|sum|total|revenue|arr|mrr|amount|avg|average|rate|ratio|percent|pct|margin|profit|value|score)\b/i;
|
||||
|
||||
function targetStatus(
|
||||
targetWarehouseConnectionId: string | null,
|
||||
targetTable: ParsedTargetTable | null,
|
||||
): LookerTargetStatus {
|
||||
if (targetTable?.ok === true && targetWarehouseConnectionId) {
|
||||
return 'mapped';
|
||||
}
|
||||
if (targetTable?.ok === false && targetTable.reason === 'no_connection_mapping') {
|
||||
return 'unmapped';
|
||||
}
|
||||
if (targetTable?.ok === false) {
|
||||
return 'unparseable';
|
||||
}
|
||||
return 'missing_target_table';
|
||||
}
|
||||
|
||||
function targetNotes(status: LookerTargetStatus, targetTable: ParsedTargetTable | null): string[] {
|
||||
if (status === 'mapped') {
|
||||
return [
|
||||
'targetTable.ok is true: write or edit SL on targetWarehouseConnectionId using targetTable.canonicalTable as source.table.',
|
||||
'Use targetTable.catalog, targetTable.schema, and targetTable.name only for source_tables preflight matching.',
|
||||
'Never use rawSqlTableName as source.table; it may contain aliases, templates, or derived-table SQL.',
|
||||
];
|
||||
}
|
||||
if (targetTable?.ok === false) {
|
||||
return [
|
||||
`targetTable.ok is false (${targetTable.reason}): keep this query wiki-only and pass the reason through emit_unmapped_fallback.`,
|
||||
];
|
||||
}
|
||||
return [
|
||||
'No targetTable was staged for this query; read the parent explore dependency before attempting any SL write.',
|
||||
];
|
||||
}
|
||||
|
||||
export function buildLookerSlProposal(raw: LookerQueryToSlInput): LookerSlProposal {
|
||||
const input = lookerQueryToSlInputSchema.parse(raw);
|
||||
const sourceName = `looker__${toSlName(input.query.model)}__${toSlName(input.query.view)}`;
|
||||
const usage = input.usage;
|
||||
const targetWarehouseConnectionId = input.query.targetWarehouseConnectionId ?? null;
|
||||
const targetTable = input.query.targetTable ?? null;
|
||||
const status = targetStatus(targetWarehouseConnectionId, targetTable);
|
||||
const sourceTable = targetTable?.ok === true ? targetTable.canonicalTable : null;
|
||||
const canWriteStandaloneSource = status === 'mapped';
|
||||
const triageLane =
|
||||
usage && usage.queryCount30d === 0 && usage.uniqueUsers30d === 0 ? 'skip' : isHighUsage(usage) ? 'full' : 'light';
|
||||
const dimensions: LookerSlFieldProposal[] = [];
|
||||
const measures: LookerSlMeasureProposal[] = [];
|
||||
|
||||
for (const field of input.query.fields) {
|
||||
const proposal = { name: toSlName(fieldLeaf(field)), lookerField: field };
|
||||
if (isMeasureLikeField(field)) {
|
||||
measures.push({
|
||||
...proposal,
|
||||
expr: suggestedMeasureExpr(field),
|
||||
description: `Suggested from Looker ${contentLabel(input)}; verify against explore field SQL before writing.`,
|
||||
});
|
||||
} else {
|
||||
dimensions.push(proposal);
|
||||
}
|
||||
}
|
||||
|
||||
const filters = nonEmptyFilters(input.query.filters);
|
||||
const segments =
|
||||
Object.keys(filters).length === 0
|
||||
? []
|
||||
: [
|
||||
{
|
||||
name: toSlName(input.contentTitle ?? Object.keys(filters).map(fieldLeaf).join('_')),
|
||||
filters,
|
||||
suggestedPredicate: Object.entries(filters)
|
||||
.map(([field, value]) => filterValueToPredicate(field, value))
|
||||
.join(' AND '),
|
||||
description: `Reusable filter candidate from Looker ${contentLabel(input)}.`,
|
||||
},
|
||||
];
|
||||
|
||||
const decision =
|
||||
measures.length > 0 ? 'measure_added' : segments.length > 0 && isHighUsage(usage) ? 'source_created' : 'wiki_only';
|
||||
|
||||
const notes = [
|
||||
...targetNotes(status, targetTable),
|
||||
'Treat this as a proposal, not an instruction to write SL blindly.',
|
||||
'Verify field SQL, source shape, and existing SL overlap with sl_discover/sl_read_source before sl_write_source or sl_edit_source.',
|
||||
'Usage signals can raise priority, but query counts, users, owners, and folders must not be written as wiki narrative.',
|
||||
];
|
||||
if (triageLane === 'skip') {
|
||||
notes.push('Zero recent usage is a skip signal unless the raw content clearly defines durable business semantics.');
|
||||
}
|
||||
|
||||
return {
|
||||
sourceName,
|
||||
targetWarehouseConnectionId,
|
||||
targetTable,
|
||||
targetStatus: status,
|
||||
sourceTable,
|
||||
canWriteStandaloneSource,
|
||||
triageLane,
|
||||
decision,
|
||||
dimensions,
|
||||
measures,
|
||||
segments,
|
||||
notes,
|
||||
};
|
||||
}
|
||||
|
||||
export function createLookerQueryToSlTool() {
|
||||
return tool({
|
||||
description:
|
||||
'Given one staged Looker query JSON, return a conservative proposal for SL measures, dimensions, reusable filters, and triage priority. The proposal is advisory; verify with SL tools before writing.',
|
||||
inputSchema: lookerQueryToSlInputSchema,
|
||||
execute: async (input): Promise<ToolOutput<LookerSlProposal>> => {
|
||||
const structured = buildLookerSlProposal(input);
|
||||
return {
|
||||
markdown: formatLookerSlProposal(structured),
|
||||
structured,
|
||||
};
|
||||
},
|
||||
toModelOutput: ({ output }) => {
|
||||
const markdown =
|
||||
output && typeof output === 'object' && 'markdown' in output
|
||||
? String((output as { markdown: unknown }).markdown)
|
||||
: String(output);
|
||||
return { type: 'content', value: [{ type: 'text', text: markdown }] };
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export function formatLookerSlProposal(proposal: LookerSlProposal): string {
|
||||
const lines = [
|
||||
'## Looker query SL proposal',
|
||||
'',
|
||||
`- sourceName: ${proposal.sourceName}`,
|
||||
`- targetStatus: ${proposal.targetStatus}`,
|
||||
`- targetWarehouseConnectionId: ${proposal.targetWarehouseConnectionId ?? '(none)'}`,
|
||||
`- sourceTable: ${proposal.sourceTable ?? '(none)'}`,
|
||||
`- canWriteStandaloneSource: ${proposal.canWriteStandaloneSource}`,
|
||||
`- triageLane: ${proposal.triageLane}`,
|
||||
`- decision: ${proposal.decision}`,
|
||||
'',
|
||||
'### Measures',
|
||||
...(proposal.measures.length === 0
|
||||
? ['- (none)']
|
||||
: proposal.measures.map((measure) => `- ${measure.name}: ${measure.expr} (${measure.lookerField})`)),
|
||||
'',
|
||||
'### Dimensions',
|
||||
...(proposal.dimensions.length === 0
|
||||
? ['- (none)']
|
||||
: proposal.dimensions.map((dimension) => `- ${dimension.name}: ${dimension.lookerField}`)),
|
||||
'',
|
||||
'### Segments',
|
||||
...(proposal.segments.length === 0
|
||||
? ['- (none)']
|
||||
: proposal.segments.map((segment) => `- ${segment.name}: ${segment.suggestedPredicate}`)),
|
||||
'',
|
||||
'### Notes',
|
||||
...proposal.notes.map((note) => `- ${note}`),
|
||||
];
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function isHighUsage(usage: z.infer<typeof lookerUsageInputSchema> | undefined): boolean {
|
||||
return !!usage && (usage.queryCount30d >= 10 || usage.uniqueUsers30d >= 3);
|
||||
}
|
||||
|
||||
function isMeasureLikeField(field: string): boolean {
|
||||
return MEASURE_FIELD_RE.test(fieldLeaf(field).replace(/_/g, ' '));
|
||||
}
|
||||
|
||||
function suggestedMeasureExpr(field: string): string {
|
||||
const leaf = fieldLeaf(field);
|
||||
if (/\b(count|count_distinct)\b/i.test(leaf.replace(/_/g, ' '))) {
|
||||
return `count(${field})`;
|
||||
}
|
||||
if (/\b(avg|average|rate|ratio|percent|pct|margin|score)\b/i.test(leaf.replace(/_/g, ' '))) {
|
||||
return `avg(${field})`;
|
||||
}
|
||||
return `sum(${field})`;
|
||||
}
|
||||
|
||||
function fieldLeaf(field: string): string {
|
||||
const parts = field.split('.');
|
||||
return parts[parts.length - 1] || field;
|
||||
}
|
||||
|
||||
function nonEmptyFilters(filters: Record<string, unknown>): Record<string, unknown> {
|
||||
return Object.fromEntries(
|
||||
Object.entries(filters).filter(([, value]) => {
|
||||
if (value === null || value === undefined) {
|
||||
return false;
|
||||
}
|
||||
if (typeof value === 'string') {
|
||||
return value.trim().length > 0;
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
return value.length > 0;
|
||||
}
|
||||
return true;
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
function filterValueToPredicate(field: string, value: unknown): string {
|
||||
if (Array.isArray(value)) {
|
||||
return `${field} IN (${value.map(sqlLiteral).join(', ')})`;
|
||||
}
|
||||
if (typeof value === 'number' || typeof value === 'boolean') {
|
||||
return `${field} = ${String(value)}`;
|
||||
}
|
||||
const raw = String(value).trim();
|
||||
if (raw.includes(',') && !raw.includes('"') && !raw.includes("'")) {
|
||||
return `${field} IN (${raw
|
||||
.split(',')
|
||||
.map((part) => sqlLiteral(part.trim()))
|
||||
.join(', ')})`;
|
||||
}
|
||||
if (raw.startsWith('-') && raw.length > 1) {
|
||||
return `${field} != ${sqlLiteral(raw.slice(1).trim())}`;
|
||||
}
|
||||
if (raw.includes('%')) {
|
||||
return `${field} LIKE ${sqlLiteral(raw)}`;
|
||||
}
|
||||
return `${field} = ${sqlLiteral(raw)}`;
|
||||
}
|
||||
|
||||
function sqlLiteral(value: unknown): string {
|
||||
if (typeof value === 'number' || typeof value === 'boolean') {
|
||||
return String(value);
|
||||
}
|
||||
return `'${String(value).replace(/'/g, "''")}'`;
|
||||
}
|
||||
|
||||
function contentLabel(input: z.infer<typeof lookerQueryToSlInputSchema>): string {
|
||||
const noun = input.contentType === 'dashboard_tile' ? 'dashboard tile' : 'look';
|
||||
return input.contentTitle ? `${noun} "${input.contentTitle}"` : noun;
|
||||
}
|
||||
|
||||
function toSlName(value: string): string {
|
||||
const normalized = value
|
||||
.trim()
|
||||
.replace(/([a-z0-9])([A-Z])/g, '$1_$2')
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '_')
|
||||
.replace(/^_+|_+$/g, '')
|
||||
.replace(/_+/g, '_');
|
||||
if (!normalized) {
|
||||
throw new Error(`Cannot derive semantic-layer name from empty Looker value`);
|
||||
}
|
||||
return /^[0-9]/.test(normalized) ? `n_${normalized}` : normalized;
|
||||
}
|
||||
329
packages/context/src/ingest/adapters/looker/types.test.ts
Normal file
329
packages/context/src/ingest/adapters/looker/types.test.ts
Normal file
|
|
@ -0,0 +1,329 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
lookerPullConfigSchema,
|
||||
parseLookerPullConfig,
|
||||
parsedTargetTableSchema,
|
||||
stagedDashboardFileSchema,
|
||||
stagedExploreFileSchema,
|
||||
stagedLookerFetchIssueSchema,
|
||||
stagedLookerQuerySchema,
|
||||
stagedLookerScopeFileSchema,
|
||||
stagedLookerSignalsFileSchema,
|
||||
stagedLookFileSchema,
|
||||
stagedSyncConfigSchema,
|
||||
} from './types.js';
|
||||
|
||||
describe('Looker staged runtime schemas', () => {
|
||||
it('parses pull config and staged sync config', () => {
|
||||
expect(
|
||||
lookerPullConfigSchema.parse({
|
||||
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
|
||||
instanceBaseUrl: 'https://example.looker.com',
|
||||
}),
|
||||
).toEqual({
|
||||
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
|
||||
instanceBaseUrl: 'https://example.looker.com',
|
||||
connectionMappings: {},
|
||||
connectionTypes: {},
|
||||
parsedTargetTables: {},
|
||||
});
|
||||
|
||||
expect(
|
||||
stagedSyncConfigSchema.parse({
|
||||
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
|
||||
fetchedAt: '2026-04-30T12:00:00.000Z',
|
||||
instanceBaseUrl: 'https://example.looker.com',
|
||||
}),
|
||||
).toMatchObject({
|
||||
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
|
||||
instanceBaseUrl: 'https://example.looker.com',
|
||||
});
|
||||
});
|
||||
|
||||
it('parses incremental pull cursors and scope manifests', () => {
|
||||
expect(
|
||||
parseLookerPullConfig({
|
||||
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
|
||||
dashboardUpdatedSince: '2026-04-30T10:00:00.000Z',
|
||||
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
|
||||
}),
|
||||
).toEqual({
|
||||
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
|
||||
dashboardUpdatedSince: '2026-04-30T10:00:00.000Z',
|
||||
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
|
||||
connectionMappings: {},
|
||||
connectionTypes: {},
|
||||
parsedTargetTables: {},
|
||||
});
|
||||
|
||||
expect(
|
||||
stagedLookerScopeFileSchema.parse({
|
||||
mode: 'incremental',
|
||||
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
|
||||
fetchedRawPaths: ['dashboards/10.json'],
|
||||
}),
|
||||
).toEqual({
|
||||
mode: 'incremental',
|
||||
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
|
||||
fetchedRawPaths: ['dashboards/10.json'],
|
||||
});
|
||||
|
||||
expect(
|
||||
stagedSyncConfigSchema.parse({
|
||||
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
|
||||
fetchedAt: '2026-04-30T12:30:00.000Z',
|
||||
previousCursors: {
|
||||
dashboardsLastSyncedAt: null,
|
||||
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
|
||||
},
|
||||
nextCursors: {
|
||||
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
|
||||
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
|
||||
},
|
||||
}).nextCursors,
|
||||
).toEqual({
|
||||
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
|
||||
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
|
||||
});
|
||||
});
|
||||
|
||||
it('normalizes numeric Looker ids to strings', () => {
|
||||
const dashboard = stagedDashboardFileSchema.parse({
|
||||
lookerId: 10,
|
||||
title: 'Sales Pipeline',
|
||||
description: null,
|
||||
folderId: 7,
|
||||
ownerId: 3,
|
||||
updatedAt: '2026-04-30T12:00:00.000Z',
|
||||
tiles: [{ id: 100, title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
|
||||
});
|
||||
|
||||
expect(dashboard.lookerId).toBe('10');
|
||||
expect(dashboard.folderId).toBe('7');
|
||||
expect(dashboard.ownerId).toBe('3');
|
||||
expect(dashboard.tiles[0].id).toBe('100');
|
||||
});
|
||||
|
||||
it('parses explores, looks, and signal files with defaults', () => {
|
||||
expect(
|
||||
stagedExploreFileSchema.parse({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: null,
|
||||
fields: {
|
||||
dimensions: [{ name: 'opportunities.id', label: 'Opportunity ID', type: 'number', sql: '${TABLE}.id' }],
|
||||
measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '${TABLE}.arr' }],
|
||||
},
|
||||
joins: [{ name: 'accounts', type: 'left_outer', relationship: 'many_to_one' }],
|
||||
}),
|
||||
).toMatchObject({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
|
||||
});
|
||||
|
||||
expect(
|
||||
stagedLookFileSchema.parse({
|
||||
lookerId: '20',
|
||||
title: 'Open Pipeline',
|
||||
description: null,
|
||||
folderId: null,
|
||||
ownerId: null,
|
||||
updatedAt: null,
|
||||
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
|
||||
}),
|
||||
).toMatchObject({ lookerId: '20', query: { fields: ['opportunities.arr'] } });
|
||||
|
||||
expect(stagedLookerSignalsFileSchema.parse({}).dashboardUsage).toEqual([]);
|
||||
});
|
||||
|
||||
it('parses warehouse SL mapping pull config and staged target table fields', () => {
|
||||
const targetConnectionId = '22222222-2222-4222-8222-222222222222';
|
||||
const parsedTargetTable = {
|
||||
ok: true as const,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'opportunities',
|
||||
canonicalTable: 'proj.dataset.opportunities',
|
||||
};
|
||||
|
||||
expect(parsedTargetTableSchema.parse(parsedTargetTable)).toEqual(parsedTargetTable);
|
||||
|
||||
expect(
|
||||
parseLookerPullConfig({
|
||||
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
|
||||
connectionMappings: { b2b_sandbox_bq: targetConnectionId },
|
||||
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
|
||||
parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable },
|
||||
}),
|
||||
).toEqual({
|
||||
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
|
||||
connectionMappings: { b2b_sandbox_bq: targetConnectionId },
|
||||
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
|
||||
parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable },
|
||||
});
|
||||
|
||||
expect(
|
||||
stagedExploreFileSchema.parse({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: null,
|
||||
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
|
||||
connectionName: 'b2b_sandbox_bq',
|
||||
viewName: 'opportunities',
|
||||
fields: {
|
||||
dimensions: [{ name: 'opportunities.id', label: 'Opportunity ID', type: 'number', sql: '${TABLE}.id' }],
|
||||
measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '${TABLE}.arr' }],
|
||||
},
|
||||
joins: [
|
||||
{
|
||||
name: 'accounts',
|
||||
type: 'left_outer',
|
||||
relationship: 'many_to_one',
|
||||
rawSqlTableName: 'proj.dataset.accounts',
|
||||
sqlOn: '${opportunities.account_id} = ${accounts.id}',
|
||||
from: null,
|
||||
targetTable: {
|
||||
ok: true,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'accounts',
|
||||
canonicalTable: 'proj.dataset.accounts',
|
||||
},
|
||||
},
|
||||
],
|
||||
targetWarehouseConnectionId: targetConnectionId,
|
||||
targetTable: parsedTargetTable,
|
||||
}),
|
||||
).toMatchObject({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
connectionName: 'b2b_sandbox_bq',
|
||||
targetWarehouseConnectionId: targetConnectionId,
|
||||
targetTable: parsedTargetTable,
|
||||
joins: [{ name: 'accounts', targetTable: { ok: true, name: 'accounts' } }],
|
||||
});
|
||||
});
|
||||
|
||||
it('parses structured Looker mapping fetch warnings', () => {
|
||||
expect(
|
||||
stagedLookerFetchIssueSchema.parse({
|
||||
rawPath: 'looker_connection_mappings/b2b_sandbox_bq',
|
||||
entityType: 'looker_connection_mapping',
|
||||
entityId: 'b2b_sandbox_bq',
|
||||
severity: 'warning',
|
||||
statusCode: null,
|
||||
message: 'Looker connection b2b_sandbox_bq is not mapped to a warehouse connection.',
|
||||
retryRecommended: false,
|
||||
kind: 'unmapped_looker_connection',
|
||||
details: {
|
||||
lookerConnectionName: 'b2b_sandbox_bq',
|
||||
affectedExplores: ['b2b.sales_pipeline'],
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
entityType: 'looker_connection_mapping',
|
||||
kind: 'unmapped_looker_connection',
|
||||
details: {
|
||||
lookerConnectionName: 'b2b_sandbox_bq',
|
||||
affectedExplores: ['b2b.sales_pipeline'],
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('parses LookML model listing warnings in fetch reports', () => {
|
||||
expect(
|
||||
stagedLookerFetchIssueSchema.parse({
|
||||
rawPath: 'lookml_models.json',
|
||||
entityType: 'lookml_models',
|
||||
entityId: null,
|
||||
severity: 'warning',
|
||||
statusCode: 403,
|
||||
message: 'LookML model access denied',
|
||||
retryRecommended: false,
|
||||
}),
|
||||
).toEqual({
|
||||
rawPath: 'lookml_models.json',
|
||||
entityType: 'lookml_models',
|
||||
entityId: null,
|
||||
severity: 'warning',
|
||||
statusCode: 403,
|
||||
message: 'LookML model access denied',
|
||||
retryRecommended: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('accepts slug-shaped connection ids inside KLO Looker runtime schemas', () => {
|
||||
const parsedTargetTable = {
|
||||
ok: true as const,
|
||||
catalog: 'proj',
|
||||
schema: 'dataset',
|
||||
name: 'opportunities',
|
||||
canonicalTable: 'proj.dataset.opportunities',
|
||||
};
|
||||
|
||||
expect(
|
||||
parseLookerPullConfig({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
connectionMappings: { b2b_sandbox_bq: 'prod-warehouse' },
|
||||
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
|
||||
parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable },
|
||||
}),
|
||||
).toMatchObject({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
connectionMappings: { b2b_sandbox_bq: 'prod-warehouse' },
|
||||
});
|
||||
|
||||
expect(
|
||||
stagedSyncConfigSchema.parse({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
fetchedAt: '2026-04-30T12:00:00.000Z',
|
||||
}),
|
||||
).toMatchObject({
|
||||
lookerConnectionId: 'prod-looker',
|
||||
});
|
||||
|
||||
expect(
|
||||
stagedLookerQuerySchema.parse({
|
||||
model: 'b2b',
|
||||
view: 'sales_pipeline',
|
||||
targetWarehouseConnectionId: 'prod-warehouse',
|
||||
targetTable: parsedTargetTable,
|
||||
}),
|
||||
).toMatchObject({
|
||||
targetWarehouseConnectionId: 'prod-warehouse',
|
||||
targetTable: parsedTargetTable,
|
||||
});
|
||||
|
||||
expect(
|
||||
stagedExploreFileSchema.parse({
|
||||
modelName: 'b2b',
|
||||
exploreName: 'sales_pipeline',
|
||||
label: 'Sales Pipeline',
|
||||
description: null,
|
||||
fields: { dimensions: [], measures: [] },
|
||||
targetWarehouseConnectionId: 'prod-warehouse',
|
||||
targetTable: parsedTargetTable,
|
||||
}),
|
||||
).toMatchObject({
|
||||
targetWarehouseConnectionId: 'prod-warehouse',
|
||||
targetTable: parsedTargetTable,
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects unsafe KLO Looker connection ids', () => {
|
||||
expect(() =>
|
||||
parseLookerPullConfig({
|
||||
lookerConnectionId: '../prod-looker',
|
||||
}),
|
||||
).toThrow();
|
||||
|
||||
expect(() =>
|
||||
parseLookerPullConfig({
|
||||
connectionMappings: { b2b_sandbox_bq: 'prod/warehouse' },
|
||||
}),
|
||||
).toThrow();
|
||||
});
|
||||
});
|
||||
255
packages/context/src/ingest/adapters/looker/types.ts
Normal file
255
packages/context/src/ingest/adapters/looker/types.ts
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
import { z } from 'zod';
|
||||
import { connectionTypeSchema } from '../../../connections/connection-type.js';
|
||||
import { parsedTargetTableSchema } from '../../parsed-target-table.js';
|
||||
|
||||
const lookerIdSchema = z.union([z.string(), z.number().int()]).transform(String);
|
||||
const nullableLookerIdSchema = z.union([lookerIdSchema, z.null()]).default(null);
|
||||
|
||||
export const lookerConnectionIdSchema = z.string().min(1).regex(/^[A-Za-z0-9_-]+$/);
|
||||
|
||||
export { parsedTargetTableSchema, type ParsedTargetTable } from '../../parsed-target-table.js';
|
||||
|
||||
export const lookerRuntimeCursorsSchema = z.object({
|
||||
dashboardsLastSyncedAt: z.iso.datetime().nullable().default(null),
|
||||
looksLastSyncedAt: z.iso.datetime().nullable().default(null),
|
||||
});
|
||||
|
||||
export type LookerRuntimeCursors = z.infer<typeof lookerRuntimeCursorsSchema>;
|
||||
|
||||
export const lookerPullConfigSchema = z.object({
|
||||
lookerConnectionId: lookerConnectionIdSchema.optional(),
|
||||
instanceBaseUrl: z.url().optional(),
|
||||
dashboardUpdatedSince: z.iso.datetime().nullable().optional(),
|
||||
lookUpdatedSince: z.iso.datetime().nullable().optional(),
|
||||
connectionMappings: z.record(z.string(), lookerConnectionIdSchema).default({}),
|
||||
connectionTypes: z.record(z.string(), connectionTypeSchema).default({}),
|
||||
parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}),
|
||||
});
|
||||
|
||||
export type LookerPullConfig = z.infer<typeof lookerPullConfigSchema>;
|
||||
|
||||
export function parseLookerPullConfig(raw: unknown): LookerPullConfig {
|
||||
return lookerPullConfigSchema.parse(raw ?? {});
|
||||
}
|
||||
|
||||
export const stagedSyncConfigSchema = z.object({
|
||||
lookerConnectionId: lookerConnectionIdSchema,
|
||||
fetchedAt: z.iso.datetime(),
|
||||
instanceBaseUrl: z.url().optional(),
|
||||
previousCursors: lookerRuntimeCursorsSchema.default({
|
||||
dashboardsLastSyncedAt: null,
|
||||
looksLastSyncedAt: null,
|
||||
}),
|
||||
nextCursors: lookerRuntimeCursorsSchema.default({
|
||||
dashboardsLastSyncedAt: null,
|
||||
looksLastSyncedAt: null,
|
||||
}),
|
||||
});
|
||||
|
||||
export const stagedLookerQuerySchema = z.object({
|
||||
id: lookerIdSchema.optional(),
|
||||
model: z.string(),
|
||||
view: z.string(),
|
||||
fields: z.array(z.string()).default([]),
|
||||
filters: z.record(z.string(), z.unknown()).default({}),
|
||||
sorts: z.array(z.string()).default([]),
|
||||
limit: z.union([z.string(), z.number()]).optional().nullable(),
|
||||
dynamicFields: z.string().optional().nullable(),
|
||||
targetWarehouseConnectionId: lookerConnectionIdSchema.nullable().default(null),
|
||||
targetTable: parsedTargetTableSchema.nullable().default(null),
|
||||
});
|
||||
|
||||
export type StagedLookerQuery = z.infer<typeof stagedLookerQuerySchema>;
|
||||
|
||||
const stagedDashboardTileSchema = z.object({
|
||||
id: lookerIdSchema,
|
||||
title: z.string().nullable().default(null),
|
||||
lookId: nullableLookerIdSchema,
|
||||
query: stagedLookerQuerySchema.nullable().default(null),
|
||||
});
|
||||
|
||||
export const stagedDashboardFileSchema = z.object({
|
||||
lookerId: lookerIdSchema,
|
||||
title: z.string(),
|
||||
description: z.string().nullable(),
|
||||
folderId: nullableLookerIdSchema,
|
||||
ownerId: nullableLookerIdSchema,
|
||||
updatedAt: z.string().nullable(),
|
||||
tiles: z.array(stagedDashboardTileSchema).default([]),
|
||||
});
|
||||
|
||||
export type StagedDashboardFile = z.infer<typeof stagedDashboardFileSchema>;
|
||||
|
||||
export const stagedLookFileSchema = z.object({
|
||||
lookerId: lookerIdSchema,
|
||||
title: z.string(),
|
||||
description: z.string().nullable(),
|
||||
folderId: nullableLookerIdSchema,
|
||||
ownerId: nullableLookerIdSchema,
|
||||
updatedAt: z.string().nullable(),
|
||||
query: stagedLookerQuerySchema.nullable().default(null),
|
||||
});
|
||||
|
||||
export type StagedLookFile = z.infer<typeof stagedLookFileSchema>;
|
||||
|
||||
const stagedFolderSchema = z.object({
|
||||
id: lookerIdSchema,
|
||||
name: z.string(),
|
||||
parentId: nullableLookerIdSchema,
|
||||
path: z.array(z.string()).default([]),
|
||||
});
|
||||
|
||||
export const stagedFoldersTreeFileSchema = z.object({
|
||||
folders: z.array(stagedFolderSchema),
|
||||
});
|
||||
|
||||
export type StagedFoldersTreeFile = z.infer<typeof stagedFoldersTreeFileSchema>;
|
||||
|
||||
export const stagedUserFileSchema = z.object({
|
||||
id: lookerIdSchema,
|
||||
displayName: z.string().nullable(),
|
||||
email: z.string().nullable().default(null),
|
||||
});
|
||||
|
||||
export type StagedUserFile = z.infer<typeof stagedUserFileSchema>;
|
||||
|
||||
export const stagedGroupFileSchema = z.object({
|
||||
id: lookerIdSchema,
|
||||
name: z.string(),
|
||||
});
|
||||
|
||||
export type StagedGroupFile = z.infer<typeof stagedGroupFileSchema>;
|
||||
|
||||
const stagedLookmlModelSchema = z.object({
|
||||
name: z.string(),
|
||||
label: z.string().nullable().default(null),
|
||||
explores: z.array(z.object({ name: z.string(), label: z.string().nullable().default(null) })),
|
||||
});
|
||||
|
||||
export const stagedLookmlModelsFileSchema = z.object({
|
||||
models: z.array(stagedLookmlModelSchema),
|
||||
});
|
||||
|
||||
export type StagedLookmlModelsFile = z.infer<typeof stagedLookmlModelsFileSchema>;
|
||||
|
||||
const stagedLookerFieldSchema = z.object({
|
||||
name: z.string(),
|
||||
label: z.string().nullable().default(null),
|
||||
type: z.string().nullable().default(null),
|
||||
sql: z.string().nullable().default(null),
|
||||
description: z.string().nullable().default(null),
|
||||
});
|
||||
|
||||
const stagedLookerJoinSchema = z.object({
|
||||
name: z.string(),
|
||||
type: z.string().nullable().default(null),
|
||||
relationship: z.string().nullable().default(null),
|
||||
rawSqlTableName: z.string().nullable().default(null),
|
||||
sqlOn: z.string().nullable().default(null),
|
||||
from: z.string().nullable().default(null),
|
||||
targetTable: parsedTargetTableSchema.nullable().default(null),
|
||||
});
|
||||
|
||||
export const stagedExploreFileSchema = z.object({
|
||||
modelName: z.string(),
|
||||
exploreName: z.string(),
|
||||
label: z.string().nullable().default(null),
|
||||
description: z.string().nullable().default(null),
|
||||
rawSqlTableName: z.string().nullable().default(null),
|
||||
connectionName: z.string().nullable().default(null),
|
||||
viewName: z.string().nullable().default(null),
|
||||
fields: z.object({
|
||||
dimensions: z.array(stagedLookerFieldSchema).default([]),
|
||||
measures: z.array(stagedLookerFieldSchema).default([]),
|
||||
}),
|
||||
joins: z.array(stagedLookerJoinSchema).default([]),
|
||||
targetWarehouseConnectionId: lookerConnectionIdSchema.nullable().default(null),
|
||||
targetTable: parsedTargetTableSchema.nullable().default(null),
|
||||
});
|
||||
|
||||
export type StagedExploreFile = z.infer<typeof stagedExploreFileSchema>;
|
||||
|
||||
const stagedUsageSignalSchema = z.object({
|
||||
contentId: lookerIdSchema,
|
||||
queryCount30d: z.number().int().nonnegative().default(0),
|
||||
uniqueUsers30d: z.number().int().nonnegative().default(0),
|
||||
lastRunAt: z.string().nullable().default(null),
|
||||
topUsers: z.array(lookerIdSchema).default([]),
|
||||
});
|
||||
|
||||
const stagedScheduledPlanSignalSchema = z.object({
|
||||
contentId: lookerIdSchema,
|
||||
contentType: z.enum(['dashboard', 'look']),
|
||||
isScheduled: z.boolean(),
|
||||
scheduleCount: z.number().int().nonnegative().default(0),
|
||||
recipientCount: z.number().int().nonnegative().default(0),
|
||||
});
|
||||
|
||||
const stagedFavoriteSignalSchema = z.object({
|
||||
contentId: lookerIdSchema,
|
||||
contentType: z.enum(['dashboard', 'look']),
|
||||
favoriteCount: z.number().int().nonnegative().default(0),
|
||||
});
|
||||
|
||||
export const stagedLookerSignalsFileSchema = z.object({
|
||||
dashboardUsage: z.array(stagedUsageSignalSchema).default([]),
|
||||
lookUsage: z.array(stagedUsageSignalSchema).default([]),
|
||||
scheduledPlans: z.array(stagedScheduledPlanSignalSchema).default([]),
|
||||
favorites: z.array(stagedFavoriteSignalSchema).default([]),
|
||||
});
|
||||
|
||||
export type StagedLookerSignalsFile = z.infer<typeof stagedLookerSignalsFileSchema>;
|
||||
|
||||
export const stagedLookerScopeFileSchema = z.object({
|
||||
mode: z.enum(['full', 'incremental']),
|
||||
knownCurrentRawPaths: z.array(z.string()).default([]),
|
||||
fetchedRawPaths: z.array(z.string()).default([]),
|
||||
});
|
||||
|
||||
export type StagedLookerScopeFile = z.infer<typeof stagedLookerScopeFileSchema>;
|
||||
|
||||
const stagedLookerFetchIssueKindSchema = z.enum([
|
||||
'unmapped_looker_connection',
|
||||
'unparseable_sql_table_name',
|
||||
'looker_template_unresolved',
|
||||
'derived_table_not_supported',
|
||||
'lookml_connection_mismatch',
|
||||
]);
|
||||
|
||||
export const stagedLookerFetchIssueSchema = z.object({
|
||||
rawPath: z.string().min(1),
|
||||
entityType: z.enum(['dashboard', 'look', 'explore', 'signals', 'lookml_models', 'looker_connection_mapping']),
|
||||
entityId: z.string().nullable().default(null),
|
||||
severity: z.enum(['warning', 'error']),
|
||||
statusCode: z.number().int().nullable().default(null),
|
||||
message: z.string().min(1),
|
||||
retryRecommended: z.boolean().default(false),
|
||||
kind: stagedLookerFetchIssueKindSchema.optional(),
|
||||
details: z.record(z.string(), z.unknown()).optional(),
|
||||
});
|
||||
|
||||
export type StagedLookerFetchIssue = z.infer<typeof stagedLookerFetchIssueSchema>;
|
||||
|
||||
export const stagedLookerFetchReportSchema = z.object({
|
||||
status: z.enum(['success', 'partial']),
|
||||
retryRecommended: z.boolean().default(false),
|
||||
skipped: z.array(stagedLookerFetchIssueSchema).default([]),
|
||||
warnings: z.array(stagedLookerFetchIssueSchema).default([]),
|
||||
});
|
||||
|
||||
export type StagedLookerFetchReport = z.infer<typeof stagedLookerFetchReportSchema>;
|
||||
|
||||
export const STAGED_FILES = {
|
||||
syncConfig: 'sync-config.json',
|
||||
scope: 'looker-scope.json',
|
||||
fetchReport: 'looker-fetch-report.json',
|
||||
evidenceRoot: 'evidence',
|
||||
lookmlModels: 'lookml_models.json',
|
||||
foldersTree: 'folders/tree.json',
|
||||
signals: {
|
||||
dashboardUsage: 'signals/dashboard_usage.json',
|
||||
lookUsage: 'signals/look_usage.json',
|
||||
scheduledPlans: 'signals/scheduled_plans.json',
|
||||
favorites: 'signals/favorites.json',
|
||||
},
|
||||
} as const;
|
||||
230
packages/context/src/ingest/adapters/lookml/chunk.test.ts
Normal file
230
packages/context/src/ingest/adapters/lookml/chunk.test.ts
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { chunkLookmlProject } from './chunk.js';
|
||||
import { type ParsedLookmlProject, parseLookmlStagedDir } from './parse.js';
|
||||
|
||||
const FIXTURE_ROOT = join(__dirname, '../../../../test/fixtures/lookml');
|
||||
|
||||
describe('chunkLookmlProject — first run', () => {
|
||||
it('single-model bundle → 1 WU with model + all views in rawFiles', async () => {
|
||||
const stagedDir = join(FIXTURE_ROOT, 'single-model');
|
||||
const project = await parseLookmlStagedDir(stagedDir);
|
||||
const result = chunkLookmlProject(project);
|
||||
expect(result.workUnits).toHaveLength(1);
|
||||
const wu = result.workUnits[0];
|
||||
expect(wu.unitKey).toBe('lookml-orders');
|
||||
expect(wu.rawFiles.sort()).toEqual(['orders.model.lkml', 'views/customers.view.lkml', 'views/orders.view.lkml']);
|
||||
expect(wu.peerFileIndex).toEqual([]);
|
||||
expect(wu.dependencyPaths).toEqual([]);
|
||||
expect(result.eviction).toBeUndefined();
|
||||
});
|
||||
|
||||
it('multi-model bundle → 1 WU per model; shared view owned by lex-first model; others see it in dependencyPaths + peerFileIndex is pathless-index', async () => {
|
||||
const stagedDir = join(FIXTURE_ROOT, 'multi-model');
|
||||
const project = await parseLookmlStagedDir(stagedDir);
|
||||
const result = chunkLookmlProject(project);
|
||||
expect(result.workUnits).toHaveLength(2);
|
||||
const marketing = result.workUnits.find((wu) => wu.unitKey === 'lookml-marketing');
|
||||
const orders = result.workUnits.find((wu) => wu.unitKey === 'lookml-orders');
|
||||
expect(marketing).toBeDefined();
|
||||
expect(orders).toBeDefined();
|
||||
if (!marketing || !orders) {
|
||||
throw new Error('expected marketing and orders work units');
|
||||
}
|
||||
|
||||
// marketing sorts before orders → marketing owns shared_dims
|
||||
expect(marketing.rawFiles).toContain('views/shared_dims.view.lkml');
|
||||
expect(marketing.rawFiles).toContain('views/campaigns.view.lkml');
|
||||
expect(marketing.rawFiles).toContain('marketing.model.lkml');
|
||||
expect(marketing.rawFiles).not.toContain('views/orders.view.lkml');
|
||||
expect(marketing.dependencyPaths).toEqual([]);
|
||||
|
||||
// orders does NOT own shared_dims — it's in dependencyPaths (read-only upstream).
|
||||
expect(orders.rawFiles).not.toContain('views/shared_dims.view.lkml');
|
||||
expect(orders.dependencyPaths).toEqual(['views/shared_dims.view.lkml']);
|
||||
expect(orders.rawFiles).toContain('views/orders.view.lkml');
|
||||
expect(orders.rawFiles).toContain('orders.model.lkml');
|
||||
|
||||
// Each WU's peerFileIndex lists the OTHER model's files (paths-only index).
|
||||
expect(orders.peerFileIndex).toContain('marketing.model.lkml');
|
||||
expect(orders.peerFileIndex).toContain('views/campaigns.view.lkml');
|
||||
// Dependency paths should not be duplicated into peerFileIndex.
|
||||
expect(orders.peerFileIndex).not.toContain('views/shared_dims.view.lkml');
|
||||
});
|
||||
|
||||
it('extends-chain fixture: single WU contains base + orders + orders_ext; chain order visible via graph', async () => {
|
||||
const stagedDir = join(FIXTURE_ROOT, 'extends-chain');
|
||||
const project = await parseLookmlStagedDir(stagedDir);
|
||||
const result = chunkLookmlProject(project);
|
||||
// One model ("orders") includes views/*.view.lkml — so all three views land in its WU.
|
||||
expect(result.workUnits).toHaveLength(1);
|
||||
const wu = result.workUnits[0];
|
||||
expect(wu.unitKey).toBe('lookml-orders');
|
||||
expect(wu.rawFiles.sort()).toEqual([
|
||||
'orders.model.lkml',
|
||||
'views/base.view.lkml',
|
||||
'views/orders.view.lkml',
|
||||
'views/orders_ext.view.lkml',
|
||||
]);
|
||||
expect(wu.dependencyPaths).toEqual([]); // all ancestors already in rawFiles on first run
|
||||
expect(wu.notes).toMatch(/orders/);
|
||||
});
|
||||
|
||||
it('is deterministic: two calls on the same project return structurally identical WorkUnits', async () => {
|
||||
const stagedDir = join(FIXTURE_ROOT, 'multi-model');
|
||||
const project = await parseLookmlStagedDir(stagedDir);
|
||||
const r1 = chunkLookmlProject(project);
|
||||
const r2 = chunkLookmlProject(project);
|
||||
expect(r1.workUnits).toEqual(r2.workUnits);
|
||||
});
|
||||
|
||||
it('unitKey is model-name-derived (stable across parse+chunk cycles and across re-syncs)', async () => {
|
||||
const project = await parseLookmlStagedDir(join(FIXTURE_ROOT, 'multi-model'));
|
||||
const { workUnits } = chunkLookmlProject(project);
|
||||
expect(workUnits.map((wu) => wu.unitKey).sort()).toEqual(['lookml-marketing', 'lookml-orders']);
|
||||
});
|
||||
|
||||
it('marks mismatched model WorkUnits as SL-disallowed and keeps wiki ingest enabled', () => {
|
||||
const project: ParsedLookmlProject = {
|
||||
models: [
|
||||
{
|
||||
path: 'b2b.model.lkml',
|
||||
name: 'b2b',
|
||||
includes: ['views/orders.view.lkml'],
|
||||
explores: ['orders'],
|
||||
connectionName: 'wrong_connection',
|
||||
},
|
||||
],
|
||||
views: [{ path: 'views/orders.view.lkml', name: 'orders', extendsFrom: [], rawSqlTableName: 'public.orders' }],
|
||||
dashboards: [],
|
||||
allPaths: ['b2b.model.lkml', 'views/orders.view.lkml'],
|
||||
};
|
||||
|
||||
const result = chunkLookmlProject(project, { mismatchedModelNames: new Set(['b2b']) });
|
||||
const wu = result.workUnits[0];
|
||||
|
||||
expect(wu.unitKey).toBe('lookml-b2b');
|
||||
expect(wu.rawFiles).toEqual(['b2b.model.lkml', 'views/orders.view.lkml']);
|
||||
expect(wu.slDisallowed).toBe(true);
|
||||
expect(wu.slDisallowedReason).toBe('lookml_connection_mismatch');
|
||||
expect(wu.notes).toContain('[LOOKML SL WRITES DISALLOWED]');
|
||||
expect(wu.notes).toContain('reason: lookml_connection_mismatch');
|
||||
expect(wu.notes).toContain('Do not call sl_write_source or sl_edit_source for this WorkUnit.');
|
||||
});
|
||||
});
|
||||
|
||||
describe('chunkLookmlProject — re-sync', () => {
|
||||
it("modified file in one model only emits that model's WU", async () => {
|
||||
const stagedDir = join(FIXTURE_ROOT, 'multi-model');
|
||||
const project = await parseLookmlStagedDir(stagedDir);
|
||||
const result = chunkLookmlProject(project, {
|
||||
diffSet: {
|
||||
added: [],
|
||||
modified: ['views/campaigns.view.lkml'],
|
||||
deleted: [],
|
||||
unchanged: [
|
||||
'marketing.model.lkml',
|
||||
'orders.model.lkml',
|
||||
'views/orders.view.lkml',
|
||||
'views/shared_dims.view.lkml',
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(result.workUnits).toHaveLength(1);
|
||||
expect(result.workUnits[0].unitKey).toBe('lookml-marketing');
|
||||
});
|
||||
|
||||
it("added file under a model emits that model's WU with the new path in rawFiles", async () => {
|
||||
const stagedDir = join(FIXTURE_ROOT, 'single-model');
|
||||
const project = await parseLookmlStagedDir(stagedDir);
|
||||
const result = chunkLookmlProject(project, {
|
||||
diffSet: {
|
||||
added: ['views/customers.view.lkml'],
|
||||
modified: [],
|
||||
deleted: [],
|
||||
unchanged: ['orders.model.lkml', 'views/orders.view.lkml'],
|
||||
},
|
||||
});
|
||||
expect(result.workUnits).toHaveLength(1);
|
||||
expect(result.workUnits[0].rawFiles).toContain('views/customers.view.lkml');
|
||||
});
|
||||
|
||||
it('widens dependencyPaths with transitive extends ancestors on re-sync', async () => {
|
||||
const stagedDir = join(FIXTURE_ROOT, 'extends-chain');
|
||||
const project = await parseLookmlStagedDir(stagedDir);
|
||||
// Only orders_ext is touched; base and orders are upstream ancestors.
|
||||
// Because the single-model WU's rawFiles ALREADY include all three on first run,
|
||||
// they remain in rawFiles — dependencyPaths stays empty. Widening matters when
|
||||
// re-sync drops some files from rawFiles, which doesn't apply for a monolithic
|
||||
// single-model WU. Assert the baseline invariant.
|
||||
const result = chunkLookmlProject(project, {
|
||||
diffSet: {
|
||||
added: [],
|
||||
modified: ['views/orders_ext.view.lkml'],
|
||||
deleted: [],
|
||||
unchanged: ['orders.model.lkml', 'views/base.view.lkml', 'views/orders.view.lkml'],
|
||||
},
|
||||
});
|
||||
expect(result.workUnits).toHaveLength(1);
|
||||
const wu = result.workUnits[0];
|
||||
expect(wu.rawFiles).toContain('views/orders_ext.view.lkml');
|
||||
// Ancestors already in rawFiles → not duplicated into dependencyPaths.
|
||||
expect(wu.dependencyPaths).toEqual([]);
|
||||
});
|
||||
|
||||
it('widens dependencyPaths when an ancestor is OUTSIDE the WU (synthesized cross-model case)', () => {
|
||||
// Synthesize a scenario in-memory: two models, "a" owns base.view.lkml,
|
||||
// "b" owns derived.view.lkml which extends base. A diff that only touches
|
||||
// derived.view.lkml should widen b's WU with base.view.lkml in dependencyPaths
|
||||
// if base lives outside b's rawFiles. In practice with the current emit rules,
|
||||
// base.view.lkml would already be in dependencyPaths because model b lists
|
||||
// base.view.lkml under its `include:`. Here we confirm the widening is idempotent.
|
||||
const project: ParsedLookmlProject = {
|
||||
models: [
|
||||
{ path: 'a.model.lkml', name: 'a', includes: ['views/base.view.lkml'], explores: [], connectionName: null },
|
||||
{
|
||||
path: 'b.model.lkml',
|
||||
name: 'b',
|
||||
includes: ['views/base.view.lkml', 'views/derived.view.lkml'],
|
||||
explores: [],
|
||||
connectionName: null,
|
||||
},
|
||||
],
|
||||
views: [
|
||||
{ path: 'views/base.view.lkml', name: 'base', extendsFrom: [], rawSqlTableName: null },
|
||||
{ path: 'views/derived.view.lkml', name: 'derived', extendsFrom: ['base'], rawSqlTableName: null },
|
||||
],
|
||||
dashboards: [],
|
||||
allPaths: ['a.model.lkml', 'b.model.lkml', 'views/base.view.lkml', 'views/derived.view.lkml'],
|
||||
};
|
||||
const result = chunkLookmlProject(project, {
|
||||
diffSet: {
|
||||
added: [],
|
||||
modified: ['views/derived.view.lkml'],
|
||||
deleted: [],
|
||||
unchanged: ['a.model.lkml', 'b.model.lkml', 'views/base.view.lkml'],
|
||||
},
|
||||
});
|
||||
const b = result.workUnits.find((wu) => wu.unitKey === 'lookml-b');
|
||||
expect(b).toBeDefined();
|
||||
if (!b) {
|
||||
throw new Error('expected lookml-b work unit');
|
||||
}
|
||||
expect(b.dependencyPaths).toContain('views/base.view.lkml');
|
||||
});
|
||||
|
||||
it('passes through diffSet.deleted as an EvictionUnit', async () => {
|
||||
const project = await parseLookmlStagedDir(join(FIXTURE_ROOT, 'single-model'));
|
||||
const result = chunkLookmlProject(project, {
|
||||
diffSet: {
|
||||
added: [],
|
||||
modified: [],
|
||||
deleted: ['views/zombie.view.lkml'],
|
||||
unchanged: ['orders.model.lkml', 'views/customers.view.lkml', 'views/orders.view.lkml'],
|
||||
},
|
||||
});
|
||||
expect(result.eviction).toEqual({ deletedRawPaths: ['views/zombie.view.lkml'] });
|
||||
// No WU emitted because no current files are touched.
|
||||
expect(result.workUnits).toEqual([]);
|
||||
});
|
||||
});
|
||||
159
packages/context/src/ingest/adapters/lookml/chunk.ts
Normal file
159
packages/context/src/ingest/adapters/lookml/chunk.ts
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js';
|
||||
import { buildLookmlGraph, type LookmlGraph } from './graph.js';
|
||||
import type { ParsedLookmlProject } from './parse.js';
|
||||
|
||||
interface ChunkOptions {
|
||||
diffSet?: DiffSet;
|
||||
mismatchedModelNames?: Set<string>;
|
||||
}
|
||||
|
||||
function lookmlSlDisallowedNotes(modelName: string, existingNotes: string): string {
|
||||
return [
|
||||
'[LOOKML SL WRITES DISALLOWED]',
|
||||
'reason: lookml_connection_mismatch',
|
||||
`model: ${modelName}`,
|
||||
'Do not call sl_write_source or sl_edit_source for this WorkUnit.',
|
||||
'Continue wiki extraction and context candidates from the raw LookML files.',
|
||||
'[/LOOKML SL WRITES DISALLOWED]',
|
||||
'',
|
||||
existingNotes,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit WorkUnits for a parsed LookML project.
|
||||
*
|
||||
* First run (no diffSet): one WU per model + `lookml-orphans` (if any non-owned views)
|
||||
* + `lookml-dashboard-<name>` per dashboard file.
|
||||
*
|
||||
* Re-sync (diffSet provided): filter to WUs whose rawFiles intersect added∪modified;
|
||||
* widen dependencyPaths with every file in `allPaths`
|
||||
* that's upstream of the WU's changed files via the graph.
|
||||
* Emit a single EvictionUnit for diffSet.deleted.
|
||||
*/
|
||||
export function chunkLookmlProject(project: ParsedLookmlProject, opts: ChunkOptions = {}): ChunkResult {
|
||||
const graph = buildLookmlGraph(project);
|
||||
const firstRunUnits = emitFirstRunWorkUnits(project, graph, opts);
|
||||
if (!opts.diffSet) {
|
||||
return { workUnits: firstRunUnits };
|
||||
}
|
||||
return applyDiffSet(firstRunUnits, project, graph, opts.diffSet);
|
||||
}
|
||||
|
||||
function emitFirstRunWorkUnits(project: ParsedLookmlProject, graph: LookmlGraph, opts: ChunkOptions): WorkUnit[] {
|
||||
const allModelPaths = [...new Set(project.models.map((m) => m.path))].sort();
|
||||
const allDashboardPaths = [...new Set(project.dashboards.map((d) => d.path))].sort();
|
||||
// Dedupe: a .view.lkml with multiple `view:` blocks produces multiple ParsedLookmlView
|
||||
// entries sharing one path.
|
||||
const allViewPaths = [...new Set(project.views.map((v) => v.path))].sort();
|
||||
|
||||
const workUnits: WorkUnit[] = [];
|
||||
|
||||
// Per-model WU, sorted by model name for determinism.
|
||||
const sortedModels = [...project.models].sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
for (const model of sortedModels) {
|
||||
const includedViewPaths = (graph.viewsIncludedByModel.get(model.name) ?? []).filter((p) =>
|
||||
allViewPaths.includes(p),
|
||||
);
|
||||
// Views the model includes and which this model ALSO owns (first-includer-wins).
|
||||
const ownedViewPaths = includedViewPaths.filter((p) => graph.ownerByViewPath.get(p) === model.name);
|
||||
// Views the model includes but that another lexicographically-earlier model owns.
|
||||
// These land in dependencyPaths so this WU's agent can READ them, but the "canonical
|
||||
// write" for those views happens in the owner's WU.
|
||||
const nonOwnedDepViewPaths = includedViewPaths.filter((p) => graph.ownerByViewPath.get(p) !== model.name).sort();
|
||||
|
||||
const rawFiles = [model.path, ...ownedViewPaths].sort();
|
||||
const peerFileIndex = [
|
||||
...allModelPaths.filter((p) => p !== model.path),
|
||||
...allViewPaths.filter((p) => !rawFiles.includes(p) && !nonOwnedDepViewPaths.includes(p)),
|
||||
...allDashboardPaths,
|
||||
].sort();
|
||||
|
||||
const isMismatched = opts.mismatchedModelNames?.has(model.name) ?? false;
|
||||
const notes =
|
||||
model.explores.length > 0
|
||||
? `LookML model "${model.name}" (explores: ${model.explores.join(', ')})`
|
||||
: `LookML model "${model.name}"`;
|
||||
|
||||
workUnits.push({
|
||||
unitKey: `lookml-${model.name}`,
|
||||
displayLabel: `LookML model "${model.name}"`,
|
||||
rawFiles,
|
||||
peerFileIndex,
|
||||
dependencyPaths: nonOwnedDepViewPaths,
|
||||
notes: isMismatched ? lookmlSlDisallowedNotes(model.name, notes) : notes,
|
||||
slDisallowed: isMismatched ? true : undefined,
|
||||
slDisallowedReason: isMismatched ? 'lookml_connection_mismatch' : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
// Orphan view WU — views that no model includes. Skip entirely if none.
|
||||
const orphanViewPaths = allViewPaths.filter((p) => !graph.ownerByViewPath.has(p)).sort();
|
||||
if (orphanViewPaths.length > 0) {
|
||||
workUnits.push({
|
||||
unitKey: 'lookml-orphans',
|
||||
displayLabel: 'LookML orphan views',
|
||||
rawFiles: orphanViewPaths,
|
||||
peerFileIndex: [...allModelPaths, ...allDashboardPaths].sort(),
|
||||
dependencyPaths: [],
|
||||
notes: 'Views not referenced by any .model.lkml (orphaned)',
|
||||
});
|
||||
}
|
||||
|
||||
// One WU per dashboard file.
|
||||
for (const dashboard of [...project.dashboards].sort((a, b) => a.name.localeCompare(b.name))) {
|
||||
workUnits.push({
|
||||
unitKey: `lookml-dashboard-${dashboard.name}`,
|
||||
displayLabel: `LookML dashboard "${dashboard.name}"`,
|
||||
rawFiles: [dashboard.path],
|
||||
peerFileIndex: [...allModelPaths, ...allViewPaths].sort(),
|
||||
dependencyPaths: [],
|
||||
notes: `LookML dashboard "${dashboard.name}"`,
|
||||
});
|
||||
}
|
||||
|
||||
return workUnits;
|
||||
}
|
||||
|
||||
function applyDiffSet(
|
||||
firstRunUnits: WorkUnit[],
|
||||
_project: ParsedLookmlProject,
|
||||
graph: LookmlGraph,
|
||||
diffSet: DiffSet,
|
||||
): ChunkResult {
|
||||
const touched = new Set([...diffSet.added, ...diffSet.modified]);
|
||||
const keptUnits: WorkUnit[] = [];
|
||||
|
||||
for (const wu of firstRunUnits) {
|
||||
const anyTouched = wu.rawFiles.some((p) => touched.has(p));
|
||||
if (!anyTouched) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Widen dependencyPaths: for every view in rawFiles, add paths of all transitive
|
||||
// extends ancestors (if known in the graph) that aren't already in rawFiles.
|
||||
const existingDeps = new Set(wu.dependencyPaths);
|
||||
for (const rawPath of wu.rawFiles) {
|
||||
const viewNames = graph.viewNamesByPath.get(rawPath) ?? [];
|
||||
for (const viewName of viewNames) {
|
||||
const ancestors = graph.extendsAncestorsByViewName.get(viewName) ?? [];
|
||||
for (const ancestorName of ancestors) {
|
||||
const ancestorPaths = graph.pathsByViewName.get(ancestorName) ?? [];
|
||||
for (const ancestorPath of ancestorPaths) {
|
||||
if (!wu.rawFiles.includes(ancestorPath)) {
|
||||
existingDeps.add(ancestorPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
keptUnits.push({
|
||||
...wu,
|
||||
dependencyPaths: [...existingDeps].sort(),
|
||||
});
|
||||
}
|
||||
|
||||
const eviction = diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : undefined;
|
||||
return { workUnits: keptUnits, eviction };
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue