Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,42 @@
import { describe, expect, it } from 'vitest';
import { actionTargetConnectionId, memoryActionIdentity } from './action-identity.js';
describe('memory action target identity', () => {
it('keys SL actions by target connection and wiki actions by run connection', () => {
expect(
memoryActionIdentity(
{ target: 'sl', type: 'created', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' },
'looker-run',
),
).toBe('sl:warehouse-b:orders');
expect(memoryActionIdentity({ target: 'sl', type: 'created', key: 'orders', detail: '' }, 'warehouse-a')).toBe(
'sl:warehouse-a:orders',
);
expect(
memoryActionIdentity(
{
target: 'wiki',
type: 'created',
key: 'knowledge/global/orders.md',
detail: '',
targetConnectionId: 'ignored',
},
'looker-run',
),
).toBe('wiki:looker-run:knowledge/global/orders.md');
});
it('resolves action target connection only for SL actions', () => {
expect(
actionTargetConnectionId(
{ target: 'sl', type: 'updated', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' },
'looker-run',
),
).toBe('warehouse-b');
expect(actionTargetConnectionId({ target: 'wiki', type: 'updated', key: 'orders', detail: '' }, 'looker-run')).toBe(
'looker-run',
);
});
});

View file

@ -0,0 +1,9 @@
import type { MemoryAction } from '../memory/index.js';
export function actionTargetConnectionId(action: MemoryAction, runConnectionId: string): string {
return action.target === 'sl' ? (action.targetConnectionId ?? runConnectionId) : runConnectionId;
}
export function memoryActionIdentity(action: MemoryAction, runConnectionId: string): string {
return `${action.target}:${actionTargetConnectionId(action, runConnectionId)}:${action.key}`;
}

View file

@ -0,0 +1,75 @@
import { describe, expect, it } from 'vitest';
import type { DbtParsedTable } from './parse-schema.js';
import { findMatchingKloTable, matchDbtTables, type DbtHostTableLite } from './match-tables.js';
const hostTables: DbtHostTableLite[] = [
{ id: '1', name: 'orders', catalog: 'warehouse', db: 'analytics', columns: [{ id: 'c1', name: 'id' }] },
{ id: '2', name: 'orders', catalog: 'warehouse', db: 'staging', columns: [{ id: 'c2', name: 'id' }] },
{ id: '3', name: 'customers', catalog: null, db: null, columns: [{ id: 'c3', name: 'id' }] },
];
function table(input: Partial<DbtParsedTable>): DbtParsedTable {
return {
name: 'orders',
description: null,
database: null,
schema: null,
columns: [],
resourceType: 'model',
...input,
};
}
describe('dbt descriptions table matching', () => {
it('uses schema plus name first and checks catalog when dbt database is present', () => {
expect(
findMatchingKloTable(table({ database: 'warehouse', schema: 'analytics' }), hostTables, null)?.id,
).toBe('1');
});
it('does not fall back to name-only for source tables', () => {
expect(findMatchingKloTable(table({ resourceType: 'source' }), hostTables, null)).toBeUndefined();
});
it('uses targetSchema for models and name-only only when unique', () => {
expect(findMatchingKloTable(table({ resourceType: 'model' }), hostTables, 'staging')?.id).toBe('2');
expect(findMatchingKloTable(table({ name: 'customers', resourceType: 'model' }), hostTables, null)?.id).toBe(
'3',
);
expect(findMatchingKloTable(table({ resourceType: 'model' }), hostTables, null)).toBeUndefined();
});
it('summarizes matched columns and descriptions', () => {
const matches = matchDbtTables(
[
table({
name: 'customers',
description: 'Customers',
columns: [
{ name: 'id', description: 'Primary key', dataType: null },
{ name: 'missing', description: 'Missing', dataType: null },
],
}),
],
hostTables,
null,
);
expect(matches).toEqual([
{
dbtTable: 'customers',
dbtSchema: null,
dbtDatabase: null,
hostTableId: '3',
hostTableName: 'customers',
matched: true,
tableDescriptionAction: 'import',
tableDescriptionFound: true,
columnsToImport: 1,
columnsMatched: 1,
columnsTotal: 2,
columnDescriptionsFound: 1,
},
]);
});
});

View file

@ -0,0 +1,127 @@
import type { DbtParsedTable } from './parse-schema.js';
export interface DbtHostTableLite {
id: string;
name: string;
catalog: string | null;
db: string | null;
columns: Array<{ id: string; name: string }>;
}
export interface DbtTableMatch {
dbtTable: string;
dbtSchema: string | null;
dbtDatabase: string | null;
hostTableId: string | null;
hostTableName: string | null;
matched: boolean;
tableDescriptionAction: 'skip' | 'import';
tableDescriptionFound: boolean;
columnsToImport: number;
columnsMatched: number;
columnsTotal: number;
columnDescriptionsFound: number;
}
export function matchDbtTables(
dbtTables: DbtParsedTable[],
hostTables: DbtHostTableLite[],
targetSchema?: string | null,
): DbtTableMatch[] {
return dbtTables.map((dbtTable) => {
const hostTable = findMatchingKloTable(dbtTable, hostTables, targetSchema);
if (!hostTable) {
return {
dbtTable: dbtTable.name,
dbtSchema: dbtTable.schema,
dbtDatabase: dbtTable.database,
hostTableId: null,
hostTableName: null,
matched: false,
tableDescriptionAction: 'skip',
tableDescriptionFound: Boolean(dbtTable.description),
columnsToImport: 0,
columnsMatched: 0,
columnsTotal: dbtTable.columns.length,
columnDescriptionsFound: dbtTable.columns.filter((column) => Boolean(column.description)).length,
};
}
const analysis = analyzeColumns(dbtTable, hostTable);
return {
dbtTable: dbtTable.name,
dbtSchema: dbtTable.schema,
dbtDatabase: dbtTable.database,
hostTableId: hostTable.id,
hostTableName: hostTable.name,
matched: true,
tableDescriptionAction: dbtTable.description ? 'import' : 'skip',
tableDescriptionFound: Boolean(dbtTable.description),
...analysis,
};
});
}
export function findMatchingKloTable(
dbtTable: DbtParsedTable,
hostTables: DbtHostTableLite[],
targetSchema?: string | null,
): DbtHostTableLite | undefined {
const dbtName = dbtTable.name.toLowerCase();
const effectiveSchema = dbtTable.schema ?? targetSchema ?? null;
if (effectiveSchema) {
const strictMatch = hostTables.find((table) => {
const nameMatches = table.name.toLowerCase() === dbtName;
const schemaMatches = table.db?.toLowerCase() === effectiveSchema.toLowerCase();
if (!nameMatches || !schemaMatches) {
return false;
}
if (dbtTable.database && table.catalog) {
return table.catalog.toLowerCase() === dbtTable.database.toLowerCase();
}
return true;
});
if (strictMatch) {
return strictMatch;
}
}
if (dbtTable.resourceType === 'source') {
return undefined;
}
const nameMatches = hostTables.filter((table) => table.name.toLowerCase() === dbtName);
return nameMatches.length === 1 ? nameMatches[0] : undefined;
}
function analyzeColumns(
dbtTable: DbtParsedTable,
hostTable: DbtHostTableLite,
): Pick<DbtTableMatch, 'columnsToImport' | 'columnsMatched' | 'columnsTotal' | 'columnDescriptionsFound'> {
let columnsToImport = 0;
let columnsMatched = 0;
let columnDescriptionsFound = 0;
for (const dbtColumn of dbtTable.columns) {
const hostColumn = hostTable.columns.find(
(column) => column.name.toLowerCase() === dbtColumn.name.toLowerCase(),
);
if (!hostColumn) {
continue;
}
columnsMatched++;
if (dbtColumn.description) {
columnDescriptionsFound++;
columnsToImport++;
}
}
return {
columnsToImport,
columnsMatched,
columnsTotal: dbtTable.columns.length,
columnDescriptionsFound,
};
}

View file

@ -0,0 +1,62 @@
import { describe, expect, it } from 'vitest';
import type { ParsedSemanticModel } from '../metricflow/deep-parse.js';
import { mergeSemanticModelTables } from './merge-semantic-model-tables.js';
import type { DbtSchemaParseResult } from './parse-schema.js';
const semanticModel: ParsedSemanticModel = {
name: 'orders_semantic',
description: 'Order facts',
modelRef: 'fct_orders',
dimensions: [
{ name: 'status', column: 'status', type: 'categorical', description: 'Order status' },
{ name: 'ordered_at', column: 'ordered_at', type: 'time' },
],
measures: [],
entities: [],
defaultTimeDimension: null,
};
describe('mergeSemanticModelTables', () => {
it('adds missing MetricFlow model refs as dbt model tables', () => {
const input: DbtSchemaParseResult = { projectName: null, dbtVersion: null, tables: [], relationships: [] };
expect(mergeSemanticModelTables(input, [semanticModel])).toEqual({
projectName: null,
dbtVersion: null,
relationships: [],
tables: [
{
name: 'fct_orders',
description: 'Order facts',
database: null,
schema: null,
resourceType: 'model',
columns: [
{ name: 'status', description: 'Order status', dataType: null },
{ name: 'ordered_at', description: null, dataType: 'TIMESTAMP' },
],
},
],
});
});
it('does not add a duplicate table when schema parsing already found the model ref', () => {
const input: DbtSchemaParseResult = {
projectName: null,
dbtVersion: null,
relationships: [],
tables: [
{
name: 'FCT_ORDERS',
description: 'Existing',
database: null,
schema: null,
resourceType: 'model',
columns: [],
},
],
};
expect(mergeSemanticModelTables(input, [semanticModel]).tables).toHaveLength(1);
});
});

View file

@ -0,0 +1,37 @@
import type { ParsedSemanticModel } from '../metricflow/deep-parse.js';
import type { DbtSchemaParseResult } from './parse-schema.js';
export function mergeSemanticModelTables(
parseResult: DbtSchemaParseResult,
semanticModels: ParsedSemanticModel[],
): DbtSchemaParseResult {
const merged: DbtSchemaParseResult = {
...parseResult,
tables: [...parseResult.tables],
relationships: [...parseResult.relationships],
};
const existingTableNames = new Set(merged.tables.map((table) => table.name.toLowerCase()));
for (const model of semanticModels) {
const tableName = model.modelRef;
if (existingTableNames.has(tableName.toLowerCase())) {
continue;
}
merged.tables.push({
name: tableName,
description: model.description,
database: null,
schema: null,
columns: model.dimensions.map((dimension) => ({
name: dimension.column,
description: dimension.description ?? null,
dataType: dimension.type === 'time' ? 'TIMESTAMP' : null,
})),
resourceType: 'model',
});
existingTableNames.add(tableName.toLowerCase());
}
return merged;
}

View file

@ -0,0 +1,214 @@
import { describe, expect, it } from 'vitest';
import { parseDbtSchemaFile, parseDbtSchemaFiles } from './parse-schema.js';
describe('dbt descriptions schema parser', () => {
it('resolves shared dbt vars and defaults before parsing schema YAML', () => {
const result = parseDbtSchemaFile(
`
version: 2
sources:
- name: raw
database: "{{ var('database') }}"
schema: "{{ var('schema', 'fallback_schema') }}"
tables:
- name: orders
identifier: fct_orders
description: "Orders from {{ var('database') }}"
columns:
- name: customer_id
description: "Customer id"
tests:
- relationships:
to: ref('customers')
field: id
models:
- name: "{{ var('model_name', 'orders_model') }}"
schema: "{{ var('model_schema') }}"
columns:
- name: id
description: "Order id"
`,
{ path: 'models/schema.yml', variables: new Map([['database', 'analytics'], ['model_schema', 'mart']]) },
);
expect(result.tables).toEqual([
{
name: 'fct_orders',
description: 'Orders from analytics',
database: 'analytics',
schema: 'fallback_schema',
columns: [
{
name: 'customer_id',
description: 'Customer id',
dataType: null,
dataTests: [{ name: 'relationships', package: 'dbt', kwargs: { to: "ref('customers')", field: 'id' } }],
},
],
resourceType: 'source',
},
{
name: 'orders_model',
description: null,
database: null,
schema: 'mart',
columns: [{ name: 'id', description: 'Order id', dataType: null }],
resourceType: 'model',
},
]);
expect(result.relationships).toEqual([
{
fromTable: 'fct_orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
fromSchema: 'fallback_schema',
},
]);
});
it('deduplicates tables by database schema and name while merging columns', () => {
const result = parseDbtSchemaFiles([
{
path: 'models/a.yml',
content: `
version: 2
models:
- name: orders
description: Orders
columns:
- name: id
description: Primary key
`,
},
{
path: 'models/b.yml',
content: `
version: 2
models:
- name: orders
columns:
- name: status
description: Status
- name: id
data_type: integer
`,
},
]);
expect(result.tables).toEqual([
{
name: 'orders',
description: 'Orders',
database: null,
schema: null,
resourceType: 'model',
columns: [
{ name: 'id', description: 'Primary key', dataType: 'integer' },
{ name: 'status', description: 'Status', dataType: null },
],
},
]);
});
it('returns an empty result for malformed YAML and preserves unresolved Jinja text', () => {
expect(parseDbtSchemaFile('{{{{ invalid yaml', { path: 'broken.yml' })).toEqual({
projectName: null,
dbtVersion: null,
tables: [],
relationships: [],
});
const unresolved = parseDbtSchemaFile(
`
version: 2
models:
- name: "{{ var('missing_model') }}"
`,
{ variables: new Map() },
);
expect(unresolved.tables[0]?.name).toBe("{{ var('missing_model') }}");
});
it('extracts data tests, constraints, enum values, tags, and freshness', () => {
const result = parseDbtSchemaFile(`
version: 2
sources:
- name: raw
schema: jaffle
tags: ["raw"]
tables:
- name: customers
tags: ["core"]
loaded_at_field: updated_at
freshness:
warn_after: { count: 12, period: hour }
columns:
- name: id
tests:
- not_null
- unique
- name: status
data_tests:
- accepted_values:
values: ['active', 'inactive']
models:
- name: orders
tags: ["finance"]
loaded_at_field: run_at
columns:
- name: status
data_tests:
- dbt_utils.expression_is_true:
expression: "status is not null"
- accepted_values: ['placed', 'shipped']
`);
const customers = result.tables.find((table) => table.name === 'customers');
expect(customers?.tagsDbt).toEqual(['raw', 'core']);
expect(customers?.freshnessDbt?.loadedAtField).toBe('updated_at');
expect(customers?.freshnessDbt?.raw).toBeDefined();
const id = customers?.columns.find((column) => column.name === 'id');
expect(id?.constraints?.dbt).toEqual({ not_null: true, unique: true });
const status = customers?.columns.find((column) => column.name === 'status');
expect(status?.enumValuesDbt).toEqual(['active', 'inactive']);
const orders = result.tables.find((table) => table.name === 'orders');
expect(orders?.tagsDbt).toEqual(['finance']);
expect(orders?.freshnessDbt?.loadedAtField).toBe('run_at');
const ordersStatus = orders?.columns.find((column) => column.name === 'status');
expect(ordersStatus?.enumValuesDbt).toEqual(['placed', 'shipped']);
expect(ordersStatus?.dataTests).toEqual(
expect.arrayContaining([
expect.objectContaining({ package: 'dbt_utils', name: 'expression_is_true' }),
expect.objectContaining({ package: 'dbt', name: 'accepted_values' }),
]),
);
});
it('parses relationships from model column data tests', () => {
const result = parseDbtSchemaFile(`
version: 2
models:
- name: orders
schema: public
columns:
- name: customer_id
data_tests:
- relationships:
arguments:
to: "ref('customers')"
field: id
`);
expect(result.relationships).toEqual([
{
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
fromSchema: 'public',
},
]);
});
});

View file

@ -0,0 +1,655 @@
import { createHash } from 'node:crypto';
import { parse as parseYaml } from 'yaml';
import { type KloLogger, noopLogger } from '../../../core/index.js';
import { resolveJinjaVariables } from '../../dbt-shared/project-vars.js';
export interface DbtParsedColumn {
name: string;
description: string | null;
dataType: string | null;
dataTests?: DbtDataTestRef[];
constraints?: DbtColumnConstraints;
enumValuesDbt?: string[];
}
export interface DbtDataTestRef {
name: string;
package: string;
kwargs?: Record<string, unknown>;
}
export interface DbtColumnConstraints {
dbt: {
not_null?: boolean;
unique?: boolean;
};
}
export interface DbtParsedRelationship {
fromTable: string;
fromColumn: string;
toTable: string;
toColumn: string;
fromSchema?: string;
toSchema?: string;
description?: string;
}
export interface DbtParsedTable {
name: string;
description: string | null;
database: string | null;
schema: string | null;
columns: DbtParsedColumn[];
resourceType?: 'source' | 'model';
tagsDbt?: string[];
freshnessDbt?: {
raw?: unknown;
loadedAtField?: string | null;
};
}
export interface DbtSchemaParseResult {
projectName: string | null;
dbtVersion: string | null;
tables: DbtParsedTable[];
relationships: DbtParsedRelationship[];
}
export interface DbtSchemaFile {
content: string;
path: string;
}
interface ParseDbtSchemaOptions {
path?: string;
variables?: Map<string, string>;
projectName?: string | null;
logger?: KloLogger;
}
interface DbtSchemaYaml {
version?: number;
sources?: DbtSchemaSource[];
models?: DbtSchemaModel[];
}
interface DbtSchemaSource {
name: string;
description?: string;
database?: string;
schema?: string;
tags?: string[];
tables?: DbtSchemaTable[];
}
interface DbtSchemaTable {
name: string;
description?: string;
identifier?: string;
tags?: string[];
loaded_at_field?: string;
freshness?: unknown;
columns?: DbtSchemaColumn[];
}
interface DbtSchemaModel {
name: string;
description?: string;
database?: string;
schema?: string;
tags?: string[];
loaded_at_field?: string;
freshness?: unknown;
columns?: DbtSchemaColumn[];
}
interface DbtSchemaColumn {
name: string;
description?: string;
data_type?: string;
data_tests?: DbtSchemaDataTest[];
tests?: DbtSchemaDataTest[];
}
type DbtSchemaDataTest =
| string
| {
relationships?: {
to?: string;
field?: string;
arguments?: { to?: string; field?: string };
};
not_null?: unknown;
unique?: unknown;
accepted_values?: { values?: unknown } | unknown;
[key: string]: unknown;
};
export function parseDbtSchemaFile(content: string, options: ParseDbtSchemaOptions = {}): DbtSchemaParseResult {
return new DbtSchemaParser(options.logger ?? noopLogger).parseFile(content, options);
}
export function parseDbtSchemaFiles(
files: DbtSchemaFile[],
variables?: Map<string, string>,
options: { projectName?: string | null; logger?: KloLogger } = {},
): DbtSchemaParseResult {
return new DbtSchemaParser(options.logger ?? noopLogger).parseFiles(files, variables, options.projectName ?? null);
}
export function computeDbtSchemaHash(files: DbtSchemaFile[]): string {
const combined = [...files]
.sort((a, b) => a.path.localeCompare(b.path))
.map((file) => `${file.path}:${file.content}`)
.join('\n');
return createHash('sha256').update(combined).digest('hex').substring(0, 16);
}
class DbtSchemaParser {
constructor(private readonly logger: KloLogger) {}
parseFile(yamlContent: string, options: ParseDbtSchemaOptions = {}): DbtSchemaParseResult {
this.logger.debug(`Parsing schema file: ${options.path ?? 'unknown'}`);
const resolved = options.variables
? resolveJinjaVariables(yamlContent, options.variables)
: { content: yamlContent, unresolvedVars: [] };
if (resolved.unresolvedVars.length > 0) {
this.logger.warn(
`Unresolved dbt variables in ${options.path ?? 'schema file'}: ${resolved.unresolvedVars.join(', ')}`,
);
}
let schema: DbtSchemaYaml;
try {
schema = parseYaml(resolved.content) as DbtSchemaYaml;
} catch (error) {
this.logger.warn(`Failed to parse YAML${options.path ? ` at ${options.path}` : ''}: ${error}`);
return this.emptyResult(options.projectName ?? null);
}
if (!schema || typeof schema !== 'object') {
return this.emptyResult(options.projectName ?? null);
}
const tables = [...this.parseSources(schema.sources), ...this.parseModels(schema.models)];
const relationships = [
...this.parseSourceRelationships(schema.sources),
...this.parseModelRelationships(schema.models),
];
return {
projectName: options.projectName ?? null,
dbtVersion: null,
tables,
relationships,
};
}
parseFiles(
files: DbtSchemaFile[],
variables?: Map<string, string>,
projectName: string | null = null,
): DbtSchemaParseResult {
const allTables: DbtParsedTable[] = [];
const allRelationships: DbtParsedRelationship[] = [];
for (const file of files) {
const result = this.parseFile(file.content, { path: file.path, variables, projectName });
allTables.push(...result.tables);
allRelationships.push(...result.relationships);
}
return {
projectName,
dbtVersion: null,
tables: this.deduplicateTables(allTables),
relationships: this.deduplicateRelationships(allRelationships),
};
}
private parseSources(sources: DbtSchemaSource[] | undefined): DbtParsedTable[] {
if (!sources || !Array.isArray(sources)) {
return [];
}
const tables: DbtParsedTable[] = [];
for (const source of sources) {
const sourceSchema = source.schema ?? source.name;
const sourceDatabase = source.database ?? null;
const sourceTags = this.normalizeTagList(source.tags);
if (!source.tables || !Array.isArray(source.tables)) {
continue;
}
for (const table of source.tables) {
const tagsDbt = this.mergeTagsDbt(sourceTags, this.normalizeTagList(table.tags));
const freshnessDbt = this.buildFreshnessDbt(table.freshness, table.loaded_at_field);
tables.push({
name: table.identifier ?? table.name,
description: this.normalizeDescription(table.description),
database: sourceDatabase,
schema: sourceSchema,
columns: this.parseColumns(table.columns),
resourceType: 'source',
...(tagsDbt ? { tagsDbt } : {}),
...(freshnessDbt ? { freshnessDbt } : {}),
});
}
}
return tables;
}
private parseModels(models: DbtSchemaModel[] | undefined): DbtParsedTable[] {
if (!models || !Array.isArray(models)) {
return [];
}
const tables: DbtParsedTable[] = [];
for (const model of models) {
if (!model.name) {
continue;
}
const tagsDbt = this.mergeTagsDbt(this.normalizeTagList(model.tags));
const freshnessDbt = this.buildFreshnessDbt(model.freshness, model.loaded_at_field);
tables.push({
name: model.name,
description: this.normalizeDescription(model.description),
database: model.database ?? null,
schema: model.schema ?? null,
columns: this.parseColumns(model.columns),
resourceType: 'model',
...(tagsDbt ? { tagsDbt } : {}),
...(freshnessDbt ? { freshnessDbt } : {}),
});
}
return tables;
}
private parseColumns(columns: DbtSchemaColumn[] | undefined): DbtParsedColumn[] {
if (!columns || !Array.isArray(columns)) {
return [];
}
return columns.map((column) => {
const { refs, constraints, enumValues } = this.parseDataTests(column.data_tests ?? column.tests);
return {
name: column.name,
description: this.normalizeDescription(column.description),
dataType: column.data_type ?? null,
...(refs.length > 0 ? { dataTests: refs } : {}),
...(constraints ? { constraints } : {}),
...(enumValues.length > 0 ? { enumValuesDbt: enumValues } : {}),
};
});
}
private parseDataTests(tests: DbtSchemaDataTest[] | undefined): {
refs: DbtDataTestRef[];
constraints: DbtColumnConstraints | undefined;
enumValues: string[];
} {
const refs: DbtDataTestRef[] = [];
const dbt: { not_null?: boolean; unique?: boolean } = {};
const enumValues: string[] = [];
if (!tests?.length) {
return { refs, constraints: undefined, enumValues };
}
for (const test of tests) {
if (typeof test === 'string') {
const parsed = this.parseTestNameString(test);
refs.push(parsed);
if (parsed.package === 'dbt' && parsed.name === 'not_null') {
dbt.not_null = true;
}
if (parsed.package === 'dbt' && parsed.name === 'unique') {
dbt.unique = true;
}
continue;
}
for (const [key, value] of Object.entries(test)) {
if (key === 'relationships') {
refs.push({
name: 'relationships',
package: 'dbt',
...(value && typeof value === 'object' && !Array.isArray(value)
? { kwargs: value as Record<string, unknown> }
: {}),
});
continue;
}
if (key === 'not_null') {
refs.push({ name: 'not_null', package: 'dbt' });
dbt.not_null = true;
continue;
}
if (key === 'unique') {
refs.push({ name: 'unique', package: 'dbt' });
dbt.unique = true;
continue;
}
if (key === 'accepted_values') {
if (Array.isArray(value)) {
enumValues.push(...value.map((item) => String(item)));
refs.push({ name: 'accepted_values', package: 'dbt', kwargs: { values: value } });
continue;
}
if (value && typeof value === 'object' && !Array.isArray(value)) {
const values = (value as { values?: unknown }).values;
if (Array.isArray(values)) {
enumValues.push(...values.map((item) => String(item)));
}
refs.push({ name: 'accepted_values', package: 'dbt', kwargs: value as Record<string, unknown> });
continue;
}
}
refs.push({
...this.parseTestNameString(key),
...(value && typeof value === 'object' && !Array.isArray(value)
? { kwargs: value as Record<string, unknown> }
: {}),
});
}
}
const constraints = dbt.not_null || dbt.unique ? { dbt } : undefined;
return { refs, constraints, enumValues };
}
private parseTestNameString(value: string): { name: string; package: string } {
const parts = value.split('.');
if (parts.length >= 2) {
return { package: parts[0]!, name: parts.slice(1).join('.') };
}
return { package: 'dbt', name: value };
}
private parseSourceRelationships(sources: DbtSchemaSource[] | undefined): DbtParsedRelationship[] {
if (!sources || !Array.isArray(sources)) {
return [];
}
const relationships: DbtParsedRelationship[] = [];
for (const source of sources) {
const sourceSchema = source.schema ?? source.name;
if (!source.tables || !Array.isArray(source.tables)) {
continue;
}
for (const table of source.tables) {
const tableName = table.identifier ?? table.name;
if (!table.columns || !Array.isArray(table.columns)) {
continue;
}
for (const column of table.columns) {
const tests = column.data_tests ?? column.tests ?? [];
for (const test of tests) {
const relationship = this.parseRelationshipTest(test, tableName, column.name, sourceSchema);
if (relationship) {
relationships.push(relationship);
}
}
}
}
}
return relationships;
}
private parseModelRelationships(models: DbtSchemaModel[] | undefined): DbtParsedRelationship[] {
if (!models || !Array.isArray(models)) {
return [];
}
const relationships: DbtParsedRelationship[] = [];
for (const model of models) {
if (!model.name || !model.columns || !Array.isArray(model.columns)) {
continue;
}
for (const column of model.columns) {
const tests = column.data_tests ?? column.tests ?? [];
for (const test of tests) {
const relationship = this.parseRelationshipTest(test, model.name, column.name, model.schema ?? undefined);
if (relationship) {
relationships.push(relationship);
}
}
}
}
return relationships;
}
private parseRelationshipTest(
test: DbtSchemaDataTest,
fromTable: string,
fromColumn: string,
fromSchema?: string,
): DbtParsedRelationship | null {
if (typeof test === 'string' || !test.relationships) {
return null;
}
const relationship = test.relationships;
const toRef = relationship.to ?? relationship.arguments?.to;
const toColumn = relationship.field ?? relationship.arguments?.field;
if (!toRef || !toColumn) {
this.logger.debug(`Skipping incomplete relationship test for ${fromTable}.${fromColumn}`);
return null;
}
const toTable = this.parseRef(toRef);
if (!toTable) {
this.logger.debug(`Could not parse ref: ${toRef}`);
return null;
}
return {
fromTable,
fromColumn,
toTable,
toColumn,
...(fromSchema ? { fromSchema } : {}),
};
}
private parseRef(refString: string): string | null {
const refMatch = refString.match(/ref\s*\(\s*['"]([^'"]+)['"]\s*\)/);
if (refMatch) {
return refMatch[1];
}
const sourceMatch = refString.match(/source\s*\(\s*['"][^'"]+['"]\s*,\s*['"]([^'"]+)['"]\s*\)/);
if (sourceMatch) {
return sourceMatch[1];
}
return null;
}
private normalizeDescription(description: string | undefined): string | null {
if (!description) {
return null;
}
const trimmed = description.trim();
return trimmed.length > 0 ? trimmed : null;
}
private normalizeTagList(tags: string[] | undefined): string[] {
if (!tags || !Array.isArray(tags)) {
return [];
}
return tags.map((tag) => String(tag));
}
private mergeTagsDbt(...lists: Array<string[] | undefined>): string[] | undefined {
const merged: string[] = [];
const seen = new Set<string>();
for (const list of lists) {
for (const item of list ?? []) {
if (!seen.has(item)) {
seen.add(item);
merged.push(item);
}
}
}
return merged.length > 0 ? merged : undefined;
}
private buildFreshnessDbt(freshness: unknown, loadedAtField: string | undefined): DbtParsedTable['freshnessDbt'] {
const loadedTrim = loadedAtField?.trim();
const hasFreshness = freshness !== undefined && freshness !== null;
if (!hasFreshness && !loadedTrim) {
return undefined;
}
return {
...(hasFreshness ? { raw: freshness } : {}),
...(hasFreshness ? { loadedAtField: loadedTrim ?? null } : loadedTrim ? { loadedAtField: loadedTrim } : {}),
};
}
private deduplicateTables(tables: DbtParsedTable[]): DbtParsedTable[] {
const seen = new Map<string, DbtParsedTable>();
for (const table of tables) {
const key = `${table.database ?? ''}.${table.schema ?? ''}.${table.name}`.toLowerCase();
const existing = seen.get(key);
if (!existing) {
seen.set(key, table);
continue;
}
seen.set(key, {
...existing,
description: existing.description ?? table.description,
columns: this.mergeColumns(existing.columns, table.columns),
tagsDbt: this.mergeTagsDbt(existing.tagsDbt, table.tagsDbt),
freshnessDbt: this.mergeFreshnessDbt(existing.freshnessDbt, table.freshnessDbt),
});
}
return Array.from(seen.values());
}
private mergeColumns(existing: DbtParsedColumn[], incoming: DbtParsedColumn[]): DbtParsedColumn[] {
const seen = new Map<string, DbtParsedColumn>();
for (const column of existing) {
seen.set(column.name.toLowerCase(), column);
}
for (const column of incoming) {
const key = column.name.toLowerCase();
const existingColumn = seen.get(key);
if (!existingColumn) {
seen.set(key, column);
continue;
}
seen.set(key, {
...existingColumn,
description: existingColumn.description ?? column.description,
dataType: existingColumn.dataType ?? column.dataType,
dataTests: this.mergeDbtDataTests(existingColumn.dataTests, column.dataTests),
constraints: this.mergeDbtConstraints(existingColumn.constraints, column.constraints),
enumValuesDbt: this.mergeStringList(existingColumn.enumValuesDbt, column.enumValuesDbt),
});
}
return Array.from(seen.values());
}
private deduplicateRelationships(relationships: DbtParsedRelationship[]): DbtParsedRelationship[] {
const seen = new Set<string>();
const result: DbtParsedRelationship[] = [];
for (const relationship of relationships) {
const key =
`${relationship.fromTable}.${relationship.fromColumn}->${relationship.toTable}.${relationship.toColumn}`.toLowerCase();
if (!seen.has(key)) {
seen.add(key);
result.push(relationship);
}
}
return result;
}
private mergeFreshnessDbt(
existing?: DbtParsedTable['freshnessDbt'],
incoming?: DbtParsedTable['freshnessDbt'],
): DbtParsedTable['freshnessDbt'] {
if (!existing && !incoming) {
return undefined;
}
const raw = existing?.raw !== undefined ? existing.raw : incoming?.raw;
const loadedAtField = existing?.loadedAtField ?? incoming?.loadedAtField;
return {
...(raw !== undefined ? { raw } : {}),
...(loadedAtField !== undefined ? { loadedAtField } : {}),
};
}
private mergeDbtConstraints(
existing?: DbtColumnConstraints,
incoming?: DbtColumnConstraints,
): DbtColumnConstraints | undefined {
const notNull = !!(existing?.dbt.not_null || incoming?.dbt.not_null);
const unique = !!(existing?.dbt.unique || incoming?.dbt.unique);
if (!notNull && !unique) {
return undefined;
}
return { dbt: { ...(notNull ? { not_null: true } : {}), ...(unique ? { unique: true } : {}) } };
}
private mergeStringList(existing?: string[], incoming?: string[]): string[] | undefined {
return this.mergeTagsDbt(existing, incoming);
}
private mergeDbtDataTests(existing?: DbtDataTestRef[], incoming?: DbtDataTestRef[]): DbtDataTestRef[] | undefined {
if (!existing?.length) {
return incoming?.length ? [...incoming] : undefined;
}
if (!incoming?.length) {
return [...existing];
}
const tests = new Map<string, DbtDataTestRef>();
for (const test of [...existing, ...incoming]) {
const kwargsKey =
test.kwargs && Object.keys(test.kwargs).length > 0
? `:${createHash('sha256').update(JSON.stringify(test.kwargs)).digest('hex').slice(0, 16)}`
: '';
tests.set(`${test.package}:${test.name}${kwargsKey}`, test);
}
return [...tests.values()];
}
private emptyResult(projectName: string | null): DbtSchemaParseResult {
return {
projectName,
dbtVersion: null,
tables: [],
relationships: [],
};
}
}

View file

@ -0,0 +1,102 @@
import { describe, expect, it } from 'vitest';
import type { DbtSchemaParseResult } from './parse-schema.js';
import { toDescriptionUpdates } from './to-description-updates.js';
import type { DbtHostTableLite } from './match-tables.js';
const hostTables: DbtHostTableLite[] = [
{
id: '1',
name: 'orders',
catalog: 'warehouse',
db: 'analytics',
columns: [
{ id: 'c1', name: 'id' },
{ id: 'c2', name: 'amount' },
],
},
];
function parseResult(description: string | null, columnDescription: string | null): DbtSchemaParseResult {
return {
projectName: null,
dbtVersion: null,
relationships: [],
tables: [
{
name: 'orders',
description,
database: 'warehouse',
schema: 'analytics',
resourceType: 'model',
columns: [
{ name: 'id', description: columnDescription, dataType: null },
{ name: 'missing', description: 'not imported', dataType: null },
],
},
],
};
}
describe('dbt descriptions update payloads', () => {
it('emits dbt writes and matching ai invalidations when descriptions exist', () => {
expect(
toDescriptionUpdates({
connectionId: 'conn-1',
parseResult: parseResult('Orders table', 'Primary key'),
hostTables,
targetSchema: null,
}),
).toEqual({
dbt: [
{
connectionId: 'conn-1',
table: { catalog: 'warehouse', db: 'analytics', name: 'orders' },
source: 'dbt',
tableDescription: 'Orders table',
columnDescriptions: { id: 'Primary key' },
},
],
aiInvalidations: [
{
connectionId: 'conn-1',
table: { catalog: 'warehouse', db: 'analytics', name: 'orders' },
source: 'ai',
},
],
});
});
it('does not emit spurious dbt writes or ai invalidations when no descriptions exist', () => {
expect(
toDescriptionUpdates({
connectionId: 'conn-1',
parseResult: parseResult(null, null),
hostTables,
targetSchema: null,
}),
).toEqual({ dbt: [], aiInvalidations: [] });
});
it('emits ai invalidation without a dbt description write when only structural metadata exists', () => {
const result = parseResult(null, null);
result.tables[0]!.tagsDbt = ['finance'];
expect(
toDescriptionUpdates({
connectionId: 'conn-1',
parseResult: result,
hostTables,
targetSchema: null,
}),
).toEqual({
dbt: [],
aiInvalidations: [
{
connectionId: 'conn-1',
table: { catalog: 'warehouse', db: 'analytics', name: 'orders' },
source: 'ai',
},
],
});
});
});

View file

@ -0,0 +1,70 @@
import type { KloDescriptionUpdate } from '../../../scan/enrichment-types.js';
import { findMatchingKloTable, type DbtHostTableLite } from './match-tables.js';
import type { DbtSchemaParseResult } from './parse-schema.js';
export interface DbtDescriptionUpdates {
dbt: KloDescriptionUpdate[];
aiInvalidations: KloDescriptionUpdate[];
}
export function toDescriptionUpdates(input: {
connectionId: string;
parseResult: DbtSchemaParseResult;
hostTables: DbtHostTableLite[];
targetSchema: string | null;
}): DbtDescriptionUpdates {
const dbt: KloDescriptionUpdate[] = [];
const aiInvalidations: KloDescriptionUpdate[] = [];
for (const dbtTable of input.parseResult.tables) {
const hostTable = findMatchingKloTable(dbtTable, input.hostTables, input.targetSchema);
if (!hostTable) {
continue;
}
const tableDescription = dbtTable.description ?? undefined;
const columnDescriptions: Record<string, string | null> = {};
for (const dbtColumn of dbtTable.columns) {
if (!dbtColumn.description) {
continue;
}
const hostColumn = hostTable.columns.find(
(column) => column.name.toLowerCase() === dbtColumn.name.toLowerCase(),
);
if (hostColumn) {
columnDescriptions[hostColumn.name] = dbtColumn.description;
}
}
const hasColumnDescriptions = Object.keys(columnDescriptions).length > 0;
const hasDescriptionChange = tableDescription !== undefined || hasColumnDescriptions;
const hasMetadataChange =
!!dbtTable.tagsDbt?.length ||
dbtTable.freshnessDbt !== undefined ||
dbtTable.columns.some(
(column) => column.constraints !== undefined || !!column.enumValuesDbt?.length || !!column.dataTests?.length,
);
if (!hasDescriptionChange && !hasMetadataChange) {
continue;
}
const tableRef = { catalog: hostTable.catalog, db: hostTable.db, name: hostTable.name };
if (hasDescriptionChange) {
dbt.push({
connectionId: input.connectionId,
table: tableRef,
source: 'dbt',
...(tableDescription !== undefined ? { tableDescription } : {}),
...(hasColumnDescriptions ? { columnDescriptions } : {}),
});
}
aiInvalidations.push({
connectionId: input.connectionId,
table: tableRef,
source: 'ai',
});
}
return { dbt, aiInvalidations };
}

View file

@ -0,0 +1,70 @@
import { describe, expect, it } from 'vitest';
import { toMetadataUpdates } from './to-metadata-updates.js';
describe('toMetadataUpdates', () => {
it('emits source-keyed dbt metadata updates for matched tables and columns', () => {
const updates = toMetadataUpdates({
connectionId: 'conn_1',
targetSchema: 'analytics',
hostTables: [
{
id: 'orders-id',
name: 'orders',
catalog: 'warehouse',
db: 'analytics',
columns: [
{ id: 'status-id', name: 'status' },
{ id: 'created-id', name: 'created_at' },
],
},
],
parseResult: {
projectName: null,
dbtVersion: null,
relationships: [],
tables: [
{
name: 'orders',
description: null,
database: 'warehouse',
schema: 'analytics',
resourceType: 'model',
tagsDbt: ['finance'],
freshnessDbt: { loadedAtField: 'created_at' },
columns: [
{
name: 'status',
description: null,
dataType: null,
enumValuesDbt: ['placed', 'shipped'],
constraints: { dbt: { not_null: true } },
dataTests: [{ name: 'accepted_values', package: 'dbt', kwargs: { values: ['placed', 'shipped'] } }],
},
],
},
],
},
});
expect(updates).toEqual([
{
connectionId: 'conn_1',
table: { catalog: 'warehouse', db: 'analytics', name: 'orders' },
source: 'dbt',
tableFields: {
tags: ['finance'],
freshness: { loaded_at_field: 'created_at' },
},
columnFields: {
status: {
constraints: { not_null: true },
enum_values: ['placed', 'shipped'],
tests: [
{ name: 'accepted_values', package: 'dbt', kwargs: { values: ['placed', 'shipped'] } },
],
},
},
},
]);
});
});

View file

@ -0,0 +1,74 @@
import type { KloMetadataUpdate } from '../../../scan/enrichment-types.js';
import { findMatchingKloTable, type DbtHostTableLite } from './match-tables.js';
import type { DbtSchemaParseResult } from './parse-schema.js';
export function toMetadataUpdates(input: {
connectionId: string;
parseResult: DbtSchemaParseResult;
hostTables: DbtHostTableLite[];
targetSchema: string | null;
}): KloMetadataUpdate[] {
const updates: KloMetadataUpdate[] = [];
for (const dbtTable of input.parseResult.tables) {
const hostTable = findMatchingKloTable(dbtTable, input.hostTables, input.targetSchema);
if (!hostTable) {
continue;
}
const tableFields: Record<string, unknown> = {};
if (dbtTable.tagsDbt?.length) {
tableFields.tags = dbtTable.tagsDbt;
}
if (dbtTable.freshnessDbt) {
tableFields.freshness = {
...(dbtTable.freshnessDbt.raw !== undefined ? { raw: dbtTable.freshnessDbt.raw } : {}),
...(dbtTable.freshnessDbt.loadedAtField !== undefined
? { loaded_at_field: dbtTable.freshnessDbt.loadedAtField }
: {}),
};
}
const columnFields: Record<string, Record<string, unknown>> = {};
for (const dbtColumn of dbtTable.columns) {
const hostColumn = hostTable.columns.find(
(column) => column.name.toLowerCase() === dbtColumn.name.toLowerCase(),
);
if (!hostColumn) {
continue;
}
const fields: Record<string, unknown> = {};
if (dbtColumn.constraints) {
fields.constraints = dbtColumn.constraints.dbt;
}
if (dbtColumn.enumValuesDbt?.length) {
fields.enum_values = dbtColumn.enumValuesDbt;
}
if (dbtColumn.dataTests?.length) {
fields.tests = dbtColumn.dataTests.map((test) => ({
name: test.name,
package: test.package,
...(test.kwargs ? { kwargs: test.kwargs } : {}),
}));
}
if (Object.keys(fields).length > 0) {
columnFields[hostColumn.name] = fields;
}
}
if (Object.keys(tableFields).length === 0 && Object.keys(columnFields).length === 0) {
continue;
}
updates.push({
connectionId: input.connectionId,
table: { catalog: hostTable.catalog, db: hostTable.db, name: hostTable.name },
source: 'dbt',
...(Object.keys(tableFields).length > 0 ? { tableFields } : {}),
...(Object.keys(columnFields).length > 0 ? { columnFields } : {}),
});
}
return updates;
}

View file

@ -0,0 +1,62 @@
import { describe, expect, it } from 'vitest';
import type { DbtHostTableLite } from './match-tables.js';
import type { DbtSchemaParseResult } from './parse-schema.js';
import { toRelationshipUpdates } from './to-relationship-updates.js';
const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join('');
const hostTables: DbtHostTableLite[] = [
{
id: '1',
name: 'orders',
catalog: 'warehouse',
db: 'analytics',
columns: [{ id: 'c1', name: 'customer_id' }],
},
{
id: '2',
name: 'customers',
catalog: 'warehouse',
db: 'staging',
columns: [{ id: 'c2', name: 'id' }],
},
];
const parseResult: DbtSchemaParseResult = {
projectName: null,
dbtVersion: null,
tables: [],
relationships: [
{
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
fromSchema: 'analytics',
toSchema: 'analytics',
description: 'schema intentionally differs from the host customers table',
},
{ fromTable: 'orders', fromColumn: 'missing', toTable: 'customers', toColumn: 'id' },
{ fromTable: 'orders', fromColumn: 'customer_id', toTable: 'missing_table', toColumn: 'id' },
],
};
describe('dbt relationship update payloads', () => {
it('validates relationships using the current name-only matching behavior and dbt provenance', () => {
expect(toRelationshipUpdates({ connectionId: 'conn-1', parseResult, hostTables })).toEqual({
joins: [
{
connectionId: 'conn-1',
fromTable: 'orders',
fromColumns: ['customer_id'],
toTable: 'customers',
toColumns: ['id'],
relationship: 'many_to_one',
author: 'dbt',
authorEmail: DBT_SYSTEM_EMAIL,
},
],
skippedNoMatch: 2,
});
});
});

View file

@ -0,0 +1,57 @@
import type { KloJoinUpdate } from '../../../scan/enrichment-types.js';
import type { DbtHostTableLite } from './match-tables.js';
import type { DbtSchemaParseResult } from './parse-schema.js';
export interface DbtRelationshipUpdates {
joins: KloJoinUpdate[];
skippedNoMatch: number;
}
const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join('');
export function toRelationshipUpdates(input: {
connectionId: string;
parseResult: DbtSchemaParseResult;
hostTables: DbtHostTableLite[];
}): DbtRelationshipUpdates {
const tablesByName = new Map<string, DbtHostTableLite>();
for (const table of input.hostTables) {
tablesByName.set(table.name.toLowerCase(), table);
}
const joins: KloJoinUpdate[] = [];
let skippedNoMatch = 0;
for (const relationship of input.parseResult.relationships) {
const fromTable = tablesByName.get(relationship.fromTable.toLowerCase());
const toTable = tablesByName.get(relationship.toTable.toLowerCase());
if (!fromTable || !toTable) {
skippedNoMatch++;
continue;
}
const fromColumn = fromTable.columns.find(
(column) => column.name.toLowerCase() === relationship.fromColumn.toLowerCase(),
);
const toColumn = toTable.columns.find(
(column) => column.name.toLowerCase() === relationship.toColumn.toLowerCase(),
);
if (!fromColumn || !toColumn) {
skippedNoMatch++;
continue;
}
joins.push({
connectionId: input.connectionId,
fromTable: fromTable.name,
fromColumns: [fromColumn.name],
toTable: toTable.name,
toColumns: [toColumn.name],
relationship: 'many_to_one',
author: 'dbt',
authorEmail: DBT_SYSTEM_EMAIL,
});
}
return { joins, skippedNoMatch };
}

View file

@ -0,0 +1,410 @@
import { describe, expect, it } from 'vitest';
import { type DbtHostTableLite, matchDbtTables } from './dbt-descriptions/match-tables.js';
import { mergeSemanticModelTables } from './dbt-descriptions/merge-semantic-model-tables.js';
import { parseDbtSchemaFiles } from './dbt-descriptions/parse-schema.js';
import { toDescriptionUpdates } from './dbt-descriptions/to-description-updates.js';
import { toRelationshipUpdates } from './dbt-descriptions/to-relationship-updates.js';
import { parseMetricflowFiles } from './metricflow/deep-parse.js';
import { mapCrossModelMetricToSource, mapSemanticModelToSource } from './metricflow/semantic-models.js';
const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join('');
const metricflowYaml = `
semantic_models:
- name: orders_semantic
description: MetricFlow order facts
model: ref('fct_orders')
defaults:
agg_time_dimension: ordered_at
entities:
- name: customer
type: foreign
expr: customer_id
description: Customer relationship
dimensions:
- name: status
type: categorical
expr: status
description: Order status
- name: ordered_at
type: time
expr: ordered_at
measures:
- name: total_revenue
agg: sum
expr: amount
description: Revenue
- name: customers_semantic
description: Customer dimension
model: ref('dim_customers')
entities:
- name: customer
type: primary
expr: id
dimensions:
- name: country
type: categorical
expr: country
description: Customer country
measures:
- name: customer_count
agg: count
expr: id
description: Customer count
metrics:
- name: total_revenue
type: simple
type_params:
measure: total_revenue
- name: customer_count
type: simple
type_params:
measure: customer_count
- name: revenue_per_customer
description: Revenue per customer
type: derived
type_params:
expr: total_revenue / NULLIF(customer_count, 0)
metrics:
- name: total_revenue
alias: total_revenue
- name: customer_count
alias: customer_count
`;
const schemaYaml = `
version: 2
sources:
- name: raw
database: warehouse
schema: landing
tables:
- name: customers
identifier: dim_customers
description: Raw customer dimension
columns:
- name: id
description: Customer primary key
- name: country
description: Country name
models:
- name: "{{ var('orders_model', 'fct_orders') }}"
schema: "{{ var('mart_schema', 'analytics') }}"
description: Modeled orders
columns:
- name: customer_id
description: Linked customer id
tests:
- relationships:
to: ref('dim_customers')
field: id
- name: status
description: Order status
- name: amount
description: Gross amount
`;
const hostTables: DbtHostTableLite[] = [
{
id: 'orders-table',
name: 'fct_orders',
catalog: 'warehouse',
db: 'analytics',
columns: [
{ id: 'orders-customer-id', name: 'customer_id' },
{ id: 'orders-status', name: 'status' },
{ id: 'orders-amount', name: 'amount' },
{ id: 'orders-ordered-at', name: 'ordered_at' },
],
},
{
id: 'customers-table',
name: 'dim_customers',
catalog: 'warehouse',
db: 'landing',
columns: [
{ id: 'customers-id', name: 'id' },
{ id: 'customers-country', name: 'country' },
],
},
];
describe('dbt extraction golden parity fixture', () => {
it('freezes the relocated MetricFlow and dbt-description contract together', () => {
const metricflow = parseMetricflowFiles([{ path: 'semantic_models/orders.yml', content: metricflowYaml }]);
expect(metricflow).toEqual({
semanticModels: [
{
name: 'orders_semantic',
description: 'MetricFlow order facts',
modelRef: 'fct_orders',
dimensions: [
{
name: 'status',
column: 'status',
type: 'string',
label: 'Status',
description: 'Order status',
},
{
name: 'ordered_at',
column: 'ordered_at',
type: 'time',
label: 'Ordered At',
description: undefined,
},
],
measures: [
{
type: 'simple',
name: 'total_revenue',
column: 'amount',
aggregation: 'sum',
label: 'Total Revenue',
description: 'Revenue',
},
],
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id', description: 'Customer relationship' }],
defaultTimeDimension: 'ordered_at',
},
{
name: 'customers_semantic',
description: 'Customer dimension',
modelRef: 'dim_customers',
dimensions: [
{
name: 'country',
column: 'country',
type: 'string',
label: 'Country',
description: 'Customer country',
},
],
measures: [
{
type: 'simple',
name: 'customer_count',
column: 'id',
aggregation: 'count',
label: 'Customer Count',
description: 'Customer count',
},
],
entities: [{ name: 'customer', type: 'primary', expr: 'id' }],
defaultTimeDimension: null,
},
],
crossModelMetrics: [
{
name: 'revenue_per_customer',
label: null,
description: 'Revenue per customer',
type: 'derived',
expr: 'total_revenue / NULLIF(customer_count, 0)',
dependsOn: [
{ metricName: 'orders_semantic', alias: 'total_revenue' },
{ metricName: 'customers_semantic', alias: 'customer_count' },
],
filter: null,
},
],
relationships: [
{
fromTable: 'fct_orders',
fromColumn: 'customer_id',
toTable: 'dim_customers',
toColumn: 'id',
description: 'Customer relationship',
},
],
warnings: [],
});
expect(mapSemanticModelToSource(metricflow.semanticModels[0], 'analytics.fct_orders')).toEqual({
name: 'fct-orders',
table: 'analytics.fct_orders',
grain: ['status', 'ordered_at'],
columns: [
{ name: 'status', type: 'string', description: 'Order status' },
{ name: 'ordered_at', type: 'time' },
],
measures: [
{
name: 'total_revenue',
expr: 'sum(amount)',
description: 'Revenue',
},
],
joins: [],
descriptions: { dbt: 'MetricFlow order facts' },
});
expect(mapCrossModelMetricToSource(metricflow.crossModelMetrics[0])).toEqual({
name: 'revenue-per-customer',
sql: 'total_revenue / NULLIF(customer_count, 0)',
descriptions: { dbt: 'Revenue per customer' },
grain: [],
columns: [],
measures: [
{
name: 'revenue_per_customer',
expr: 'total_revenue / NULLIF(customer_count, 0)',
description: 'Revenue per customer',
},
],
joins: [],
});
const schema = parseDbtSchemaFiles(
[{ path: 'models/schema.yml', content: schemaYaml }],
new Map([
['orders_model', 'fct_orders'],
['mart_schema', 'analytics'],
]),
);
const merged = mergeSemanticModelTables(schema, metricflow.semanticModels);
expect(merged).toEqual({
projectName: null,
dbtVersion: null,
tables: [
{
name: 'dim_customers',
description: 'Raw customer dimension',
database: 'warehouse',
schema: 'landing',
columns: [
{ name: 'id', description: 'Customer primary key', dataType: null },
{ name: 'country', description: 'Country name', dataType: null },
],
resourceType: 'source',
},
{
name: 'fct_orders',
description: 'Modeled orders',
database: null,
schema: 'analytics',
columns: [
{
name: 'customer_id',
description: 'Linked customer id',
dataType: null,
dataTests: [
{
name: 'relationships',
package: 'dbt',
kwargs: { to: "ref('dim_customers')", field: 'id' },
},
],
},
{ name: 'status', description: 'Order status', dataType: null },
{ name: 'amount', description: 'Gross amount', dataType: null },
],
resourceType: 'model',
},
],
relationships: [
{
fromTable: 'fct_orders',
fromColumn: 'customer_id',
toTable: 'dim_customers',
toColumn: 'id',
fromSchema: 'analytics',
},
],
});
expect(matchDbtTables(merged.tables, hostTables, 'analytics')).toEqual([
{
dbtTable: 'dim_customers',
dbtSchema: 'landing',
dbtDatabase: 'warehouse',
hostTableId: 'customers-table',
hostTableName: 'dim_customers',
matched: true,
tableDescriptionAction: 'import',
tableDescriptionFound: true,
columnsToImport: 2,
columnsMatched: 2,
columnsTotal: 2,
columnDescriptionsFound: 2,
},
{
dbtTable: 'fct_orders',
dbtSchema: 'analytics',
dbtDatabase: null,
hostTableId: 'orders-table',
hostTableName: 'fct_orders',
matched: true,
tableDescriptionAction: 'import',
tableDescriptionFound: true,
columnsToImport: 3,
columnsMatched: 3,
columnsTotal: 3,
columnDescriptionsFound: 3,
},
]);
expect(
toDescriptionUpdates({
connectionId: 'warehouse-1',
parseResult: merged,
hostTables,
targetSchema: 'analytics',
}),
).toEqual({
dbt: [
{
connectionId: 'warehouse-1',
table: { catalog: 'warehouse', db: 'landing', name: 'dim_customers' },
source: 'dbt',
tableDescription: 'Raw customer dimension',
columnDescriptions: {
id: 'Customer primary key',
country: 'Country name',
},
},
{
connectionId: 'warehouse-1',
table: { catalog: 'warehouse', db: 'analytics', name: 'fct_orders' },
source: 'dbt',
tableDescription: 'Modeled orders',
columnDescriptions: {
customer_id: 'Linked customer id',
status: 'Order status',
amount: 'Gross amount',
},
},
],
aiInvalidations: [
{
connectionId: 'warehouse-1',
table: { catalog: 'warehouse', db: 'landing', name: 'dim_customers' },
source: 'ai',
},
{
connectionId: 'warehouse-1',
table: { catalog: 'warehouse', db: 'analytics', name: 'fct_orders' },
source: 'ai',
},
],
});
expect(toRelationshipUpdates({ connectionId: 'warehouse-1', parseResult: merged, hostTables })).toEqual({
joins: [
{
connectionId: 'warehouse-1',
fromTable: 'fct_orders',
fromColumns: ['customer_id'],
toTable: 'dim_customers',
toColumns: ['id'],
relationship: 'many_to_one',
author: 'dbt',
authorEmail: DBT_SYSTEM_EMAIL,
},
],
skippedNoMatch: 0,
});
});
});

View file

@ -0,0 +1,36 @@
import { describe, expect, it } from 'vitest';
import { chunkDbtProject } from './chunk.js';
describe('chunkDbtProject', () => {
const diffSet = (modified: string[]) => ({ added: [], modified, deleted: [], unchanged: [] });
it('caps peerFileIndex when the project has very many yaml files', () => {
const modelPaths = Array.from({ length: 201 }, (_, i) => `models/m${i}.yml`);
const allPaths = ['dbt_project.yml', ...modelPaths].sort();
const { workUnits } = chunkDbtProject({ allPaths });
const [first] = workUnits;
expect(first).toBeDefined();
expect(first?.peerFileIndex).toHaveLength(200);
expect(first?.notes).toMatch(/capped at 200/);
});
it('keeps large-project model work units when dbt_project.yml changes', () => {
const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`);
const allPaths = ['dbt_project.yml', ...modelPaths].sort();
const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['dbt_project.yml']) });
expect(workUnits).toHaveLength(30);
expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']);
expect(workUnits[0]?.dependencyPaths).toContain('dbt_project.yml');
});
it('keeps large-project model work units when non-model yaml peers change', () => {
const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`);
const allPaths = ['dbt_project.yml', 'seeds/seed_properties.yml', ...modelPaths].sort();
const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['seeds/seed_properties.yml']) });
expect(workUnits).toHaveLength(30);
expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']);
expect(workUnits[0]?.dependencyPaths).toContain('seeds/seed_properties.yml');
});
});

View file

@ -0,0 +1,130 @@
import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js';
import type { ParsedDbtProject } from './parse.js';
interface ChunkOptions {
diffSet?: DiffSet;
}
/**
* Per-model work units (when the project has more than 25 YAML files) only name `rawFiles` under
* `models/**`. Other `.yml` (e.g. some `seeds/` or custom layouts) still appear in `peerFileIndex`
* or in the small-project / no-models fallbacks v1 does not emit one WU per non-models file.
*/
const MODELS_PREFIX = 'models/';
/** `peerFileIndex` is a hint only (agents may not read those paths). Cap to limit prompt size. */
const MAX_PEER_FILE_INDEX = 200;
function projectYamlPath(allPaths: string[]): string | undefined {
if (allPaths.includes('dbt_project.yml')) {
return 'dbt_project.yml';
}
if (allPaths.includes('dbt_project.yaml')) {
return 'dbt_project.yaml';
}
return undefined;
}
function modelRelativePaths(allPaths: string[]): string[] {
return allPaths.filter((p) => p.replace(/\\/g, '/').startsWith(MODELS_PREFIX)).sort();
}
function unitKeyForModelFile(mf: string): string {
const base = mf
.replace(/\.(ya?ml)$/i, '')
.replace(/\\/g, '/')
.replace(/[^a-zA-Z0-9]+/g, '-')
.replace(/^-+|-+$/g, '');
return `dbt-${base.toLowerCase()}`;
}
function emitFirstRunWorkUnits(allPaths: string[], dbtDep: string | undefined): WorkUnit[] {
if (allPaths.length === 0) {
return [];
}
if (allPaths.length <= 25) {
return [
{
unitKey: 'dbt-all',
displayLabel: 'dbt project (all yaml)',
rawFiles: [...allPaths],
peerFileIndex: [],
dependencyPaths: [],
notes: 'dbt project — all YAML in one WorkUnit (≤25 files)',
},
];
}
const modelFiles = modelRelativePaths(allPaths);
if (modelFiles.length === 0) {
return [
{
unitKey: 'dbt-all',
displayLabel: 'dbt project (all yaml, no models/**)',
rawFiles: [...allPaths],
peerFileIndex: [],
dependencyPaths: dbtDep ? [dbtDep] : [],
notes: 'dbt: no models/**/*.yml — single slice with dbt_project as dependency if present',
},
];
}
return modelFiles.map((mf) => {
const allPeers = allPaths.filter((p) => p !== mf).sort();
const truncated = allPeers.length > MAX_PEER_FILE_INDEX;
const peerFileIndex = truncated ? allPeers.slice(0, MAX_PEER_FILE_INDEX) : allPeers;
const dependencyPaths = dbtDep && allPaths.includes(dbtDep) && mf !== dbtDep ? [dbtDep].sort() : [];
const notes = truncated
? `dbt model schema slice (peer index capped at ${MAX_PEER_FILE_INDEX} of ${allPeers.length} paths)`
: 'dbt model schema slice';
return {
unitKey: unitKeyForModelFile(mf),
displayLabel: `dbt ${mf}`,
rawFiles: [mf],
peerFileIndex,
dependencyPaths: dependencyPaths,
notes,
};
});
}
function applyDiffSet(firstRunUnits: WorkUnit[], diffSet: DiffSet): ChunkResult {
const touched = new Set([...diffSet.added, ...diffSet.modified]);
const kept: WorkUnit[] = [];
for (const wu of firstRunUnits) {
const touchedRawFiles = wu.rawFiles.filter((p) => touched.has(p));
const touchedDependencies = wu.dependencyPaths.filter((p) => touched.has(p));
const touchedPeerFiles = wu.peerFileIndex.filter((p) => touched.has(p));
if (touchedRawFiles.length === 0 && touchedDependencies.length === 0 && touchedPeerFiles.length === 0) {
continue;
}
const rawFiles = touchedRawFiles.length > 0 ? touchedRawFiles : wu.rawFiles;
const unchangedRaw = touchedRawFiles.length > 0 ? wu.rawFiles.filter((p) => !touched.has(p)) : [];
for (const p of wu.rawFiles) {
if (!rawFiles.includes(p) && !unchangedRaw.includes(p)) {
unchangedRaw.push(p);
}
}
const combinedDeps = new Set<string>([...wu.dependencyPaths, ...unchangedRaw, ...touchedPeerFiles]);
kept.push({
...wu,
rawFiles: rawFiles.sort(),
dependencyPaths: [...combinedDeps].sort(),
});
}
const eviction = diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : undefined;
return { workUnits: kept, eviction };
}
export function chunkDbtProject(project: ParsedDbtProject, opts: ChunkOptions = {}): ChunkResult {
const dbtDep = projectYamlPath(project.allPaths);
const firstRun = emitFirstRunWorkUnits(project.allPaths, dbtDep);
if (!opts.diffSet) {
return { workUnits: firstRun };
}
return applyDiffSet(firstRun, opts.diffSet);
}

View file

@ -0,0 +1,51 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { SourceAdapter } from '../../types.js';
import { DbtSourceAdapter } from './dbt.adapter.js';
describe('DbtSourceAdapter', () => {
let stagedDir: string;
let adapter: SourceAdapter;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'dbt-adapter-'));
adapter = new DbtSourceAdapter();
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('declares the expected source key and skill list', () => {
expect(adapter.source).toBe('dbt');
expect(adapter.skillNames).toEqual(['dbt_ingest']);
});
it('detects a staged dbt project root (dbt_project.yml)', async () => {
await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\nversion: '1.0.0'\n", 'utf-8');
expect(await adapter.detect(stagedDir)).toBe(true);
});
it('chunk: dbt_project.yml + models/a.yml yields one WU (≤25 files)', async () => {
await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\n", 'utf-8');
await mkdir(join(stagedDir, 'models'), { recursive: true });
await writeFile(
join(stagedDir, 'models/a.yml'),
'version: 2\nmodels:\n - name: orders\n description: Orders\n',
'utf-8',
);
const result = await adapter.chunk(stagedDir);
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].unitKey).toBe('dbt-all');
expect(result.parseArtifacts).toMatchObject({
projectName: 'jaffle',
tables: [{ name: 'orders', description: 'Orders' }],
});
});
it('implements fetch() for git-backed dbt source setup', () => {
expect(adapter.fetch).toBeTypeOf('function');
});
});

View file

@ -0,0 +1,48 @@
import { join } from 'node:path';
import type { ChunkResult, DiffSet, SourceAdapter } from '../../types.js';
import type { FetchContext } from '../../types.js';
import { loadProjectInfo } from '../../dbt-shared/project-vars.js';
import { loadDbtSchemaFiles } from '../../dbt-shared/schema-files.js';
import { parseDbtSchemaFiles } from '../dbt-descriptions/parse-schema.js';
import { chunkDbtProject } from './chunk.js';
import { detectDbtStagedDir } from './detect.js';
import { fetchDbtRepo, type DbtPullConfig } from './fetch.js';
import { parseDbtStagedDir } from './parse.js';
interface DbtSourceAdapterOptions {
homeDir?: string;
}
export class DbtSourceAdapter implements SourceAdapter {
readonly source = 'dbt' as const;
/** Runner merges: ingest_triage, sl_capture, knowledge_capture (see ingest-bundle.runner.ts) */
readonly skillNames: string[] = ['dbt_ingest'];
constructor(private readonly options: DbtSourceAdapterOptions = {}) {}
detect(stagedDir: string): Promise<boolean> {
return detectDbtStagedDir(stagedDir);
}
async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
const config = pullConfig as DbtPullConfig | undefined;
if (!config?.repoUrl) {
throw new Error('dbt fetch requires repoUrl');
}
await fetchDbtRepo({
config,
cacheDir: join(this.options.homeDir ?? '.klo/cache', 'dbt', ctx.connectionId),
stagedDir,
});
}
async chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
const project = await parseDbtStagedDir(stagedDir);
const projectInfo = await loadProjectInfo(stagedDir);
const schemaFiles = await loadDbtSchemaFiles(stagedDir);
const parseArtifacts = parseDbtSchemaFiles(schemaFiles, projectInfo.variables, {
projectName: projectInfo.projectName,
});
return { ...chunkDbtProject(project, { diffSet }), parseArtifacts };
}
}

View file

@ -0,0 +1,12 @@
import { access } from 'node:fs/promises';
import { join } from 'node:path';
export async function detectDbtStagedDir(stagedDir: string): Promise<boolean> {
for (const name of ['dbt_project.yml', 'dbt_project.yaml'] as const) {
try {
await access(join(stagedDir, name));
return true;
} catch {}
}
return false;
}

View file

@ -0,0 +1,38 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { fetchDbtRepo } from './fetch.js';
describe('fetchDbtRepo', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-dbt-fetch-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('copies dbt yaml files from a fetched repo subpath into staged dir', async () => {
const cacheDir = join(tempDir, 'cache');
const stagedDir = join(tempDir, 'staged');
await mkdir(join(cacheDir, 'analytics', 'models'), { recursive: true });
await writeFile(join(cacheDir, 'analytics', 'dbt_project.yml'), 'name: analytics\n', 'utf-8');
await writeFile(join(cacheDir, 'analytics', 'models', 'orders.yml'), 'models: []\n', 'utf-8');
const cloneOrPull = vi.fn(async () => ({ commitHash: 'abc123' }));
await expect(
fetchDbtRepo({
config: { repoUrl: 'https://github.com/acme/dbt.git', path: 'analytics' },
cacheDir,
stagedDir,
deps: { cloneOrPull },
}),
).resolves.toEqual({ commitHash: 'abc123', filesCopied: 2 });
await expect(readFile(join(stagedDir, 'dbt_project.yml'), 'utf-8')).resolves.toContain('analytics');
await expect(readFile(join(stagedDir, 'models', 'orders.yml'), 'utf-8')).resolves.toContain('models');
});
});

View file

@ -0,0 +1,60 @@
import { access, copyFile, mkdir, readdir } from 'node:fs/promises';
import { dirname, join, relative } from 'node:path';
import { cloneOrPull, sanitizeRepoError } from '../../repo-fetch.js';
export interface DbtPullConfig {
repoUrl: string;
branch?: string;
path?: string;
authToken?: string | null;
}
export interface FetchDbtRepoParams {
config: DbtPullConfig;
cacheDir: string;
stagedDir: string;
deps?: {
cloneOrPull?: typeof cloneOrPull;
};
}
export async function fetchDbtRepo(params: FetchDbtRepoParams): Promise<{ commitHash: string; filesCopied: number }> {
try {
const runCloneOrPull = params.deps?.cloneOrPull ?? cloneOrPull;
const { commitHash } = await runCloneOrPull({
repoUrl: params.config.repoUrl,
authToken: params.config.authToken,
cacheDir: params.cacheDir,
branch: params.config.branch ?? 'main',
});
const sourceRoot = params.config.path ? join(params.cacheDir, params.config.path) : params.cacheDir;
const filesCopied = await copyYamlFilesRecursive(sourceRoot, params.stagedDir);
return { commitHash, filesCopied };
} catch (error) {
throw new Error(sanitizeRepoError(error, params.config.authToken));
}
}
async function copyYamlFilesRecursive(sourceRoot: string, destRoot: string): Promise<number> {
try {
await access(sourceRoot);
} catch {
return 0;
}
await mkdir(destRoot, { recursive: true });
const entries = await readdir(sourceRoot, { withFileTypes: true, recursive: true });
let copied = 0;
for (const entry of entries) {
if (!entry.isFile() || !/\.ya?ml$/i.test(entry.name)) {
continue;
}
const absSrc = join(entry.parentPath, entry.name);
const rel = relative(sourceRoot, absSrc);
const dest = join(destRoot, rel);
await mkdir(dirname(dest), { recursive: true });
await copyFile(absSrc, dest);
copied += 1;
}
return copied;
}

View file

@ -0,0 +1,8 @@
import { describe, expect, it } from 'vitest';
import { normalizeDbtPath } from './parse.js';
describe('normalizeDbtPath', () => {
it('normalizes Windows separators to POSIX separators', () => {
expect(normalizeDbtPath('models\\marts\\orders.yml')).toBe('models/marts/orders.yml');
});
});

View file

@ -0,0 +1,32 @@
import { readdir } from 'node:fs/promises';
import { join, relative } from 'node:path';
const YAML_EXT_RE = /\.(ya?ml)$/i;
export function normalizeDbtPath(path: string): string {
return path.replaceAll('\\', '/');
}
async function collectYamlFiles(stagedDir: string): Promise<string[]> {
const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true });
const paths: string[] = [];
for (const entry of entries) {
if (!entry.isFile() || !YAML_EXT_RE.test(entry.name)) {
continue;
}
const abs = join(entry.parentPath, entry.name);
paths.push(normalizeDbtPath(relative(stagedDir, abs)));
}
paths.sort();
return paths;
}
export interface ParsedDbtProject {
/** All `.yml` / `.yaml` paths under stagedDir, relative + sorted. */
allPaths: string[];
}
export async function parseDbtStagedDir(stagedDir: string): Promise<ParsedDbtProject> {
const allPaths = await collectYamlFiles(stagedDir);
return { allPaths };
}

View file

@ -0,0 +1,48 @@
import { readdir } from 'node:fs/promises';
import { join, relative } from 'node:path';
import type { ChunkResult, DiffSet, SourceAdapter, WorkUnit } from '../../types.js';
export class FakeSourceAdapter implements SourceAdapter {
readonly source = 'fake';
readonly skillNames: string[] = [];
detect(): Promise<boolean> {
return Promise.resolve(true);
}
async chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
const subDirs = (await readdir(stagedDir, { withFileTypes: true }))
.filter((e) => e.isDirectory())
.map((e) => e.name)
.sort();
const workUnits: WorkUnit[] = [];
for (const subDir of subDirs) {
const entries = await readdir(join(stagedDir, subDir), { withFileTypes: true, recursive: true });
const rawFiles = entries
.filter((e) => e.isFile())
.map((e) => relative(stagedDir, join(e.parentPath, e.name)))
.sort();
if (rawFiles.length === 0) {
continue;
}
if (diffSet) {
const touched = new Set([...diffSet.added, ...diffSet.modified]);
const anyTouched = rawFiles.some((p) => touched.has(p));
if (!anyTouched) {
continue;
}
}
workUnits.push({
unitKey: `fake-${subDir}`,
displayLabel: subDir,
rawFiles,
peerFileIndex: [],
dependencyPaths: [],
});
}
const eviction = diffSet && diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted] } : undefined;
return { workUnits, eviction };
}
}

View file

@ -0,0 +1,146 @@
{
"name": "eviction-churn",
"now": "2026-05-08T12:00:00.000Z",
"connectionId": "warehouse",
"probe": {
"pgServerVersion": "PostgreSQL 16.4",
"warnings": [
"pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn"
]
},
"snapshot": {
"statsResetAt": "2026-05-08T08:00:00.000Z",
"deallocCount": 3,
"rows": [
{
"queryid": "501",
"userid": "11",
"username": "analyst",
"dbid": "5",
"database": "analytics",
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"calls": 20,
"totalExecTime": 500,
"meanExecTime": 25,
"totalRows": 40
}
]
},
"pullConfig": {
"dialect": "postgres",
"windowDays": 90,
"lastSuccessfulCursor": null,
"serviceAccountUserPatterns": [],
"redactionPatterns": [],
"maxTemplatesPerRun": 5000,
"minCalls": 5
},
"analysisBySql": {
"SELECT count(*) FROM analytics.orders WHERE status = $1": {
"fingerprint": "fp_orders_status",
"normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"tablesTouched": [
"analytics.orders"
],
"literalSlots": []
}
},
"baseline": null,
"expectedBaseline": {
"version": 1,
"fetchedAt": "2026-05-08T12:00:00.000Z",
"statsResetAt": "2026-05-08T08:00:00.000Z",
"pgServerVersion": "PostgreSQL 16.4",
"templates": {
"db5_q501": {
"firstObservedAt": "2026-05-08T12:00:00.000Z",
"perUser": {
"11": {
"calls": 20,
"totalExecTime": 500,
"totalRows": 40
}
}
}
}
},
"expectedFiles": {
"manifest.json": {
"json": {
"source": "historic-sql",
"connectionId": "warehouse",
"dialect": "postgres",
"fetchedAt": "2026-05-08T12:00:00.000Z",
"windowStart": "2026-05-08T08:00:00.000Z",
"windowEnd": "2026-05-08T12:00:00.000Z",
"nextSuccessfulCursor": "2026-05-08T12:00:00.000Z",
"templateCount": 1,
"capped": false,
"warnings": [
"pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn",
"pgss_dealloc_count:3; pg_stat_statements.max may be too low, causing template eviction churn",
"baseline_first_run:no_previous_pgss_baseline"
],
"degraded": true,
"statsResetAt": "2026-05-08T08:00:00.000Z",
"baselineFirstRun": true,
"pgServerVersion": "PostgreSQL 16.4",
"deallocCount": 3,
"templates": [
{
"id": "db5_q501",
"fingerprint": "fp_orders_status",
"subClusterId": null,
"path": "templates/db5_q501/page.md"
}
]
}
},
"templates/db5_q501/metadata.json": {
"json": {
"id": "db5_q501",
"title": "postgres · analytics.orders [db5_q501]",
"path": "templates/db5_q501/page.md",
"objectType": "historic_sql_template",
"lastEditedAt": null,
"properties": {
"fingerprint": "fp_orders_status",
"sub_cluster_id": null,
"dialect": "postgres",
"tables_touched": [
"analytics.orders"
],
"literal_slots": [],
"triage_signals": {
"executions_bucket": "mid",
"distinct_users_bucket": "solo",
"error_rate_bucket": "ok",
"recency_bucket": "active",
"service_account_only": "false",
"runtime_bucket": "fast"
}
}
}
},
"templates/db5_q501/page.md": {
"text": "# db5_q501\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n"
},
"templates/db5_q501/usage.json": {
"json": {
"stats": {
"executions": 20,
"distinct_users": 1,
"first_seen": "2026-05-08T12:00:00.000Z",
"last_seen": "2026-05-08T12:00:00.000Z",
"p50_runtime_ms": null,
"p95_runtime_ms": null,
"mean_runtime_ms": 25,
"error_rate": 0,
"rows_produced": 40
},
"literal_slots": [],
"samples": []
}
}
}
}

View file

@ -0,0 +1,144 @@
{
"name": "first-run",
"now": "2026-05-08T12:00:00.000Z",
"connectionId": "warehouse",
"probe": {
"pgServerVersion": "PostgreSQL 16.4",
"warnings": []
},
"snapshot": {
"statsResetAt": "2026-05-08T08:00:00.000Z",
"deallocCount": 0,
"rows": [
{
"queryid": "101",
"userid": "11",
"username": "analyst",
"dbid": "5",
"database": "analytics",
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"calls": 10,
"totalExecTime": 250,
"meanExecTime": 25,
"totalRows": 20
}
]
},
"pullConfig": {
"dialect": "postgres",
"windowDays": 90,
"lastSuccessfulCursor": null,
"serviceAccountUserPatterns": [
"^svc_"
],
"redactionPatterns": [],
"maxTemplatesPerRun": 5000,
"minCalls": 5
},
"analysisBySql": {
"SELECT count(*) FROM analytics.orders WHERE status = $1": {
"fingerprint": "fp_orders_status",
"normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"tablesTouched": [
"analytics.orders"
],
"literalSlots": []
}
},
"baseline": null,
"expectedBaseline": {
"version": 1,
"fetchedAt": "2026-05-08T12:00:00.000Z",
"statsResetAt": "2026-05-08T08:00:00.000Z",
"pgServerVersion": "PostgreSQL 16.4",
"templates": {
"db5_q101": {
"firstObservedAt": "2026-05-08T12:00:00.000Z",
"perUser": {
"11": {
"calls": 10,
"totalExecTime": 250,
"totalRows": 20
}
}
}
}
},
"expectedFiles": {
"manifest.json": {
"json": {
"source": "historic-sql",
"connectionId": "warehouse",
"dialect": "postgres",
"fetchedAt": "2026-05-08T12:00:00.000Z",
"windowStart": "2026-05-08T08:00:00.000Z",
"windowEnd": "2026-05-08T12:00:00.000Z",
"nextSuccessfulCursor": "2026-05-08T12:00:00.000Z",
"templateCount": 1,
"capped": false,
"warnings": [
"baseline_first_run:no_previous_pgss_baseline"
],
"degraded": true,
"statsResetAt": "2026-05-08T08:00:00.000Z",
"baselineFirstRun": true,
"pgServerVersion": "PostgreSQL 16.4",
"deallocCount": 0,
"templates": [
{
"id": "db5_q101",
"fingerprint": "fp_orders_status",
"subClusterId": null,
"path": "templates/db5_q101/page.md"
}
]
}
},
"templates/db5_q101/metadata.json": {
"json": {
"id": "db5_q101",
"title": "postgres · analytics.orders [db5_q101]",
"path": "templates/db5_q101/page.md",
"objectType": "historic_sql_template",
"lastEditedAt": null,
"properties": {
"fingerprint": "fp_orders_status",
"sub_cluster_id": null,
"dialect": "postgres",
"tables_touched": [
"analytics.orders"
],
"literal_slots": [],
"triage_signals": {
"executions_bucket": "mid",
"distinct_users_bucket": "solo",
"error_rate_bucket": "ok",
"recency_bucket": "active",
"service_account_only": "false",
"runtime_bucket": "fast"
}
}
}
},
"templates/db5_q101/page.md": {
"text": "# db5_q101\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n"
},
"templates/db5_q101/usage.json": {
"json": {
"stats": {
"executions": 10,
"distinct_users": 1,
"first_seen": "2026-05-08T12:00:00.000Z",
"last_seen": "2026-05-08T12:00:00.000Z",
"p50_runtime_ms": null,
"p95_runtime_ms": null,
"mean_runtime_ms": 25,
"error_rate": 0,
"rows_produced": 20
},
"literal_slots": [],
"samples": []
}
}
}
}

View file

@ -0,0 +1,181 @@
{
"name": "normal-delta",
"now": "2026-05-08T12:00:00.000Z",
"connectionId": "warehouse",
"probe": {
"pgServerVersion": "PostgreSQL 16.4",
"warnings": []
},
"snapshot": {
"statsResetAt": "2026-05-08T08:00:00.000Z",
"deallocCount": 0,
"rows": [
{
"queryid": "201",
"userid": "11",
"username": "analyst",
"dbid": "5",
"database": "analytics",
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"calls": 12,
"totalExecTime": 160,
"meanExecTime": 13.333333333333334,
"totalRows": 58
},
{
"queryid": "201",
"userid": "12",
"username": "svc_loader",
"dbid": "5",
"database": "analytics",
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"calls": 5,
"totalExecTime": 50,
"meanExecTime": 10,
"totalRows": 25
}
]
},
"pullConfig": {
"dialect": "postgres",
"windowDays": 90,
"lastSuccessfulCursor": null,
"serviceAccountUserPatterns": [
"^svc_"
],
"redactionPatterns": [],
"maxTemplatesPerRun": 5000,
"minCalls": 5
},
"analysisBySql": {
"SELECT count(*) FROM analytics.orders WHERE status = $1": {
"fingerprint": "fp_orders_status",
"normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"tablesTouched": [
"analytics.orders"
],
"literalSlots": []
}
},
"baseline": {
"version": 1,
"fetchedAt": "2026-05-08T10:00:00.000Z",
"statsResetAt": "2026-05-08T08:00:00.000Z",
"pgServerVersion": "PostgreSQL 16.4",
"templates": {
"db5_q201": {
"firstObservedAt": "2026-05-08T09:00:00.000Z",
"perUser": {
"11": {
"calls": 10,
"totalExecTime": 100,
"totalRows": 50
},
"12": {
"calls": 5,
"totalExecTime": 50,
"totalRows": 25
}
}
}
}
},
"expectedBaseline": {
"version": 1,
"fetchedAt": "2026-05-08T12:00:00.000Z",
"statsResetAt": "2026-05-08T08:00:00.000Z",
"pgServerVersion": "PostgreSQL 16.4",
"templates": {
"db5_q201": {
"firstObservedAt": "2026-05-08T09:00:00.000Z",
"perUser": {
"11": {
"calls": 12,
"totalExecTime": 160,
"totalRows": 58
},
"12": {
"calls": 5,
"totalExecTime": 50,
"totalRows": 25
}
}
}
}
},
"expectedFiles": {
"manifest.json": {
"json": {
"source": "historic-sql",
"connectionId": "warehouse",
"dialect": "postgres",
"fetchedAt": "2026-05-08T12:00:00.000Z",
"windowStart": "2026-05-08T10:00:00.000Z",
"windowEnd": "2026-05-08T12:00:00.000Z",
"nextSuccessfulCursor": "2026-05-08T12:00:00.000Z",
"templateCount": 1,
"capped": false,
"warnings": [],
"degraded": true,
"statsResetAt": "2026-05-08T08:00:00.000Z",
"baselineFirstRun": false,
"pgServerVersion": "PostgreSQL 16.4",
"deallocCount": 0,
"templates": [
{
"id": "db5_q201",
"fingerprint": "fp_orders_status",
"subClusterId": null,
"path": "templates/db5_q201/page.md"
}
]
}
},
"templates/db5_q201/metadata.json": {
"json": {
"id": "db5_q201",
"title": "postgres · analytics.orders [db5_q201]",
"path": "templates/db5_q201/page.md",
"objectType": "historic_sql_template",
"lastEditedAt": null,
"properties": {
"fingerprint": "fp_orders_status",
"sub_cluster_id": null,
"dialect": "postgres",
"tables_touched": [
"analytics.orders"
],
"literal_slots": [],
"triage_signals": {
"executions_bucket": "low",
"distinct_users_bucket": "solo",
"error_rate_bucket": "ok",
"recency_bucket": "active",
"service_account_only": "false",
"runtime_bucket": "fast"
}
}
}
},
"templates/db5_q201/page.md": {
"text": "# db5_q201\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n"
},
"templates/db5_q201/usage.json": {
"json": {
"stats": {
"executions": 2,
"distinct_users": 1,
"first_seen": "2026-05-08T09:00:00.000Z",
"last_seen": "2026-05-08T12:00:00.000Z",
"p50_runtime_ms": null,
"p95_runtime_ms": null,
"mean_runtime_ms": 30,
"error_rate": 0,
"rows_produced": 8
},
"literal_slots": [],
"samples": []
}
}
}
}

View file

@ -0,0 +1,159 @@
{
"name": "reset-detected",
"now": "2026-05-08T12:00:00.000Z",
"connectionId": "warehouse",
"probe": {
"pgServerVersion": "PostgreSQL 16.4",
"warnings": []
},
"snapshot": {
"statsResetAt": "2026-05-08T11:00:00.000Z",
"deallocCount": 0,
"rows": [
{
"queryid": "301",
"userid": "11",
"username": "analyst",
"dbid": "5",
"database": "analytics",
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"calls": 3,
"totalExecTime": 90,
"meanExecTime": 30,
"totalRows": 9
}
]
},
"pullConfig": {
"dialect": "postgres",
"windowDays": 90,
"lastSuccessfulCursor": null,
"serviceAccountUserPatterns": [],
"redactionPatterns": [],
"maxTemplatesPerRun": 5000,
"minCalls": 5
},
"analysisBySql": {
"SELECT count(*) FROM analytics.orders WHERE status = $1": {
"fingerprint": "fp_orders_status",
"normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"tablesTouched": [
"analytics.orders"
],
"literalSlots": []
}
},
"baseline": {
"version": 1,
"fetchedAt": "2026-05-08T10:00:00.000Z",
"statsResetAt": "2026-05-08T08:00:00.000Z",
"pgServerVersion": "PostgreSQL 16.4",
"templates": {
"db5_q301": {
"firstObservedAt": "2026-05-08T09:00:00.000Z",
"perUser": {
"11": {
"calls": 100,
"totalExecTime": 1000,
"totalRows": 500
}
}
}
}
},
"expectedBaseline": {
"version": 1,
"fetchedAt": "2026-05-08T12:00:00.000Z",
"statsResetAt": "2026-05-08T11:00:00.000Z",
"pgServerVersion": "PostgreSQL 16.4",
"templates": {
"db5_q301": {
"firstObservedAt": "2026-05-08T12:00:00.000Z",
"perUser": {
"11": {
"calls": 3,
"totalExecTime": 90,
"totalRows": 9
}
}
}
}
},
"expectedFiles": {
"manifest.json": {
"json": {
"source": "historic-sql",
"connectionId": "warehouse",
"dialect": "postgres",
"fetchedAt": "2026-05-08T12:00:00.000Z",
"windowStart": "2026-05-08T10:00:00.000Z",
"windowEnd": "2026-05-08T12:00:00.000Z",
"nextSuccessfulCursor": "2026-05-08T12:00:00.000Z",
"templateCount": 1,
"capped": false,
"warnings": [
"baseline_reset:stats_reset advanced from 2026-05-08T08:00:00.000Z to 2026-05-08T11:00:00.000Z"
],
"degraded": true,
"statsResetAt": "2026-05-08T11:00:00.000Z",
"baselineFirstRun": true,
"pgServerVersion": "PostgreSQL 16.4",
"deallocCount": 0,
"templates": [
{
"id": "db5_q301",
"fingerprint": "fp_orders_status",
"subClusterId": null,
"path": "templates/db5_q301/page.md"
}
]
}
},
"templates/db5_q301/metadata.json": {
"json": {
"id": "db5_q301",
"title": "postgres · analytics.orders [db5_q301]",
"path": "templates/db5_q301/page.md",
"objectType": "historic_sql_template",
"lastEditedAt": null,
"properties": {
"fingerprint": "fp_orders_status",
"sub_cluster_id": null,
"dialect": "postgres",
"tables_touched": [
"analytics.orders"
],
"literal_slots": [],
"triage_signals": {
"executions_bucket": "mid",
"distinct_users_bucket": "solo",
"error_rate_bucket": "ok",
"recency_bucket": "active",
"service_account_only": "false",
"runtime_bucket": "fast"
}
}
}
},
"templates/db5_q301/page.md": {
"text": "# db5_q301\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n"
},
"templates/db5_q301/usage.json": {
"json": {
"stats": {
"executions": 3,
"distinct_users": 1,
"first_seen": "2026-05-08T12:00:00.000Z",
"last_seen": "2026-05-08T12:00:00.000Z",
"p50_runtime_ms": null,
"p95_runtime_ms": null,
"mean_runtime_ms": 30,
"error_rate": 0,
"rows_produced": 9
},
"literal_slots": [],
"samples": []
}
}
}
}

View file

@ -0,0 +1,159 @@
{
"name": "version-change",
"now": "2026-05-08T12:00:00.000Z",
"connectionId": "warehouse",
"probe": {
"pgServerVersion": "PostgreSQL 16.4",
"warnings": []
},
"snapshot": {
"statsResetAt": "2026-05-08T08:00:00.000Z",
"deallocCount": 0,
"rows": [
{
"queryid": "401",
"userid": "11",
"username": "analyst",
"dbid": "5",
"database": "analytics",
"query": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"calls": 4,
"totalExecTime": 80,
"meanExecTime": 20,
"totalRows": 8
}
]
},
"pullConfig": {
"dialect": "postgres",
"windowDays": 90,
"lastSuccessfulCursor": null,
"serviceAccountUserPatterns": [],
"redactionPatterns": [],
"maxTemplatesPerRun": 5000,
"minCalls": 5
},
"analysisBySql": {
"SELECT count(*) FROM analytics.orders WHERE status = $1": {
"fingerprint": "fp_orders_status",
"normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1",
"tablesTouched": [
"analytics.orders"
],
"literalSlots": []
}
},
"baseline": {
"version": 1,
"fetchedAt": "2026-05-08T10:00:00.000Z",
"statsResetAt": "2026-05-08T08:00:00.000Z",
"pgServerVersion": "PostgreSQL 15.7",
"templates": {
"db5_q401": {
"firstObservedAt": "2026-05-08T09:00:00.000Z",
"perUser": {
"11": {
"calls": 100,
"totalExecTime": 1000,
"totalRows": 500
}
}
}
}
},
"expectedBaseline": {
"version": 1,
"fetchedAt": "2026-05-08T12:00:00.000Z",
"statsResetAt": "2026-05-08T08:00:00.000Z",
"pgServerVersion": "PostgreSQL 16.4",
"templates": {
"db5_q401": {
"firstObservedAt": "2026-05-08T12:00:00.000Z",
"perUser": {
"11": {
"calls": 4,
"totalExecTime": 80,
"totalRows": 8
}
}
}
}
},
"expectedFiles": {
"manifest.json": {
"json": {
"source": "historic-sql",
"connectionId": "warehouse",
"dialect": "postgres",
"fetchedAt": "2026-05-08T12:00:00.000Z",
"windowStart": "2026-05-08T10:00:00.000Z",
"windowEnd": "2026-05-08T12:00:00.000Z",
"nextSuccessfulCursor": "2026-05-08T12:00:00.000Z",
"templateCount": 1,
"capped": false,
"warnings": [
"baseline_reset:pg_server_major changed from 15 to 16"
],
"degraded": true,
"statsResetAt": "2026-05-08T08:00:00.000Z",
"baselineFirstRun": true,
"pgServerVersion": "PostgreSQL 16.4",
"deallocCount": 0,
"templates": [
{
"id": "db5_q401",
"fingerprint": "fp_orders_status",
"subClusterId": null,
"path": "templates/db5_q401/page.md"
}
]
}
},
"templates/db5_q401/metadata.json": {
"json": {
"id": "db5_q401",
"title": "postgres · analytics.orders [db5_q401]",
"path": "templates/db5_q401/page.md",
"objectType": "historic_sql_template",
"lastEditedAt": null,
"properties": {
"fingerprint": "fp_orders_status",
"sub_cluster_id": null,
"dialect": "postgres",
"tables_touched": [
"analytics.orders"
],
"literal_slots": [],
"triage_signals": {
"executions_bucket": "mid",
"distinct_users_bucket": "solo",
"error_rate_bucket": "ok",
"recency_bucket": "active",
"service_account_only": "false",
"runtime_bucket": "fast"
}
}
}
},
"templates/db5_q401/page.md": {
"text": "# db5_q401\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n"
},
"templates/db5_q401/usage.json": {
"json": {
"stats": {
"executions": 4,
"distinct_users": 1,
"first_seen": "2026-05-08T12:00:00.000Z",
"last_seen": "2026-05-08T12:00:00.000Z",
"p50_runtime_ms": null,
"p95_runtime_ms": null,
"mean_runtime_ms": 20,
"error_rate": 0,
"rows_produced": 8
},
"literal_slots": [],
"samples": []
}
}
}
}

View file

@ -0,0 +1,200 @@
import { describe, expect, it, vi } from 'vitest';
import { BigQueryHistoricSqlQueryHistoryReader } from './bigquery-query-history-reader.js';
import { HistoricSqlGrantsMissingError } from './errors.js';
interface FakeQueryResult {
headers: string[];
rows: unknown[][];
totalRows: number;
error?: string;
}
function queryClient(results: FakeQueryResult[]) {
const executeQuery = vi.fn(async (_query: string) => {
const next = results.shift();
if (!next) {
throw new Error('unexpected query');
}
return next;
});
return { executeQuery };
}
function firstQuery(client: ReturnType<typeof queryClient>): string {
const call = client.executeQuery.mock.calls[0];
if (!call) {
throw new Error('expected query client to be called');
}
return call[0];
}
describe('BigQueryHistoricSqlQueryHistoryReader', () => {
it('probes region-qualified INFORMATION_SCHEMA.JOBS_BY_PROJECT', async () => {
const client = queryClient([{ headers: ['1'], rows: [[1]], totalRows: 1 }]);
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
await expect(reader.probe(client)).resolves.toBeUndefined();
expect(client.executeQuery).toHaveBeenCalledWith(
'SELECT 1 FROM `project-1.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT` LIMIT 1',
);
});
it('turns probe result errors into HistoricSqlGrantsMissingError', async () => {
const client = queryClient([{ headers: [], rows: [], totalRows: 0, error: 'Access Denied: jobs.listAll' }]);
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'us-central1' });
await expect(reader.probe(client)).rejects.toMatchObject({
name: 'HistoricSqlGrantsMissingError',
dialect: 'bigquery',
remediation:
'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.',
});
});
it('turns thrown probe failures into HistoricSqlGrantsMissingError', async () => {
const client = {
executeQuery: vi.fn(async () => {
throw new Error('permission denied');
}),
};
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
await expect(reader.probe(client)).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
});
it('fetches BigQuery jobs with cursor and maps them into RawQueryRow shape without rowsProduced', async () => {
const client = queryClient([
{
headers: [
'job_id',
'query',
'user_email',
'creation_time',
'end_time',
'runtime_ms',
'total_slot_ms',
'total_bytes_processed',
'state',
'error_reason',
'error_message',
'statement_type',
],
rows: [
[
'bquxjob_1',
"SELECT COUNT(*) FROM `project-1.analytics.orders` WHERE status = 'paid'",
'analyst-a@example.test',
'2026-05-04T10:00:00.000Z',
'2026-05-04T10:00:01.250Z',
1250,
3106,
161164718,
'DONE',
null,
null,
'SELECT',
],
[
'bquxjob_2',
'SELECT * FROM `project-1.analytics.missing_table`',
'analyst-b@example.test',
new Date('2026-05-04T10:05:00.000Z'),
null,
null,
0,
0,
'DONE',
'notFound',
'Not found: Table project-1.analytics.missing_table',
'SELECT',
],
],
totalRows: 2,
},
]);
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
const rows = [];
for await (const row of reader.fetch(
client,
{
start: new Date('2026-05-01T00:00:00.000Z'),
end: new Date('2026-05-04T12:00:00.000Z'),
},
'2026-05-03T00:00:00.000Z',
)) {
rows.push(row);
}
expect(client.executeQuery).toHaveBeenCalledTimes(1);
const sql = firstQuery(client);
expect(sql).toContain('FROM `project-1.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT`');
expect(sql).toContain("creation_time >= TIMESTAMP('2026-05-03T00:00:00.000Z')");
expect(sql).toContain("creation_time < TIMESTAMP('2026-05-04T12:00:00.000Z')");
expect(sql).toContain("job_type = 'QUERY'");
expect(sql).toContain("(statement_type IS NULL OR statement_type != 'SCRIPT')");
expect(sql).toContain('ORDER BY creation_time ASC, job_id ASC');
expect(sql).toContain('total_slot_ms');
expect(sql).toContain('total_bytes_processed');
expect(sql).not.toMatch(/total_rows/i);
expect(rows).toEqual([
{
id: 'bquxjob_1',
sql: "SELECT COUNT(*) FROM `project-1.analytics.orders` WHERE status = 'paid'",
user: 'analyst-a@example.test',
startedAt: '2026-05-04T10:00:00.000Z',
endedAt: '2026-05-04T10:00:01.250Z',
runtimeMs: 1250,
success: true,
errorMessage: null,
},
{
id: 'bquxjob_2',
sql: 'SELECT * FROM `project-1.analytics.missing_table`',
user: 'analyst-b@example.test',
startedAt: '2026-05-04T10:05:00.000Z',
endedAt: null,
runtimeMs: null,
success: false,
errorMessage: 'notFound: Not found: Table project-1.analytics.missing_table',
},
]);
});
it('uses the window start when no cursor is available', async () => {
const client = queryClient([{ headers: ['job_id'], rows: [], totalRows: 0 }]);
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'EU' });
for await (const _row of reader.fetch(client, {
start: new Date('2026-02-03T12:00:00.000Z'),
end: new Date('2026-05-04T12:00:00.000Z'),
})) {
throw new Error('empty result should not yield rows');
}
const sql = firstQuery(client);
expect(sql).toContain('FROM `project-1.region-eu.INFORMATION_SCHEMA.JOBS_BY_PROJECT`');
expect(sql).toContain("creation_time >= TIMESTAMP('2026-02-03T12:00:00.000Z')");
});
it('throws a clear error when the query client cannot execute SQL', async () => {
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
await expect(async () => {
for await (const _row of reader.fetch({}, { start: new Date(), end: new Date() })) {
throw new Error('unreachable');
}
}).rejects.toThrow('Historic SQL BigQuery reader requires a query client with executeQuery(query)');
});
it('rejects unsafe project and region identifiers before building SQL', () => {
expect(() => new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project`1', region: 'US' })).toThrow(
'Invalid BigQuery project id for historic-SQL ingest: project`1',
);
expect(() => new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US;DROP' })).toThrow(
'Invalid BigQuery region for historic-SQL ingest: US;DROP',
);
});
});

View file

@ -0,0 +1,219 @@
import { HistoricSqlGrantsMissingError } from './errors.js';
import type { HistoricSqlQueryHistoryReader, HistoricSqlRawQueryRow, HistoricSqlTimeWindow } from './types.js';
interface QueryResultLike {
headers: string[];
rows: unknown[][];
totalRows: number;
error?: string;
}
interface QueryClientLike {
executeQuery(query: string): Promise<QueryResultLike>;
}
export interface BigQueryHistoricSqlQueryHistoryReaderOptions {
projectId: string;
region: string;
}
const BIGQUERY_GRANTS_REMEDIATION =
'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.';
function queryClient(client: unknown): QueryClientLike {
if (
client &&
typeof client === 'object' &&
'executeQuery' in client &&
typeof (client as { executeQuery?: unknown }).executeQuery === 'function'
) {
return client as QueryClientLike;
}
throw new Error('Historic SQL BigQuery reader requires a query client with executeQuery(query)');
}
function grantsError(cause: unknown): HistoricSqlGrantsMissingError {
const message =
cause instanceof Error
? cause.message
: typeof cause === 'string'
? cause
: 'BigQuery principal cannot query INFORMATION_SCHEMA.JOBS_BY_PROJECT.';
return new HistoricSqlGrantsMissingError({
dialect: 'bigquery',
message: `Missing BigQuery audit grants for historic-SQL ingest: ${message}`,
remediation: BIGQUERY_GRANTS_REMEDIATION,
cause,
});
}
function normalizeProjectId(value: string): string {
if (!/^[A-Za-z0-9_-]+$/.test(value)) {
throw new Error(`Invalid BigQuery project id for historic-SQL ingest: ${value}`);
}
return value;
}
function normalizeRegion(value: string): string {
const region = value.trim().toLowerCase().replace(/^region-/, '');
if (!/^[a-z0-9-]+$/.test(region)) {
throw new Error(`Invalid BigQuery region for historic-SQL ingest: ${value}`);
}
return region;
}
function timestampExpression(value: Date | string): string {
const date = value instanceof Date ? value : new Date(value);
if (Number.isNaN(date.getTime())) {
throw new Error(`Invalid BigQuery query-history timestamp: ${String(value)}`);
}
return `TIMESTAMP('${date.toISOString().replace(/'/g, "\\'")}')`;
}
function indexByHeader(headers: string[]): Map<string, number> {
const out = new Map<string, number>();
headers.forEach((header, index) => {
out.set(header.toUpperCase(), index);
});
return out;
}
function value(row: unknown[], indexes: Map<string, number>, name: string): unknown {
const index = indexes.get(name.toUpperCase());
return index === undefined ? null : row[index];
}
function nullableString(raw: unknown): string | null {
if (raw === null || raw === undefined) {
return null;
}
const text = String(raw);
return text.length > 0 ? text : null;
}
function requiredString(raw: unknown, field: string): string {
const text = nullableString(raw);
if (!text) {
throw new Error(`BigQuery JOBS_BY_PROJECT row is missing ${field}`);
}
return text;
}
function nullableNumber(raw: unknown): number | null {
if (raw === null || raw === undefined || raw === '') {
return null;
}
const number = typeof raw === 'number' ? raw : Number(raw);
if (!Number.isFinite(number)) {
return null;
}
return Math.max(0, number);
}
function isoTimestamp(raw: unknown, field: string): string {
if (raw instanceof Date) {
return raw.toISOString();
}
const text = requiredString(raw, field);
const date = new Date(text);
if (Number.isNaN(date.getTime())) {
throw new Error(`BigQuery JOBS_BY_PROJECT row has invalid ${field}: ${text}`);
}
return date.toISOString();
}
function nullableIsoTimestamp(raw: unknown): string | null {
if (raw === null || raw === undefined || raw === '') {
return null;
}
return isoTimestamp(raw, 'end_time');
}
function executionSucceeded(state: string | null, errorReason: string | null, errorMessage: string | null): boolean {
if (errorReason || errorMessage) {
return false;
}
return state === null || state.toUpperCase() === 'DONE';
}
function combinedErrorMessage(errorReason: string | null, errorMessage: string | null): string | null {
if (errorReason && errorMessage) {
return `${errorReason}: ${errorMessage}`;
}
return errorMessage ?? errorReason;
}
function mapRow(row: unknown[], indexes: Map<string, number>): HistoricSqlRawQueryRow {
const errorReason = nullableString(value(row, indexes, 'error_reason'));
const errorMessage = nullableString(value(row, indexes, 'error_message'));
return {
id: requiredString(value(row, indexes, 'job_id'), 'job_id'),
sql: requiredString(value(row, indexes, 'query'), 'query'),
user: nullableString(value(row, indexes, 'user_email')),
startedAt: isoTimestamp(value(row, indexes, 'creation_time'), 'creation_time'),
endedAt: nullableIsoTimestamp(value(row, indexes, 'end_time')),
runtimeMs: nullableNumber(value(row, indexes, 'runtime_ms')),
success: executionSucceeded(nullableString(value(row, indexes, 'state')), errorReason, errorMessage),
errorMessage: combinedErrorMessage(errorReason, errorMessage),
};
}
export class BigQueryHistoricSqlQueryHistoryReader implements HistoricSqlQueryHistoryReader {
private readonly viewPath: string;
constructor(options: BigQueryHistoricSqlQueryHistoryReaderOptions) {
const projectId = normalizeProjectId(options.projectId);
const region = normalizeRegion(options.region);
this.viewPath = `\`${projectId}.region-${region}.INFORMATION_SCHEMA.JOBS_BY_PROJECT\``;
}
async probe(client: unknown): Promise<void> {
let result: QueryResultLike;
try {
result = await queryClient(client).executeQuery(`SELECT 1 FROM ${this.viewPath} LIMIT 1`);
} catch (error) {
throw grantsError(error);
}
if (result.error) {
throw grantsError(result.error);
}
}
async *fetch(
client: unknown,
window: HistoricSqlTimeWindow,
cursor?: string | null,
): AsyncIterable<HistoricSqlRawQueryRow> {
const start = timestampExpression(cursor ?? window.start);
const end = timestampExpression(window.end);
const sql = `
SELECT
job_id,
query,
user_email,
creation_time,
end_time,
TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND) AS runtime_ms,
total_slot_ms,
total_bytes_processed,
state,
error_result.reason AS error_reason,
error_result.message AS error_message,
statement_type
FROM ${this.viewPath}
WHERE creation_time >= ${start}
AND creation_time < ${end}
AND job_type = 'QUERY'
AND query IS NOT NULL
AND (statement_type IS NULL OR statement_type != 'SCRIPT')
ORDER BY creation_time ASC, job_id ASC`.trim();
const result = await queryClient(client).executeQuery(sql);
if (result.error) {
throw grantsError(result.error);
}
const indexes = indexByHeader(result.headers);
for (const row of result.rows) {
yield mapRow(row, indexes);
}
}
}

View file

@ -0,0 +1,251 @@
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { chunkHistoricSqlStagedDir, describeHistoricSqlScope } from './chunk.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-chunk-'));
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(join(target, '..'), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
async function writeTemplate(root: string): Promise<void> {
await writeJson(root, 'manifest.json', {
source: 'historic-sql',
connectionId: 'conn_1',
dialect: 'snowflake',
fetchedAt: '2026-05-04T12:00:00.000Z',
windowStart: '2026-02-03T12:00:00.000Z',
windowEnd: '2026-05-04T12:00:00.000Z',
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
templateCount: 1,
capped: false,
warnings: ['source warning'],
templates: [{ id: 'fp_1', fingerprint: 'fp_1', subClusterId: null, path: 'templates/fp_1/page.md' }],
});
await writeJson(root, 'templates/fp_1/metadata.json', {
id: 'fp_1',
title: 'snowflake · analytics.orders [fp_1]',
path: 'templates/fp_1/page.md',
objectType: 'historic_sql_template',
lastEditedAt: null,
properties: {
fingerprint: 'fp_1',
sub_cluster_id: null,
dialect: 'snowflake',
tables_touched: ['analytics.orders'],
literal_slots: [{ position: 1, type: 'string', classification: 'constant' }],
triage_signals: {
executions_bucket: 'high',
distinct_users_bucket: 'team',
error_rate_bucket: 'ok',
recency_bucket: 'active',
service_account_only: 'false',
slot_summary: '1 constant, 0 runtime',
},
},
});
await writeFile(join(root, 'templates/fp_1/page.md'), '# fp_1\n', 'utf-8');
await writeJson(root, 'templates/fp_1/usage.json', {
stats: {
executions: 20,
distinct_users: 3,
first_seen: '2026-05-01T00:00:00.000Z',
last_seen: '2026-05-04T11:55:00.000Z',
p50_runtime_ms: 100,
p95_runtime_ms: 200,
error_rate: 0,
rows_produced: 20,
},
literal_slots: [{ position: 1, distinct_values: 1, top_values: [['paid', 20]] }],
samples: [],
});
}
async function writeSubclusterTemplates(root: string): Promise<void> {
await writeJson(root, 'manifest.json', {
source: 'historic-sql',
connectionId: 'conn_1',
dialect: 'snowflake',
fetchedAt: '2026-05-04T12:00:00.000Z',
windowStart: '2026-02-03T12:00:00.000Z',
windowEnd: '2026-05-04T12:00:00.000Z',
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
templateCount: 2,
capped: false,
warnings: [],
templates: [
{
id: 'fp_order_status__cat_2b2ff2318877',
fingerprint: 'fp_order_status',
subClusterId: 'cat_2b2ff2318877',
path: 'templates/fp_order_status__cat_2b2ff2318877/page.md',
},
{
id: 'fp_order_status__cat_34f037ddcbfa',
fingerprint: 'fp_order_status',
subClusterId: 'cat_34f037ddcbfa',
path: 'templates/fp_order_status__cat_34f037ddcbfa/page.md',
},
],
});
for (const template of [
{ id: 'fp_order_status__cat_2b2ff2318877', subClusterId: 'cat_2b2ff2318877' },
{ id: 'fp_order_status__cat_34f037ddcbfa', subClusterId: 'cat_34f037ddcbfa' },
]) {
await writeJson(root, `templates/${template.id}/metadata.json`, {
id: template.id,
title: `snowflake · analytics.orders [fp_ord:${template.subClusterId.slice(-6)}]`,
path: `templates/${template.id}/page.md`,
objectType: 'historic_sql_template',
lastEditedAt: null,
properties: {
fingerprint: 'fp_order_status',
sub_cluster_id: template.subClusterId,
dialect: 'snowflake',
tables_touched: ['analytics.orders'],
literal_slots: [{ position: 1, type: 'string', classification: 'categorical' }],
triage_signals: {
executions_bucket: 'mid',
distinct_users_bucket: 'team',
error_rate_bucket: 'ok',
recency_bucket: 'active',
service_account_only: 'false',
slot_summary: '0 constant, 0 runtime',
},
},
});
await writeFile(join(root, `templates/${template.id}/page.md`), `# ${template.id}\n`, 'utf-8');
await writeJson(root, `templates/${template.id}/usage.json`, {
stats: {
executions: 3,
distinct_users: 3,
first_seen: '2026-05-04T10:00:00.000Z',
last_seen: '2026-05-04T10:05:00.000Z',
p50_runtime_ms: 120,
p95_runtime_ms: 150,
error_rate: 0,
rows_produced: 36,
},
literal_slots: [{ position: 1, distinct_values: 1, top_values: [['paid', 3]] }],
samples: [],
});
}
}
describe('chunkHistoricSqlStagedDir', () => {
it('emits one WorkUnit per changed template and keeps usage as dependency', async () => {
const stagedDir = await tempDir();
await writeTemplate(stagedDir);
const result = await chunkHistoricSqlStagedDir(stagedDir, {
added: ['templates/fp_1/metadata.json'],
modified: [],
deleted: [],
unchanged: ['templates/fp_1/page.md', 'templates/fp_1/usage.json', 'manifest.json'],
});
expect(result.workUnits).toEqual([
{
unitKey: 'historic-sql-fp-1',
displayLabel: 'snowflake · analytics.orders [fp_1]',
rawFiles: ['templates/fp_1/metadata.json'],
dependencyPaths: ['manifest.json', 'templates/fp_1/usage.json'],
peerFileIndex: ['templates/fp_1/page.md'],
notes:
'Infer canonical query intent for this single historic-SQL template only. Read metadata.json, page.md, and usage.json for this template; do not group sibling templates in this WorkUnit.',
},
]);
expect(result.contextReport).toEqual({ capped: false, warnings: ['source warning'] });
});
it('emits one WorkUnit per changed categorical sub-cluster', async () => {
const stagedDir = await tempDir();
await writeSubclusterTemplates(stagedDir);
const result = await chunkHistoricSqlStagedDir(stagedDir, {
added: [
'templates/fp_order_status__cat_2b2ff2318877/metadata.json',
'templates/fp_order_status__cat_34f037ddcbfa/metadata.json',
],
modified: [],
deleted: [],
unchanged: [
'manifest.json',
'templates/fp_order_status__cat_2b2ff2318877/page.md',
'templates/fp_order_status__cat_2b2ff2318877/usage.json',
'templates/fp_order_status__cat_34f037ddcbfa/page.md',
'templates/fp_order_status__cat_34f037ddcbfa/usage.json',
],
});
expect(
result.workUnits.map((unit) => ({
unitKey: unit.unitKey,
displayLabel: unit.displayLabel,
rawFiles: unit.rawFiles,
dependencyPaths: unit.dependencyPaths,
})),
).toEqual([
{
unitKey: 'historic-sql-fp-order-status-cat-2b2ff2318877',
displayLabel: 'snowflake · analytics.orders [fp_ord:318877]',
rawFiles: ['templates/fp_order_status__cat_2b2ff2318877/metadata.json'],
dependencyPaths: ['manifest.json', 'templates/fp_order_status__cat_2b2ff2318877/usage.json'],
},
{
unitKey: 'historic-sql-fp-order-status-cat-34f037ddcbfa',
displayLabel: 'snowflake · analytics.orders [fp_ord:ddcbfa]',
rawFiles: ['templates/fp_order_status__cat_34f037ddcbfa/metadata.json'],
dependencyPaths: ['manifest.json', 'templates/fp_order_status__cat_34f037ddcbfa/usage.json'],
},
]);
});
it('emits zero WorkUnits for usage-only diffs', async () => {
const stagedDir = await tempDir();
await writeTemplate(stagedDir);
const result = await chunkHistoricSqlStagedDir(stagedDir, {
added: [],
modified: ['templates/fp_1/usage.json'],
deleted: [],
unchanged: ['templates/fp_1/metadata.json', 'templates/fp_1/page.md', 'manifest.json'],
});
expect(result.workUnits).toEqual([]);
expect(result.eviction).toBeUndefined();
});
it('emits eviction only for deleted metadata or page files', async () => {
const stagedDir = await tempDir();
await writeTemplate(stagedDir);
const result = await chunkHistoricSqlStagedDir(stagedDir, {
added: [],
modified: [],
deleted: ['templates/fp_1/usage.json', 'templates/fp_2/page.md'],
unchanged: [],
});
expect(result.eviction).toEqual({ deletedRawPaths: ['templates/fp_2/page.md'] });
});
it('describes historic-sql scope without including unrelated paths', async () => {
const stagedDir = await tempDir();
await writeTemplate(stagedDir);
const scope = await describeHistoricSqlScope(stagedDir);
expect(scope.fingerprint).toHaveLength(64);
expect(scope.isPathInScope('manifest.json')).toBe(true);
expect(scope.isPathInScope('templates/fp_1/usage.json')).toBe(true);
expect(scope.isPathInScope('pages/notion/page.md')).toBe(false);
});
});

View file

@ -0,0 +1,86 @@
import { createHash } from 'node:crypto';
import { readFile, readdir } from 'node:fs/promises';
import { join, relative } from 'node:path';
import type { ChunkResult, DiffSet, ScopeDescriptor, WorkUnit } from '../../types.js';
import { historicSqlManifestSchema, historicSqlMetadataSchema } from './types.js';
async function walk(root: string): Promise<string[]> {
const entries = await readdir(root, { withFileTypes: true, recursive: true });
return entries
.filter((entry) => entry.isFile())
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
.sort();
}
function safeUnitKey(id: string): string {
return `historic-sql-${id.replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`;
}
async function readManifest(stagedDir: string) {
try {
return historicSqlManifestSchema.parse(JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')));
} catch (error) {
throw new Error(`Invalid historic-SQL manifest: ${error instanceof Error ? error.message : String(error)}`);
}
}
export async function chunkHistoricSqlStagedDir(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
const files = await walk(stagedDir);
const manifest = await readManifest(stagedDir);
const touched = diffSet ? new Set([...diffSet.added, ...diffSet.modified]) : null;
const workUnits: WorkUnit[] = [];
for (const pagePath of files.filter((path) => /^templates\/[^/]+\/page\.md$/.test(path))) {
const metadataPath = pagePath.replace(/\/page\.md$/, '/metadata.json');
const usagePath = pagePath.replace(/\/page\.md$/, '/usage.json');
const primary = [metadataPath, pagePath].filter((path) => files.includes(path));
if (touched && !primary.some((path) => touched.has(path))) {
continue;
}
const metadata = historicSqlMetadataSchema.parse(JSON.parse(await readFile(join(stagedDir, metadataPath), 'utf-8')));
const rawFiles = touched ? primary.filter((path) => touched.has(path)).sort() : primary.sort();
const dependencyPaths = ['manifest.json', files.includes(usagePath) ? usagePath : null]
.filter((path): path is string => typeof path === 'string' && !rawFiles.includes(path))
.sort();
const excluded = new Set([...rawFiles, ...dependencyPaths]);
const peerFileIndex = files.filter((path) => !excluded.has(path)).sort();
workUnits.push({
unitKey: safeUnitKey(metadata.id),
displayLabel: metadata.title,
rawFiles,
dependencyPaths,
peerFileIndex,
notes:
'Infer canonical query intent for this single historic-SQL template only. Read metadata.json, page.md, and usage.json for this template; do not group sibling templates in this WorkUnit.',
});
}
const deletedPrimary = diffSet?.deleted.filter((path) => /^templates\/[^/]+\/(metadata\.json|page\.md)$/.test(path));
return {
workUnits,
eviction: deletedPrimary && deletedPrimary.length > 0 ? { deletedRawPaths: deletedPrimary.sort() } : undefined,
reconcileNotes: [`Historic-SQL staged templates=${manifest.templateCount}`],
contextReport: {
capped: manifest.capped,
warnings: manifest.warnings,
},
};
}
export async function describeHistoricSqlScope(stagedDir: string): Promise<ScopeDescriptor> {
const manifest = await readManifest(stagedDir);
const scopeKey = JSON.stringify({
connectionId: manifest.connectionId,
dialect: manifest.dialect,
windowStart: manifest.windowStart,
windowEnd: manifest.windowEnd,
});
const fingerprint = createHash('sha256').update(scopeKey).digest('hex');
return {
fingerprint,
isPathInScope: (rawPath) => rawPath === 'manifest.json' || rawPath.startsWith('templates/'),
};
}

View file

@ -0,0 +1,197 @@
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { detectHistoricSqlStagedDir } from './detect.js';
import {
HISTORIC_SQL_SOURCE_KEY,
historicSqlManifestSchema,
historicSqlMetadataSchema,
historicSqlPullConfigSchema,
historicSqlUsageSchema,
} from './types.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-detect-'));
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(join(target, '..'), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
describe('historic-sql staged dir detection', () => {
it('detects manifest source', async () => {
const stagedDir = await tempDir();
await writeJson(stagedDir, 'manifest.json', {
source: HISTORIC_SQL_SOURCE_KEY,
connectionId: 'conn_1',
dialect: 'snowflake',
fetchedAt: '2026-05-04T12:00:00.000Z',
windowStart: '2026-02-03T12:00:00.000Z',
windowEnd: '2026-05-04T12:00:00.000Z',
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
templateCount: 0,
capped: false,
warnings: [],
templates: [],
});
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(true);
});
it('detects document-shaped template structure without manifest', async () => {
const stagedDir = await tempDir();
await writeFile(join(stagedDir, 'not-a-match.txt'), 'x', 'utf-8');
await mkdir(join(stagedDir, 'templates', 'fp_1'), { recursive: true });
await writeFile(join(stagedDir, 'templates', 'fp_1', 'metadata.json'), '{}', 'utf-8');
await writeFile(join(stagedDir, 'templates', 'fp_1', 'page.md'), '# fp_1\n', 'utf-8');
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(true);
});
it('does not detect unrelated directories', async () => {
const stagedDir = await tempDir();
await writeJson(stagedDir, 'manifest.json', { source: 'notion' });
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(false);
});
});
describe('historic-sql schemas', () => {
it('defaults disabled optional pull-config fields through the parser', () => {
expect(
historicSqlPullConfigSchema.parse({
dialect: 'bigquery',
}),
).toEqual({
dialect: 'bigquery',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 5000,
minCalls: 5,
});
});
it('accepts postgres pull config with a minCalls floor', () => {
expect(
historicSqlPullConfigSchema.parse({
dialect: 'postgres',
minCalls: 12,
}),
).toEqual({
dialect: 'postgres',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 5000,
minCalls: 12,
});
});
it('accepts postgres manifest fields with defaults for older dialects', () => {
expect(
historicSqlManifestSchema.parse({
source: HISTORIC_SQL_SOURCE_KEY,
connectionId: 'conn_pg',
dialect: 'postgres',
fetchedAt: '2026-05-08T12:00:00.000Z',
windowStart: '2026-05-08T11:00:00.000Z',
windowEnd: '2026-05-08T12:00:00.000Z',
nextSuccessfulCursor: '2026-05-08T12:00:00.000Z',
templateCount: 0,
capped: false,
warnings: [],
templates: [],
degraded: true,
statsResetAt: '2026-05-01T00:00:00.000Z',
baselineFirstRun: true,
pgServerVersion: 'PostgreSQL 16.4',
deallocCount: 3,
}),
).toMatchObject({
dialect: 'postgres',
degraded: true,
statsResetAt: '2026-05-01T00:00:00.000Z',
baselineFirstRun: true,
pgServerVersion: 'PostgreSQL 16.4',
deallocCount: 3,
});
expect(
historicSqlManifestSchema.parse({
source: HISTORIC_SQL_SOURCE_KEY,
connectionId: 'conn_sf',
dialect: 'snowflake',
fetchedAt: '2026-05-08T12:00:00.000Z',
windowStart: '2026-05-01T12:00:00.000Z',
windowEnd: '2026-05-08T12:00:00.000Z',
nextSuccessfulCursor: null,
templateCount: 0,
capped: false,
warnings: [],
templates: [],
}),
).toMatchObject({
degraded: false,
statsResetAt: null,
baselineFirstRun: false,
pgServerVersion: null,
deallocCount: null,
});
});
it('accepts postgres usage stats with mean_runtime_ms and empty samples', () => {
const parsed = historicSqlUsageSchema.parse({
stats: {
executions: 25,
distinct_users: 2,
first_seen: '2026-05-08T10:00:00.000Z',
last_seen: '2026-05-08T12:00:00.000Z',
p50_runtime_ms: null,
p95_runtime_ms: null,
mean_runtime_ms: 32.5,
error_rate: 0,
rows_produced: 1042,
},
literal_slots: [],
samples: [],
});
expect(parsed.stats.mean_runtime_ms).toBe(32.5);
expect(parsed.samples).toEqual([]);
});
it('pins the Notion-compatible metadata envelope', () => {
const parsed = historicSqlMetadataSchema.parse({
id: 'fp_1',
title: 'snowflake · analytics.orders [fp_1]',
path: 'templates/fp_1/page.md',
objectType: 'historic_sql_template',
lastEditedAt: null,
properties: {
fingerprint: 'fp_1',
sub_cluster_id: null,
dialect: 'snowflake',
tables_touched: ['analytics.orders'],
literal_slots: [{ position: 1, type: 'string', classification: 'constant' }],
triage_signals: {
executions_bucket: 'high',
distinct_users_bucket: 'team',
error_rate_bucket: 'ok',
recency_bucket: 'active',
service_account_only: 'false',
slot_summary: '1 constant, 0 runtime',
},
},
});
expect(parsed.objectType).toBe('historic_sql_template');
expect(parsed.lastEditedAt).toBeNull();
expect(parsed.properties.triage_signals.service_account_only).toBe('false');
});
});

View file

@ -0,0 +1,37 @@
import { readFile, readdir } from 'node:fs/promises';
import { join } from 'node:path';
import { HISTORIC_SQL_SOURCE_KEY } from './types.js';
export async function detectHistoricSqlStagedDir(stagedDir: string): Promise<boolean> {
try {
const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')) as { source?: unknown };
if (manifest.source === HISTORIC_SQL_SOURCE_KEY) {
return true;
}
if (manifest.source !== undefined) {
return false;
}
} catch {
// Fall through to structural detection for stage-only fixtures.
}
try {
const entries = await readdir(join(stagedDir, 'templates'), { withFileTypes: true, recursive: true });
const metadataDirs = new Set<string>();
const pageDirs = new Set<string>();
for (const entry of entries) {
if (!entry.isFile()) {
continue;
}
if (entry.name === 'metadata.json') {
metadataDirs.add(entry.parentPath);
}
if (entry.name === 'page.md') {
pageDirs.add(entry.parentPath);
}
}
return [...metadataDirs].some((dir) => pageDirs.has(dir));
} catch {
return false;
}
}

View file

@ -0,0 +1,61 @@
import type { HistoricSqlDialect } from './types.js';
interface HistoricSqlGrantsMissingErrorOptions {
dialect: HistoricSqlDialect;
message: string;
remediation: string;
cause?: unknown;
}
export class HistoricSqlGrantsMissingError extends Error {
readonly dialect: HistoricSqlDialect;
readonly remediation: string;
constructor(options: HistoricSqlGrantsMissingErrorOptions) {
super(options.message, options.cause === undefined ? undefined : { cause: options.cause });
this.name = 'HistoricSqlGrantsMissingError';
this.dialect = options.dialect;
this.remediation = options.remediation;
}
}
interface HistoricSqlExtensionMissingErrorOptions {
dialect: HistoricSqlDialect;
message: string;
remediation: string;
cause?: unknown;
}
export class HistoricSqlExtensionMissingError extends Error {
readonly dialect: HistoricSqlDialect;
readonly remediation: string;
constructor(options: HistoricSqlExtensionMissingErrorOptions) {
super(options.message, options.cause === undefined ? undefined : { cause: options.cause });
this.name = 'HistoricSqlExtensionMissingError';
this.dialect = options.dialect;
this.remediation = options.remediation;
}
}
interface HistoricSqlVersionUnsupportedErrorOptions {
dialect: HistoricSqlDialect;
detectedVersion: string;
minimumVersion: string;
}
export class HistoricSqlVersionUnsupportedError extends Error {
readonly dialect: HistoricSqlDialect;
readonly detectedVersion: string;
readonly minimumVersion: string;
constructor(options: HistoricSqlVersionUnsupportedErrorOptions) {
super(
`Unsupported ${options.dialect} version for historic-SQL ingest: detected ${options.detectedVersion}; requires ${options.minimumVersion} or newer.`,
);
this.name = 'HistoricSqlVersionUnsupportedError';
this.dialect = options.dialect;
this.detectedVersion = options.detectedVersion;
this.minimumVersion = options.minimumVersion;
}
}

View file

@ -0,0 +1,304 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import type { SqlAnalysisPort } from '../../../sql-analysis/index.js';
import { HistoricSqlSourceAdapter } from './historic-sql.adapter.js';
import { pgssBaselinePath } from './stage-pgss.js';
import type { HistoricSqlQueryHistoryReader, PostgresPgssReader } from './types.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-adapter-'));
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(join(target, '..'), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
const sqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint() {
return {
fingerprint: 'fp_1',
normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ?',
tablesTouched: ['analytics.orders'],
literalSlots: [{ position: 1, type: 'string', exampleValue: 'paid' }],
};
},
};
const reader: HistoricSqlQueryHistoryReader = {
async probe() {},
async *fetch() {
yield {
id: 'q1',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'",
user: 'analyst',
startedAt: '2026-05-04T11:00:00.000Z',
endedAt: null,
runtimeMs: 10,
rowsProduced: 1,
success: true,
errorMessage: null,
};
},
};
describe('HistoricSqlSourceAdapter', () => {
it('declares canonical adapter metadata', () => {
const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} });
expect(adapter.source).toBe('historic-sql');
expect(adapter.skillNames).toEqual(['historic_sql_ingest']);
expect(adapter.reconcileSkillNames).toEqual(['historic_sql_curator']);
expect(adapter.evidenceIndexing).toBe('documents');
expect(adapter.triageSupported).toBe(true);
});
it('fetches staged templates through injected reader and SqlAnalysisPort', async () => {
const stagedDir = await tempDir();
const adapter = new HistoricSqlSourceAdapter({
sqlAnalysis,
reader,
queryClient: {},
now: () => new Date('2026-05-04T12:00:00.000Z'),
});
await adapter.fetch(
{
dialect: 'snowflake',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 5000,
},
stagedDir,
{ connectionId: 'conn_1', sourceKey: 'historic-sql' },
);
await expect(adapter.detect(stagedDir)).resolves.toBe(true);
});
it('reads triage signals from usage.json and metadata properties', async () => {
const stagedDir = await tempDir();
await writeJson(stagedDir, 'manifest.json', {
source: 'historic-sql',
connectionId: 'conn_1',
dialect: 'snowflake',
fetchedAt: '2026-05-04T12:00:00.000Z',
windowStart: '2026-02-03T12:00:00.000Z',
windowEnd: '2026-05-04T12:00:00.000Z',
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
templateCount: 1,
capped: false,
warnings: [],
templates: [{ id: 'fp_1', fingerprint: 'fp_1', subClusterId: null, path: 'templates/fp_1/page.md' }],
});
await writeJson(stagedDir, 'templates/fp_1/metadata.json', {
id: 'fp_1',
title: 'snowflake · analytics.orders [fp_1]',
path: 'templates/fp_1/page.md',
objectType: 'historic_sql_template',
lastEditedAt: null,
properties: {
fingerprint: 'fp_1',
sub_cluster_id: null,
dialect: 'snowflake',
tables_touched: ['analytics.orders'],
literal_slots: [{ position: 1, type: 'string', classification: 'constant' }],
triage_signals: {
executions_bucket: 'high',
distinct_users_bucket: 'team',
error_rate_bucket: 'ok',
recency_bucket: 'active',
service_account_only: 'false',
slot_summary: '1 constant, 0 runtime',
},
},
});
await writeFile(join(stagedDir, 'templates/fp_1/page.md'), '# fp_1\n', 'utf-8');
await writeJson(stagedDir, 'templates/fp_1/usage.json', {
stats: {
executions: 20,
distinct_users: 3,
first_seen: '2026-05-01T00:00:00.000Z',
last_seen: '2026-05-04T11:55:00.000Z',
p50_runtime_ms: 100,
p95_runtime_ms: 200,
error_rate: 0,
},
literal_slots: [{ position: 1, distinct_values: 1, top_values: [['paid', 20]] }],
samples: [],
});
const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} });
await expect(adapter.getTriageSignals(stagedDir, 'fp_1')).resolves.toEqual({
objectType: 'historic_sql_template',
lastEditedAt: '2026-05-04T11:55:00.000Z',
propertyHints: {
executions_bucket: 'high',
distinct_users_bucket: 'team',
error_rate_bucket: 'ok',
recency_bucket: 'active',
service_account_only: 'false',
slot_summary: '1 constant, 0 runtime',
},
});
});
it('dispatches postgres fetches through PGSS staging and writes the baseline only after pull success', async () => {
const stagedDir = await tempDir();
const baselineRootDir = await tempDir();
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
const unusedPerExecutionReader: HistoricSqlQueryHistoryReader = {
async probe() {
throw new Error('per-execution reader must not be used for postgres');
},
async *fetch() {
throw new Error('per-execution reader must not be used for postgres');
},
};
const postgresReader: PostgresPgssReader = {
async probe() {
return { pgServerVersion: 'PostgreSQL 16.4', warnings: [] };
},
async readSnapshot() {
return {
statsResetAt: '2026-05-08T08:00:00.000Z',
deallocCount: 0,
rows: [
{
queryid: '901',
userid: '11',
username: 'analyst',
dbid: '5',
database: 'warehouse',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 9,
totalExecTime: 90,
meanExecTime: 10,
totalRows: 18,
},
],
};
},
};
const adapter = new HistoricSqlSourceAdapter({
sqlAnalysis,
reader: unusedPerExecutionReader,
queryClient: {},
postgresReader,
postgresQueryClient: {
async executeQuery() {
return { headers: [], rows: [] };
},
},
postgresBaselineRootDir: baselineRootDir,
now: () => new Date('2026-05-08T12:00:00.000Z'),
});
await adapter.fetch(
{
dialect: 'postgres',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 5000,
minCalls: 5,
},
stagedDir,
{ connectionId: 'conn_pg', sourceKey: 'historic-sql' },
);
const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')) as {
dialect: string;
baselineFirstRun: boolean;
templates: Array<{ id: string }>;
};
expect(manifest.dialect).toBe('postgres');
expect(manifest.baselineFirstRun).toBe(true);
expect(manifest.templates).toEqual([
{ id: 'db5_q901', fingerprint: 'fp_1', subClusterId: null, path: 'templates/db5_q901/page.md' },
]);
await expect(readFile(baselinePath, 'utf-8')).rejects.toMatchObject({ code: 'ENOENT' });
await adapter.onPullSucceeded({
connectionId: 'conn_pg',
sourceKey: 'historic-sql',
syncId: 'sync_pg',
trigger: 'scheduled_pull',
completedAt: new Date('2026-05-08T12:01:00.000Z'),
stagedDir,
});
const baseline = JSON.parse(await readFile(baselinePath, 'utf-8')) as {
fetchedAt: string;
templates: Record<string, { perUser: Record<string, { calls: number }> }>;
};
expect(baseline.fetchedAt).toBe('2026-05-08T12:00:00.000Z');
expect(baseline.templates.db5_q901.perUser['11'].calls).toBe(9);
});
it('fails postgres fetches clearly when no PGSS reader is configured', async () => {
const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} });
await expect(
adapter.fetch(
{
dialect: 'postgres',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 5000,
minCalls: 5,
},
await tempDir(),
{ connectionId: 'conn_pg', sourceKey: 'historic-sql' },
),
).rejects.toThrow('Historic SQL Postgres fetch requires deps.postgresReader');
});
it('forwards manifest cursor through onPullSucceeded without changing the SourceAdapter signature', async () => {
const stagedDir = await tempDir();
await writeJson(stagedDir, 'manifest.json', {
source: 'historic-sql',
connectionId: 'conn_1',
dialect: 'snowflake',
fetchedAt: '2026-05-04T12:00:00.000Z',
windowStart: '2026-02-03T12:00:00.000Z',
windowEnd: '2026-05-04T12:00:00.000Z',
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
templateCount: 0,
capped: false,
warnings: [],
templates: [],
});
const onPullSucceeded = vi.fn(async () => {});
const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {}, onPullSucceeded });
const completedAt = new Date('2026-05-04T12:01:00.000Z');
await adapter.onPullSucceeded({
connectionId: 'conn_1',
sourceKey: 'historic-sql',
syncId: 'sync_1',
trigger: 'scheduled_pull',
completedAt,
stagedDir,
});
expect(onPullSucceeded).toHaveBeenCalledWith({
connectionId: 'conn_1',
sourceKey: 'historic-sql',
syncId: 'sync_1',
trigger: 'scheduled_pull',
completedAt,
stagedDir,
nextSuccessfulCursor: '2026-05-04T11:55:00.000Z',
});
});
});

View file

@ -0,0 +1,135 @@
import { readFile } from 'node:fs/promises';
import { join } from 'node:path';
import type {
ChunkResult,
DiffSet,
FetchContext,
IngestTrigger,
ScopeDescriptor,
SourceAdapter,
TriageSignals,
} from '../../types.js';
import { chunkHistoricSqlStagedDir, describeHistoricSqlScope } from './chunk.js';
import { detectHistoricSqlStagedDir } from './detect.js';
import { stageHistoricSqlTemplates } from './stage.js';
import {
pgssBaselinePath,
stagePgStatStatementsTemplates,
writePgssBaselineAtomic,
type StagePgStatStatementsTemplatesResult,
} from './stage-pgss.js';
import {
historicSqlManifestSchema,
historicSqlMetadataSchema,
historicSqlPullConfigSchema,
historicSqlUsageSchema,
type HistoricSqlSourceAdapterDeps,
} from './types.js';
export class HistoricSqlSourceAdapter implements SourceAdapter {
readonly source = 'historic-sql';
readonly skillNames = ['historic_sql_ingest'];
readonly reconcileSkillNames = ['historic_sql_curator'];
readonly evidenceIndexing = 'documents' as const;
readonly triageSupported = true;
private readonly pendingPgssBaselines = new Map<string, StagePgStatStatementsTemplatesResult>();
constructor(private readonly deps: HistoricSqlSourceAdapterDeps) {}
detect(stagedDir: string): Promise<boolean> {
return detectHistoricSqlStagedDir(stagedDir);
}
async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
const config = historicSqlPullConfigSchema.parse(pullConfig);
if (config.dialect === 'postgres') {
if (!this.deps.postgresReader) {
throw new Error('Historic SQL Postgres fetch requires deps.postgresReader');
}
const postgresQueryClient = this.deps.postgresQueryClient ?? this.deps.queryClient;
if (
!postgresQueryClient ||
typeof postgresQueryClient !== 'object' ||
!('executeQuery' in postgresQueryClient) ||
typeof (postgresQueryClient as { executeQuery?: unknown }).executeQuery !== 'function'
) {
throw new Error('Historic SQL Postgres fetch requires deps.postgresQueryClient with executeQuery(sql, params?)');
}
const result = await stagePgStatStatementsTemplates({
stagedDir,
connectionId: ctx.connectionId,
queryClient: postgresQueryClient as NonNullable<HistoricSqlSourceAdapterDeps['postgresQueryClient']>,
reader: this.deps.postgresReader,
sqlAnalysis: this.deps.sqlAnalysis,
pullConfig: config,
baselinePath: pgssBaselinePath(this.deps.postgresBaselineRootDir, ctx.connectionId),
now: this.deps.now?.(),
});
this.pendingPgssBaselines.set(stagedDir, result);
return;
}
await stageHistoricSqlTemplates({
stagedDir,
connectionId: ctx.connectionId,
queryClient: this.deps.queryClient,
reader: this.deps.reader,
sqlAnalysis: this.deps.sqlAnalysis,
pullConfig: config,
now: this.deps.now?.(),
});
}
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
return chunkHistoricSqlStagedDir(stagedDir, diffSet);
}
describeScope(stagedDir: string): Promise<ScopeDescriptor> {
return describeHistoricSqlScope(stagedDir);
}
async getTriageSignals(stagedDir: string, externalId: string): Promise<TriageSignals> {
const manifest = historicSqlManifestSchema.parse(
JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')),
);
const template = manifest.templates.find((entry) => entry.id === externalId);
if (!template) {
return {};
}
const templateDir = template.path.replace(/\/page\.md$/, '');
const metadata = historicSqlMetadataSchema.parse(
JSON.parse(await readFile(join(stagedDir, templateDir, 'metadata.json'), 'utf-8')),
);
const usage = historicSqlUsageSchema.parse(
JSON.parse(await readFile(join(stagedDir, templateDir, 'usage.json'), 'utf-8')),
);
return {
objectType: metadata.objectType,
lastEditedAt: usage.stats.last_seen,
propertyHints: metadata.properties.triage_signals,
};
}
async onPullSucceeded(ctx: {
connectionId: string;
sourceKey: string;
syncId: string;
trigger: IngestTrigger;
completedAt: Date;
stagedDir: string;
}): Promise<void> {
const manifest = historicSqlManifestSchema.parse(
JSON.parse(await readFile(join(ctx.stagedDir, 'manifest.json'), 'utf-8')),
);
if (manifest.dialect === 'postgres') {
const pending = this.pendingPgssBaselines.get(ctx.stagedDir);
if (pending) {
await writePgssBaselineAtomic(pending.baselinePath, pending.baseline);
this.pendingPgssBaselines.delete(ctx.stagedDir);
}
}
await this.deps.onPullSucceeded?.({ ...ctx, nextSuccessfulCursor: manifest.nextSuccessfulCursor });
}
}

View file

@ -0,0 +1,281 @@
import { describe, expect, it, vi } from 'vitest';
import {
HistoricSqlExtensionMissingError,
HistoricSqlGrantsMissingError,
HistoricSqlVersionUnsupportedError,
} from './errors.js';
import { PostgresPgssQueryHistoryReader } from './postgres-pgss-query-history-reader.js';
interface FakeQueryResult {
headers: string[];
rows: unknown[][];
totalRows?: number;
error?: string;
}
function queryClient(results: Array<FakeQueryResult | Error>) {
const executeQuery = vi.fn(async (_query: string, _params?: unknown[]) => {
const next = results.shift();
if (!next) {
throw new Error('unexpected query');
}
if (next instanceof Error) {
throw next;
}
return next;
});
return { executeQuery };
}
function executedSql(client: ReturnType<typeof queryClient>, index: number): string {
const call = client.executeQuery.mock.calls[index];
if (!call) {
throw new Error(`expected query client call ${index}`);
}
return call[0];
}
describe('PostgresPgssQueryHistoryReader', () => {
it('probes version, extension presence, grants, and tracking state', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4 on x86_64-apple-darwin']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[true]] },
{ headers: ['track'], rows: [['top']] },
{ headers: ['max'], rows: [['5000']] },
]);
const reader = new PostgresPgssQueryHistoryReader();
await expect(reader.probe(client)).resolves.toEqual({
pgServerVersion: 'PostgreSQL 16.4 on x86_64-apple-darwin',
warnings: [],
});
expect(executedSql(client, 0)).toContain("current_setting('server_version_num')::int");
expect(executedSql(client, 1)).toBe('SELECT 1 FROM pg_stat_statements LIMIT 1');
expect(executedSql(client, 2)).toBe(
"SELECT pg_has_role(current_user, 'pg_read_all_stats', 'USAGE') AS has_role",
);
expect(executedSql(client, 3)).toBe("SELECT current_setting('pg_stat_statements.track') AS track");
expect(executedSql(client, 4)).toBe("SELECT current_setting('pg_stat_statements.max') AS max");
});
it('rejects PostgreSQL versions older than 14 without probing the extension', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[130012, 'PostgreSQL 13.12']],
},
{
headers: ['stats_reset', 'dealloc'],
rows: [[new Date('2026-05-01T00:00:00.000Z'), 7]],
},
]);
const reader = new PostgresPgssQueryHistoryReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlVersionUnsupportedError',
dialect: 'postgres',
detectedVersion: 'PostgreSQL 13.12',
minimumVersion: 'PostgreSQL 14',
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlVersionUnsupportedError);
expect(client.executeQuery).toHaveBeenCalledTimes(1);
});
it('maps a missing pg_stat_statements relation to HistoricSqlExtensionMissingError', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
new Error('relation "pg_stat_statements" does not exist'),
]);
const reader = new PostgresPgssQueryHistoryReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlExtensionMissingError',
dialect: 'postgres',
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlExtensionMissingError);
});
it('maps pg_stat_statements preload failures to HistoricSqlExtensionMissingError with preload remediation', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
new Error('pg_stat_statements must be loaded via shared_preload_libraries'),
]);
const reader = new PostgresPgssQueryHistoryReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlExtensionMissingError',
dialect: 'postgres',
message: 'pg_stat_statements is installed but not loaded via shared_preload_libraries.',
remediation: expect.stringContaining("shared_preload_libraries includes 'pg_stat_statements'"),
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlExtensionMissingError);
});
it('maps missing pg_read_all_stats membership to HistoricSqlGrantsMissingError', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[false]] },
]);
const reader = new PostgresPgssQueryHistoryReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlGrantsMissingError',
dialect: 'postgres',
remediation: 'GRANT pg_read_all_stats TO <connection role>;',
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
});
it('returns a warning instead of failing when pg_stat_statements.track is none', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[true]] },
{ headers: ['track'], rows: [['none']] },
{ headers: ['max'], rows: [['5000']] },
]);
const reader = new PostgresPgssQueryHistoryReader();
await expect(reader.probe(client)).resolves.toEqual({
pgServerVersion: 'PostgreSQL 16.4',
warnings: [
"pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config",
],
});
});
it('warns when pg_stat_statements.max is below the recommended floor', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[true]] },
{ headers: ['track'], rows: [['top']] },
{ headers: ['max'], rows: [['1000']] },
]);
const reader = new PostgresPgssQueryHistoryReader();
await expect(reader.probe(client)).resolves.toEqual({
pgServerVersion: 'PostgreSQL 16.4',
warnings: [
'pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn',
],
});
});
it('reads a parameterized pg_stat_statements snapshot and stats info', async () => {
const client = queryClient([
{
headers: [
'queryid',
'userid',
'username',
'dbid',
'database',
'query',
'calls',
'total_exec_time',
'mean_exec_time',
'total_rows',
],
rows: [
[
'922337203685477580',
'16384',
'analyst',
'16385',
'warehouse',
'SELECT count(*) FROM public.orders WHERE status = $1',
'42',
'2100.5',
'50.0119',
'9001',
],
[
'922337203685477581',
'16386',
'unknown',
'16385',
'warehouse',
'SELECT * FROM public.customers WHERE id = $1',
5,
30,
6,
5,
],
],
},
{
headers: ['stats_reset', 'dealloc'],
rows: [[new Date('2026-05-01T00:00:00.000Z'), 7]],
},
]);
const reader = new PostgresPgssQueryHistoryReader();
await expect(reader.readSnapshot(client, { minCalls: 5, maxTemplates: 500 })).resolves.toEqual({
statsResetAt: '2026-05-01T00:00:00.000Z',
deallocCount: 7,
rows: [
{
queryid: '922337203685477580',
userid: '16384',
username: 'analyst',
dbid: '16385',
database: 'warehouse',
query: 'SELECT count(*) FROM public.orders WHERE status = $1',
calls: 42,
totalExecTime: 2100.5,
meanExecTime: 50.0119,
totalRows: 9001,
},
{
queryid: '922337203685477581',
userid: '16386',
username: 'unknown',
dbid: '16385',
database: 'warehouse',
query: 'SELECT * FROM public.customers WHERE id = $1',
calls: 5,
totalExecTime: 30,
meanExecTime: 6,
totalRows: 5,
},
],
});
const snapshotSql = executedSql(client, 0);
expect(snapshotSql).toContain('FROM pg_stat_statements s');
expect(snapshotSql).toContain('LEFT JOIN pg_roles');
expect(snapshotSql).toContain('LEFT JOIN pg_database');
expect(snapshotSql).toContain('WHERE s.toplevel = true');
expect(snapshotSql).toContain('AND s.calls >= $1');
expect(snapshotSql).toContain('ORDER BY s.total_exec_time DESC');
expect(snapshotSql).toContain('LIMIT $2');
expect(client.executeQuery.mock.calls[0]?.[1]).toEqual([5, 500]);
expect(executedSql(client, 1)).toBe('SELECT stats_reset, dealloc FROM pg_stat_statements_info');
});
});

View file

@ -0,0 +1,262 @@
import {
HistoricSqlExtensionMissingError,
HistoricSqlGrantsMissingError,
HistoricSqlVersionUnsupportedError,
} from './errors.js';
import type {
KloPostgresQueryClient,
PostgresPgssProbeResult,
PostgresPgssReader,
PostgresPgssRow,
PostgresPgssSnapshot,
} from './types.js';
interface QueryResultLike {
headers: string[];
rows: unknown[][];
totalRows?: number;
error?: string;
}
const VERSION_SQL = `
SELECT current_setting('server_version_num')::int AS server_version_num,
version() AS server_version
`.trim();
const EXTENSION_PROBE_SQL = 'SELECT 1 FROM pg_stat_statements LIMIT 1';
const GRANTS_PROBE_SQL = "SELECT pg_has_role(current_user, 'pg_read_all_stats', 'USAGE') AS has_role";
const TRACKING_PROBE_SQL = "SELECT current_setting('pg_stat_statements.track') AS track";
const MAX_SETTING_PROBE_SQL = "SELECT current_setting('pg_stat_statements.max') AS max";
const RECOMMENDED_PGSS_MAX = 5000;
const STATS_INFO_SQL = 'SELECT stats_reset, dealloc FROM pg_stat_statements_info';
const SNAPSHOT_SQL = `
SELECT
s.queryid::text AS queryid,
s.userid::text AS userid,
COALESCE(r.rolname, 'unknown') AS username,
s.dbid::text AS dbid,
d.datname AS database,
s.query,
s.calls,
s.total_exec_time,
s.mean_exec_time,
s.rows AS total_rows
FROM pg_stat_statements s
LEFT JOIN pg_roles r ON s.userid = r.oid
LEFT JOIN pg_database d ON s.dbid = d.oid
WHERE s.toplevel = true
AND s.calls >= $1
ORDER BY s.total_exec_time DESC
LIMIT $2
`.trim();
const POSTGRES_EXTENSION_REMEDIATION = [
'Run CREATE EXTENSION pg_stat_statements; against the connection database.',
"Ensure shared_preload_libraries includes 'pg_stat_statements' in the Postgres parameter group or config.",
].join(' ');
const POSTGRES_GRANTS_REMEDIATION = 'GRANT pg_read_all_stats TO <connection role>;';
function queryClient(client: unknown): KloPostgresQueryClient {
if (
client &&
typeof client === 'object' &&
'executeQuery' in client &&
typeof (client as { executeQuery?: unknown }).executeQuery === 'function'
) {
return client as KloPostgresQueryClient;
}
throw new Error('Historic SQL Postgres PGSS reader requires a query client with executeQuery(sql, params?)');
}
async function execute(client: KloPostgresQueryClient, sql: string, params?: unknown[]): Promise<QueryResultLike> {
const result = await client.executeQuery(sql, params);
if ('error' in result && typeof result.error === 'string' && result.error.length > 0) {
throw new Error(result.error);
}
return result;
}
function indexes(headers: string[]): Map<string, number> {
const out = new Map<string, number>();
headers.forEach((header, index) => out.set(header.toLowerCase(), index));
return out;
}
function value(row: unknown[], headerIndexes: Map<string, number>, header: string): unknown {
const index = headerIndexes.get(header.toLowerCase());
return index === undefined ? null : row[index];
}
function nullableString(raw: unknown): string | null {
if (raw === null || raw === undefined) {
return null;
}
const text = String(raw);
return text.length > 0 ? text : null;
}
function requiredString(raw: unknown, field: string): string {
const text = nullableString(raw);
if (!text) {
throw new Error(`Postgres pg_stat_statements row is missing ${field}`);
}
return text;
}
function requiredFiniteNumber(raw: unknown, field: string): number {
const number = typeof raw === 'number' ? raw : Number(raw);
if (!Number.isFinite(number)) {
throw new Error(`Postgres pg_stat_statements row has invalid ${field}: ${String(raw)}`);
}
return number;
}
function nullableInteger(raw: unknown): number | null {
if (raw === null || raw === undefined || raw === '') {
return null;
}
const number = typeof raw === 'number' ? raw : Number(raw);
return Number.isFinite(number) ? Math.trunc(number) : null;
}
function nullableIsoTimestamp(raw: unknown): string | null {
if (raw === null || raw === undefined || raw === '') {
return null;
}
if (raw instanceof Date) {
return raw.toISOString();
}
const date = new Date(String(raw));
return Number.isNaN(date.getTime()) ? null : date.toISOString();
}
function firstRow(result: QueryResultLike, context: string): { row: unknown[]; headers: Map<string, number> } {
const row = result.rows[0];
if (!row) {
throw new Error(`Postgres historic-SQL ${context} query returned no rows`);
}
return { row, headers: indexes(result.headers) };
}
function isMissingPgssRelation(error: unknown): boolean {
const message = error instanceof Error ? error.message : String(error);
return /relation ["']?pg_stat_statements["']? does not exist/i.test(message);
}
function isPgssPreloadRequired(error: unknown): boolean {
const message = error instanceof Error ? error.message : String(error);
return /pg_stat_statements.*shared_preload_libraries/i.test(message);
}
function extensionMissingError(cause: unknown, message?: string): HistoricSqlExtensionMissingError {
return new HistoricSqlExtensionMissingError({
dialect: 'postgres',
message: message ?? 'pg_stat_statements extension is not installed in the connection database.',
remediation: POSTGRES_EXTENSION_REMEDIATION,
cause,
});
}
function grantsMissingError(): HistoricSqlGrantsMissingError {
return new HistoricSqlGrantsMissingError({
dialect: 'postgres',
message: 'Postgres connection role lacks pg_read_all_stats for historic-SQL ingest.',
remediation: POSTGRES_GRANTS_REMEDIATION,
});
}
function mapSnapshotRow(row: unknown[], headerIndexes: Map<string, number>): PostgresPgssRow {
return {
queryid: requiredString(value(row, headerIndexes, 'queryid'), 'queryid'),
userid: requiredString(value(row, headerIndexes, 'userid'), 'userid'),
username: nullableString(value(row, headerIndexes, 'username')),
dbid: requiredString(value(row, headerIndexes, 'dbid'), 'dbid'),
database: nullableString(value(row, headerIndexes, 'database')),
query: requiredString(value(row, headerIndexes, 'query'), 'query'),
calls: Math.trunc(requiredFiniteNumber(value(row, headerIndexes, 'calls'), 'calls')),
totalExecTime: requiredFiniteNumber(value(row, headerIndexes, 'total_exec_time'), 'total_exec_time'),
meanExecTime: requiredFiniteNumber(value(row, headerIndexes, 'mean_exec_time'), 'mean_exec_time'),
totalRows: Math.trunc(requiredFiniteNumber(value(row, headerIndexes, 'total_rows'), 'total_rows')),
};
}
export class PostgresPgssQueryHistoryReader implements PostgresPgssReader {
async probe(client: unknown): Promise<PostgresPgssProbeResult> {
const pgClient = queryClient(client);
const versionResult = await execute(pgClient, VERSION_SQL);
const { row: versionRow, headers: versionHeaders } = firstRow(versionResult, 'version probe');
const serverVersionNum = requiredFiniteNumber(
value(versionRow, versionHeaders, 'server_version_num'),
'server_version_num',
);
const pgServerVersion = requiredString(value(versionRow, versionHeaders, 'server_version'), 'server_version');
if (serverVersionNum < 140000) {
throw new HistoricSqlVersionUnsupportedError({
dialect: 'postgres',
detectedVersion: pgServerVersion,
minimumVersion: 'PostgreSQL 14',
});
}
try {
await execute(pgClient, EXTENSION_PROBE_SQL);
} catch (error) {
if (isMissingPgssRelation(error)) {
throw extensionMissingError(error);
}
if (isPgssPreloadRequired(error)) {
throw extensionMissingError(
error,
'pg_stat_statements is installed but not loaded via shared_preload_libraries.',
);
}
throw error;
}
const grantsResult = await execute(pgClient, GRANTS_PROBE_SQL);
const { row: grantsRow, headers: grantsHeaders } = firstRow(grantsResult, 'grant probe');
if (value(grantsRow, grantsHeaders, 'has_role') !== true) {
throw grantsMissingError();
}
const trackingResult = await execute(pgClient, TRACKING_PROBE_SQL);
const { row: trackingRow, headers: trackingHeaders } = firstRow(trackingResult, 'tracking probe');
const track = nullableString(value(trackingRow, trackingHeaders, 'track'));
const maxResult = await execute(pgClient, MAX_SETTING_PROBE_SQL);
const { row: maxRow, headers: maxHeaders } = firstRow(maxResult, 'max-setting probe');
const pgssMax = nullableInteger(value(maxRow, maxHeaders, 'max'));
const warnings: string[] = [];
if (track === 'none') {
warnings.push('pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config');
}
if (pgssMax !== null && pgssMax < RECOMMENDED_PGSS_MAX) {
warnings.push(
`pg_stat_statements.max is ${pgssMax}; set it to at least ${RECOMMENDED_PGSS_MAX} to reduce query-template eviction churn`,
);
}
return { pgServerVersion, warnings };
}
async readSnapshot(
client: unknown,
options: { minCalls: number; maxTemplates: number },
): Promise<PostgresPgssSnapshot> {
const pgClient = queryClient(client);
const snapshotResult = await execute(pgClient, SNAPSHOT_SQL, [options.minCalls, options.maxTemplates]);
const snapshotHeaders = indexes(snapshotResult.headers);
const statsResult = await execute(pgClient, STATS_INFO_SQL);
const { row: statsRow, headers: statsHeaders } = firstRow(statsResult, 'stats-info');
return {
statsResetAt: nullableIsoTimestamp(value(statsRow, statsHeaders, 'stats_reset')),
deallocCount: nullableInteger(value(statsRow, statsHeaders, 'dealloc')),
rows: snapshotResult.rows.map((row) => mapSnapshotRow(row, snapshotHeaders)),
};
}
}

View file

@ -0,0 +1,193 @@
import { describe, expect, it, vi } from 'vitest';
import { HistoricSqlGrantsMissingError } from './errors.js';
import { SnowflakeHistoricSqlQueryHistoryReader } from './snowflake-query-history-reader.js';
interface FakeQueryResult {
headers: string[];
rows: unknown[][];
totalRows: number;
error?: string;
}
function queryClient(results: FakeQueryResult[]) {
const executeQuery = vi.fn(async (_query: string) => {
const next = results.shift();
if (!next) {
throw new Error('unexpected query');
}
return next;
});
return { executeQuery };
}
function firstQuery(client: ReturnType<typeof queryClient>): string {
const call = client.executeQuery.mock.calls[0];
if (!call) {
throw new Error('expected query client to be called');
}
return call[0];
}
describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
it('probes SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', async () => {
const client = queryClient([{ headers: ['1'], rows: [[1]], totalRows: 1 }]);
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(reader.probe(client)).resolves.toBeUndefined();
expect(client.executeQuery).toHaveBeenCalledWith(
'SELECT 1 FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY LIMIT 1',
);
});
it('turns probe result errors into HistoricSqlGrantsMissingError', async () => {
const client = queryClient([{ headers: [], rows: [], totalRows: 0, error: 'Object does not exist or not authorized' }]);
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(reader.probe(client)).rejects.toMatchObject({
name: 'HistoricSqlGrantsMissingError',
dialect: 'snowflake',
remediation: 'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE <connection role>;',
});
});
it('turns thrown probe failures into HistoricSqlGrantsMissingError', async () => {
const client = {
executeQuery: vi.fn(async () => {
throw new Error('permission denied');
}),
};
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(reader.probe(client)).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
});
it('fetches query-history rows with cursor and maps them into RawQueryRow shape', async () => {
const client = queryClient([
{
headers: [
'QUERY_ID',
'QUERY_TEXT',
'USER_NAME',
'ROLE_NAME',
'WAREHOUSE_NAME',
'DATABASE_NAME',
'SCHEMA_NAME',
'START_TIME',
'END_TIME',
'TOTAL_ELAPSED_TIME',
'ROWS_PRODUCED',
'EXECUTION_STATUS',
'ERROR_CODE',
'ERROR_MESSAGE',
],
rows: [
[
'01a',
"SELECT count(*) FROM ANALYTICS.ORDERS WHERE STATUS = 'paid'",
'ANALYST_A',
'ANALYST_ROLE',
'WH_XS',
'ANALYTICS',
'PUBLIC',
'2026-05-04T10:00:00.000Z',
'2026-05-04T10:00:01.250Z',
1250,
12,
'SUCCESS',
null,
null,
],
[
'01b',
'SELECT * FROM MISSING_TABLE',
'ANALYST_B',
'ANALYST_ROLE',
'WH_XS',
'ANALYTICS',
'PUBLIC',
new Date('2026-05-04T10:05:00.000Z'),
null,
null,
null,
'FAILED_WITH_ERROR',
'002003',
'SQL compilation error',
],
],
totalRows: 2,
},
]);
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
const rows = [];
for await (const row of reader.fetch(
client,
{
start: new Date('2026-05-01T00:00:00.000Z'),
end: new Date('2026-05-04T12:00:00.000Z'),
},
'2026-05-03T00:00:00.000Z',
)) {
rows.push(row);
}
expect(client.executeQuery).toHaveBeenCalledTimes(1);
const sql = firstQuery(client);
expect(sql).toContain('FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY');
expect(sql).toContain("START_TIME >= '2026-05-03T00:00:00.000Z'::TIMESTAMP_TZ");
expect(sql).toContain("START_TIME < '2026-05-04T12:00:00.000Z'::TIMESTAMP_TZ");
expect(sql).toContain('ORDER BY START_TIME ASC, QUERY_ID ASC');
expect(sql).toContain('ROWS_PRODUCED');
expect(rows).toEqual([
{
id: '01a',
sql: "SELECT count(*) FROM ANALYTICS.ORDERS WHERE STATUS = 'paid'",
user: 'ANALYST_A',
startedAt: '2026-05-04T10:00:00.000Z',
endedAt: '2026-05-04T10:00:01.250Z',
runtimeMs: 1250,
rowsProduced: 12,
success: true,
errorMessage: null,
},
{
id: '01b',
sql: 'SELECT * FROM MISSING_TABLE',
user: 'ANALYST_B',
startedAt: '2026-05-04T10:05:00.000Z',
endedAt: null,
runtimeMs: null,
rowsProduced: null,
success: false,
errorMessage: '002003: SQL compilation error',
},
]);
});
it('uses the window start when no cursor is available', async () => {
const client = queryClient([{ headers: ['QUERY_ID'], rows: [], totalRows: 0 }]);
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
for await (const _row of reader.fetch(client, {
start: new Date('2026-02-03T12:00:00.000Z'),
end: new Date('2026-05-04T12:00:00.000Z'),
})) {
throw new Error('empty result should not yield rows');
}
const sql = firstQuery(client);
expect(sql).toContain("START_TIME >= '2026-02-03T12:00:00.000Z'::TIMESTAMP_TZ");
});
it('throws a clear error when the query client cannot execute SQL', async () => {
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(async () => {
for await (const _row of reader.fetch({}, { start: new Date(), end: new Date() })) {
throw new Error('unreachable');
}
}).rejects.toThrow('Historic SQL Snowflake reader requires a query client with executeQuery(query)');
});
});

View file

@ -0,0 +1,203 @@
import { HistoricSqlGrantsMissingError } from './errors.js';
import type { HistoricSqlQueryHistoryReader, HistoricSqlRawQueryRow, HistoricSqlTimeWindow } from './types.js';
interface QueryResultLike {
headers: string[];
rows: unknown[][];
totalRows: number;
error?: string;
}
interface QueryClientLike {
executeQuery(query: string): Promise<QueryResultLike>;
}
const PROBE_SQL = 'SELECT 1 FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY LIMIT 1';
const SNOWFLAKE_GRANTS_REMEDIATION =
'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE <connection role>;';
function queryClient(client: unknown): QueryClientLike {
if (
client &&
typeof client === 'object' &&
'executeQuery' in client &&
typeof (client as { executeQuery?: unknown }).executeQuery === 'function'
) {
return client as QueryClientLike;
}
throw new Error('Historic SQL Snowflake reader requires a query client with executeQuery(query)');
}
function grantsError(cause: unknown): HistoricSqlGrantsMissingError {
const message =
cause instanceof Error
? cause.message
: typeof cause === 'string'
? cause
: 'Snowflake role cannot query SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY.';
return new HistoricSqlGrantsMissingError({
dialect: 'snowflake',
message: `Missing Snowflake audit grants for historic-SQL ingest: ${message}`,
remediation: SNOWFLAKE_GRANTS_REMEDIATION,
cause,
});
}
function timestampLiteral(value: Date | string): string {
const date = value instanceof Date ? value : new Date(value);
if (Number.isNaN(date.getTime())) {
throw new Error(`Invalid Snowflake query-history timestamp: ${String(value)}`);
}
return `'${date.toISOString().replace(/'/g, "''")}'::TIMESTAMP_TZ`;
}
function queryHistorySql(window: HistoricSqlTimeWindow, cursor?: string | null): string {
const start = timestampLiteral(cursor ?? window.start);
const end = timestampLiteral(window.end);
return `
SELECT
QUERY_ID,
QUERY_TEXT,
USER_NAME,
ROLE_NAME,
WAREHOUSE_NAME,
DATABASE_NAME,
SCHEMA_NAME,
START_TIME,
END_TIME,
TOTAL_ELAPSED_TIME,
ROWS_PRODUCED,
EXECUTION_STATUS,
ERROR_CODE,
ERROR_MESSAGE
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
WHERE START_TIME >= ${start}
AND START_TIME < ${end}
AND QUERY_TEXT IS NOT NULL
ORDER BY START_TIME ASC, QUERY_ID ASC`.trim();
}
function indexByHeader(headers: string[]): Map<string, number> {
const out = new Map<string, number>();
headers.forEach((header, index) => {
out.set(header.toUpperCase(), index);
});
return out;
}
function value(row: unknown[], indexes: Map<string, number>, name: string): unknown {
const index = indexes.get(name);
return index === undefined ? null : row[index];
}
function nullableString(raw: unknown): string | null {
if (raw === null || raw === undefined) {
return null;
}
const text = String(raw);
return text.length > 0 ? text : null;
}
function requiredString(raw: unknown, field: string): string {
const text = nullableString(raw);
if (!text) {
throw new Error(`Snowflake QUERY_HISTORY row is missing ${field}`);
}
return text;
}
function nullableNumber(raw: unknown): number | null {
if (raw === null || raw === undefined || raw === '') {
return null;
}
const number = typeof raw === 'number' ? raw : Number(raw);
if (!Number.isFinite(number)) {
return null;
}
return number;
}
function nullableInteger(raw: unknown): number | null {
const number = nullableNumber(raw);
return number === null ? null : Math.trunc(number);
}
function isoTimestamp(raw: unknown, field: string): string {
if (raw instanceof Date) {
return raw.toISOString();
}
const text = requiredString(raw, field);
const date = new Date(text);
if (Number.isNaN(date.getTime())) {
throw new Error(`Snowflake QUERY_HISTORY row has invalid ${field}: ${text}`);
}
return date.toISOString();
}
function nullableIsoTimestamp(raw: unknown): string | null {
if (raw === null || raw === undefined || raw === '') {
return null;
}
return isoTimestamp(raw, 'END_TIME');
}
function executionSucceeded(status: string | null, errorCode: string | null, errorMessage: string | null): boolean {
if (errorCode || errorMessage) {
return false;
}
return status === null || status.toUpperCase().startsWith('SUCCESS');
}
function combinedErrorMessage(errorCode: string | null, errorMessage: string | null): string | null {
if (errorCode && errorMessage) {
return `${errorCode}: ${errorMessage}`;
}
return errorMessage ?? errorCode;
}
function mapRow(row: unknown[], indexes: Map<string, number>): HistoricSqlRawQueryRow {
const errorCode = nullableString(value(row, indexes, 'ERROR_CODE'));
const errorMessage = nullableString(value(row, indexes, 'ERROR_MESSAGE'));
const rowsProduced = nullableInteger(value(row, indexes, 'ROWS_PRODUCED'));
return {
id: requiredString(value(row, indexes, 'QUERY_ID'), 'QUERY_ID'),
sql: requiredString(value(row, indexes, 'QUERY_TEXT'), 'QUERY_TEXT'),
user: nullableString(value(row, indexes, 'USER_NAME')),
startedAt: isoTimestamp(value(row, indexes, 'START_TIME'), 'START_TIME'),
endedAt: nullableIsoTimestamp(value(row, indexes, 'END_TIME')),
runtimeMs: nullableNumber(value(row, indexes, 'TOTAL_ELAPSED_TIME')),
rowsProduced,
success: executionSucceeded(nullableString(value(row, indexes, 'EXECUTION_STATUS')), errorCode, errorMessage),
errorMessage: combinedErrorMessage(errorCode, errorMessage),
};
}
export class SnowflakeHistoricSqlQueryHistoryReader implements HistoricSqlQueryHistoryReader {
async probe(client: unknown): Promise<void> {
let result: QueryResultLike;
try {
result = await queryClient(client).executeQuery(PROBE_SQL);
} catch (error) {
throw grantsError(error);
}
if (result.error) {
throw grantsError(result.error);
}
}
async *fetch(
client: unknown,
window: HistoricSqlTimeWindow,
cursor?: string | null,
): AsyncIterable<HistoricSqlRawQueryRow> {
const result = await queryClient(client).executeQuery(queryHistorySql(window, cursor));
if (result.error) {
throw grantsError(result.error);
}
const indexes = indexByHeader(result.headers);
for (const row of result.rows) {
yield mapRow(row, indexes);
}
}
}

View file

@ -0,0 +1,152 @@
import { mkdir, mkdtemp, readdir, readFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { dirname, join, relative } from 'node:path';
import { describe, expect, it } from 'vitest';
import type { SqlAnalysisPort } from '../../../sql-analysis/index.js';
import { stagePgStatStatementsTemplates, writePgssBaselineAtomic, type PgssBaseline } from './stage-pgss.js';
import type { HistoricSqlPullConfig, KloPostgresQueryClient, PostgresPgssReader, PostgresPgssRow } from './types.js';
const FIXTURE_ROOT = join(__dirname, '__fixtures__/postgres');
interface GoldenFixture {
name: string;
now: string;
connectionId: string;
probe: {
pgServerVersion: string;
warnings: string[];
};
snapshot: {
statsResetAt: string | null;
deallocCount: number | null;
rows: PostgresPgssRow[];
};
pullConfig: HistoricSqlPullConfig & { dialect: 'postgres' };
analysisBySql: Record<
string,
{
fingerprint: string;
normalizedSql: string;
tablesTouched: string[];
literalSlots: [];
error?: string;
}
>;
baseline: PgssBaseline | null;
expectedBaseline: PgssBaseline;
expectedFiles: Record<string, { json?: unknown; text?: string }>;
}
async function readFixture(name: string): Promise<GoldenFixture> {
return JSON.parse(await readFile(join(FIXTURE_ROOT, name, 'input.json'), 'utf-8')) as GoldenFixture;
}
async function tempDir(prefix: string): Promise<string> {
return mkdtemp(join(tmpdir(), prefix));
}
function fakePgClient(): KloPostgresQueryClient {
return {
async executeQuery() {
return { headers: [], rows: [] };
},
};
}
function fixtureReader(fixture: GoldenFixture): PostgresPgssReader {
return {
async probe() {
return fixture.probe;
},
async readSnapshot(_client, options) {
return {
statsResetAt: fixture.snapshot.statsResetAt,
deallocCount: fixture.snapshot.deallocCount,
rows: fixture.snapshot.rows.slice(0, options.maxTemplates),
};
},
};
}
function fixtureSqlAnalysis(fixture: GoldenFixture): SqlAnalysisPort {
return {
async analyzeForFingerprint(sql) {
const result = fixture.analysisBySql[sql];
if (!result) {
return {
fingerprint: '',
normalizedSql: '',
tablesTouched: [],
literalSlots: [],
error: `missing fixture analysis for ${sql}`,
};
}
return result;
},
};
}
async function writeFixtureBaseline(path: string, baseline: PgssBaseline | null): Promise<void> {
if (!baseline) {
return;
}
await writePgssBaselineAtomic(path, baseline);
}
async function listFiles(root: string, current = root): Promise<string[]> {
const entries = await readdir(current, { withFileTypes: true });
const files: string[] = [];
for (const entry of entries) {
const fullPath = join(current, entry.name);
if (entry.isDirectory()) {
files.push(...(await listFiles(root, fullPath)));
} else {
files.push(relative(root, fullPath));
}
}
return files;
}
async function expectGoldenFiles(stagedDir: string, expectedFiles: GoldenFixture['expectedFiles']): Promise<void> {
const actualFiles = await listFiles(stagedDir);
const expectedPaths = Object.keys(expectedFiles).sort();
expect(actualFiles.sort()).toEqual(expectedPaths);
for (const path of expectedPaths) {
const expected = expectedFiles[path];
const actual = await readFile(join(stagedDir, path), 'utf-8');
if ('json' in expected) {
expect(JSON.parse(actual)).toEqual(expected.json);
} else {
expect(actual).toBe(expected.text);
}
}
}
describe('stagePgStatStatementsTemplates golden fixtures', () => {
it.each(['first-run', 'normal-delta', 'reset-detected', 'version-change', 'eviction-churn'] as const)(
'matches the committed %s golden output',
async (fixtureName) => {
const fixture = await readFixture(fixtureName);
const root = await tempDir(`pgss-golden-${fixtureName}-`);
const stagedDir = join(root, 'staged');
const baselinePath = join(root, 'cache', fixture.connectionId, 'pgss-baseline.json');
await mkdir(dirname(baselinePath), { recursive: true });
await writeFixtureBaseline(baselinePath, fixture.baseline);
const result = await stagePgStatStatementsTemplates({
stagedDir,
connectionId: fixture.connectionId,
queryClient: fakePgClient(),
reader: fixtureReader(fixture),
sqlAnalysis: fixtureSqlAnalysis(fixture),
pullConfig: fixture.pullConfig,
baselinePath,
now: new Date(fixture.now),
});
await expectGoldenFiles(stagedDir, fixture.expectedFiles);
expect(result.baseline).toEqual(fixture.expectedBaseline);
},
);
});

View file

@ -0,0 +1,652 @@
import { mkdtemp, readFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import type { SqlAnalysisPort } from '../../../sql-analysis/index.js';
import {
pgssBaselinePath,
readPgssBaseline,
stagePgStatStatementsTemplates,
writePgssBaselineAtomic,
type PgssBaseline,
} from './stage-pgss.js';
import { historicSqlManifestSchema, historicSqlMetadataSchema, historicSqlUsageSchema } from './types.js';
import type { KloPostgresQueryClient, PostgresPgssReader, PostgresPgssRow } from './types.js';
async function tempDir(prefix: string): Promise<string> {
return mkdtemp(join(tmpdir(), prefix));
}
async function readJson<T>(root: string, relPath: string): Promise<T> {
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
}
function fakePgClient(): KloPostgresQueryClient {
return {
async executeQuery() {
return { headers: [], rows: [] };
},
};
}
function row(overrides: Partial<PostgresPgssRow> & Pick<PostgresPgssRow, 'queryid' | 'query'>): PostgresPgssRow {
return {
userid: '11',
username: 'analyst',
dbid: '5',
database: 'warehouse',
calls: 10,
totalExecTime: 250,
meanExecTime: 25,
totalRows: 20,
...overrides,
};
}
function fakeReader(input: {
pgServerVersion?: string;
warnings?: string[];
statsResetAt?: string | null;
deallocCount?: number | null;
rows: PostgresPgssRow[];
}): PostgresPgssReader {
return {
probe: vi.fn(async () => ({
pgServerVersion: input.pgServerVersion ?? 'PostgreSQL 16.4',
warnings: input.warnings ?? [],
})),
readSnapshot: vi.fn(async (_client, options) => ({
statsResetAt: input.statsResetAt ?? '2026-05-08T08:00:00.000Z',
deallocCount: input.deallocCount ?? 0,
rows: input.rows.slice(0, options.maxTemplates),
})),
};
}
const sqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint(sql) {
if (sql.includes('broken')) {
return {
fingerprint: '',
normalizedSql: '',
tablesTouched: [],
literalSlots: [],
error: 'parse failed',
};
}
if (sql.includes('customers')) {
return {
fingerprint: 'fp_customers',
normalizedSql: 'SELECT count(*) FROM analytics.customers',
tablesTouched: ['analytics.customers'],
literalSlots: [],
};
}
return {
fingerprint: 'fp_orders',
normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
tablesTouched: ['analytics.orders'],
literalSlots: [],
};
},
};
function postgresPullConfig(maxTemplatesPerRun = 5000) {
return {
dialect: 'postgres' as const,
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: ['^svc_'],
redactionPatterns: ['secret'],
maxTemplatesPerRun,
minCalls: 5,
};
}
describe('stagePgStatStatementsTemplates', () => {
it('stages first-run PGSS templates as degraded aggregate templates and builds a next baseline', async () => {
const stagedDir = await tempDir('pgss-stage-first-');
const baselineRootDir = await tempDir('pgss-baseline-first-');
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
const result = await stagePgStatStatementsTemplates({
stagedDir,
connectionId: 'conn_pg',
queryClient: fakePgClient(),
reader: fakeReader({
warnings: ['pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config'],
deallocCount: 2,
rows: [
row({
queryid: '101',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 10,
totalExecTime: 250,
totalRows: 20,
}),
row({
queryid: '102',
query: 'SELECT * FROM pg_catalog.pg_class',
calls: 50,
totalExecTime: 500,
}),
row({
queryid: '103',
query: 'BEGIN',
calls: 75,
totalExecTime: 75,
}),
row({
queryid: '104',
query: 'SELECT broken FROM analytics.orders',
calls: 8,
totalExecTime: 80,
}),
],
}),
sqlAnalysis,
pullConfig: postgresPullConfig(),
baselinePath,
now: new Date('2026-05-08T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
expect(manifest).toMatchObject({
source: 'historic-sql',
connectionId: 'conn_pg',
dialect: 'postgres',
fetchedAt: '2026-05-08T12:00:00.000Z',
windowEnd: '2026-05-08T12:00:00.000Z',
nextSuccessfulCursor: '2026-05-08T12:00:00.000Z',
templateCount: 1,
capped: false,
degraded: true,
statsResetAt: '2026-05-08T08:00:00.000Z',
baselineFirstRun: true,
pgServerVersion: 'PostgreSQL 16.4',
deallocCount: 2,
});
expect(manifest.warnings).toEqual([
'pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config',
'pgss_dealloc_count:2; pg_stat_statements.max may be too low, causing template eviction churn',
'baseline_first_run:no_previous_pgss_baseline',
'analysis_failed:db5_q104',
]);
expect(manifest.templates).toEqual([
{
id: 'db5_q101',
fingerprint: 'fp_orders',
subClusterId: null,
path: 'templates/db5_q101/page.md',
},
]);
const metadata = historicSqlMetadataSchema.parse(await readJson(stagedDir, 'templates/db5_q101/metadata.json'));
expect(metadata).toMatchObject({
id: 'db5_q101',
title: 'postgres · analytics.orders [db5_q101]',
path: 'templates/db5_q101/page.md',
objectType: 'historic_sql_template',
lastEditedAt: null,
properties: {
fingerprint: 'fp_orders',
sub_cluster_id: null,
dialect: 'postgres',
tables_touched: ['analytics.orders'],
literal_slots: [],
},
});
expect(metadata.properties.triage_signals).toEqual({
executions_bucket: 'mid',
distinct_users_bucket: 'solo',
error_rate_bucket: 'ok',
recency_bucket: 'active',
service_account_only: 'false',
runtime_bucket: 'fast',
});
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q101/usage.json'));
expect(usage).toEqual({
stats: {
executions: 10,
distinct_users: 1,
first_seen: '2026-05-08T12:00:00.000Z',
last_seen: '2026-05-08T12:00:00.000Z',
p50_runtime_ms: null,
p95_runtime_ms: null,
mean_runtime_ms: 25,
error_rate: 0,
rows_produced: 20,
},
literal_slots: [],
samples: [],
});
expect(await readFile(join(stagedDir, 'templates/db5_q101/page.md'), 'utf-8')).toContain(
'SELECT count(*) FROM analytics.orders WHERE status = $1',
);
expect(result.baselinePath).toBe(baselinePath);
expect(result.baseline.templates.db5_q101.perUser['11']).toEqual({
calls: 10,
totalExecTime: 250,
totalRows: 20,
});
await expect(readPgssBaseline(baselinePath)).resolves.toBeNull();
});
it('warns when pg_stat_statements reports dealloc churn', async () => {
const root = await tempDir('pgss-churn-');
const stagedDir = join(root, 'staged');
const baselinePath = join(root, 'cache', 'warehouse', 'pgss-baseline.json');
await stagePgStatStatementsTemplates({
stagedDir,
connectionId: 'warehouse',
queryClient: fakePgClient(),
reader: fakeReader({
rows: [
row({
queryid: '901',
query: 'SELECT COUNT(*) FROM public.orders WHERE status = $1',
calls: 20,
totalExecTime: 500,
meanExecTime: 25,
}),
],
deallocCount: 3,
}),
sqlAnalysis,
pullConfig: postgresPullConfig(50),
baselinePath,
now: new Date('2026-05-08T12:00:00.000Z'),
});
const manifest = await readJson<{ warnings: string[]; deallocCount: number }>(stagedDir, 'manifest.json');
expect(manifest.deallocCount).toBe(3);
expect(manifest.warnings).toContain(
'pgss_dealloc_count:3; pg_stat_statements.max may be too low, causing template eviction churn',
);
});
it('uses the saved cumulative baseline to stage only positive deltas on later runs', async () => {
const stagedDir = await tempDir('pgss-stage-delta-');
const baselineRootDir = await tempDir('pgss-baseline-delta-');
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
const baseline: PgssBaseline = {
version: 1,
fetchedAt: '2026-05-08T10:00:00.000Z',
statsResetAt: '2026-05-08T08:00:00.000Z',
pgServerVersion: 'PostgreSQL 16.4',
templates: {
db5_q201: {
firstObservedAt: '2026-05-08T09:00:00.000Z',
perUser: {
'11': { calls: 10, totalExecTime: 100, totalRows: 50 },
'12': { calls: 5, totalExecTime: 50, totalRows: 25 },
},
},
},
};
await writePgssBaselineAtomic(baselinePath, baseline);
await stagePgStatStatementsTemplates({
stagedDir,
connectionId: 'conn_pg',
queryClient: fakePgClient(),
reader: fakeReader({
rows: [
row({
queryid: '201',
userid: '11',
username: 'analyst',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 12,
totalExecTime: 160,
totalRows: 58,
}),
row({
queryid: '201',
userid: '12',
username: 'svc_loader',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 5,
totalExecTime: 50,
totalRows: 25,
}),
row({
queryid: '202',
userid: '13',
username: 'analyst_2',
query: 'SELECT count(*) FROM analytics.customers',
calls: 7,
totalExecTime: 210,
totalRows: 7,
}),
],
}),
sqlAnalysis,
pullConfig: postgresPullConfig(),
baselinePath,
now: new Date('2026-05-08T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
expect(manifest.baselineFirstRun).toBe(false);
expect(manifest.windowStart).toBe('2026-05-08T10:00:00.000Z');
expect(manifest.templateCount).toBe(2);
expect(manifest.templates.map((template) => template.id)).toEqual(['db5_q202', 'db5_q201']);
const usage201 = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q201/usage.json'));
expect(usage201.stats).toMatchObject({
executions: 2,
distinct_users: 1,
first_seen: '2026-05-08T09:00:00.000Z',
last_seen: '2026-05-08T12:00:00.000Z',
mean_runtime_ms: 30,
rows_produced: 8,
});
const metadata201 = historicSqlMetadataSchema.parse(await readJson(stagedDir, 'templates/db5_q201/metadata.json'));
expect(metadata201.properties.triage_signals.service_account_only).toBe('false');
const usage202 = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q202/usage.json'));
expect(usage202.stats).toMatchObject({
executions: 7,
distinct_users: 1,
first_seen: '2026-05-08T12:00:00.000Z',
mean_runtime_ms: 30,
rows_produced: 7,
});
});
it('keeps matching queryid values from different databases as distinct templates and baseline entries', async () => {
const stagedDir = await tempDir('pgss-stage-db-key-');
const baselineRootDir = await tempDir('pgss-baseline-db-key-');
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
await writePgssBaselineAtomic(baselinePath, {
version: 1,
fetchedAt: '2026-05-08T10:00:00.000Z',
statsResetAt: '2026-05-08T08:00:00.000Z',
pgServerVersion: 'PostgreSQL 16.4',
templates: {
db5_q701: {
firstObservedAt: '2026-05-08T09:00:00.000Z',
perUser: {
'11': { calls: 10, totalExecTime: 100, totalRows: 50 },
},
},
db6_q701: {
firstObservedAt: '2026-05-08T09:30:00.000Z',
perUser: {
'11': { calls: 4, totalExecTime: 40, totalRows: 20 },
},
},
},
});
const result = await stagePgStatStatementsTemplates({
stagedDir,
connectionId: 'conn_pg',
queryClient: fakePgClient(),
reader: fakeReader({
rows: [
row({
queryid: '701',
dbid: '5',
database: 'warehouse',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 12,
totalExecTime: 160,
totalRows: 58,
}),
row({
queryid: '701',
dbid: '6',
database: 'app',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 9,
totalExecTime: 130,
totalRows: 35,
}),
],
}),
sqlAnalysis,
pullConfig: postgresPullConfig(),
baselinePath,
now: new Date('2026-05-08T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
expect(manifest.templates.map((template) => template.id).sort()).toEqual(['db5_q701', 'db6_q701']);
const warehouseUsage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q701/usage.json'));
expect(warehouseUsage.stats).toMatchObject({
executions: 2,
rows_produced: 8,
first_seen: '2026-05-08T09:00:00.000Z',
});
const appUsage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db6_q701/usage.json'));
expect(appUsage.stats).toMatchObject({
executions: 5,
rows_produced: 15,
first_seen: '2026-05-08T09:30:00.000Z',
});
expect(result.baseline.templates.db5_q701.perUser['11']).toEqual({
calls: 12,
totalExecTime: 160,
totalRows: 58,
});
expect(result.baseline.templates.db6_q701.perUser['11']).toEqual({
calls: 9,
totalExecTime: 130,
totalRows: 35,
});
});
it('treats stats_reset advancement and major-version changes as fresh baselines', async () => {
const resetStagedDir = await tempDir('pgss-stage-reset-');
const resetBaselineRootDir = await tempDir('pgss-baseline-reset-');
const resetBaselinePath = pgssBaselinePath(resetBaselineRootDir, 'conn_pg');
await writePgssBaselineAtomic(resetBaselinePath, {
version: 1,
fetchedAt: '2026-05-08T10:00:00.000Z',
statsResetAt: '2026-05-08T08:00:00.000Z',
pgServerVersion: 'PostgreSQL 16.4',
templates: {
db5_q301: {
firstObservedAt: '2026-05-08T09:00:00.000Z',
perUser: {
'11': { calls: 100, totalExecTime: 1000, totalRows: 500 },
},
},
},
});
await stagePgStatStatementsTemplates({
stagedDir: resetStagedDir,
connectionId: 'conn_pg',
queryClient: fakePgClient(),
reader: fakeReader({
statsResetAt: '2026-05-08T11:00:00.000Z',
rows: [
row({
queryid: '301',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 3,
totalExecTime: 90,
totalRows: 9,
}),
],
}),
sqlAnalysis,
pullConfig: postgresPullConfig(),
baselinePath: resetBaselinePath,
now: new Date('2026-05-08T12:00:00.000Z'),
});
const resetManifest = historicSqlManifestSchema.parse(await readJson(resetStagedDir, 'manifest.json'));
expect(resetManifest.baselineFirstRun).toBe(true);
expect(resetManifest.warnings).toContain(
'baseline_reset:stats_reset advanced from 2026-05-08T08:00:00.000Z to 2026-05-08T11:00:00.000Z',
);
const resetUsage = historicSqlUsageSchema.parse(await readJson(resetStagedDir, 'templates/db5_q301/usage.json'));
expect(resetUsage.stats.executions).toBe(3);
const versionStagedDir = await tempDir('pgss-stage-version-');
const versionBaselineRootDir = await tempDir('pgss-baseline-version-');
const versionBaselinePath = pgssBaselinePath(versionBaselineRootDir, 'conn_pg');
await writePgssBaselineAtomic(versionBaselinePath, {
version: 1,
fetchedAt: '2026-05-08T10:00:00.000Z',
statsResetAt: '2026-05-08T08:00:00.000Z',
pgServerVersion: 'PostgreSQL 15.7',
templates: {
db5_q302: {
firstObservedAt: '2026-05-08T09:00:00.000Z',
perUser: {
'11': { calls: 100, totalExecTime: 1000, totalRows: 500 },
},
},
},
});
await stagePgStatStatementsTemplates({
stagedDir: versionStagedDir,
connectionId: 'conn_pg',
queryClient: fakePgClient(),
reader: fakeReader({
pgServerVersion: 'PostgreSQL 16.4',
rows: [
row({
queryid: '302',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 4,
totalExecTime: 80,
totalRows: 8,
}),
],
}),
sqlAnalysis,
pullConfig: postgresPullConfig(),
baselinePath: versionBaselinePath,
now: new Date('2026-05-08T12:00:00.000Z'),
});
const versionManifest = historicSqlManifestSchema.parse(await readJson(versionStagedDir, 'manifest.json'));
expect(versionManifest.baselineFirstRun).toBe(true);
expect(versionManifest.warnings).toContain('baseline_reset:pg_server_major changed from 15 to 16');
});
it('handles scoped counter regressions without forcing a global first-run baseline', async () => {
const stagedDir = await tempDir('pgss-stage-scoped-');
const baselineRootDir = await tempDir('pgss-baseline-scoped-');
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
await writePgssBaselineAtomic(baselinePath, {
version: 1,
fetchedAt: '2026-05-08T10:00:00.000Z',
statsResetAt: '2026-05-08T08:00:00.000Z',
pgServerVersion: 'PostgreSQL 16.4',
templates: {
db5_q401: {
firstObservedAt: '2026-05-08T09:00:00.000Z',
perUser: {
'11': { calls: 100, totalExecTime: 1000, totalRows: 500 },
'12': { calls: 50, totalExecTime: 500, totalRows: 250 },
},
},
},
});
await stagePgStatStatementsTemplates({
stagedDir,
connectionId: 'conn_pg',
queryClient: fakePgClient(),
reader: fakeReader({
statsResetAt: '2026-05-08T08:00:00.000Z',
rows: [
row({
queryid: '401',
userid: '11',
username: 'analyst',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 2,
totalExecTime: 30,
totalRows: 6,
}),
row({
queryid: '401',
userid: '12',
username: 'svc_loader',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 55,
totalExecTime: 650,
totalRows: 275,
}),
],
}),
sqlAnalysis,
pullConfig: postgresPullConfig(),
baselinePath,
now: new Date('2026-05-08T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
expect(manifest.baselineFirstRun).toBe(false);
expect(manifest.warnings).toContain('scoped_reset:dbid=5 queryid=401 userid=11');
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q401/usage.json'));
expect(usage.stats).toMatchObject({
executions: 7,
distinct_users: 2,
mean_runtime_ms: 25.714285714285715,
rows_produced: 31,
});
});
it('ranks and caps selected PGSS templates after skip and analysis filtering', async () => {
const stagedDir = await tempDir('pgss-stage-cap-');
const baselineRootDir = await tempDir('pgss-baseline-cap-');
const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg');
await stagePgStatStatementsTemplates({
stagedDir,
connectionId: 'conn_pg',
queryClient: fakePgClient(),
reader: fakeReader({
rows: [
row({
queryid: '501',
username: 'analyst_a',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 2,
totalExecTime: 20,
}),
row({
queryid: '502',
username: 'analyst_b',
query: 'SELECT count(*) FROM analytics.customers',
calls: 20,
totalExecTime: 200,
}),
row({
queryid: '503',
username: 'analyst_c',
query: 'SELECT count(*) FROM analytics.orders WHERE status = $1',
calls: 10,
totalExecTime: 100,
}),
],
}),
sqlAnalysis,
pullConfig: postgresPullConfig(2),
baselinePath,
now: new Date('2026-05-08T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
expect(manifest.capped).toBe(true);
expect(manifest.warnings).toContain('templates_truncated: kept 2 of 3 templates');
expect(manifest.templates.map((template) => template.id)).toEqual(['db5_q502', 'db5_q503']);
});
});

View file

@ -0,0 +1,508 @@
import { mkdir, readFile, rename, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import { z } from 'zod';
import type { SqlAnalysisFingerprintResult, SqlAnalysisPort } from '../../../sql-analysis/index.js';
import {
HISTORIC_SQL_OBJECT_TYPE,
HISTORIC_SQL_SOURCE_KEY,
historicSqlPullConfigSchema,
type HistoricSqlManifest,
type HistoricSqlMetadata,
type HistoricSqlPullConfig,
type HistoricSqlUsage,
type KloPostgresQueryClient,
type PostgresPgssAggregateRow,
type PostgresPgssReader,
type PostgresPgssRow,
} from './types.js';
const PGSS_BASELINE_VERSION = 1 as const;
const pgssCounterSchema = z.object({
calls: z.number().int().nonnegative(),
totalExecTime: z.number().nonnegative(),
totalRows: z.number().int().nonnegative(),
});
const pgssBaselineSchema = z.object({
version: z.literal(PGSS_BASELINE_VERSION),
fetchedAt: z.string().datetime(),
statsResetAt: z.string().datetime().nullable(),
pgServerVersion: z.string(),
templates: z.record(
z.string(),
z.object({
firstObservedAt: z.string().datetime(),
perUser: z.record(z.string(), pgssCounterSchema),
}),
),
});
export type PgssBaseline = z.infer<typeof pgssBaselineSchema>;
export interface StagePgStatStatementsTemplatesInput {
stagedDir: string;
connectionId: string;
queryClient: KloPostgresQueryClient;
reader: PostgresPgssReader;
sqlAnalysis: SqlAnalysisPort;
pullConfig: HistoricSqlPullConfig;
baselinePath: string;
now?: Date;
}
export interface StagePgStatStatementsTemplatesResult {
baselinePath: string;
baseline: PgssBaseline;
}
interface PgssBaselineCounter {
calls: number;
totalExecTime: number;
totalRows: number;
}
interface PgssAggregateMutable {
id: string;
queryid: string;
dbid: string;
database: string | null;
query: string;
deltaCalls: number;
deltaExecTime: number;
deltaRows: number;
users: Set<string>;
firstObservedAt: string;
}
interface AnalyzedPgssTemplate {
aggregate: PostgresPgssAggregateRow;
analysis: SqlAnalysisFingerprintResult;
}
const ZERO_COUNTER: PgssBaselineCounter = {
calls: 0,
totalExecTime: 0,
totalRows: 0,
};
const PGSS_SNAPSHOT_READ_LIMIT = 5000;
const PGSS_HARD_SKIP_PREFIX_RE = /^\s*(SHOW|DESCRIBE|DESC|EXPLAIN|USE|SET|BEGIN|COMMIT|ROLLBACK|VACUUM|ANALYZE)\b/i;
const PGSS_HARD_SKIP_TABLE_RE = /\b(INFORMATION_SCHEMA|pg_catalog\.|pg_toast\.|pg_stat_)/i;
function pgssTemplateId(row: Pick<PostgresPgssRow, 'dbid' | 'queryid'>): string {
return `db${row.dbid}_q${row.queryid}`;
}
export function pgssBaselinePath(rootDir: string | undefined, connectionId: string): string {
return join(rootDir ?? join(process.cwd(), '.klo/cache/historic-sql'), connectionId, 'pgss-baseline.json');
}
export async function readPgssBaseline(path: string): Promise<PgssBaseline | null> {
try {
return pgssBaselineSchema.parse(JSON.parse(await readFile(path, 'utf-8')));
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
return null;
}
throw error;
}
}
export async function writePgssBaselineAtomic(path: string, baseline: PgssBaseline): Promise<void> {
const parsed = pgssBaselineSchema.parse(baseline);
await mkdir(dirname(path), { recursive: true });
const tempPath = `${path}.tmp`;
await writeFile(tempPath, `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8');
await rename(tempPath, path);
}
export async function stagePgStatStatementsTemplates(
input: StagePgStatStatementsTemplatesInput,
): Promise<StagePgStatStatementsTemplatesResult> {
const config = historicSqlPullConfigSchema.parse(input.pullConfig);
if (config.dialect !== 'postgres') {
throw new Error(`stagePgStatStatementsTemplates requires dialect postgres, got ${config.dialect}`);
}
const now = input.now ?? new Date();
const fetchedAt = now.toISOString();
const probe = await input.reader.probe(input.queryClient);
const warnings = [...probe.warnings];
const baseline = await readPgssBaseline(input.baselinePath);
const snapshot = await input.reader.readSnapshot(input.queryClient, {
minCalls: config.minCalls,
maxTemplates: PGSS_SNAPSHOT_READ_LIMIT,
});
if (snapshot.deallocCount !== null && snapshot.deallocCount > 0) {
warnings.push(
`pgss_dealloc_count:${snapshot.deallocCount}; pg_stat_statements.max may be too low, causing template eviction churn`,
);
}
const reset = detectBaselineReset({
baseline,
snapshotStatsResetAt: snapshot.statsResetAt,
currentPgServerVersion: probe.pgServerVersion,
});
warnings.push(...reset.warnings);
const aggregates = aggregatePgssRows({
rows: snapshot.rows,
baseline,
baselineFirstRun: reset.baselineFirstRun,
fetchedAt,
warnings,
}).filter((aggregate) => !shouldSkipPgssSql(aggregate.query));
const analyzed: AnalyzedPgssTemplate[] = [];
for (const aggregate of aggregates) {
const analysis = await input.sqlAnalysis.analyzeForFingerprint(aggregate.query, 'postgres');
if (analysis.error || !analysis.fingerprint || !analysis.normalizedSql) {
warnings.push(`analysis_failed:${aggregate.id}`);
continue;
}
analyzed.push({ aggregate, analysis });
}
const selected = selectPgssTemplates(analyzed, config.maxTemplatesPerRun);
if (selected.length < analyzed.length) {
warnings.push(`templates_truncated: kept ${selected.length} of ${analyzed.length} templates`);
}
await mkdir(input.stagedDir, { recursive: true });
const templates: HistoricSqlManifest['templates'] = [];
for (const template of selected) {
const staged = buildPgssStagedTemplate(template, config, now);
const basePath = `templates/${staged.metadata.id}`;
await writeJson(input.stagedDir, `${basePath}/metadata.json`, staged.metadata);
await writeText(input.stagedDir, `${basePath}/page.md`, staged.pageMarkdown);
await writeJson(input.stagedDir, `${basePath}/usage.json`, staged.usage);
templates.push({
id: staged.metadata.id,
fingerprint: staged.metadata.properties.fingerprint,
subClusterId: staged.metadata.properties.sub_cluster_id,
path: staged.metadata.path,
});
}
await writeJson(input.stagedDir, 'manifest.json', {
source: HISTORIC_SQL_SOURCE_KEY,
connectionId: input.connectionId,
dialect: 'postgres',
fetchedAt,
windowStart: baseline?.fetchedAt ?? snapshot.statsResetAt ?? fetchedAt,
windowEnd: fetchedAt,
nextSuccessfulCursor: fetchedAt,
templateCount: selected.length,
capped: selected.length < analyzed.length,
warnings,
degraded: true,
statsResetAt: snapshot.statsResetAt,
baselineFirstRun: reset.baselineFirstRun,
pgServerVersion: probe.pgServerVersion,
deallocCount: snapshot.deallocCount,
templates,
} satisfies HistoricSqlManifest);
return {
baselinePath: input.baselinePath,
baseline: buildNextBaseline({
rows: snapshot.rows,
fetchedAt,
statsResetAt: snapshot.statsResetAt,
pgServerVersion: probe.pgServerVersion,
previousBaseline: reset.baselineFirstRun ? null : baseline,
}),
};
}
function detectBaselineReset(input: {
baseline: PgssBaseline | null;
snapshotStatsResetAt: string | null;
currentPgServerVersion: string;
}): { baselineFirstRun: boolean; warnings: string[] } {
if (!input.baseline) {
return { baselineFirstRun: true, warnings: ['baseline_first_run:no_previous_pgss_baseline'] };
}
const warnings: string[] = [];
if (
input.baseline.statsResetAt &&
input.snapshotStatsResetAt &&
input.baseline.statsResetAt < input.snapshotStatsResetAt
) {
warnings.push(
`baseline_reset:stats_reset advanced from ${input.baseline.statsResetAt} to ${input.snapshotStatsResetAt}`,
);
}
const previousMajor = postgresMajor(input.baseline.pgServerVersion);
const currentMajor = postgresMajor(input.currentPgServerVersion);
if (previousMajor && currentMajor && previousMajor !== currentMajor) {
warnings.push(`baseline_reset:pg_server_major changed from ${previousMajor} to ${currentMajor}`);
}
return { baselineFirstRun: warnings.length > 0, warnings };
}
function postgresMajor(version: string): string | null {
return version.match(/PostgreSQL\s+(\d+)/i)?.[1] ?? version.match(/^(\d+)(?:\.|$)/)?.[1] ?? null;
}
function aggregatePgssRows(input: {
rows: PostgresPgssRow[];
baseline: PgssBaseline | null;
baselineFirstRun: boolean;
fetchedAt: string;
warnings: string[];
}): PostgresPgssAggregateRow[] {
const aggregates = new Map<string, PgssAggregateMutable>();
for (const row of input.rows) {
const templateId = pgssTemplateId(row);
const baselineTemplate = input.baselineFirstRun ? undefined : input.baseline?.templates[templateId];
const baselineCounter = baselineTemplate?.perUser[row.userid];
const previous = scopedCounterBaseline(row, baselineCounter, input.baselineFirstRun, input.warnings);
const deltaCalls = row.calls - previous.calls;
const deltaExecTime = row.totalExecTime - previous.totalExecTime;
const deltaRows = row.totalRows - previous.totalRows;
if (deltaCalls === 0 && !input.baselineFirstRun) {
continue;
}
const existing =
aggregates.get(templateId) ??
({
id: templateId,
queryid: row.queryid,
dbid: row.dbid,
database: row.database,
query: row.query,
deltaCalls: 0,
deltaExecTime: 0,
deltaRows: 0,
users: new Set<string>(),
firstObservedAt: baselineTemplate?.firstObservedAt ?? input.fetchedAt,
} satisfies PgssAggregateMutable);
existing.deltaCalls += Math.max(0, deltaCalls);
existing.deltaExecTime += Math.max(0, deltaExecTime);
existing.deltaRows += Math.max(0, deltaRows);
if (deltaCalls > 0) {
existing.users.add(row.username ?? 'unknown');
}
aggregates.set(templateId, existing);
}
return [...aggregates.values()]
.filter((aggregate) => aggregate.deltaCalls > 0)
.map((aggregate) => ({
id: aggregate.id,
queryid: aggregate.queryid,
dbid: aggregate.dbid,
database: aggregate.database,
query: aggregate.query,
deltaCalls: aggregate.deltaCalls,
deltaExecTime: aggregate.deltaExecTime,
deltaRows: aggregate.deltaRows,
meanExecTime: aggregate.deltaExecTime / Math.max(aggregate.deltaCalls, 1),
distinctUsersDelta: aggregate.users.size,
users: [...aggregate.users].sort(),
firstObservedAt: aggregate.firstObservedAt,
}));
}
function scopedCounterBaseline(
row: PostgresPgssRow,
baselineCounter: PgssBaselineCounter | undefined,
baselineFirstRun: boolean,
warnings: string[],
): PgssBaselineCounter {
if (!baselineCounter || baselineFirstRun) {
return ZERO_COUNTER;
}
if (
baselineCounter.calls > row.calls ||
baselineCounter.totalExecTime > row.totalExecTime ||
baselineCounter.totalRows > row.totalRows
) {
warnings.push(`scoped_reset:dbid=${row.dbid} queryid=${row.queryid} userid=${row.userid}`);
return ZERO_COUNTER;
}
return baselineCounter;
}
function shouldSkipPgssSql(sql: string): boolean {
return PGSS_HARD_SKIP_PREFIX_RE.test(sql) || PGSS_HARD_SKIP_TABLE_RE.test(sql);
}
function selectPgssTemplates(templates: AnalyzedPgssTemplate[], maxTemplatesPerRun: number): AnalyzedPgssTemplate[] {
return templates
.map((template) => ({
template,
score: template.aggregate.users.length * Math.log1p(template.aggregate.deltaCalls),
}))
.sort(
(left, right) => right.score - left.score || left.template.aggregate.id.localeCompare(right.template.aggregate.id),
)
.slice(0, maxTemplatesPerRun)
.map((entry) => entry.template);
}
function buildPgssStagedTemplate(
template: AnalyzedPgssTemplate,
config: HistoricSqlPullConfig,
now: Date,
): { metadata: HistoricSqlMetadata; pageMarkdown: string; usage: HistoricSqlUsage } {
const tablesTouched = [...template.analysis.tablesTouched].sort();
const firstTable = tablesTouched[0] ?? 'query';
const id = template.aggregate.id;
const metadata: HistoricSqlMetadata = {
id,
title: `postgres · ${firstTable} [${id.slice(0, 12)}]`,
path: `templates/${id}/page.md`,
objectType: HISTORIC_SQL_OBJECT_TYPE,
lastEditedAt: null,
properties: {
fingerprint: template.analysis.fingerprint,
sub_cluster_id: null,
dialect: 'postgres',
tables_touched: tablesTouched,
literal_slots: [],
triage_signals: buildPgssTriageSignals({
executions: template.aggregate.deltaCalls,
distinctUsers: template.aggregate.distinctUsersDelta,
firstSeen: template.aggregate.firstObservedAt,
lastSeen: now.toISOString(),
meanRuntimeMs: template.aggregate.meanExecTime,
serviceAccountOnly: isServiceAccountOnly(template.aggregate.users, config.serviceAccountUserPatterns),
now,
}),
},
};
return {
metadata,
pageMarkdown: renderTemplatePage(id, template.analysis.normalizedSql, tablesTouched),
usage: {
stats: {
executions: template.aggregate.deltaCalls,
distinct_users: template.aggregate.distinctUsersDelta,
first_seen: template.aggregate.firstObservedAt,
last_seen: now.toISOString(),
p50_runtime_ms: null,
p95_runtime_ms: null,
mean_runtime_ms: template.aggregate.meanExecTime,
error_rate: 0,
rows_produced: template.aggregate.deltaRows,
},
literal_slots: [],
samples: [],
},
};
}
function buildPgssTriageSignals(input: {
executions: number;
distinctUsers: number;
firstSeen: string;
lastSeen: string;
meanRuntimeMs: number;
serviceAccountOnly: boolean;
now: Date;
}): Record<string, string> {
return {
executions_bucket: input.executions < 3 ? 'low' : input.executions < 50 ? 'mid' : 'high',
distinct_users_bucket: input.distinctUsers <= 1 ? 'solo' : input.distinctUsers <= 5 ? 'team' : 'broad',
error_rate_bucket: 'ok',
recency_bucket: recencyBucket(input.lastSeen, input.now),
service_account_only: String(input.serviceAccountOnly),
runtime_bucket: runtimeBucket(input.meanRuntimeMs),
};
}
function runtimeBucket(meanRuntimeMs: number): string {
if (meanRuntimeMs < 100) {
return 'fast';
}
if (meanRuntimeMs < 1000) {
return 'moderate';
}
return 'slow';
}
function recencyBucket(lastSeen: string, now: Date): string {
const ageDays = Math.max(0, (now.getTime() - new Date(lastSeen).getTime()) / 86400000);
if (ageDays <= 14) {
return 'active';
}
if (ageDays <= 60) {
return 'warm';
}
return 'cold';
}
function isServiceAccountOnly(users: string[], patterns: string[]): boolean {
if (users.length === 0 || patterns.length === 0) {
return false;
}
const regexes = patterns.map((pattern) => new RegExp(pattern));
return users.every((user) => regexes.some((regex) => regex.test(user)));
}
function renderTemplatePage(id: string, normalizedSql: string, tablesTouched: string[]): string {
return [
`# ${id}`,
'',
'## Normalized SQL',
'```sql',
normalizedSql,
'```',
'',
'## Tables touched',
...tablesTouched.map((table) => `- ${table}`),
'',
].join('\n');
}
function buildNextBaseline(input: {
rows: PostgresPgssRow[];
fetchedAt: string;
statsResetAt: string | null;
pgServerVersion: string;
previousBaseline: PgssBaseline | null;
}): PgssBaseline {
const templates: PgssBaseline['templates'] = {};
for (const row of input.rows) {
const templateId = pgssTemplateId(row);
const previous = input.previousBaseline?.templates[templateId];
const template = templates[templateId] ?? {
firstObservedAt: previous?.firstObservedAt ?? input.fetchedAt,
perUser: {},
};
template.perUser[row.userid] = {
calls: row.calls,
totalExecTime: row.totalExecTime,
totalRows: row.totalRows,
};
templates[templateId] = template;
}
return {
version: PGSS_BASELINE_VERSION,
fetchedAt: input.fetchedAt,
statsResetAt: input.statsResetAt,
pgServerVersion: input.pgServerVersion,
templates,
};
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
await writeText(root, relPath, `${JSON.stringify(value, null, 2)}\n`);
}
async function writeText(root: string, relPath: string, value: string): Promise<void> {
const target = join(root, relPath);
await mkdir(dirname(target), { recursive: true });
await writeFile(target, value, 'utf-8');
}

View file

@ -0,0 +1,798 @@
import { mkdtemp, readFile, readdir } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import type { SqlAnalysisPort } from '../../../sql-analysis/index.js';
import { stageHistoricSqlTemplates } from './stage.js';
import {
historicSqlManifestSchema,
historicSqlMetadataSchema,
historicSqlUsageSchema,
type HistoricSqlQueryHistoryReader,
type HistoricSqlRawQueryRow,
} from './types.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-stage-'));
}
async function readJson<T>(root: string, relPath: string): Promise<T> {
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
}
function fakeReader(rows: HistoricSqlRawQueryRow[]): HistoricSqlQueryHistoryReader {
return {
async probe() {},
async *fetch() {
for (const row of rows) {
yield row;
}
},
};
}
const fakeSqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint(sql) {
if (sql.includes('paid')) {
return {
fingerprint: 'fp_paid_orders',
normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ? AND created_at >= ?',
tablesTouched: ['analytics.orders'],
literalSlots: [
{ position: 1, type: 'string', exampleValue: 'paid' },
{ position: 2, type: 'date', exampleValue: '2026-04-01' },
],
};
}
return {
fingerprint: 'fp_refunds',
normalizedSql: 'SELECT count(*) FROM analytics.refunds WHERE state = ?',
tablesTouched: ['analytics.refunds'],
literalSlots: [{ position: 1, type: 'string', exampleValue: 'complete' }],
};
},
};
const categoricalSqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint(sql) {
const status = sql.includes("'refunded'") ? 'refunded' : 'paid';
return {
fingerprint: 'fp_order_status',
normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ?',
tablesTouched: ['analytics.orders'],
literalSlots: [{ position: 1, type: 'string', exampleValue: status }],
};
},
};
function categoricalRows(): HistoricSqlRawQueryRow[] {
return [
{
id: 'paid-1',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'",
user: 'analyst-a',
startedAt: '2026-05-04T10:00:00.000Z',
endedAt: null,
runtimeMs: 100,
rowsProduced: 11,
success: true,
errorMessage: null,
},
{
id: 'paid-2',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'",
user: 'analyst-b',
startedAt: '2026-05-04T10:01:00.000Z',
endedAt: null,
runtimeMs: 110,
rowsProduced: 12,
success: true,
errorMessage: null,
},
{
id: 'paid-3',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'",
user: 'analyst-c',
startedAt: '2026-05-04T10:02:00.000Z',
endedAt: null,
runtimeMs: 120,
rowsProduced: 13,
success: true,
errorMessage: null,
},
{
id: 'refunded-1',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'refunded'",
user: 'analyst-a',
startedAt: '2026-05-04T10:03:00.000Z',
endedAt: null,
runtimeMs: 130,
rowsProduced: 21,
success: true,
errorMessage: null,
},
{
id: 'refunded-2',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'refunded'",
user: 'analyst-b',
startedAt: '2026-05-04T10:04:00.000Z',
endedAt: null,
runtimeMs: 140,
rowsProduced: 22,
success: true,
errorMessage: null,
},
{
id: 'refunded-3',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'refunded'",
user: 'analyst-c',
startedAt: '2026-05-04T10:05:00.000Z',
endedAt: null,
runtimeMs: 150,
rowsProduced: 23,
success: true,
errorMessage: null,
},
];
}
const diverseSqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint(sql) {
const value = sql.match(/status = '([^']+)'/)?.[1] ?? 'unknown';
return {
fingerprint: 'fp_diverse_samples',
normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ?',
tablesTouched: ['analytics.orders'],
literalSlots: [{ position: 1, type: 'string', exampleValue: value }],
};
},
};
const classificationMatrixSqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint(sql) {
if (sql.includes('stale_orders')) {
return {
fingerprint: 'fp_stale_date',
normalizedSql: 'SELECT count(*) FROM analytics.stale_orders WHERE created_at >= ?',
tablesTouched: ['analytics.stale_orders'],
literalSlots: [{ position: 1, type: 'date', exampleValue: '2026-04-01' }],
};
}
const stringValue = (field: string): string => sql.match(new RegExp(`${field} = '([^']+)'`))?.[1] ?? 'unknown';
const amount = sql.match(/amount >= (\d+)/)?.[1] ?? '0';
const asOf = sql.match(/created_at >= '([^']+)'/)?.[1] ?? '2026-05-01';
return {
fingerprint: 'fp_classification_matrix',
normalizedSql:
'SELECT count(*) FROM analytics.orders WHERE region = ? AND plan = ? AND status = ? AND amount >= ? AND created_at >= ?',
tablesTouched: ['analytics.orders'],
literalSlots: [
{ position: 1, type: 'string', exampleValue: stringValue('region') },
{ position: 2, type: 'string', exampleValue: stringValue('plan') },
{ position: 3, type: 'string', exampleValue: stringValue('status') },
{ position: 4, type: 'number', exampleValue: amount },
{ position: 5, type: 'date', exampleValue: asOf },
],
};
},
};
function classificationMatrixRows(): HistoricSqlRawQueryRow[] {
const rows: HistoricSqlRawQueryRow[] = Array.from({ length: 20 }, (_, index) => {
const status = index < 10 ? 'paid' : 'refunded';
const plan = index === 19 ? 'self_serve' : 'enterprise';
const amount = 100 + index;
const asOf = `2026-05-${String(1 + Math.floor(index / 5)).padStart(2, '0')}`;
return {
id: `matrix-${index + 1}`,
sql: `SELECT count(*) FROM analytics.orders WHERE region = 'us' AND plan = '${plan}' AND status = '${status}' AND amount >= ${amount} AND created_at >= '${asOf}'`,
user: `analyst-${(index % 4) + 1}`,
startedAt: `2026-05-04T10:${String(index).padStart(2, '0')}:00.000Z`,
endedAt: null,
runtimeMs: 100 + index,
rowsProduced: 1,
success: true,
errorMessage: null,
};
});
return [
...rows,
{
id: 'stale-date-1',
sql: "SELECT count(*) FROM analytics.stale_orders WHERE created_at >= '2026-04-01'",
user: 'analyst-1',
startedAt: '2026-05-04T11:00:00.000Z',
endedAt: null,
runtimeMs: 75,
rowsProduced: 1,
success: true,
errorMessage: null,
},
];
}
describe('stageHistoricSqlTemplates', () => {
it('compresses rows by fingerprint into document-shaped staged templates', async () => {
const stagedDir = await tempDir();
await stageHistoricSqlTemplates({
stagedDir,
connectionId: 'conn_1',
queryClient: {},
reader: fakeReader([
{
id: 'q1',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid' AND created_at >= '2026-04-01' AND email = 'analyst@example.com'",
user: 'analyst@example.com',
startedAt: '2026-05-04T10:00:00.000Z',
endedAt: '2026-05-04T10:00:01.000Z',
runtimeMs: 100,
rowsProduced: 1,
success: true,
errorMessage: null,
},
{
id: 'q2',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid' AND created_at >= '2026-05-01' AND email = 'analyst-2@example.com'",
user: 'analyst-2@example.com',
startedAt: '2026-05-04T11:00:00.000Z',
endedAt: '2026-05-04T11:00:01.000Z',
runtimeMs: 300,
rowsProduced: 1,
success: true,
errorMessage: null,
},
]),
sqlAnalysis: fakeSqlAnalysis,
pullConfig: {
dialect: 'snowflake',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: ['^svc_'],
redactionPatterns: ['[\\w.+-]+@[\\w-]+\\.[\\w.-]+'],
maxTemplatesPerRun: 5000,
minCalls: 5,
},
now: new Date('2026-05-04T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
expect(manifest).toMatchObject({
source: 'historic-sql',
connectionId: 'conn_1',
dialect: 'snowflake',
nextSuccessfulCursor: '2026-05-04T11:00:00.000Z',
templateCount: 1,
capped: false,
});
const files = (await readdir(join(stagedDir, 'templates', 'fp_paid_orders'))).sort();
expect(files).toEqual(['metadata.json', 'page.md', 'usage.json']);
const metadata = historicSqlMetadataSchema.parse(
await readJson(stagedDir, 'templates/fp_paid_orders/metadata.json'),
);
expect(metadata).toEqual({
id: 'fp_paid_orders',
title: 'snowflake · analytics.orders [fp_pai]',
path: 'templates/fp_paid_orders/page.md',
objectType: 'historic_sql_template',
lastEditedAt: null,
properties: {
fingerprint: 'fp_paid_orders',
sub_cluster_id: null,
dialect: 'snowflake',
tables_touched: ['analytics.orders'],
literal_slots: [
{ position: 1, type: 'string', classification: 'constant' },
{ position: 2, type: 'date', classification: 'runtime' },
],
triage_signals: {
executions_bucket: 'low',
distinct_users_bucket: 'team',
error_rate_bucket: 'ok',
recency_bucket: 'active',
service_account_only: 'false',
slot_summary: '1 constant, 1 runtime',
},
},
});
const page = await readFile(join(stagedDir, 'templates/fp_paid_orders/page.md'), 'utf-8');
expect(page).toContain('## Normalized SQL');
expect(page).toContain('SELECT count(*) FROM analytics.orders WHERE status = ? AND created_at >= ?');
expect(page).toContain('- analytics.orders');
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_paid_orders/usage.json'));
expect(usage.stats).toMatchObject({
executions: 2,
distinct_users: 2,
first_seen: '2026-05-04T10:00:00.000Z',
last_seen: '2026-05-04T11:00:00.000Z',
p50_runtime_ms: 100,
p95_runtime_ms: 300,
error_rate: 0,
});
expect(usage.samples).toHaveLength(1);
expect(usage.samples[0].bound_sql).toContain('<redacted>');
expect(usage.samples[0].bound_sql).not.toContain('analyst@example.com');
expect(usage.samples[0].bound_sql).not.toContain('analyst-2@example.com');
});
it('skips hard-noise SQL and caps templates deterministically', async () => {
const stagedDir = await tempDir();
await stageHistoricSqlTemplates({
stagedDir,
connectionId: 'conn_1',
queryClient: {},
reader: fakeReader([
{
id: 'show-1',
sql: 'SHOW TABLES',
user: 'analyst',
startedAt: '2026-05-04T10:00:00.000Z',
endedAt: null,
runtimeMs: null,
success: true,
errorMessage: null,
},
{
id: 'q3',
sql: "SELECT count(*) FROM analytics.refunds WHERE state = 'complete'",
user: 'analyst',
startedAt: '2026-05-04T11:00:00.000Z',
endedAt: null,
runtimeMs: 50,
success: true,
errorMessage: null,
},
{
id: 'q4',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid' AND created_at >= '2026-04-01'",
user: 'analyst',
startedAt: '2026-05-04T11:30:00.000Z',
endedAt: null,
runtimeMs: 40,
success: true,
errorMessage: null,
},
]),
sqlAnalysis: fakeSqlAnalysis,
pullConfig: {
dialect: 'bigquery',
windowDays: 7,
lastSuccessfulCursor: '2026-05-01T00:00:00.000Z',
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 1,
minCalls: 5,
},
now: new Date('2026-05-04T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
expect(manifest.templateCount).toBe(1);
expect(manifest.capped).toBe(true);
expect(manifest.warnings).toEqual(['templates_truncated: kept 1 of 2 templates']);
expect(manifest.templates.map((template) => template.id)).toEqual(['fp_paid_orders']);
});
it('splits categorical fingerprints into one document directory per dominant value', async () => {
const stagedDir = await tempDir();
await stageHistoricSqlTemplates({
stagedDir,
connectionId: 'conn_1',
queryClient: {},
reader: fakeReader(categoricalRows()),
sqlAnalysis: categoricalSqlAnalysis,
pullConfig: {
dialect: 'snowflake',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 5000,
minCalls: 5,
},
now: new Date('2026-05-04T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
const templates = manifest.templates
.map((template) => ({
id: template.id,
fingerprint: template.fingerprint,
subClusterId: template.subClusterId,
path: template.path,
}))
.sort((left, right) => left.id.localeCompare(right.id));
expect(manifest.templateCount).toBe(2);
expect(templates).toEqual([
{
id: 'fp_order_status__cat_2b2ff2318877',
fingerprint: 'fp_order_status',
subClusterId: 'cat_2b2ff2318877',
path: 'templates/fp_order_status__cat_2b2ff2318877/page.md',
},
{
id: 'fp_order_status__cat_34f037ddcbfa',
fingerprint: 'fp_order_status',
subClusterId: 'cat_34f037ddcbfa',
path: 'templates/fp_order_status__cat_34f037ddcbfa/page.md',
},
]);
const paidMetadata = historicSqlMetadataSchema.parse(
await readJson(stagedDir, 'templates/fp_order_status__cat_34f037ddcbfa/metadata.json'),
);
expect(paidMetadata).toMatchObject({
id: 'fp_order_status__cat_34f037ddcbfa',
title: 'snowflake · analytics.orders [fp_ord:ddcbfa]',
path: 'templates/fp_order_status__cat_34f037ddcbfa/page.md',
properties: {
fingerprint: 'fp_order_status',
sub_cluster_id: 'cat_34f037ddcbfa',
dialect: 'snowflake',
tables_touched: ['analytics.orders'],
literal_slots: [{ position: 1, type: 'string', classification: 'categorical' }],
},
});
const paidUsage = historicSqlUsageSchema.parse(
await readJson(stagedDir, 'templates/fp_order_status__cat_34f037ddcbfa/usage.json'),
);
expect(paidUsage.stats).toMatchObject({
executions: 3,
distinct_users: 3,
first_seen: '2026-05-04T10:00:00.000Z',
last_seen: '2026-05-04T10:02:00.000Z',
rows_produced: 36,
});
expect(paidUsage.literal_slots).toEqual([{ position: 1, distinct_values: 1, top_values: [['paid', 3]] }]);
const refundedUsage = historicSqlUsageSchema.parse(
await readJson(stagedDir, 'templates/fp_order_status__cat_2b2ff2318877/usage.json'),
);
expect(refundedUsage.stats).toMatchObject({
executions: 3,
distinct_users: 3,
first_seen: '2026-05-04T10:03:00.000Z',
last_seen: '2026-05-04T10:05:00.000Z',
rows_produced: 66,
});
expect(refundedUsage.literal_slots).toEqual([
{ position: 1, distinct_values: 1, top_values: [['refunded', 3]] },
]);
});
it('classifies literal slots across the spec matrix and stale-date demotion', async () => {
const stagedDir = await tempDir();
await stageHistoricSqlTemplates({
stagedDir,
connectionId: 'conn_1',
queryClient: {},
reader: fakeReader(classificationMatrixRows()),
sqlAnalysis: classificationMatrixSqlAnalysis,
pullConfig: {
dialect: 'snowflake',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 5000,
minCalls: 5,
},
now: new Date('2026-05-04T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
const matrixTemplates = manifest.templates.filter((template) => template.fingerprint === 'fp_classification_matrix');
expect(matrixTemplates).toHaveLength(2);
expect(matrixTemplates.every((template) => template.subClusterId?.startsWith('cat_'))).toBe(true);
const matrixTemplate = matrixTemplates[0];
if (!matrixTemplate) {
throw new Error('expected classification matrix template');
}
const matrixMetadata = historicSqlMetadataSchema.parse(
await readJson(stagedDir, matrixTemplate.path.replace('/page.md', '/metadata.json')),
);
expect(matrixMetadata.properties.literal_slots).toMatchInlineSnapshot(`
[
{
"classification": "constant",
"position": 1,
"type": "string",
},
{
"classification": "constant",
"position": 2,
"type": "string",
},
{
"classification": "categorical",
"position": 3,
"type": "string",
},
{
"classification": "runtime",
"position": 4,
"type": "number",
},
{
"classification": "runtime",
"position": 5,
"type": "date",
},
]
`);
expect(matrixMetadata.properties.triage_signals.slot_summary).toBe('2 constant, 2 runtime');
const staleMetadata = historicSqlMetadataSchema.parse(
await readJson(stagedDir, 'templates/fp_stale_date/metadata.json'),
);
expect(staleMetadata.properties.literal_slots).toMatchInlineSnapshot(`
[
{
"classification": "runtime",
"position": 1,
"type": "date",
},
]
`);
expect(staleMetadata.properties.triage_signals.slot_summary).toBe('0 constant, 1 runtime');
});
it('applies the templates-per-run cap after categorical expansion', async () => {
const stagedDir = await tempDir();
await stageHistoricSqlTemplates({
stagedDir,
connectionId: 'conn_1',
queryClient: {},
reader: fakeReader(categoricalRows()),
sqlAnalysis: categoricalSqlAnalysis,
pullConfig: {
dialect: 'snowflake',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 1,
minCalls: 5,
},
now: new Date('2026-05-04T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
expect(manifest.templateCount).toBe(1);
expect(manifest.capped).toBe(true);
expect(manifest.warnings).toEqual(['templates_truncated: kept 1 of 2 templates']);
expect(manifest.templates).toHaveLength(1);
expect(manifest.templates[0].id).toMatch(/^fp_order_status__cat_/);
});
it('omits rows_produced for BigQuery templates when reader rows have no row counts', async () => {
const stagedDir = await tempDir();
await stageHistoricSqlTemplates({
stagedDir,
connectionId: 'conn_bq',
queryClient: {},
reader: fakeReader([
{
id: 'bq-1',
sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'",
user: 'analyst-a@example.com',
startedAt: '2026-05-04T10:00:00.000Z',
endedAt: null,
runtimeMs: 100,
success: true,
errorMessage: null,
},
]),
sqlAnalysis: fakeSqlAnalysis,
pullConfig: {
dialect: 'bigquery',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 5000,
minCalls: 5,
},
now: new Date('2026-05-04T12:00:00.000Z'),
});
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_paid_orders/usage.json'));
expect(usage.stats).not.toHaveProperty('rows_produced');
expect(usage.samples[0]).not.toHaveProperty('rows_produced');
});
it('keeps at most five diverse samples, preferring recent successful representatives per literal tuple', async () => {
const stagedDir = await tempDir();
const statuses = [
'paid',
'refunded',
'pending',
'failed',
'trial',
'cancelled',
'draft',
'returned',
'review',
'held',
'archived',
];
const rows: HistoricSqlRawQueryRow[] = statuses.flatMap((status, index) => [
{
id: `${status}-old`,
sql: `SELECT count(*) FROM analytics.orders WHERE status = '${status}'`,
user: 'analyst-a',
startedAt: `2026-05-04T10:${String(index).padStart(2, '0')}:00.000Z`,
endedAt: null,
runtimeMs: 100,
rowsProduced: 1,
success: false,
errorMessage: 'old failed sample',
},
{
id: `${status}-new`,
sql: `SELECT count(*) FROM analytics.orders WHERE status = '${status}'`,
user: 'analyst-a',
startedAt: `2026-05-04T11:${String(index).padStart(2, '0')}:00.000Z`,
endedAt: null,
runtimeMs: 90,
rowsProduced: 2,
success: true,
errorMessage: null,
},
]);
await stageHistoricSqlTemplates({
stagedDir,
connectionId: 'conn_1',
queryClient: {},
reader: fakeReader(rows),
sqlAnalysis: diverseSqlAnalysis,
pullConfig: {
dialect: 'snowflake',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 5000,
minCalls: 5,
},
now: new Date('2026-05-04T12:00:00.000Z'),
});
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_diverse_samples/usage.json'));
expect(usage.samples).toHaveLength(5);
expect(usage.samples.every((sample) => sample.success)).toBe(true);
expect(new Set(usage.samples.map((sample) => sample.bound_sql.match(/status = '([^']+)'/)?.[1])).size).toBe(5);
expect(usage.samples.map((sample) => sample.started_at)).toEqual([
'2026-05-04T11:10:00.000Z',
'2026-05-04T11:09:00.000Z',
'2026-05-04T11:08:00.000Z',
'2026-05-04T11:07:00.000Z',
'2026-05-04T11:06:00.000Z',
]);
});
it('uses recency as a tie-breaker when the templates-per-run cap overflows', async () => {
const stagedDir = await tempDir();
const sqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint(sql) {
const table = sql.includes('fresh_orders') ? 'fresh_orders' : 'stale_orders';
return {
fingerprint: `fp_${table}`,
normalizedSql: `SELECT count(*) FROM analytics.${table}`,
tablesTouched: [`analytics.${table}`],
literalSlots: [],
};
},
};
await stageHistoricSqlTemplates({
stagedDir,
connectionId: 'conn_1',
queryClient: {},
reader: fakeReader([
{
id: 'stale-1',
sql: 'SELECT count(*) FROM analytics.stale_orders',
user: 'analyst-a',
startedAt: '2026-02-04T10:00:00.000Z',
endedAt: null,
runtimeMs: 100,
rowsProduced: 1,
success: true,
errorMessage: null,
},
{
id: 'fresh-1',
sql: 'SELECT count(*) FROM analytics.fresh_orders',
user: 'analyst-a',
startedAt: '2026-05-04T10:00:00.000Z',
endedAt: null,
runtimeMs: 100,
rowsProduced: 1,
success: true,
errorMessage: null,
},
]),
sqlAnalysis,
pullConfig: {
dialect: 'snowflake',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: [],
maxTemplatesPerRun: 1,
minCalls: 5,
},
now: new Date('2026-05-04T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
expect(manifest.templates.map((template) => template.id)).toEqual(['fp_fresh_orders']);
});
it('does not persist bound SQL samples when redaction patterns are invalid', async () => {
const stagedDir = await tempDir();
await stageHistoricSqlTemplates({
stagedDir,
connectionId: 'conn_1',
queryClient: {},
reader: fakeReader([
{
id: 'q1',
sql: "SELECT * FROM analytics.orders WHERE email = 'analyst@example.com'",
user: 'analyst@example.com',
startedAt: '2026-05-04T10:00:00.000Z',
endedAt: null,
runtimeMs: 100,
rowsProduced: 1,
success: true,
errorMessage: null,
},
]),
sqlAnalysis: {
async analyzeForFingerprint() {
return {
fingerprint: 'fp_redaction',
normalizedSql: 'SELECT * FROM analytics.orders WHERE email = ?',
tablesTouched: ['analytics.orders'],
literalSlots: [{ position: 1, type: 'string', exampleValue: 'analyst@example.com' }],
};
},
},
pullConfig: {
dialect: 'snowflake',
windowDays: 90,
lastSuccessfulCursor: null,
serviceAccountUserPatterns: [],
redactionPatterns: ['['],
maxTemplatesPerRun: 5000,
minCalls: 5,
},
now: new Date('2026-05-04T12:00:00.000Z'),
});
const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json'));
const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_redaction/usage.json'));
expect(manifest.warnings.some((warning) => warning.startsWith('redaction_skipped:invalid_redaction_pattern'))).toBe(
true,
);
expect(usage.samples).toEqual([]);
});
});

View file

@ -0,0 +1,630 @@
import { createHash } from 'node:crypto';
import { mkdir, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import type {
SqlAnalysisFingerprintResult,
SqlAnalysisLiteralSlot,
SqlAnalysisLiteralSlotType,
SqlAnalysisPort,
} from '../../../sql-analysis/index.js';
import {
HISTORIC_SQL_OBJECT_TYPE,
HISTORIC_SQL_SOURCE_KEY,
historicSqlPullConfigSchema,
historicSqlRawQueryRowSchema,
type HistoricSqlLiteralSlotClassification,
type HistoricSqlManifest,
type HistoricSqlMetadata,
type HistoricSqlPullConfig,
type HistoricSqlQueryHistoryReader,
type HistoricSqlRawQueryRow,
type HistoricSqlUsage,
} from './types.js';
interface StageHistoricSqlTemplatesInput {
stagedDir: string;
connectionId: string;
queryClient: unknown;
reader: HistoricSqlQueryHistoryReader;
sqlAnalysis: SqlAnalysisPort;
pullConfig: HistoricSqlPullConfig;
now?: Date;
}
interface SlotObservation {
value: string;
rowStartedAt: string;
}
interface SlotStats {
position: number;
type: SqlAnalysisLiteralSlotType;
values: Map<string, number>;
observations: SlotObservation[];
}
interface TemplateAccumulator {
fingerprint: string;
normalizedSql: string;
tablesTouched: Set<string>;
rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>;
slotStats: Map<number, SlotStats>;
}
interface ClassifiedLiteralSlot {
position: number;
type: SqlAnalysisLiteralSlotType;
classification: HistoricSqlLiteralSlotClassification;
}
interface TemplateVariant {
id: string;
fingerprint: string;
subClusterId: string | null;
normalizedSql: string;
tablesTouched: Set<string>;
rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>;
slotStats: Map<number, SlotStats>;
slotClassifications: ClassifiedLiteralSlot[];
}
interface CategoricalTupleEntry {
position: number;
value: string;
}
interface RedactionPolicy {
redactors: RegExp[];
samplesAllowed: boolean;
}
const HARD_SKIP_PREFIX_RE = /^\s*(SHOW|DESCRIBE|DESC|EXPLAIN|USE|SET)\b/i;
const HARD_SKIP_TABLE_RE = /\b(INFORMATION_SCHEMA|SNOWFLAKE\.ACCOUNT_USAGE|pg_|system\.)/i;
export async function stageHistoricSqlTemplates(input: StageHistoricSqlTemplatesInput): Promise<void> {
const config = historicSqlPullConfigSchema.parse(input.pullConfig);
const now = input.now ?? new Date();
const windowStart = config.lastSuccessfulCursor
? new Date(config.lastSuccessfulCursor)
: new Date(now.getTime() - config.windowDays * 24 * 60 * 60 * 1000);
const warnings: string[] = [];
const redaction = compileRedactors(config.redactionPatterns, warnings);
const groups = new Map<string, TemplateAccumulator>();
let nextSuccessfulCursor: string | null = null;
await input.reader.probe(input.queryClient);
for await (const rawRow of input.reader.fetch(
input.queryClient,
{ start: windowStart, end: now },
config.lastSuccessfulCursor,
)) {
const row = historicSqlRawQueryRowSchema.parse(rawRow);
if (!nextSuccessfulCursor || row.startedAt > nextSuccessfulCursor) {
nextSuccessfulCursor = row.startedAt;
}
if (shouldSkipSql(row.sql)) {
continue;
}
const analysis = await input.sqlAnalysis.analyzeForFingerprint(row.sql, config.dialect);
if (analysis.error || !analysis.fingerprint || !analysis.normalizedSql) {
warnings.push(`analysis_failed:${row.id}`);
continue;
}
const group =
groups.get(analysis.fingerprint) ??
{
fingerprint: analysis.fingerprint,
normalizedSql: analysis.normalizedSql,
tablesTouched: new Set<string>(),
rows: [],
slotStats: new Map<number, SlotStats>(),
};
for (const table of analysis.tablesTouched) {
group.tablesTouched.add(table);
}
for (const slot of analysis.literalSlots) {
recordSlot(group.slotStats, slot, redaction.redactors, row.startedAt);
}
group.rows.push({ row, analysis });
groups.set(analysis.fingerprint, group);
}
const expandedTemplates = expandCategoricalTemplates([...groups.values()], redaction.redactors);
const selected = selectTemplates(expandedTemplates, config.maxTemplatesPerRun, now);
if (selected.length < expandedTemplates.length) {
warnings.push(`templates_truncated: kept ${selected.length} of ${expandedTemplates.length} templates`);
}
await mkdir(input.stagedDir, { recursive: true });
const templates: HistoricSqlManifest['templates'] = [];
for (const template of selected) {
const staged = buildStagedTemplate(template, config, redaction, now);
const basePath = `templates/${staged.metadata.id}`;
await writeJson(input.stagedDir, `${basePath}/metadata.json`, staged.metadata);
await writeText(input.stagedDir, `${basePath}/page.md`, staged.pageMarkdown);
await writeJson(input.stagedDir, `${basePath}/usage.json`, staged.usage);
templates.push({
id: staged.metadata.id,
fingerprint: staged.metadata.properties.fingerprint,
subClusterId: staged.metadata.properties.sub_cluster_id,
path: staged.metadata.path,
});
}
await writeJson(input.stagedDir, 'manifest.json', {
source: HISTORIC_SQL_SOURCE_KEY,
connectionId: input.connectionId,
dialect: config.dialect,
fetchedAt: now.toISOString(),
windowStart: windowStart.toISOString(),
windowEnd: now.toISOString(),
nextSuccessfulCursor,
templateCount: selected.length,
capped: selected.length < expandedTemplates.length,
warnings,
degraded: false,
statsResetAt: null,
baselineFirstRun: false,
pgServerVersion: null,
deallocCount: null,
templates,
} satisfies HistoricSqlManifest);
}
function shouldSkipSql(sql: string): boolean {
return HARD_SKIP_PREFIX_RE.test(sql) || HARD_SKIP_TABLE_RE.test(sql);
}
function recordSlot(
slotStats: Map<number, SlotStats>,
slot: SqlAnalysisLiteralSlot,
redactors: RegExp[],
rowStartedAt: string,
): void {
const existing = slotStats.get(slot.position) ?? {
position: slot.position,
type: slot.type,
values: new Map<string, number>(),
observations: [],
};
const persistedValue = redactText(slot.exampleValue, redactors);
existing.values.set(persistedValue, (existing.values.get(persistedValue) ?? 0) + 1);
existing.observations.push({ value: persistedValue, rowStartedAt });
slotStats.set(slot.position, existing);
}
function expandCategoricalTemplates(groups: TemplateAccumulator[], redactors: RegExp[]): TemplateVariant[] {
return groups.flatMap((group) => expandTemplateGroup(group, redactors));
}
function expandTemplateGroup(group: TemplateAccumulator, redactors: RegExp[]): TemplateVariant[] {
const rows = [...group.rows].sort((left, right) => left.row.startedAt.localeCompare(right.row.startedAt));
const firstSeen = rows[0]?.row.startedAt;
if (!firstSeen) {
return [];
}
const slotClassifications = classifySlots(group.slotStats, rows.length, firstSeen);
const categoricalPositions = slotClassifications
.filter((slot) => slot.classification === 'categorical')
.map((slot) => slot.position)
.sort((left, right) => left - right);
if (categoricalPositions.length === 0) {
return [
{
id: group.fingerprint,
fingerprint: group.fingerprint,
subClusterId: null,
normalizedSql: group.normalizedSql,
tablesTouched: group.tablesTouched,
rows,
slotStats: group.slotStats,
slotClassifications,
},
];
}
const byTuple = new Map<
string,
{
tuple: CategoricalTupleEntry[];
rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>;
}
>();
for (const entry of rows) {
const tuple = categoricalTuple(entry.analysis.literalSlots, categoricalPositions, redactors);
const key = JSON.stringify(tuple);
const existing = byTuple.get(key) ?? { tuple, rows: [] };
existing.rows.push(entry);
byTuple.set(key, existing);
}
return [...byTuple.values()]
.map(({ tuple, rows: tupleRows }) => {
const subClusterId = subClusterIdForTuple(tuple);
return {
id: `${group.fingerprint}__${subClusterId}`,
fingerprint: group.fingerprint,
subClusterId,
normalizedSql: group.normalizedSql,
tablesTouched: group.tablesTouched,
rows: tupleRows,
slotStats: collectSlotStats(tupleRows, redactors),
slotClassifications,
};
})
.sort((left, right) => left.id.localeCompare(right.id));
}
function classifySlots(
slotStats: Map<number, SlotStats>,
executions: number,
firstSeen: string,
): ClassifiedLiteralSlot[] {
return [...slotStats.values()]
.sort((left, right) => left.position - right.position)
.map((slot) => ({
position: slot.position,
type: slot.type,
classification: classifySlot(slot, executions, firstSeen),
}));
}
function collectSlotStats(
rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>,
redactors: RegExp[],
): Map<number, SlotStats> {
const slotStats = new Map<number, SlotStats>();
for (const entry of rows) {
for (const slot of entry.analysis.literalSlots) {
recordSlot(slotStats, slot, redactors, entry.row.startedAt);
}
}
return slotStats;
}
function categoricalTuple(
literalSlots: SqlAnalysisLiteralSlot[],
categoricalPositions: number[],
redactors: RegExp[],
): CategoricalTupleEntry[] {
const valuesByPosition = new Map(
literalSlots.map((slot) => [slot.position, redactText(slot.exampleValue, redactors)] as const),
);
return categoricalPositions.map((position) => ({
position,
value: valuesByPosition.get(position) ?? '<missing>',
}));
}
function subClusterIdForTuple(tuple: CategoricalTupleEntry[]): string {
return `cat_${createHash('sha256').update(JSON.stringify(tuple)).digest('hex').slice(0, 12)}`;
}
function buildStagedTemplate(
template: TemplateVariant,
config: HistoricSqlPullConfig,
redaction: RedactionPolicy,
now: Date,
): { metadata: HistoricSqlMetadata; pageMarkdown: string; usage: HistoricSqlUsage } {
const rows = template.rows
.map((entry) => entry.row)
.sort((left, right) => left.startedAt.localeCompare(right.startedAt));
const firstSeen = rows[0].startedAt;
const lastSeen = rows[rows.length - 1].startedAt;
const distinctUsers = new Set(rows.map((row) => row.user).filter((user): user is string => !!user)).size;
const errorCount = rows.filter((row) => !row.success).length;
const runtimes = rows
.map((row) => row.runtimeMs)
.filter((runtime): runtime is number => typeof runtime === 'number')
.sort((left, right) => left - right);
const triageSignals = buildTriageSignals({
executions: rows.length,
distinctUsers,
errorRate: rows.length === 0 ? 0 : errorCount / rows.length,
lastSeen,
now,
serviceAccountOnly: isServiceAccountOnly(rows, config.serviceAccountUserPatterns),
slotClassifications: template.slotClassifications.map((slot) => slot.classification),
});
const tablesTouched = [...template.tablesTouched].sort();
const firstTable = tablesTouched[0] ?? 'query';
const id = template.id;
const rowsProduced = sumRowsProduced(rows);
const metadata: HistoricSqlMetadata = {
id,
title: buildTemplateTitle(config.dialect, firstTable, template.fingerprint, template.subClusterId),
path: `templates/${id}/page.md`,
objectType: HISTORIC_SQL_OBJECT_TYPE,
lastEditedAt: null,
properties: {
fingerprint: template.fingerprint,
sub_cluster_id: template.subClusterId,
dialect: config.dialect,
tables_touched: tablesTouched,
literal_slots: template.slotClassifications,
triage_signals: triageSignals,
},
};
return {
metadata,
pageMarkdown: renderTemplatePage(id, template.normalizedSql, tablesTouched),
usage: {
stats: {
executions: rows.length,
distinct_users: distinctUsers,
first_seen: firstSeen,
last_seen: lastSeen,
p50_runtime_ms: percentile(runtimes, 0.5),
p95_runtime_ms: percentile(runtimes, 0.95),
error_rate: rows.length === 0 ? 0 : errorCount / rows.length,
...(rowsProduced === null ? {} : { rows_produced: rowsProduced }),
},
literal_slots: [...template.slotStats.values()]
.sort((left, right) => left.position - right.position)
.map((slot) => ({
position: slot.position,
distinct_values: slot.values.size,
top_values: [...slot.values.entries()]
.sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
.slice(0, 10),
})),
samples: selectSamples(template.rows, redaction),
},
};
}
const TEMPORAL_SLOT_TYPES = new Set<SqlAnalysisLiteralSlotType>(['date', 'timestamp']);
function isStaleDateConstant(slot: SlotStats, value: string, firstSeen: string): boolean {
return slot.type === 'date' && parseTemporalSlotValue(value) !== null && value < firstSeen.slice(0, 10);
}
function isMovingTemporalSlot(slot: SlotStats): boolean {
if (!TEMPORAL_SLOT_TYPES.has(slot.type) || slot.values.size < 2) {
return false;
}
const observations: Array<{ rowStartedAt: number; literalTime: number }> = [];
for (const observation of slot.observations) {
const rowStartedAt = Date.parse(observation.rowStartedAt);
const literalTime = parseTemporalSlotValue(observation.value);
if (Number.isNaN(rowStartedAt) || literalTime === null) {
return false;
}
observations.push({ rowStartedAt, literalTime });
}
const literalTimes = observations
.sort((left, right) => left.rowStartedAt - right.rowStartedAt)
.map((observation) => observation.literalTime);
return isMonotonic(literalTimes);
}
function parseTemporalSlotValue(value: string): number | null {
const parsed = Date.parse(value);
return Number.isNaN(parsed) ? null : parsed;
}
function isMonotonic(values: number[]): boolean {
if (values.length < 2) {
return false;
}
let nonDecreasing = true;
let nonIncreasing = true;
for (let index = 1; index < values.length; index += 1) {
if (values[index] < values[index - 1]) {
nonDecreasing = false;
}
if (values[index] > values[index - 1]) {
nonIncreasing = false;
}
}
return nonDecreasing || nonIncreasing;
}
function classifySlot(
slot: SlotStats,
executions: number,
firstSeen: string,
): HistoricSqlLiteralSlotClassification {
const ordered = [...slot.values.entries()].sort((left, right) => right[1] - left[1]);
const distinct = ordered.length;
const topCount = ordered[0]?.[1] ?? 0;
const topValue = ordered[0]?.[0] ?? '';
const staleDateConstant = isStaleDateConstant(slot, topValue, firstSeen);
if (distinct === 1 && !staleDateConstant) {
return 'constant';
}
if (executions > 0 && topCount / executions >= 0.95 && !staleDateConstant) {
return 'constant';
}
if (isMovingTemporalSlot(slot)) {
return 'runtime';
}
if (executions > 0 && distinct >= 2 && distinct <= 10 && ordered.every(([, count]) => count / executions >= 0.05)) {
return 'categorical';
}
return 'runtime';
}
function buildTriageSignals(input: {
executions: number;
distinctUsers: number;
errorRate: number;
lastSeen: string;
now: Date;
serviceAccountOnly: boolean;
slotClassifications: HistoricSqlLiteralSlotClassification[];
}): Record<string, string> {
const runtimeCount = input.slotClassifications.filter((classification) => classification === 'runtime').length;
const constantCount = input.slotClassifications.filter((classification) => classification === 'constant').length;
return {
executions_bucket: input.executions < 3 ? 'low' : input.executions < 50 ? 'mid' : 'high',
distinct_users_bucket: input.distinctUsers <= 1 ? 'solo' : input.distinctUsers <= 5 ? 'team' : 'broad',
error_rate_bucket: input.errorRate <= 0.01 ? 'ok' : input.errorRate <= 0.1 ? 'noisy' : 'broken',
recency_bucket: recencyBucket(input.lastSeen, input.now),
service_account_only: String(input.serviceAccountOnly),
slot_summary: `${constantCount} constant, ${runtimeCount} runtime`,
};
}
function recencyBucket(lastSeen: string, now: Date): string {
const ageDays = Math.max(0, (now.getTime() - new Date(lastSeen).getTime()) / (24 * 60 * 60 * 1000));
if (ageDays <= 14) {
return 'active';
}
if (ageDays <= 60) {
return 'warm';
}
return 'cold';
}
function isServiceAccountOnly(rows: HistoricSqlRawQueryRow[], patterns: string[]): boolean {
const users = rows.map((row) => row.user).filter((user): user is string => !!user);
if (users.length === 0 || patterns.length === 0) {
return false;
}
const regexes = patterns.map((pattern) => new RegExp(pattern));
return users.every((user) => regexes.some((regex) => regex.test(user)));
}
function buildTemplateTitle(
dialect: HistoricSqlPullConfig['dialect'],
firstTable: string,
fingerprint: string,
subClusterId: string | null,
): string {
if (!subClusterId) {
return `${dialect} · ${firstTable} [${fingerprint.slice(0, 6)}]`;
}
return `${dialect} · ${firstTable} [${fingerprint.slice(0, 6)}:${subClusterId.slice(-6)}]`;
}
function renderTemplatePage(fingerprint: string, normalizedSql: string, tablesTouched: string[]): string {
return [
`# ${fingerprint}`,
'',
'## Normalized SQL',
'```sql',
normalizedSql,
'```',
'',
'## Tables touched',
...tablesTouched.map((table) => `- ${table}`),
'',
].join('\n');
}
function selectSamples(
rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>,
redaction: RedactionPolicy,
): HistoricSqlUsage['samples'] {
if (!redaction.samplesAllowed) {
return [];
}
const byLiteralTuple = new Map<string, { row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>();
const preferred = [...rows].sort((left, right) => {
if (left.row.success !== right.row.success) {
return left.row.success ? -1 : 1;
}
return right.row.startedAt.localeCompare(left.row.startedAt);
});
for (const entry of preferred) {
const key = [...entry.analysis.literalSlots]
.sort((left, right) => left.position - right.position)
.map((slot) => slot.exampleValue)
.join('\u001f');
if (!byLiteralTuple.has(key)) {
byLiteralTuple.set(key, entry);
}
}
return [...byLiteralTuple.values()]
.sort((left, right) => right.row.startedAt.localeCompare(left.row.startedAt))
.slice(0, 5)
.map(({ row }) => ({
started_at: row.startedAt,
user: row.user,
bound_sql: redactText(row.sql, redaction.redactors),
...(row.rowsProduced === undefined ? {} : { rows_produced: row.rowsProduced ?? null }),
runtime_ms: row.runtimeMs,
success: row.success,
}));
}
function selectTemplates(templates: TemplateVariant[], maxTemplatesPerRun: number, now: Date): TemplateVariant[] {
return templates
.map((template) => ({ template, score: rankTemplate(template, now) }))
.sort((left, right) => right.score - left.score || left.template.id.localeCompare(right.template.id))
.slice(0, maxTemplatesPerRun)
.map((entry) => entry.template);
}
function rankTemplate(template: TemplateVariant, now: Date): number {
const users = new Set(template.rows.map(({ row }) => row.user).filter((user): user is string => !!user)).size;
const latestStartedAt = template.rows.reduce<string | null>(
(latest, { row }) => (latest === null || row.startedAt > latest ? row.startedAt : latest),
null,
);
const ageDays =
latestStartedAt === null ? 365 : Math.max(0, (now.getTime() - new Date(latestStartedAt).getTime()) / 86400000);
const recencyWeight = 1 / (1 + ageDays / 30);
return users * Math.log1p(template.rows.length) * recencyWeight;
}
function percentile(values: number[], percentileValue: number): number | null {
if (values.length === 0) {
return null;
}
const index = Math.min(values.length - 1, Math.max(0, Math.ceil(values.length * percentileValue) - 1));
return values[index];
}
function sumRowsProduced(rows: HistoricSqlRawQueryRow[]): number | null {
const values = rows.map((row) => row.rowsProduced).filter((value): value is number => typeof value === 'number');
return values.length > 0 ? values.reduce((sum, value) => sum + value, 0) : null;
}
function compileRedactors(patterns: string[], warnings: string[]): RedactionPolicy {
let samplesAllowed = true;
const redactors = patterns.flatMap((pattern) => {
try {
return [new RegExp(pattern, 'g')];
} catch (error) {
samplesAllowed = false;
warnings.push(
`redaction_skipped:invalid_redaction_pattern:${pattern}:${error instanceof Error ? error.message : String(error)}`,
);
return [];
}
});
return { redactors, samplesAllowed };
}
function redactText(value: string, redactors: RegExp[]): string {
return redactors.reduce((current, regex) => current.replace(regex, '<redacted>'), value);
}
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
await writeText(stagedDir, relPath, `${JSON.stringify(value, null, 2)}\n`);
}
async function writeText(stagedDir: string, relPath: string, value: string): Promise<void> {
const target = join(stagedDir, relPath);
await mkdir(dirname(target), { recursive: true });
await writeFile(target, value, 'utf-8');
}

View file

@ -0,0 +1,201 @@
import { z } from 'zod';
import type { SqlAnalysisPort } from '../../../sql-analysis/index.js';
export const HISTORIC_SQL_SOURCE_KEY = 'historic-sql' as const;
export const HISTORIC_SQL_OBJECT_TYPE = 'historic_sql_template' as const;
const historicSqlDialectSchema = z.enum(['snowflake', 'bigquery', 'postgres']);
export type HistoricSqlDialect = z.infer<typeof historicSqlDialectSchema>;
export const historicSqlPullConfigSchema = z.object({
dialect: historicSqlDialectSchema,
windowDays: z.number().int().min(1).max(365).default(90),
lastSuccessfulCursor: z.string().datetime().nullable().default(null),
serviceAccountUserPatterns: z.array(z.string()).default([]),
redactionPatterns: z.array(z.string()).default([]),
maxTemplatesPerRun: z.number().int().min(1).max(5000).default(5000),
minCalls: z.number().int().min(1).default(5),
});
export type HistoricSqlPullConfig = z.infer<typeof historicSqlPullConfigSchema>;
export interface HistoricSqlTimeWindow {
start: Date;
end: Date;
}
export const historicSqlRawQueryRowSchema = z.object({
id: z.string().min(1),
sql: z.string().min(1),
user: z.string().nullable().default(null),
startedAt: z.string().datetime(),
endedAt: z.string().datetime().nullable().default(null),
runtimeMs: z.number().nonnegative().nullable().default(null),
rowsProduced: z.number().int().nonnegative().nullable().optional(),
success: z.boolean().default(true),
errorMessage: z.string().nullable().default(null),
});
export type HistoricSqlRawQueryRow = z.infer<typeof historicSqlRawQueryRowSchema>;
export interface HistoricSqlQueryHistoryReader {
probe(client: unknown): Promise<void>;
fetch(
client: unknown,
window: HistoricSqlTimeWindow,
cursor?: string | null,
): AsyncIterable<HistoricSqlRawQueryRow>;
}
export interface KloPostgresQueryClient {
executeQuery(sql: string, params?: unknown[]): Promise<{ headers: string[]; rows: unknown[][]; totalRows?: number }>;
}
export interface PostgresPgssProbeResult {
pgServerVersion: string;
warnings: string[];
}
export interface PostgresPgssSnapshot {
statsResetAt: string | null;
deallocCount: number | null;
rows: PostgresPgssRow[];
}
export interface PostgresPgssReader {
probe(client: KloPostgresQueryClient): Promise<PostgresPgssProbeResult>;
readSnapshot(
client: KloPostgresQueryClient,
options: { minCalls: number; maxTemplates: number },
): Promise<PostgresPgssSnapshot>;
}
export interface PostgresPgssRow {
queryid: string;
userid: string;
username: string | null;
dbid: string;
database: string | null;
query: string;
calls: number;
totalExecTime: number;
meanExecTime: number;
totalRows: number;
}
export interface PostgresPgssAggregateRow {
id: string;
queryid: string;
dbid: string;
database: string | null;
query: string;
deltaCalls: number;
deltaExecTime: number;
deltaRows: number;
meanExecTime: number;
distinctUsersDelta: number;
users: string[];
firstObservedAt: string;
}
export interface HistoricSqlSourceAdapterDeps {
sqlAnalysis: SqlAnalysisPort;
reader: HistoricSqlQueryHistoryReader;
queryClient: unknown;
postgresReader?: PostgresPgssReader;
postgresQueryClient?: KloPostgresQueryClient;
postgresBaselineRootDir?: string;
now?: () => Date;
onPullSucceeded?: (ctx: {
connectionId: string;
sourceKey: string;
syncId: string;
trigger: import('../../types.js').IngestTrigger;
completedAt: Date;
stagedDir: string;
nextSuccessfulCursor: string | null;
}) => Promise<void>;
}
const historicSqlLiteralSlotClassificationSchema = z.enum(['constant', 'runtime', 'categorical']);
export type HistoricSqlLiteralSlotClassification = z.infer<typeof historicSqlLiteralSlotClassificationSchema>;
export const historicSqlMetadataSchema = z.object({
id: z.string().min(1),
title: z.string().min(1),
path: z.string().min(1),
objectType: z.literal(HISTORIC_SQL_OBJECT_TYPE),
lastEditedAt: z.null(),
properties: z.object({
fingerprint: z.string().min(1),
sub_cluster_id: z.string().nullable(),
dialect: historicSqlDialectSchema,
tables_touched: z.array(z.string()),
literal_slots: z.array(
z.object({
position: z.number().int().min(1),
type: z.enum(['string', 'number', 'timestamp', 'date', 'boolean', 'null', 'unknown']),
classification: historicSqlLiteralSlotClassificationSchema,
}),
),
triage_signals: z.record(z.string(), z.string()),
}),
});
export type HistoricSqlMetadata = z.infer<typeof historicSqlMetadataSchema>;
export const historicSqlUsageSchema = z.object({
stats: z.object({
executions: z.number().int().nonnegative(),
distinct_users: z.number().int().nonnegative(),
first_seen: z.string().datetime(),
last_seen: z.string().datetime(),
p50_runtime_ms: z.number().nonnegative().nullable(),
p95_runtime_ms: z.number().nonnegative().nullable(),
mean_runtime_ms: z.number().nonnegative().nullable().optional(),
error_rate: z.number().min(0).max(1),
rows_produced: z.number().int().nonnegative().nullable().optional(),
}),
literal_slots: z.array(
z.object({
position: z.number().int().min(1),
distinct_values: z.number().int().nonnegative(),
top_values: z.array(z.tuple([z.string(), z.number().int().nonnegative()])),
}),
),
samples: z.array(
z.object({
started_at: z.string().datetime(),
user: z.string().nullable(),
bound_sql: z.string(),
rows_produced: z.number().int().nonnegative().nullable().optional(),
runtime_ms: z.number().nonnegative().nullable(),
success: z.boolean(),
}),
),
});
export type HistoricSqlUsage = z.infer<typeof historicSqlUsageSchema>;
export const historicSqlManifestSchema = z.object({
source: z.literal(HISTORIC_SQL_SOURCE_KEY),
connectionId: z.string().min(1),
dialect: historicSqlDialectSchema,
fetchedAt: z.string().datetime(),
windowStart: z.string().datetime(),
windowEnd: z.string().datetime(),
nextSuccessfulCursor: z.string().datetime().nullable(),
templateCount: z.number().int().nonnegative(),
capped: z.boolean(),
warnings: z.array(z.string()),
degraded: z.boolean().default(false),
statsResetAt: z.string().datetime().nullable().default(null),
baselineFirstRun: z.boolean().default(false),
pgServerVersion: z.string().nullable().default(null),
deallocCount: z.number().int().nonnegative().nullable().default(null),
templates: z.array(
z.object({
id: z.string().min(1),
fingerprint: z.string().min(1),
subClusterId: z.string().nullable(),
path: z.string().min(1),
}),
),
});
export type HistoricSqlManifest = z.infer<typeof historicSqlManifestSchema>;

View file

@ -0,0 +1,107 @@
import { mkdtemp } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import type { KloSchemaSnapshot } from '../../../scan/types.js';
import { chunkLiveDatabaseStagedDir } from './chunk.js';
import { liveDatabaseTablePath, writeLiveDatabaseSnapshot } from './stage.js';
function snapshot(): KloSchemaSnapshot {
return {
connectionId: 'conn-1',
driver: 'postgres',
extractedAt: '2026-04-27T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: {},
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
{
name: 'customers',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
],
};
}
describe('chunkLiveDatabaseStagedDir', () => {
it('emits one work unit per table on the first run', async () => {
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-chunk-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
const result = await chunkLiveDatabaseStagedDir(dir);
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual([
'live-database-public-customers',
'live-database-public-orders',
]);
expect(result.workUnits[0]?.dependencyPaths).toEqual(['connection.json', 'foreign-keys.json']);
expect(result.workUnits[0]?.peerFileIndex).toContain(
liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' }),
);
});
it('keeps only changed tables during incremental syncs and records table evictions', async () => {
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-diff-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
const ordersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' });
const customersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'customers' });
const result = await chunkLiveDatabaseStagedDir(dir, {
added: [],
modified: [ordersPath],
deleted: [customersPath],
unchanged: ['connection.json', 'foreign-keys.json'],
});
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']);
expect(result.eviction?.deletedRawPaths).toEqual([customersPath]);
});
it('fans out all table work units when the foreign-key index changes', async () => {
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-fk-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
const result = await chunkLiveDatabaseStagedDir(dir, {
added: [],
modified: ['foreign-keys.json'],
deleted: [],
unchanged: [],
});
expect(result.workUnits).toHaveLength(2);
});
});

View file

@ -0,0 +1,58 @@
import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js';
import type { KloSchemaTable } from '../../../scan/types.js';
import { LIVE_DATABASE_FOREIGN_KEYS_FILE, LIVE_DATABASE_META_FILE, readLiveDatabaseTableFiles } from './stage.js';
function unitKey(table: KloSchemaTable): string {
const parts = [table.catalog, table.db, table.name]
.filter((part): part is string => typeof part === 'string' && part.length > 0)
.map((part) =>
part
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, ''),
)
.filter(Boolean);
return `live-database-${parts.join('-') || 'table'}`;
}
function displayName(table: KloSchemaTable): string {
return [table.catalog, table.db, table.name].filter(Boolean).join('.');
}
function isTablePath(path: string): boolean {
return path.startsWith('tables/') && path.endsWith('.json');
}
export async function chunkLiveDatabaseStagedDir(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
const tableFiles = await readLiveDatabaseTableFiles(stagedDir);
const allTablePaths = tableFiles.map((file) => file.path);
const globalDeps = [LIVE_DATABASE_META_FILE, LIVE_DATABASE_FOREIGN_KEYS_FILE];
const touched = diffSet ? new Set([...diffSet.added, ...diffSet.modified]) : null;
const globalTouched = Boolean(
touched && (touched.has(LIVE_DATABASE_META_FILE) || touched.has(LIVE_DATABASE_FOREIGN_KEYS_FILE)),
);
const workUnits: WorkUnit[] = [];
for (const file of tableFiles) {
if (touched && !globalTouched && !touched.has(file.path)) {
continue;
}
const peers = allTablePaths.filter((path) => path !== file.path).sort();
workUnits.push({
unitKey: unitKey(file.table),
displayLabel: `Live database table ${displayName(file.table)}`,
rawFiles: [file.path],
peerFileIndex: peers,
dependencyPaths: globalDeps,
notes: `Database catalog snapshot for ${displayName(file.table)} with ${file.table.columns.length} column${
file.table.columns.length === 1 ? '' : 's'
}.`,
});
}
const deletedRawPaths = diffSet ? diffSet.deleted.filter(isTablePath).sort() : [];
return {
workUnits,
...(deletedRawPaths.length > 0 ? { eviction: { deletedRawPaths } } : {}),
};
}

View file

@ -0,0 +1,224 @@
import { once } from 'node:events';
import { createServer } from 'node:http';
import { describe, expect, it, vi } from 'vitest';
import { createDaemonLiveDatabaseIntrospection } from './daemon-introspection.js';
const daemonResponse = {
connection_id: 'warehouse',
extracted_at: '2026-04-28T10:00:00+00:00',
metadata: { driver: 'postgres', schemas: ['public'] },
tables: [
{
catalog: 'warehouse',
db: 'public',
name: 'customers',
comment: null,
columns: [{ name: 'id', type: 'integer', nullable: false, primary_key: true, comment: null }],
foreign_keys: [],
},
{
catalog: 'warehouse',
db: 'public',
name: 'orders',
comment: 'Order facts',
columns: [
{ name: 'id', type: 'integer', nullable: false, primary_key: true, comment: 'Order id' },
{ name: 'customer_id', type: 'integer', nullable: false, primary_key: false, comment: null },
],
foreign_keys: [
{
from_column: 'customer_id',
to_table: 'customers',
to_column: 'id',
constraint_name: 'orders_customer_id_fkey',
},
],
},
],
};
describe('createDaemonLiveDatabaseIntrospection', () => {
it('calls the database-introspect daemon command and maps the snapshot response', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
readonly: true,
},
},
schemas: ['public'],
runJson,
});
await expect(introspection.extractSchema('warehouse')).resolves.toEqual({
connectionId: 'warehouse',
driver: 'postgres',
extractedAt: '2026-04-28T10:00:00+00:00',
scope: { schemas: ['public'] },
metadata: { driver: 'postgres', schemas: ['public'] },
tables: [
{
catalog: 'warehouse',
db: 'public',
name: 'customers',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
{
catalog: 'warehouse',
db: 'public',
name: 'orders',
kind: 'table',
comment: 'Order facts',
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
{
name: 'customer_id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: null,
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fkey',
},
],
},
],
});
expect(runJson).toHaveBeenCalledWith('database-introspect', {
connection_id: 'warehouse',
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
schemas: ['public'],
statement_timeout_ms: 30_000,
connection_timeout_seconds: 5,
});
});
it('calls a running daemon HTTP endpoint when baseUrl is configured', async () => {
const requests: Array<{ url: string | undefined; body: unknown }> = [];
const server = createServer((request, response) => {
const chunks: Buffer[] = [];
request.on('data', (chunk: Buffer) => chunks.push(chunk));
request.on('end', () => {
requests.push({
url: request.url,
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
});
response.writeHead(200, { 'content-type': 'application/json' });
response.end(JSON.stringify(daemonResponse));
});
});
server.listen(0, '127.0.0.1');
await once(server, 'listening');
try {
const address = server.address();
if (!address || typeof address === 'string') {
throw new Error('expected TCP server address');
}
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgresql',
url: 'postgres://localhost:5432/warehouse',
readonly: true,
},
},
baseUrl: `http://127.0.0.1:${address.port}`,
});
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
connectionId: 'warehouse',
tables: [{ name: 'customers' }, { name: 'orders' }],
});
expect(requests).toEqual([
{
url: '/database/introspect',
body: {
connection_id: 'warehouse',
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
schemas: ['public'],
statement_timeout_ms: 30_000,
connection_timeout_seconds: 5,
},
},
]);
} finally {
server.close();
}
});
it('requires a configured read-only postgres connection with a url', async () => {
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
readonly: false,
},
},
runJson: vi.fn(async () => daemonResponse),
});
await expect(introspection.extractSchema('warehouse')).rejects.toThrow(
'Local live-database ingest requires connections.warehouse.readonly: true.',
);
});
it('rejects unsupported local connection drivers before calling the daemon', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'snowflake',
url: 'snowflake://example',
readonly: true,
},
},
runJson,
});
await expect(introspection.extractSchema('warehouse')).rejects.toThrow(
'Local live-database ingest cannot run driver "snowflake".',
);
expect(runJson).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,256 @@
import { spawn } from 'node:child_process';
import { request as httpRequest } from 'node:http';
import { request as httpsRequest } from 'node:https';
import { URL } from 'node:url';
import type { KloProjectConnectionConfig } from '../../../project/config.js';
import type { KloSchemaColumn, KloSchemaForeignKey, KloSchemaSnapshot, KloSchemaTable } from '../../../scan/types.js';
import { inferKloDimensionType, normalizeKloNativeType } from '../../../scan/type-normalization.js';
import type { LiveDatabaseIntrospectionPort } from './types.js';
export type KloDaemonDatabaseIntrospectionCommand = 'database-introspect';
export type KloDaemonDatabaseJsonRunner = (
subcommand: KloDaemonDatabaseIntrospectionCommand,
payload: Record<string, unknown>,
) => Promise<Record<string, unknown>>;
export type KloDaemonDatabaseHttpJsonRunner = (
path: string,
payload: Record<string, unknown>,
) => Promise<Record<string, unknown>>;
export interface DaemonLiveDatabaseIntrospectionOptions {
connections: Record<string, KloProjectConnectionConfig>;
schemas?: string[];
statementTimeoutMs?: number;
connectionTimeoutSeconds?: number;
command?: string;
args?: string[];
cwd?: string;
env?: NodeJS.ProcessEnv;
baseUrl?: string;
runJson?: KloDaemonDatabaseJsonRunner;
requestJson?: KloDaemonDatabaseHttpJsonRunner;
now?: () => Date;
}
const DEFAULT_SCHEMAS = ['public'];
function parseJsonObject(raw: string, subcommand: string): Record<string, unknown> {
const parsed = JSON.parse(raw) as unknown;
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
throw new Error(`klo-daemon ${subcommand} returned non-object JSON`);
}
return parsed as Record<string, unknown>;
}
function runProcessJson(
options: Required<Pick<DaemonLiveDatabaseIntrospectionOptions, 'command' | 'args'>> &
Pick<DaemonLiveDatabaseIntrospectionOptions, 'cwd' | 'env'>,
): KloDaemonDatabaseJsonRunner {
return async (subcommand, payload) =>
new Promise((resolve, reject) => {
const child = spawn(options.command, [...options.args, subcommand], {
cwd: options.cwd,
env: { ...process.env, ...options.env },
stdio: ['pipe', 'pipe', 'pipe'],
});
const stdout: Buffer[] = [];
const stderr: Buffer[] = [];
child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk));
child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk));
child.on('error', reject);
child.on('close', (code) => {
const stdoutText = Buffer.concat(stdout).toString('utf8').trim();
const stderrText = Buffer.concat(stderr).toString('utf8').trim();
if (code !== 0) {
reject(new Error(`klo-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`));
return;
}
try {
resolve(parseJsonObject(stdoutText, subcommand));
} catch (error) {
reject(error);
}
});
child.stdin.end(`${JSON.stringify(payload)}\n`);
});
}
function normalizedBaseUrl(baseUrl: string): string {
return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`;
}
function postJson(baseUrl: string): KloDaemonDatabaseHttpJsonRunner {
return async (path, payload) =>
new Promise((resolve, reject) => {
const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl));
const body = JSON.stringify(payload);
const client = target.protocol === 'https:' ? httpsRequest : httpRequest;
const request = client(
target,
{
method: 'POST',
headers: {
accept: 'application/json',
'content-type': 'application/json',
'content-length': Buffer.byteLength(body),
},
},
(response) => {
const chunks: Buffer[] = [];
response.on('data', (chunk: Buffer) => chunks.push(chunk));
response.on('end', () => {
const text = Buffer.concat(chunks).toString('utf8');
const statusCode = response.statusCode ?? 0;
if (statusCode < 200 || statusCode >= 300) {
reject(new Error(`klo-daemon HTTP ${path} failed with ${statusCode}: ${text}`));
return;
}
try {
resolve(parseJsonObject(text, path));
} catch (error) {
reject(error);
}
});
},
);
request.on('error', reject);
request.end(body);
});
}
function recordValue(value: unknown): Record<string, unknown> {
return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record<string, unknown>) : {};
}
function recordArray(value: unknown): Array<Record<string, unknown>> {
return Array.isArray(value)
? value.filter(
(item): item is Record<string, unknown> => item !== null && typeof item === 'object' && !Array.isArray(item),
)
: [];
}
function requiredString(value: unknown, field: string): string {
if (typeof value !== 'string' || value.length === 0) {
throw new Error(`klo-daemon database introspection response is missing string field ${field}`);
}
return value;
}
function nullableString(value: unknown): string | null {
return typeof value === 'string' ? value : null;
}
function optionalString(value: unknown): string | undefined {
return typeof value === 'string' ? value : undefined;
}
function normalizeDriver(driver: unknown): string {
const normalized = String(driver ?? '').trim().toLowerCase();
return normalized === 'postgresql' ? 'postgres' : normalized;
}
function requirePostgresConnection(
connections: Record<string, KloProjectConnectionConfig>,
connectionId: string,
): KloProjectConnectionConfig & { url: string } {
const connection = connections[connectionId];
const driver = normalizeDriver(connection?.driver);
if (driver !== 'postgres') {
throw new Error(`Local live-database ingest cannot run driver "${connection?.driver ?? 'unknown'}".`);
}
if (connection?.readonly !== true) {
throw new Error(`Local live-database ingest requires connections.${connectionId}.readonly: true.`);
}
if (typeof connection.url !== 'string' || connection.url.trim().length === 0) {
throw new Error(`Local live-database ingest requires connections.${connectionId}.url.`);
}
return connection as KloProjectConnectionConfig & { url: string };
}
function mapColumn(raw: Record<string, unknown>): KloSchemaColumn {
const nativeType = requiredString(raw.type, 'tables[].columns[].type');
return {
name: requiredString(raw.name, 'tables[].columns[].name'),
nativeType,
normalizedType: normalizeKloNativeType(nativeType),
dimensionType: inferKloDimensionType(nativeType),
nullable: raw.nullable !== false ? true : false,
primaryKey: raw.primary_key === true,
comment: nullableString(raw.comment),
};
}
function mapForeignKey(raw: Record<string, unknown>): KloSchemaForeignKey {
return {
fromColumn: requiredString(raw.from_column, 'tables[].foreign_keys[].from_column'),
toCatalog: null,
toDb: null,
toTable: requiredString(raw.to_table, 'tables[].foreign_keys[].to_table'),
toColumn: requiredString(raw.to_column, 'tables[].foreign_keys[].to_column'),
constraintName: nullableString(raw.constraint_name),
};
}
function mapTable(raw: Record<string, unknown>): KloSchemaTable {
return {
catalog: nullableString(raw.catalog),
db: nullableString(raw.db),
name: requiredString(raw.name, 'tables[].name'),
kind: 'table',
comment: nullableString(raw.comment),
estimatedRows: null,
columns: recordArray(raw.columns).map(mapColumn),
foreignKeys: recordArray(raw.foreign_keys).map(mapForeignKey),
};
}
function mapDaemonSnapshot(
raw: Record<string, unknown>,
input: { connectionId: string; extractedAt: string; schemas: string[] },
): KloSchemaSnapshot {
return {
connectionId: requiredString(raw.connection_id, 'connection_id') || input.connectionId,
driver: 'postgres',
extractedAt: optionalString(raw.extracted_at) ?? input.extractedAt,
scope: { schemas: input.schemas },
metadata: recordValue(raw.metadata),
tables: recordArray(raw.tables).map(mapTable),
};
}
export function createDaemonLiveDatabaseIntrospection(
options: DaemonLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
const schemas = options.schemas ?? DEFAULT_SCHEMAS;
const command = options.command ?? 'python';
const args = options.args ?? ['-m', 'klo_daemon'];
const runJson = options.runJson ?? runProcessJson({ command, args, cwd: options.cwd, env: options.env });
const requestJson = options.requestJson ?? (options.baseUrl ? postJson(options.baseUrl) : undefined);
const now = options.now ?? (() => new Date());
return {
async extractSchema(connectionId: string): Promise<KloSchemaSnapshot> {
const connection = requirePostgresConnection(options.connections, connectionId);
const payload = {
connection_id: connectionId,
driver: normalizeDriver(connection.driver),
url: connection.url,
schemas,
statement_timeout_ms: options.statementTimeoutMs ?? 30_000,
connection_timeout_seconds: options.connectionTimeoutSeconds ?? 5,
};
const raw = requestJson
? await requestJson('/database/introspect', payload)
: await runJson('database-introspect', payload);
return mapDaemonSnapshot(raw, {
connectionId,
extractedAt: now().toISOString(),
schemas,
});
},
};
}

View file

@ -0,0 +1,136 @@
import { describe, expect, it } from 'vitest';
import type { KloSchemaSnapshot } from '../../../scan/types.js';
import { buildLiveDatabaseTableNaturalKey, kloSchemaSnapshotToExtractedSchema } from './extracted-schema.js';
function snapshot(): KloSchemaSnapshot {
return {
connectionId: 'conn-1',
driver: 'postgres',
extractedAt: '2026-04-27T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: { driver: 'postgres' },
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
kind: 'table',
comment: 'Orders placed by customers',
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Primary key',
},
{
name: 'customer_id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: 'public',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fkey',
},
],
},
{
name: 'customers',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
],
};
}
describe('kloSchemaSnapshotToExtractedSchema', () => {
it('preserves structural table, column, comment, and key metadata', () => {
const extracted = kloSchemaSnapshotToExtractedSchema(snapshot());
expect(extracted.tables).toEqual([
{
name: 'orders',
catalog: null,
db: 'public',
dbComment: 'Orders placed by customers',
columns: [
{
name: 'id',
type: 'integer',
nullable: false,
primaryKey: true,
dbComment: 'Primary key',
},
{
name: 'customer_id',
type: 'integer',
nullable: false,
primaryKey: false,
dbComment: null,
},
],
foreignKeys: [
{
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fkey',
},
],
},
{
name: 'customers',
catalog: null,
db: 'public',
dbComment: null,
columns: [
{
name: 'id',
type: 'integer',
nullable: false,
primaryKey: true,
dbComment: null,
},
],
foreignKeys: [],
},
]);
});
it('builds the same natural key shape used by schema sync', () => {
expect(buildLiveDatabaseTableNaturalKey({ catalog: null, db: 'public', name: 'orders' })).toBe('|public|orders');
expect(buildLiveDatabaseTableNaturalKey({ catalog: 'warehouse', db: 'analytics', name: 'events' })).toBe(
'warehouse|analytics|events',
);
});
});

View file

@ -0,0 +1,61 @@
import type { KloSchemaSnapshot, KloSchemaTable } from '../../../scan/types.js';
export interface LiveDatabaseExtractedForeignKey {
fromTable: string;
fromColumn: string;
toTable: string;
toColumn: string;
constraintName?: string;
}
export interface LiveDatabaseExtractedColumn {
name: string;
type: string;
nullable: boolean;
primaryKey: boolean;
dbComment: string | null;
}
export interface LiveDatabaseExtractedTable {
name: string;
catalog: string | null;
db: string | null;
dbComment: string | null;
columns: LiveDatabaseExtractedColumn[];
foreignKeys: LiveDatabaseExtractedForeignKey[];
}
export interface LiveDatabaseExtractedSchema {
connectionId?: string;
tables: LiveDatabaseExtractedTable[];
}
export function buildLiveDatabaseTableNaturalKey(table: Pick<KloSchemaTable, 'catalog' | 'db' | 'name'>): string {
return `${table.catalog ?? ''}|${table.db ?? ''}|${table.name}`;
}
export function kloSchemaSnapshotToExtractedSchema(snapshot: KloSchemaSnapshot): LiveDatabaseExtractedSchema {
return {
connectionId: snapshot.connectionId,
tables: snapshot.tables.map((table) => ({
name: table.name,
catalog: table.catalog ?? null,
db: table.db ?? null,
dbComment: table.comment ?? null,
columns: table.columns.map((column) => ({
name: column.name,
type: column.nativeType,
nullable: column.nullable,
primaryKey: column.primaryKey,
dbComment: column.comment ?? null,
})),
foreignKeys: table.foreignKeys.map((foreignKey) => ({
fromTable: table.name,
fromColumn: foreignKey.fromColumn,
toTable: foreignKey.toTable,
toColumn: foreignKey.toColumn,
...(foreignKey.constraintName ? { constraintName: foreignKey.constraintName } : {}),
})),
})),
};
}

View file

@ -0,0 +1,59 @@
import { mkdtemp } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { LiveDatabaseSourceAdapter } from './live-database.adapter.js';
describe('LiveDatabaseSourceAdapter', () => {
it('fetches a schema snapshot through the introspection port', async () => {
const extractSchema = vi.fn().mockResolvedValue({
connectionId: 'conn-1',
driver: 'postgres',
extractedAt: '2026-04-27T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: {},
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
],
});
const adapter = new LiveDatabaseSourceAdapter({
introspection: { extractSchema },
now: () => new Date('2026-04-27T00:00:00.000Z'),
});
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-adapter-'));
await adapter.fetch(undefined, dir, { connectionId: 'conn-1', sourceKey: 'live-database' });
expect(extractSchema).toHaveBeenCalledWith('conn-1');
await expect(adapter.detect(dir)).resolves.toBe(true);
const chunked = await adapter.chunk(dir);
expect(chunked.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']);
});
it('declares the live database source and skill', () => {
const adapter = new LiveDatabaseSourceAdapter({
introspection: { extractSchema: vi.fn() },
});
expect(adapter.source).toBe('live-database');
expect(adapter.skillNames).toEqual(['live_database_ingest']);
});
});

View file

@ -0,0 +1,28 @@
import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js';
import { chunkLiveDatabaseStagedDir } from './chunk.js';
import { detectLiveDatabaseStagedDir, writeLiveDatabaseSnapshot } from './stage.js';
import type { LiveDatabaseSourceAdapterDeps } from './types.js';
export class LiveDatabaseSourceAdapter implements SourceAdapter {
readonly source = 'live-database';
readonly skillNames = ['live_database_ingest'];
constructor(private readonly deps: LiveDatabaseSourceAdapterDeps) {}
detect(stagedDir: string): Promise<boolean> {
return detectLiveDatabaseStagedDir(stagedDir);
}
async fetch(_pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
const snapshot = await this.deps.introspection.extractSchema(ctx.connectionId);
await writeLiveDatabaseSnapshot(stagedDir, {
...snapshot,
connectionId: ctx.connectionId,
extractedAt: snapshot.extractedAt ?? (this.deps.now ?? (() => new Date()))().toISOString(),
});
}
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
return chunkLiveDatabaseStagedDir(stagedDir, diffSet);
}
}

View file

@ -0,0 +1,252 @@
import { describe, expect, it } from 'vitest';
import {
buildLiveDatabaseManifestShards,
type LiveDatabaseManifestExistingDescriptions,
type LiveDatabaseManifestJoinEntry,
type LiveDatabaseManifestShard,
} from './manifest.js';
function shardObject(shards: Map<string, LiveDatabaseManifestShard>): Record<string, LiveDatabaseManifestShard> {
return Object.fromEntries([...shards.entries()].sort(([a], [b]) => a.localeCompare(b)));
}
describe('buildLiveDatabaseManifestShards', () => {
it('builds shard objects with generated joins and preserved external descriptions', () => {
const existingDescriptions = new Map<string, LiveDatabaseManifestExistingDescriptions>([
[
'orders',
{
table: { user: 'Pinned analyst description', db: 'Old db description' },
columns: new Map([['id', { user: 'Pinned id description', db: 'Old id description' }]]),
},
],
]);
const preservedJoins = new Map<string, LiveDatabaseManifestJoinEntry[]>([
[
'orders',
[
{
to: 'customers',
on: 'orders.account_id = customers.id',
relationship: 'many_to_one',
source: 'manual',
},
{
to: 'missing_accounts',
on: 'orders.account_id = missing_accounts.id',
relationship: 'many_to_one',
source: 'manual',
},
],
],
]);
const result = buildLiveDatabaseManifestShards({
connectionType: 'POSTGRESQL',
mapColumnType: (nativeType) => nativeType.toLowerCase(),
existingDescriptions,
existingPreservedJoins: preservedJoins,
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
descriptions: { db: 'Fresh db description', ai: 'Generated AI description' },
columns: [
{
name: 'id',
type: 'INTEGER',
pk: true,
nullable: false,
descriptions: { db: 'Fresh id description' },
},
{
name: 'customer_id',
type: 'INTEGER',
},
],
},
{
name: 'customers',
catalog: null,
db: 'public',
columns: [
{
name: 'id',
type: 'INTEGER',
pk: true,
nullable: false,
},
],
},
],
joins: [
{
fromTable: 'orders',
fromColumns: ['customer_id'],
toTable: 'customers',
toColumns: ['id'],
relationship: 'MANY_TO_ONE',
source: 'formal',
},
],
});
expect(result.tablesProcessed).toBe(2);
expect(shardObject(result.shards)).toEqual({
public: {
tables: {
orders: {
table: 'public.orders',
descriptions: {
user: 'Pinned analyst description',
db: 'Fresh db description',
ai: 'Generated AI description',
},
columns: [
{
name: 'id',
type: 'integer',
pk: true,
nullable: false,
descriptions: {
user: 'Pinned id description',
db: 'Fresh id description',
},
},
{
name: 'customer_id',
type: 'integer',
},
],
joins: [
{
to: 'customers',
on: 'orders.customer_id = customers.id',
relationship: 'many_to_one',
source: 'formal',
},
{
to: 'customers',
on: 'orders.account_id = customers.id',
relationship: 'many_to_one',
source: 'manual',
},
],
},
customers: {
table: 'public.customers',
columns: [
{
name: 'id',
type: 'integer',
pk: true,
nullable: false,
},
],
joins: [
{
to: 'orders',
on: 'customers.id = orders.customer_id',
relationship: 'one_to_many',
source: 'formal',
},
],
},
},
},
});
});
it('uses warehouse and schema shard keys for snowflake-style connections', () => {
const result = buildLiveDatabaseManifestShards({
connectionType: 'SNOWFLAKE',
mapColumnType: (nativeType) => nativeType.toLowerCase(),
tables: [
{
name: 'accounts',
catalog: 'ANALYTICS',
db: 'CORE',
columns: [{ name: 'id', type: 'NUMBER' }],
},
],
joins: [],
});
expect(shardObject(result.shards)).toEqual({
'ANALYTICS.CORE': {
tables: {
accounts: {
table: 'ANALYTICS.CORE.accounts',
columns: [{ name: 'id', type: 'number' }],
},
},
},
});
});
it('renders ordered multi-column joins in both directions', () => {
const result = buildLiveDatabaseManifestShards({
connectionType: 'POSTGRESQL',
mapColumnType: (nativeType) => nativeType,
tables: [
{
name: 'order_lines',
catalog: null,
db: 'public',
columns: [
{ name: 'order_id', type: 'integer' },
{ name: 'line_number', type: 'integer' },
],
},
{
name: 'order_line_allocations',
catalog: null,
db: 'public',
columns: [
{ name: 'order_id', type: 'integer' },
{ name: 'line_number', type: 'integer' },
],
},
],
joins: [
{
fromTable: 'order_line_allocations',
fromColumns: ['order_id', 'line_number'],
toTable: 'order_lines',
toColumns: ['order_id', 'line_number'],
relationship: 'many_to_one',
source: 'inferred',
},
],
});
expect(shardObject(result.shards)).toMatchObject({
public: {
tables: {
order_line_allocations: {
joins: [
{
to: 'order_lines',
on: 'order_line_allocations.order_id = order_lines.order_id AND order_line_allocations.line_number = order_lines.line_number',
relationship: 'many_to_one',
source: 'inferred',
},
],
},
order_lines: {
joins: [
{
to: 'order_line_allocations',
on: 'order_lines.order_id = order_line_allocations.order_id AND order_lines.line_number = order_line_allocations.line_number',
relationship: 'one_to_many',
source: 'inferred',
},
],
},
},
},
});
});
});

View file

@ -0,0 +1,270 @@
const RELATIONSHIP_MAP: Record<string, string> = {
MANY_TO_ONE: 'many_to_one',
ONE_TO_MANY: 'one_to_many',
ONE_TO_ONE: 'one_to_one',
};
const RELATIONSHIP_INVERSE: Record<string, string> = {
many_to_one: 'one_to_many',
one_to_many: 'many_to_one',
one_to_one: 'one_to_one',
};
const SCAN_MANAGED_DESCRIPTION_KEYS = new Set(['db', 'ai']);
export interface LiveDatabaseManifestColumn {
name: string;
type: string;
pk?: boolean;
nullable?: boolean;
descriptions?: Record<string, string>;
}
export interface LiveDatabaseManifestJoinEntry {
to: string;
on: string;
relationship: string;
source: string;
}
export interface LiveDatabaseManifestTableEntry {
table: string;
descriptions?: Record<string, string>;
columns: LiveDatabaseManifestColumn[];
joins?: LiveDatabaseManifestJoinEntry[];
}
export interface LiveDatabaseManifestShard {
tables: Record<string, LiveDatabaseManifestTableEntry>;
}
export interface LiveDatabaseManifestTableData {
name: string;
catalog: string | null;
db: string | null;
descriptions?: Record<string, string>;
columns: Array<{
name: string;
type: string;
pk?: boolean;
nullable?: boolean;
descriptions?: Record<string, string>;
}>;
}
export interface LiveDatabaseManifestJoinData {
fromTable: string;
fromColumns: string[];
toTable: string;
toColumns: string[];
relationship: string;
source: 'formal' | 'inferred' | 'manual';
}
export interface LiveDatabaseManifestExistingDescriptions {
table?: Record<string, string>;
columns: Map<string, Record<string, string>>;
}
export interface BuildLiveDatabaseManifestShardsInput {
connectionType: string;
tables: LiveDatabaseManifestTableData[];
joins: LiveDatabaseManifestJoinData[];
mapColumnType: (nativeType: string) => string;
existingPreservedJoins?: Map<string, LiveDatabaseManifestJoinEntry[]>;
existingDescriptions?: Map<string, LiveDatabaseManifestExistingDescriptions>;
}
export interface BuildLiveDatabaseManifestShardsResult {
shards: Map<string, LiveDatabaseManifestShard>;
tablesProcessed: number;
}
function mergeDescriptionsPreservingExternal(
existing: Record<string, string> | undefined,
incoming: Record<string, string> | undefined,
): Record<string, string> | undefined {
if (!existing && !incoming) {
return undefined;
}
const result: Record<string, string> = {};
if (existing) {
for (const [key, value] of Object.entries(existing)) {
if (!SCAN_MANAGED_DESCRIPTION_KEYS.has(key)) {
result[key] = value;
}
}
}
if (incoming) {
Object.assign(result, incoming);
}
return Object.keys(result).length > 0 ? result : undefined;
}
function getShardKey(connectionType: string, catalog: string | null, db: string | null): string {
const normalized = connectionType.toUpperCase();
switch (normalized) {
case 'SNOWFLAKE':
case 'DATABRICKS': {
const catalogPart = catalog ?? 'default';
const schemaPart = db ?? 'public';
return `${catalogPart}.${schemaPart}`;
}
case 'BIGQUERY': {
return db ?? catalog ?? 'default';
}
case 'MYSQL':
case 'CLICKHOUSE': {
return db ?? catalog ?? 'default';
}
default: {
return db ?? 'public';
}
}
}
function buildTableRef(name: string, catalog: string | null, db: string | null): string {
const parts: string[] = [];
if (catalog) {
parts.push(catalog);
}
if (db) {
parts.push(db);
}
parts.push(name);
return parts.join('.');
}
function addJoinOnce(
joinsByTable: Map<string, LiveDatabaseManifestJoinEntry[]>,
tableName: string,
join: LiveDatabaseManifestJoinEntry,
): void {
const joins = joinsByTable.get(tableName) ?? [];
const exists = joins.some((candidate) => candidate.to === join.to && candidate.on === join.on);
if (!exists) {
joins.push(join);
}
joinsByTable.set(tableName, joins);
}
function joinCondition(
leftTable: string,
leftColumns: readonly string[],
rightTable: string,
rightColumns: readonly string[],
): string {
if (leftColumns.length === 0 || leftColumns.length !== rightColumns.length) {
throw new Error(`Invalid relationship join from ${leftTable} to ${rightTable}: column tuple widths differ`);
}
return leftColumns
.map((leftColumn, index) => {
const rightColumn = rightColumns[index];
if (!rightColumn) {
throw new Error(`Invalid relationship join from ${leftTable} to ${rightTable}: missing target column`);
}
return `${leftTable}.${leftColumn} = ${rightTable}.${rightColumn}`;
})
.join(' AND ');
}
function buildJoinsByTable(
tableNames: Set<string>,
joins: LiveDatabaseManifestJoinData[],
preservedJoins: Map<string, LiveDatabaseManifestJoinEntry[]>,
): Map<string, LiveDatabaseManifestJoinEntry[]> {
const joinsByTable = new Map<string, LiveDatabaseManifestJoinEntry[]>();
for (const join of joins) {
if (!tableNames.has(join.fromTable) || !tableNames.has(join.toTable)) {
continue;
}
const relationship = RELATIONSHIP_MAP[join.relationship] ?? join.relationship;
addJoinOnce(joinsByTable, join.fromTable, {
to: join.toTable,
on: joinCondition(join.fromTable, join.fromColumns, join.toTable, join.toColumns),
relationship,
source: join.source,
});
const reverseRelationship = RELATIONSHIP_INVERSE[relationship] ?? 'one_to_many';
addJoinOnce(joinsByTable, join.toTable, {
to: join.fromTable,
on: joinCondition(join.toTable, join.toColumns, join.fromTable, join.fromColumns),
relationship: reverseRelationship,
source: join.source,
});
}
for (const [tableName, tableJoins] of preservedJoins) {
if (!tableNames.has(tableName)) {
continue;
}
for (const join of tableJoins) {
if (tableNames.has(join.to)) {
addJoinOnce(joinsByTable, tableName, join);
}
}
}
return joinsByTable;
}
export function buildLiveDatabaseManifestShards(
input: BuildLiveDatabaseManifestShardsInput,
): BuildLiveDatabaseManifestShardsResult {
const tableNames = new Set(input.tables.map((table) => table.name));
const joinsByTable = buildJoinsByTable(tableNames, input.joins, input.existingPreservedJoins ?? new Map());
const shards = new Map<string, LiveDatabaseManifestShard>();
for (const table of input.tables) {
const shardKey = getShardKey(input.connectionType, table.catalog, table.db);
const shard = shards.get(shardKey) ?? { tables: {} };
const existingDescriptions = input.existingDescriptions?.get(table.name);
const columns: LiveDatabaseManifestColumn[] = table.columns.map((column) => {
const manifestColumn: LiveDatabaseManifestColumn = {
name: column.name,
type: input.mapColumnType(column.type),
};
if (column.pk) {
manifestColumn.pk = true;
}
if (column.nullable === false) {
manifestColumn.nullable = false;
}
const descriptions = mergeDescriptionsPreservingExternal(
existingDescriptions?.columns.get(column.name),
column.descriptions,
);
if (descriptions) {
manifestColumn.descriptions = descriptions;
}
return manifestColumn;
});
const entry: LiveDatabaseManifestTableEntry = {
table: buildTableRef(table.name, table.catalog, table.db),
columns,
};
const tableDescriptions = mergeDescriptionsPreservingExternal(existingDescriptions?.table, table.descriptions);
if (tableDescriptions) {
entry.descriptions = tableDescriptions;
}
const tableJoins = joinsByTable.get(table.name);
if (tableJoins && tableJoins.length > 0) {
entry.joins = tableJoins;
}
shard.tables[table.name] = entry;
shards.set(shardKey, shard);
}
return {
shards,
tablesProcessed: input.tables.length,
};
}

View file

@ -0,0 +1,152 @@
import { mkdtemp, readFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import {
detectLiveDatabaseStagedDir,
LIVE_DATABASE_FOREIGN_KEYS_FILE,
LIVE_DATABASE_META_FILE,
liveDatabaseTablePath,
readLiveDatabaseTableFiles,
writeLiveDatabaseSnapshot,
} from './stage.js';
import type { KloSchemaSnapshot } from '../../../scan/types.js';
function snapshot(): KloSchemaSnapshot {
return {
connectionId: 'conn-1',
driver: 'postgres',
extractedAt: '2026-04-27T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: { dialect: 'postgres' },
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
kind: 'table',
comment: 'Orders placed by customers',
estimatedRows: 200,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
{
name: 'customer_id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
{
name: 'total',
nativeType: 'numeric',
normalizedType: 'numeric',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: 'public',
toTable: 'customers',
toColumn: 'id',
constraintName: null,
},
],
},
{
name: 'customers',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: 50,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
],
};
}
describe('live-database staged snapshot files', () => {
it('writes deterministic metadata, table, and foreign-key files', async () => {
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-stage-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
await expect(readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8')).resolves.toContain('"connectionId": "conn-1"');
await expect(readFile(join(dir, LIVE_DATABASE_FOREIGN_KEYS_FILE), 'utf8')).resolves.toContain(
'"fromTable": "orders"',
);
const connectionJson = await readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8');
expect(connectionJson).toContain('"driver": "postgres"');
expect(connectionJson).toContain('"schemas"');
const ordersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' });
const customersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'customers' });
expect(ordersPath).toMatch(/^tables\/[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.json$/);
await expect(readFile(join(dir, ordersPath), 'utf8')).resolves.toContain('"name": "orders"');
await expect(readFile(join(dir, customersPath), 'utf8')).resolves.toContain('"name": "customers"');
const ordersJson = await readFile(join(dir, ordersPath), 'utf8');
expect(ordersJson).toContain('"kind": "table"');
expect(ordersJson).toContain('"estimatedRows": 200');
expect(ordersJson).toContain('"nativeType": "integer"');
expect(ordersJson).toContain('"normalizedType": "integer"');
expect(ordersJson).not.toContain('"type": "integer"');
const tableFiles = await readLiveDatabaseTableFiles(dir);
expect(tableFiles.map((file) => file.table.name)).toEqual(['customers', 'orders']);
expect(await detectLiveDatabaseStagedDir(dir)).toBe(true);
});
it('redacts sensitive snapshot metadata before writing connection metadata', async () => {
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-redacted-stage-'));
await writeLiveDatabaseSnapshot(dir, {
...snapshot(),
metadata: {
dialect: 'postgres',
url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret
serviceAccountJson: {
client_email: 'reader@example.test',
private_key: 'pem-value', // pragma: allowlist secret
},
},
});
const connectionJson = await readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8');
expect(connectionJson).toContain('"dialect": "postgres"');
expect(connectionJson).toContain('"client_email": "reader@example.test"');
expect(connectionJson).toContain('"url": "<redacted>"');
expect(connectionJson).toContain('"private_key": "<redacted>"');
expect(connectionJson).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret
expect(connectionJson).not.toContain('pem-value');
});
it('returns false for a directory that is missing live database metadata', async () => {
const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-empty-'));
expect(await detectLiveDatabaseStagedDir(dir)).toBe(false);
});
});

View file

@ -0,0 +1,138 @@
import { Buffer } from 'node:buffer';
import type { Dirent } from 'node:fs';
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
import { join, relative } from 'node:path';
import { redactKloSensitiveMetadata } from '../../../core/redaction.js';
import type { KloSchemaSnapshot, KloSchemaTable, KloTableRef } from '../../../scan/types.js';
export const LIVE_DATABASE_META_FILE = 'connection.json';
export const LIVE_DATABASE_FOREIGN_KEYS_FILE = 'foreign-keys.json';
const LIVE_DATABASE_TABLES_DIR = 'tables';
interface LiveDatabaseTableFile {
path: string;
table: KloSchemaTable;
}
interface ForeignKeyIndexEntry {
fromTable: string;
fromTablePath: string;
fromColumn: string;
toCatalog: string | null;
toDb: string | null;
toTable: string;
toColumn: string;
constraintName: string | null;
}
function encodePathPart(value: string | null | undefined): string {
return Buffer.from(value ?? '_', 'utf8').toString('base64url');
}
function tableSortKey(table: KloTableRef): string {
return `${table.catalog ?? ''}\u0000${table.db ?? ''}\u0000${table.name}`;
}
export function liveDatabaseTablePath(table: KloTableRef): string {
return `${LIVE_DATABASE_TABLES_DIR}/${encodePathPart(table.catalog)}.${encodePathPart(table.db)}.${encodePathPart(
table.name,
)}.json`;
}
async function walkFiles(root: string, dir = root): Promise<string[]> {
let entries: Dirent[];
try {
entries = await readdir(dir, { withFileTypes: true });
} catch {
return [];
}
const files: string[] = [];
for (const entry of entries) {
const absolute = join(dir, entry.name);
if (entry.isDirectory()) {
files.push(...(await walkFiles(root, absolute)));
} else if (entry.isFile()) {
files.push(relative(root, absolute).replace(/\\/g, '/'));
}
}
return files.sort();
}
function stableJson(value: unknown): string {
return `${JSON.stringify(value, null, 2)}\n`;
}
function foreignKeyIndex(snapshot: KloSchemaSnapshot): ForeignKeyIndexEntry[] {
const entries: ForeignKeyIndexEntry[] = [];
for (const table of snapshot.tables) {
for (const fk of table.foreignKeys) {
entries.push({
fromTable: table.name,
fromTablePath: liveDatabaseTablePath(table),
fromColumn: fk.fromColumn,
toCatalog: fk.toCatalog,
toDb: fk.toDb,
toTable: fk.toTable,
toColumn: fk.toColumn,
constraintName: fk.constraintName,
});
}
}
entries.sort(
(a, b) =>
a.fromTable.localeCompare(b.fromTable) ||
a.fromColumn.localeCompare(b.fromColumn) ||
a.toTable.localeCompare(b.toTable) ||
a.toColumn.localeCompare(b.toColumn),
);
return entries;
}
export async function writeLiveDatabaseSnapshot(stagedDir: string, snapshot: KloSchemaSnapshot): Promise<void> {
await mkdir(join(stagedDir, LIVE_DATABASE_TABLES_DIR), { recursive: true });
const sortedTables = [...snapshot.tables].sort((a, b) => tableSortKey(a).localeCompare(tableSortKey(b)));
const metadata = {
connectionId: snapshot.connectionId,
driver: snapshot.driver,
extractedAt: snapshot.extractedAt,
scope: snapshot.scope,
metadata: redactKloSensitiveMetadata(snapshot.metadata),
tableCount: sortedTables.length,
};
await writeFile(join(stagedDir, LIVE_DATABASE_META_FILE), stableJson(metadata));
await writeFile(
join(stagedDir, LIVE_DATABASE_FOREIGN_KEYS_FILE),
stableJson({ foreignKeys: foreignKeyIndex(snapshot) }),
);
for (const table of sortedTables) {
await writeFile(join(stagedDir, liveDatabaseTablePath(table)), stableJson(table));
}
}
export async function readLiveDatabaseTableFiles(stagedDir: string): Promise<LiveDatabaseTableFile[]> {
const files = await walkFiles(join(stagedDir, LIVE_DATABASE_TABLES_DIR));
const out: LiveDatabaseTableFile[] = [];
for (const file of files.filter((path) => path.endsWith('.json'))) {
const path = `${LIVE_DATABASE_TABLES_DIR}/${file}`;
const raw = await readFile(join(stagedDir, path), 'utf8');
const parsed = JSON.parse(raw) as KloSchemaTable;
if (parsed && typeof parsed.name === 'string' && Array.isArray(parsed.columns)) {
out.push({ path, table: parsed });
}
}
out.sort((a, b) => tableSortKey(a.table).localeCompare(tableSortKey(b.table)));
return out;
}
export async function detectLiveDatabaseStagedDir(stagedDir: string): Promise<boolean> {
try {
const meta = JSON.parse(await readFile(join(stagedDir, LIVE_DATABASE_META_FILE), 'utf8')) as unknown;
if (!meta || typeof meta !== 'object' || Array.isArray(meta)) {
return false;
}
const files = await readLiveDatabaseTableFiles(stagedDir);
return files.length > 0;
} catch {
return false;
}
}

View file

@ -0,0 +1,428 @@
import { describe, expect, it } from 'vitest';
import { type LiveDatabaseSyncedSchema, planLiveDatabaseStructuralSync } from './structural-sync.js';
function idFactory(): () => string {
let next = 1;
return () => `id-${next++}`;
}
describe('planLiveDatabaseStructuralSync', () => {
it('plans table and column creates, updates, deletes, and metadata invalidation', () => {
const current: LiveDatabaseSyncedSchema = {
connectionId: 'conn-1',
tables: [
{
id: 'tbl-orders',
name: 'orders',
catalog: null,
db: 'public',
enabled: true,
descriptions: { ai: 'Old AI order text', db: 'Old DB order text' },
columns: [
{
id: 'col-order-id',
name: 'id',
type: 'number',
nullable: false,
primaryKey: true,
parentColumnId: null,
descriptions: { db: 'Order id' },
embedding: [1, 2, 3],
sampleValues: null,
cardinality: null,
},
{
id: 'col-order-total',
name: 'total',
type: 'number',
nullable: true,
primaryKey: false,
parentColumnId: null,
descriptions: { ai: 'Old AI total text', db: 'Old total text' },
embedding: [4, 5, 6],
sampleValues: ['10'],
cardinality: 12,
},
{
id: 'col-order-removed',
name: 'removed',
type: 'string',
nullable: true,
primaryKey: false,
parentColumnId: null,
descriptions: {},
embedding: null,
sampleValues: null,
cardinality: null,
},
],
},
{
id: 'tbl-removed',
name: 'removed_table',
catalog: null,
db: 'public',
enabled: true,
descriptions: {},
columns: [
{
id: 'col-removed-id',
name: 'id',
type: 'number',
nullable: false,
primaryKey: true,
parentColumnId: null,
descriptions: {},
embedding: null,
sampleValues: null,
cardinality: null,
},
],
},
],
links: [
{
id: 'inferred-total-link',
fromTableId: 'tbl-orders',
fromColumnId: 'col-order-total',
toTableId: 'tbl-orders',
toColumnId: 'col-order-id',
source: 'inferred',
confidence: 0.7,
relationshipType: 'MANY_TO_ONE',
isPrimaryKeyReference: true,
},
],
};
const plan = planLiveDatabaseStructuralSync({
connectionId: 'conn-1',
current,
extracted: {
connectionId: 'conn-1',
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
dbComment: 'Fresh DB order text',
columns: [
{
name: 'id',
type: 'number',
nullable: false,
primaryKey: true,
dbComment: 'Order id',
},
{
name: 'total',
type: 'string',
nullable: false,
primaryKey: false,
dbComment: 'Fresh total text',
},
{
name: 'created_at',
type: 'time',
nullable: false,
primaryKey: false,
dbComment: 'Creation timestamp',
},
],
foreignKeys: [],
},
{
name: 'customers',
catalog: null,
db: 'public',
dbComment: 'Customer table',
columns: [
{
name: 'id',
type: 'number',
nullable: false,
primaryKey: true,
dbComment: null,
},
],
foreignKeys: [],
},
],
},
idFactory: idFactory(),
});
expect(plan.stats).toEqual({
tablesCreated: 1,
tablesDeleted: 1,
columnsCreated: 2,
columnsDeleted: 2,
columnsModified: 1,
formalLinksCreated: 0,
formalLinksDeleted: 0,
});
expect(plan.operations.deleteTableIds).toEqual(['tbl-removed']);
expect(plan.operations.deleteColumnIds).toEqual(['col-order-removed']);
expect(plan.operations.insertTables).toEqual([
{
id: 'id-2',
connectionId: 'conn-1',
name: 'customers',
catalog: null,
db: 'public',
enabled: true,
},
]);
expect(plan.operations.insertColumns).toEqual([
{
id: 'id-1',
tableId: 'tbl-orders',
name: 'created_at',
parentColumnId: null,
},
{
id: 'id-3',
tableId: 'id-2',
name: 'id',
parentColumnId: null,
},
]);
expect(plan.operations.touchColumnIds).toEqual(['col-order-total']);
expect(plan.operations.invalidateColumnEmbeddingIds).toEqual(['col-order-total']);
expect(plan.inferredLinksToValidate).toEqual(['inferred-total-link']);
expect(plan.changes).toEqual({
newTableIds: ['id-2'],
newColumnIds: ['id-1', 'id-3'],
tablesWithStructuralChanges: ['tbl-orders', 'id-2'],
columnsWithTypeChange: ['col-order-total'],
columnsWithDescriptionChange: ['col-order-total'],
tablesWithDescriptionChange: ['tbl-orders'],
});
const orders = plan.schema.tables.find((table) => table.name === 'orders');
expect(orders?.descriptions).toEqual({ db: 'Fresh DB order text' });
expect(orders?.columns.map((column) => column.name)).toEqual(['id', 'total', 'created_at']);
expect(orders?.columns.find((column) => column.name === 'total')).toMatchObject({
id: 'col-order-total',
type: 'string',
nullable: false,
primaryKey: false,
descriptions: { db: 'Fresh total text' },
embedding: null,
sampleValues: ['10'],
cardinality: 12,
});
});
it('builds formal links from extracted foreign keys and preserves valid inferred links', () => {
const current: LiveDatabaseSyncedSchema = {
connectionId: 'conn-1',
tables: [
{
id: 'tbl-orders',
name: 'orders',
catalog: null,
db: 'public',
enabled: true,
descriptions: {},
columns: [
{
id: 'col-orders-id',
name: 'id',
type: 'number',
nullable: false,
primaryKey: true,
parentColumnId: null,
descriptions: {},
embedding: null,
sampleValues: null,
cardinality: null,
},
{
id: 'col-orders-customer',
name: 'customer_id',
type: 'number',
nullable: false,
primaryKey: false,
parentColumnId: null,
descriptions: {},
embedding: null,
sampleValues: null,
cardinality: null,
},
],
},
{
id: 'tbl-customers',
name: 'customers',
catalog: null,
db: 'public',
enabled: true,
descriptions: {},
columns: [
{
id: 'col-customers-id',
name: 'id',
type: 'number',
nullable: false,
primaryKey: true,
parentColumnId: null,
descriptions: {},
embedding: null,
sampleValues: null,
cardinality: null,
},
],
},
],
links: [
{
id: 'formal-existing',
fromTableId: 'tbl-orders',
fromColumnId: 'col-orders-customer',
toTableId: 'tbl-customers',
toColumnId: 'col-customers-id',
source: 'formal',
confidence: 1,
relationshipType: 'MANY_TO_ONE',
isPrimaryKeyReference: true,
},
{
id: 'inferred-existing',
fromTableId: 'tbl-orders',
fromColumnId: 'col-orders-id',
toTableId: 'tbl-customers',
toColumnId: 'col-customers-id',
source: 'inferred',
confidence: 0.6,
relationshipType: 'MANY_TO_ONE',
isPrimaryKeyReference: true,
},
],
};
const plan = planLiveDatabaseStructuralSync({
connectionId: 'conn-1',
current,
extracted: {
connectionId: 'conn-1',
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
dbComment: null,
columns: [
{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null },
{ name: 'customer_id', type: 'number', nullable: false, primaryKey: false, dbComment: null },
],
foreignKeys: [
{
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
},
],
},
{
name: 'customers',
catalog: null,
db: 'public',
dbComment: null,
columns: [{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }],
foreignKeys: [],
},
],
},
idFactory: idFactory(),
});
expect(plan.stats.formalLinksCreated).toBe(0);
expect(plan.stats.formalLinksDeleted).toBe(0);
expect(plan.schema.links.map((link) => link.id)).toEqual(['formal-existing', 'inferred-existing']);
const planAfterForeignKeyRemoval = planLiveDatabaseStructuralSync({
connectionId: 'conn-1',
current,
extracted: {
connectionId: 'conn-1',
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
dbComment: null,
columns: [
{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null },
{ name: 'customer_id', type: 'number', nullable: false, primaryKey: false, dbComment: null },
],
foreignKeys: [],
},
{
name: 'customers',
catalog: null,
db: 'public',
dbComment: null,
columns: [{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }],
foreignKeys: [],
},
],
},
idFactory: idFactory(),
});
expect(planAfterForeignKeyRemoval.stats.formalLinksDeleted).toBe(1);
expect(planAfterForeignKeyRemoval.schema.links.map((link) => link.id)).toEqual(['inferred-existing']);
const planAfterForeignKeyCreation = planLiveDatabaseStructuralSync({
connectionId: 'conn-1',
current: { ...current, links: [current.links[1]] },
extracted: {
connectionId: 'conn-1',
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
dbComment: null,
columns: [
{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null },
{ name: 'customer_id', type: 'number', nullable: false, primaryKey: false, dbComment: null },
],
foreignKeys: [
{
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
},
],
},
{
name: 'customers',
catalog: null,
db: 'public',
dbComment: null,
columns: [{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }],
foreignKeys: [],
},
],
},
idFactory: idFactory(),
});
expect(planAfterForeignKeyCreation.stats.formalLinksCreated).toBe(1);
expect(planAfterForeignKeyCreation.schema.links[0]).toMatchObject({
id: 'id-1',
fromTableId: 'tbl-orders',
fromColumnId: 'col-orders-customer',
toTableId: 'tbl-customers',
toColumnId: 'col-customers-id',
source: 'formal',
confidence: 1,
relationshipType: 'MANY_TO_ONE',
isPrimaryKeyReference: true,
});
});
});

View file

@ -0,0 +1,525 @@
import type { LiveDatabaseExtractedSchema, LiveDatabaseExtractedTable } from './extracted-schema.js';
import { buildLiveDatabaseTableNaturalKey } from './extracted-schema.js';
export interface LiveDatabaseSyncedColumn {
id: string;
name: string;
type: string;
nullable: boolean;
primaryKey: boolean;
parentColumnId: string | null;
descriptions: Record<string, string>;
embedding: number[] | null;
sampleValues: string[] | null;
cardinality: number | null;
}
export interface LiveDatabaseSyncedTable {
id: string;
name: string;
catalog: string | null;
db: string | null;
enabled: boolean;
descriptions: Record<string, string>;
columns: LiveDatabaseSyncedColumn[];
}
export interface LiveDatabaseSyncedLink {
id: string;
fromTableId: string;
fromColumnId: string;
toTableId: string;
toColumnId: string;
source: 'formal' | 'inferred' | 'manual';
confidence: number;
relationshipType: string;
isPrimaryKeyReference: boolean;
}
export interface LiveDatabaseSyncedSchema {
connectionId: string;
tables: LiveDatabaseSyncedTable[];
links: LiveDatabaseSyncedLink[];
}
export interface LiveDatabaseStructuralChanges {
newTableIds: string[];
newColumnIds: string[];
tablesWithStructuralChanges: string[];
columnsWithTypeChange: string[];
columnsWithDescriptionChange: string[];
tablesWithDescriptionChange: string[];
}
export interface LiveDatabaseStructuralSyncStats {
tablesCreated: number;
tablesDeleted: number;
columnsCreated: number;
columnsDeleted: number;
columnsModified: number;
formalLinksCreated: number;
formalLinksDeleted: number;
}
export interface LiveDatabaseStructuralSyncOperations {
deleteTableIds: string[];
deleteColumnIds: string[];
insertTables: Array<{
id: string;
connectionId: string;
name: string;
catalog: string | null;
db: string | null;
enabled: boolean;
}>;
insertColumns: Array<{
id: string;
tableId: string;
name: string;
parentColumnId: string | null;
}>;
touchColumnIds: string[];
invalidateColumnEmbeddingIds: string[];
}
export interface LiveDatabaseStructuralSyncPlan {
schema: LiveDatabaseSyncedSchema;
inferredLinksToValidate: string[];
stats: LiveDatabaseStructuralSyncStats;
changes: LiveDatabaseStructuralChanges;
operations: LiveDatabaseStructuralSyncOperations;
}
export interface PlanLiveDatabaseStructuralSyncInput {
connectionId: string;
current: LiveDatabaseSyncedSchema | null;
extracted: LiveDatabaseExtractedSchema;
idFactory: () => string;
}
interface UpdatedTableResult {
table: LiveDatabaseSyncedTable;
columnsCreated: number;
columnsDeleted: number;
columnsModified: number;
newColumnIds: string[];
columnsWithTypeChange: string[];
columnsWithDescriptionChange: string[];
tableDescriptionChanged: boolean;
}
function updateDescription(
descriptions: Record<string, string>,
dbComment: string | null | undefined,
changed: boolean,
): Record<string, string> {
const updated = { ...descriptions };
if (dbComment) {
updated.db = dbComment;
} else {
delete updated.db;
}
if (changed) {
delete updated.ai;
}
return updated;
}
function descriptionFromDbComment(dbComment: string | null | undefined): Record<string, string> {
return dbComment ? { db: dbComment } : {};
}
function planUpdatedTable(args: {
currentTable: LiveDatabaseSyncedTable;
extractedTable: LiveDatabaseExtractedTable;
currentLinks: LiveDatabaseSyncedLink[];
inferredLinksToValidate: string[];
operations: LiveDatabaseStructuralSyncOperations;
idFactory: () => string;
}): UpdatedTableResult {
const { currentTable, extractedTable, currentLinks, inferredLinksToValidate, operations, idFactory } = args;
let columnsCreated = 0;
let columnsDeleted = 0;
let columnsModified = 0;
const newColumnIds: string[] = [];
const columnsWithTypeChange: string[] = [];
const columnsWithDescriptionChange: string[] = [];
const updatedColumns: LiveDatabaseSyncedColumn[] = [];
const tableDescriptionChanged = (currentTable.descriptions.db ?? null) !== (extractedTable.dbComment ?? null);
const currentColumnsByName = new Map(currentTable.columns.map((column) => [column.name, column]));
const extractedColumnsByName = new Map(extractedTable.columns.map((column) => [column.name, column]));
for (const [name, currentColumn] of currentColumnsByName) {
if (!extractedColumnsByName.has(name)) {
operations.deleteColumnIds.push(currentColumn.id);
columnsDeleted++;
}
}
for (const [name, extractedColumn] of extractedColumnsByName) {
const currentColumn = currentColumnsByName.get(name);
if (!currentColumn) {
const columnId = idFactory();
operations.insertColumns.push({
id: columnId,
tableId: currentTable.id,
name: extractedColumn.name,
parentColumnId: null,
});
columnsCreated++;
newColumnIds.push(columnId);
updatedColumns.push({
id: columnId,
name: extractedColumn.name,
type: extractedColumn.type,
nullable: extractedColumn.nullable,
primaryKey: extractedColumn.primaryKey,
descriptions: descriptionFromDbComment(extractedColumn.dbComment),
parentColumnId: null,
embedding: null,
sampleValues: null,
cardinality: null,
});
continue;
}
const typeChanged = currentColumn.type !== extractedColumn.type;
const nullableChanged = currentColumn.nullable !== extractedColumn.nullable;
const primaryKeyChanged = currentColumn.primaryKey !== extractedColumn.primaryKey;
const dbDescriptionChanged = (currentColumn.descriptions.db ?? null) !== (extractedColumn.dbComment ?? null);
if (typeChanged || nullableChanged || primaryKeyChanged || dbDescriptionChanged) {
operations.touchColumnIds.push(currentColumn.id);
columnsModified++;
if (typeChanged || dbDescriptionChanged) {
operations.invalidateColumnEmbeddingIds.push(currentColumn.id);
}
if (typeChanged) {
columnsWithTypeChange.push(currentColumn.id);
const affectedLinks = currentLinks.filter(
(link) =>
link.source === 'inferred' &&
(link.fromColumnId === currentColumn.id || link.toColumnId === currentColumn.id),
);
for (const link of affectedLinks) {
if (!inferredLinksToValidate.includes(link.id)) {
inferredLinksToValidate.push(link.id);
}
}
}
if (dbDescriptionChanged) {
columnsWithDescriptionChange.push(currentColumn.id);
}
}
updatedColumns.push({
...currentColumn,
type: extractedColumn.type,
nullable: extractedColumn.nullable,
primaryKey: extractedColumn.primaryKey,
descriptions: updateDescription(currentColumn.descriptions, extractedColumn.dbComment, dbDescriptionChanged),
embedding: typeChanged ? null : currentColumn.embedding,
});
}
return {
table: {
...currentTable,
descriptions: updateDescription(currentTable.descriptions, extractedTable.dbComment, tableDescriptionChanged),
columns: updatedColumns,
},
columnsCreated,
columnsDeleted,
columnsModified,
newColumnIds,
columnsWithTypeChange,
columnsWithDescriptionChange,
tableDescriptionChanged,
};
}
function planCreatedTable(args: {
connectionId: string;
extractedTable: LiveDatabaseExtractedTable;
operations: LiveDatabaseStructuralSyncOperations;
idFactory: () => string;
}): LiveDatabaseSyncedTable {
const { connectionId, extractedTable, operations, idFactory } = args;
const tableId = idFactory();
operations.insertTables.push({
id: tableId,
connectionId,
name: extractedTable.name,
catalog: extractedTable.catalog,
db: extractedTable.db,
enabled: true,
});
const columns: LiveDatabaseSyncedColumn[] = extractedTable.columns.map((extractedColumn) => {
const columnId = idFactory();
operations.insertColumns.push({
id: columnId,
tableId,
name: extractedColumn.name,
parentColumnId: null,
});
return {
id: columnId,
name: extractedColumn.name,
type: extractedColumn.type,
nullable: extractedColumn.nullable,
primaryKey: extractedColumn.primaryKey,
descriptions: descriptionFromDbComment(extractedColumn.dbComment),
parentColumnId: null,
embedding: null,
sampleValues: null,
cardinality: null,
};
});
return {
id: tableId,
name: extractedTable.name,
catalog: extractedTable.catalog,
db: extractedTable.db,
enabled: true,
descriptions: descriptionFromDbComment(extractedTable.dbComment),
columns,
};
}
function syncFormalLinks(args: {
extracted: LiveDatabaseExtractedSchema;
tables: LiveDatabaseSyncedTable[];
tableNaturalKeyToId: Map<string, string>;
currentLinks: LiveDatabaseSyncedLink[];
idFactory: () => string;
}): { links: LiveDatabaseSyncedLink[]; created: number; deleted: number } {
const { extracted, tables, tableNaturalKeyToId, currentLinks, idFactory } = args;
const columnKeyToId = new Map<string, string>();
for (const table of tables) {
const tableKey = buildLiveDatabaseTableNaturalKey(table);
for (const column of table.columns) {
columnKeyToId.set(`${tableKey}.${column.name}`, column.id);
}
}
const extractedFormalLinks: Array<{
fromTableId: string;
fromColumnId: string;
toTableId: string;
toColumnId: string;
}> = [];
for (const table of extracted.tables) {
const fromTableKey = buildLiveDatabaseTableNaturalKey(table);
const fromTableId = tableNaturalKeyToId.get(fromTableKey);
if (!fromTableId) {
continue;
}
for (const foreignKey of table.foreignKeys) {
const toTableKey = buildLiveDatabaseTableNaturalKey({
catalog: table.catalog,
db: table.db,
name: foreignKey.toTable,
});
const toTableId = tableNaturalKeyToId.get(toTableKey);
if (!toTableId) {
continue;
}
const fromColumnId = columnKeyToId.get(`${fromTableKey}.${foreignKey.fromColumn}`);
const toColumnId = columnKeyToId.get(`${toTableKey}.${foreignKey.toColumn}`);
if (!fromColumnId || !toColumnId) {
continue;
}
extractedFormalLinks.push({ fromTableId, fromColumnId, toTableId, toColumnId });
}
}
const currentFormalLinks = currentLinks.filter((link) => link.source === 'formal');
const extractedLinkKeys = new Set(extractedFormalLinks.map((link) => `${link.fromColumnId}->${link.toColumnId}`));
const linksToDelete = currentFormalLinks.filter(
(link) => !extractedLinkKeys.has(`${link.fromColumnId}->${link.toColumnId}`),
);
const currentLinkKeys = new Set(currentFormalLinks.map((link) => `${link.fromColumnId}->${link.toColumnId}`));
const linksToCreate = extractedFormalLinks.filter(
(link) => !currentLinkKeys.has(`${link.fromColumnId}->${link.toColumnId}`),
);
const newLinks = linksToCreate.map((linkData) => ({
id: idFactory(),
fromTableId: linkData.fromTableId,
fromColumnId: linkData.fromColumnId,
toTableId: linkData.toTableId,
toColumnId: linkData.toColumnId,
source: 'formal' as const,
confidence: 1,
relationshipType: 'MANY_TO_ONE',
isPrimaryKeyReference: true,
}));
const deletedLinkIds = new Set(linksToDelete.map((link) => link.id));
const preservedFormalLinks = currentFormalLinks.filter((link) => !deletedLinkIds.has(link.id));
return {
links: [...preservedFormalLinks, ...newLinks],
created: linksToCreate.length,
deleted: linksToDelete.length,
};
}
export function planLiveDatabaseStructuralSync(
input: PlanLiveDatabaseStructuralSyncInput,
): LiveDatabaseStructuralSyncPlan {
const operations: LiveDatabaseStructuralSyncOperations = {
deleteTableIds: [],
deleteColumnIds: [],
insertTables: [],
insertColumns: [],
touchColumnIds: [],
invalidateColumnEmbeddingIds: [],
};
const stats: LiveDatabaseStructuralSyncStats = {
tablesCreated: 0,
tablesDeleted: 0,
columnsCreated: 0,
columnsDeleted: 0,
columnsModified: 0,
formalLinksCreated: 0,
formalLinksDeleted: 0,
};
const changes: LiveDatabaseStructuralChanges = {
newTableIds: [],
newColumnIds: [],
tablesWithStructuralChanges: [],
columnsWithTypeChange: [],
columnsWithDescriptionChange: [],
tablesWithDescriptionChange: [],
};
const inferredLinksToValidate: string[] = [];
const currentTablesByKey = new Map<string, LiveDatabaseSyncedTable>();
const extractedTablesByKey = new Map<string, LiveDatabaseExtractedTable>();
if (input.current) {
for (const table of input.current.tables) {
currentTablesByKey.set(buildLiveDatabaseTableNaturalKey(table), table);
}
}
for (const table of input.extracted.tables) {
extractedTablesByKey.set(buildLiveDatabaseTableNaturalKey(table), table);
}
const tablesToDelete: LiveDatabaseSyncedTable[] = [];
const tablesToUpdate: Array<{
current: LiveDatabaseSyncedTable;
extracted: LiveDatabaseExtractedTable;
}> = [];
const tablesToCreate: LiveDatabaseExtractedTable[] = [];
for (const [key, table] of currentTablesByKey) {
const extractedTable = extractedTablesByKey.get(key);
if (!extractedTable) {
tablesToDelete.push(table);
} else {
tablesToUpdate.push({ current: table, extracted: extractedTable });
}
}
for (const [key, table] of extractedTablesByKey) {
if (!currentTablesByKey.has(key)) {
tablesToCreate.push(table);
}
}
for (const table of tablesToDelete) {
operations.deleteTableIds.push(table.id);
stats.tablesDeleted++;
stats.columnsDeleted += table.columns.length;
}
const updatedTables: LiveDatabaseSyncedTable[] = [];
for (const { current, extracted } of tablesToUpdate) {
const result = planUpdatedTable({
currentTable: current,
extractedTable: extracted,
currentLinks: input.current?.links ?? [],
inferredLinksToValidate,
operations,
idFactory: input.idFactory,
});
updatedTables.push(result.table);
stats.columnsCreated += result.columnsCreated;
stats.columnsDeleted += result.columnsDeleted;
stats.columnsModified += result.columnsModified;
changes.newColumnIds.push(...result.newColumnIds);
changes.columnsWithTypeChange.push(...result.columnsWithTypeChange);
changes.columnsWithDescriptionChange.push(...result.columnsWithDescriptionChange);
if (result.tableDescriptionChanged) {
changes.tablesWithDescriptionChange.push(current.id);
}
if (result.columnsCreated > 0 || result.columnsDeleted > 0 || result.columnsWithTypeChange.length > 0) {
changes.tablesWithStructuralChanges.push(current.id);
}
}
const createdTables: LiveDatabaseSyncedTable[] = [];
for (const extractedTable of tablesToCreate) {
const table = planCreatedTable({
connectionId: input.connectionId,
extractedTable,
operations,
idFactory: input.idFactory,
});
createdTables.push(table);
stats.tablesCreated++;
stats.columnsCreated += table.columns.length;
changes.newTableIds.push(table.id);
changes.newColumnIds.push(...table.columns.map((column) => column.id));
changes.tablesWithStructuralChanges.push(table.id);
}
const allTables = [...updatedTables, ...createdTables];
const tableNaturalKeyToId = new Map<string, string>();
for (const table of allTables) {
tableNaturalKeyToId.set(buildLiveDatabaseTableNaturalKey(table), table.id);
}
const formalLinkResult = syncFormalLinks({
extracted: input.extracted,
tables: allTables,
tableNaturalKeyToId,
currentLinks: input.current?.links ?? [],
idFactory: input.idFactory,
});
stats.formalLinksCreated = formalLinkResult.created;
stats.formalLinksDeleted = formalLinkResult.deleted;
const deletedTableIds = new Set(tablesToDelete.map((table) => table.id));
const preservedInferredLinks = (input.current?.links ?? []).filter(
(link) =>
link.source === 'inferred' && !deletedTableIds.has(link.fromTableId) && !deletedTableIds.has(link.toTableId),
);
return {
schema: {
connectionId: input.connectionId,
tables: allTables,
links: [...formalLinkResult.links, ...preservedInferredLinks],
},
inferredLinksToValidate,
stats,
changes,
operations,
};
}

View file

@ -0,0 +1,10 @@
import type { KloSchemaSnapshot } from '../../../scan/types.js';
export interface LiveDatabaseIntrospectionPort {
extractSchema(connectionId: string): Promise<KloSchemaSnapshot>;
}
export interface LiveDatabaseSourceAdapterDeps {
introspection: LiveDatabaseIntrospectionPort;
now?: () => Date;
}

View file

@ -0,0 +1,154 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { chunkLookerStagedDir } from './chunk.js';
import { writeLookerEvidenceDocuments } from './evidence-documents.js';
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
async function writeSmallFixture(stagedDir: string): Promise<void> {
await writeJson(stagedDir, 'sync-config.json', {
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
fetchedAt: '2026-04-30T12:30:00.000Z',
});
await writeJson(stagedDir, 'lookml_models.json', {
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
});
await writeJson(stagedDir, 'explores/b2b/sales_pipeline.json', {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
joins: [],
});
await writeJson(stagedDir, 'dashboards/10.json', {
lookerId: '10',
title: 'Sales Pipeline',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [{ id: '100', title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
});
await writeJson(stagedDir, 'looks/20.json', {
lookerId: '20',
title: 'Open Pipeline',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
});
await writeJson(stagedDir, 'folders/tree.json', {
folders: [{ id: '7', name: 'Sandbox', parentId: null, path: ['Sandbox'] }],
});
await writeJson(stagedDir, 'users/3.json', { id: '3', displayName: 'Ada Lovelace', email: null });
await writeJson(stagedDir, 'signals/dashboard_usage.json', [
{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8 },
]);
await writeJson(stagedDir, 'signals/look_usage.json', [{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5 }]);
await writeJson(stagedDir, 'signals/scheduled_plans.json', [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
]);
await writeJson(stagedDir, 'signals/favorites.json', [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
]);
await writeLookerEvidenceDocuments(stagedDir);
}
describe('chunkLookerStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-chunk-'));
await writeSmallFixture(stagedDir);
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('emits one WU per explore, dashboard, and Look with readable dependencies', async () => {
const result = await chunkLookerStagedDir(stagedDir);
expect(result.reconcileNotes).toEqual([
expect.stringContaining('emit_artifact_resolution with actionType="subsumed"'),
]);
expect(result.workUnits.map((wu) => wu.unitKey).sort()).toEqual([
'looker-dashboard-10',
'looker-explore-b2b-sales_pipeline',
'looker-look-20',
]);
const dashboard = result.workUnits.find((wu) => wu.unitKey === 'looker-dashboard-10');
expect(dashboard?.rawFiles).toEqual([
'dashboards/10.json',
'evidence/dashboards/10/metadata.json',
'evidence/dashboards/10/page.md',
]);
expect(dashboard?.notes).toContain('context_candidate_write');
expect(dashboard?.notes).not.toContain('wiki_write');
expect(dashboard?.dependencyPaths.sort()).toEqual([
'explores/b2b/sales_pipeline.json',
'folders/tree.json',
'signals/dashboard_usage.json',
'signals/favorites.json',
'signals/scheduled_plans.json',
'users/3.json',
]);
const explore = result.workUnits.find((wu) => wu.unitKey === 'looker-explore-b2b-sales_pipeline');
expect(explore?.rawFiles).toEqual([
'explores/b2b/sales_pipeline.json',
'evidence/explores/b2b/sales_pipeline/metadata.json',
'evidence/explores/b2b/sales_pipeline/page.md',
]);
expect(explore?.dependencyPaths).toEqual(['lookml_models.json']);
});
it('keeps downstream dashboard and Look WUs when an explore dependency changes', async () => {
const result = await chunkLookerStagedDir(stagedDir, {
added: [],
modified: ['explores/b2b/sales_pipeline.json'],
deleted: [],
unchanged: [
'dashboards/10.json',
'looks/20.json',
'lookml_models.json',
'folders/tree.json',
'users/3.json',
'signals/dashboard_usage.json',
'signals/look_usage.json',
'signals/scheduled_plans.json',
'signals/favorites.json',
],
});
expect(result.workUnits.map((wu) => wu.unitKey).sort()).toEqual([
'looker-dashboard-10',
'looker-explore-b2b-sales_pipeline',
'looker-look-20',
]);
expect(result.workUnits.find((wu) => wu.unitKey === 'looker-dashboard-10')?.rawFiles).toEqual([
'dashboards/10.json',
'evidence/dashboards/10/metadata.json',
'evidence/dashboards/10/page.md',
]);
});
it('returns an EvictionUnit for deleted runtime entity raw paths', async () => {
const result = await chunkLookerStagedDir(stagedDir, {
added: [],
modified: [],
deleted: ['looks/20.json'],
unchanged: ['dashboards/10.json', 'explores/b2b/sales_pipeline.json'],
});
expect(result.eviction).toEqual({ deletedRawPaths: ['looks/20.json'] });
});
});

View file

@ -0,0 +1,198 @@
import { readdir, readFile } from 'node:fs/promises';
import { join, relative } from 'node:path';
import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js';
import { buildLookerReconcileNotes } from './reconcile.js';
import {
STAGED_FILES,
type StagedDashboardFile,
type StagedLookerQuery,
type StagedLookFile,
stagedDashboardFileSchema,
stagedExploreFileSchema,
stagedLookFileSchema,
} from './types.js';
interface LoadedLookerProject {
allPaths: string[];
dashboardsByPath: Map<string, StagedDashboardFile>;
looksByPath: Map<string, StagedLookFile>;
explorePaths: string[];
}
async function walk(root: string): Promise<string[]> {
const entries = await readdir(root, { withFileTypes: true, recursive: true });
return entries
.filter((entry) => entry.isFile())
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
.sort();
}
async function loadProject(stagedDir: string): Promise<LoadedLookerProject> {
const allPaths = await walk(stagedDir);
const dashboardsByPath = new Map<string, StagedDashboardFile>();
const looksByPath = new Map<string, StagedLookFile>();
const explorePaths: string[] = [];
for (const path of allPaths) {
if (/^dashboards\/[^/]+\.json$/.test(path)) {
dashboardsByPath.set(
path,
stagedDashboardFileSchema.parse(JSON.parse(await readFile(join(stagedDir, path), 'utf-8'))),
);
continue;
}
if (/^looks\/[^/]+\.json$/.test(path)) {
looksByPath.set(path, stagedLookFileSchema.parse(JSON.parse(await readFile(join(stagedDir, path), 'utf-8'))));
continue;
}
if (/^explores\/[^/]+\/[^/]+\.json$/.test(path)) {
const explore = stagedExploreFileSchema.parse(JSON.parse(await readFile(join(stagedDir, path), 'utf-8')));
explorePaths.push(explorePath(explore.modelName, explore.exploreName));
}
}
return { allPaths, dashboardsByPath, looksByPath, explorePaths: [...new Set(explorePaths)].sort() };
}
export async function chunkLookerStagedDir(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
const project = await loadProject(stagedDir);
const firstRunUnits = emitFirstRunWorkUnits(project);
const result = diffSet ? applyDiffSet(firstRunUnits, diffSet) : { workUnits: firstRunUnits };
const eviction =
diffSet && diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : result.eviction;
return {
...result,
eviction,
reconcileNotes: result.workUnits.length > 0 || eviction ? buildLookerReconcileNotes() : [],
};
}
function emitFirstRunWorkUnits(project: LoadedLookerProject): WorkUnit[] {
const units: WorkUnit[] = [];
for (const path of project.explorePaths) {
const parts = /^explores\/([^/]+)\/([^/]+)\.json$/.exec(path);
if (!parts) {
continue;
}
const deps = project.allPaths.includes(STAGED_FILES.lookmlModels) ? [STAGED_FILES.lookmlModels] : [];
units.push(
buildUnit(project, {
unitKey: `looker-explore-${parts[1]}-${parts[2]}`,
displayLabel: `Looker explore ${parts[1]}.${parts[2]}`,
rawFiles: [path, ...evidencePathsForExplore(project, parts[1], parts[2])],
dependencyPaths: deps,
notes: `Write API-derived SL source looker__${parts[1]}__${parts[2]} and durable domain knowledge for this Looker explore.`,
}),
);
}
for (const [path, dashboard] of [...project.dashboardsByPath.entries()].sort(([a], [b]) => a.localeCompare(b))) {
const deps = new Set<string>();
addIfPresent(project, deps, STAGED_FILES.foldersTree);
addIfPresent(project, deps, STAGED_FILES.signals.dashboardUsage);
addIfPresent(project, deps, STAGED_FILES.signals.scheduledPlans);
addIfPresent(project, deps, STAGED_FILES.signals.favorites);
if (dashboard.ownerId) {
addIfPresent(project, deps, `users/${dashboard.ownerId}.json`);
}
for (const tile of dashboard.tiles) {
addExploreDependency(project, deps, tile.query);
}
units.push(
buildUnit(project, {
unitKey: `looker-dashboard-${dashboard.lookerId}`,
displayLabel: `Looker dashboard "${dashboard.title}"`,
rawFiles: [path, ...evidencePathsForDashboard(project, dashboard.lookerId)],
dependencyPaths: [...deps].sort(),
notes:
'Extract generalizable metric, segment, and domain knowledge from this dashboard. Treat usage, owner, and folder data as prioritization/provenance context only. Use context_evidence_search/context_evidence_read and context_candidate_write for wiki-bound knowledge; do not write wiki pages directly from this WorkUnit.',
}),
);
}
for (const [path, look] of [...project.looksByPath.entries()].sort(([a], [b]) => a.localeCompare(b))) {
const deps = new Set<string>();
addIfPresent(project, deps, STAGED_FILES.foldersTree);
addIfPresent(project, deps, STAGED_FILES.signals.lookUsage);
addIfPresent(project, deps, STAGED_FILES.signals.scheduledPlans);
addIfPresent(project, deps, STAGED_FILES.signals.favorites);
if (look.ownerId) {
addIfPresent(project, deps, `users/${look.ownerId}.json`);
}
addExploreDependency(project, deps, look.query);
units.push(
buildUnit(project, {
unitKey: `looker-look-${look.lookerId}`,
displayLabel: `Looker Look "${look.title}"`,
rawFiles: [path, ...evidencePathsForLook(project, look.lookerId)],
dependencyPaths: [...deps].sort(),
notes:
'Extract generalizable metric, segment, and domain knowledge from this Look. Treat usage, owner, and folder data as prioritization/provenance context only. Use context_evidence_search/context_evidence_read and context_candidate_write for wiki-bound knowledge; do not write wiki pages directly from this WorkUnit.',
}),
);
}
return units.sort((a, b) => a.unitKey.localeCompare(b.unitKey));
}
function buildUnit(
project: LoadedLookerProject,
input: Pick<WorkUnit, 'unitKey' | 'displayLabel' | 'rawFiles' | 'dependencyPaths' | 'notes'>,
): WorkUnit {
const excluded = new Set([...input.rawFiles, ...input.dependencyPaths]);
return {
...input,
peerFileIndex: project.allPaths.filter((path) => !excluded.has(path)).sort(),
};
}
function applyDiffSet(firstRunUnits: WorkUnit[], diffSet: DiffSet): ChunkResult {
const touched = new Set([...diffSet.added, ...diffSet.modified]);
const workUnits = firstRunUnits.filter((wu) => {
const readablePaths = [...wu.rawFiles, ...wu.dependencyPaths];
return readablePaths.some((path) => touched.has(path));
});
return { workUnits };
}
function addIfPresent(project: LoadedLookerProject, deps: Set<string>, path: string): void {
if (project.allPaths.includes(path)) {
deps.add(path);
}
}
function addExploreDependency(project: LoadedLookerProject, deps: Set<string>, query: StagedLookerQuery | null): void {
if (!query) {
return;
}
addIfPresent(project, deps, explorePath(query.model, query.view));
}
function evidencePathsForExplore(project: LoadedLookerProject, modelName: string, exploreName: string): string[] {
return existingPaths(project, [
`evidence/explores/${modelName}/${exploreName}/metadata.json`,
`evidence/explores/${modelName}/${exploreName}/page.md`,
]);
}
function evidencePathsForDashboard(project: LoadedLookerProject, dashboardId: string): string[] {
return existingPaths(project, [
`evidence/dashboards/${dashboardId}/metadata.json`,
`evidence/dashboards/${dashboardId}/page.md`,
]);
}
function evidencePathsForLook(project: LoadedLookerProject, lookId: string): string[] {
return existingPaths(project, [`evidence/looks/${lookId}/metadata.json`, `evidence/looks/${lookId}/page.md`]);
}
function existingPaths(project: LoadedLookerProject, paths: string[]): string[] {
return paths.filter((path) => project.allPaths.includes(path));
}
function explorePath(modelName: string, exploreName: string): string {
return `explores/${modelName}/${exploreName}.json`;
}

View file

@ -0,0 +1,14 @@
import { readFile } from 'node:fs/promises';
import { describe, expect, it } from 'vitest';
describe('LookerClient boundary', () => {
it('does not import server or NestJS modules', async () => {
const source = await readFile(new URL('./client.ts', import.meta.url), 'utf-8');
expect(source).not.toMatch(/@nestjs\/common/);
expect(source).not.toMatch(/DataSourceClient/);
expect(source).not.toMatch(/\.\.\/interfaces/);
expect(source).not.toMatch(/\.\.\/types/);
expect(source).not.toMatch(/server\/src/);
});
});

View file

@ -0,0 +1,455 @@
import { describe, expect, it, vi } from 'vitest';
import { LookerClient, type LookerSdkPort } from './client.js';
const clientSecretParam = 'client_secret'; // pragma: allowlist secret
function params(): Record<string, unknown> {
return {
base_url: 'https://example.looker.com',
client_id: 'id',
[clientSecretParam]: 'credential', // pragma: allowlist secret
};
}
function sdk(overrides: Partial<LookerSdkPort> = {}): LookerSdkPort {
const port: LookerSdkPort = {
me: vi.fn().mockResolvedValue({ id: '1', display_name: 'API User', email: 'api@example.com' }),
search_dashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
dashboard: vi.fn().mockResolvedValue({
id: '10',
title: 'Revenue Dashboard',
description: 'Revenue concepts',
folder_id: '20',
user_id: '1',
updated_at: '2026-04-30T00:00:00.000Z',
dashboard_elements: [
{
id: '99',
title: 'ARR',
look_id: null,
query: {
id: 'q1',
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr', 'opportunities.stage'],
filters: { 'opportunities.stage': 'open' },
sorts: ['opportunities.arr desc'],
limit: '500',
},
},
],
}),
search_looks: vi.fn().mockResolvedValue([{ id: '30' }]),
search_scheduled_plans: vi.fn().mockResolvedValue([]),
look: vi.fn().mockResolvedValue({
id: '30',
title: 'Open Pipeline ARR',
description: 'ARR for open opportunities',
folder_id: '20',
user_id: '1',
updated_at: '2026-04-30T00:00:00.000Z',
query: {
id: 'q2',
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr'],
filters: { 'opportunities.stage': 'open' },
},
}),
all_folders: vi.fn().mockResolvedValue([{ id: '20', name: 'Executive', parent_id: null }]),
all_users: vi.fn().mockResolvedValue([{ id: '1', display_name: 'API User', email: 'api@example.com' }]),
all_groups: vi.fn().mockResolvedValue([{ id: '2', name: 'Finance' }]),
all_connections: vi.fn().mockResolvedValue([
{
name: 'b2b_sandbox_bq',
host: 'warehouse.example.com',
database: 'analytics',
schema: 'public',
dialect_name: 'bigquery_standard_sql',
},
]),
all_lookml_models: vi
.fn()
.mockResolvedValue([
{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] },
]),
lookml_model_explore: vi.fn().mockResolvedValue({
name: 'sales_pipeline',
label: 'Sales Pipeline',
description: 'Opportunity pipeline',
sql_table_name: 'proj.dataset.opportunities AS opportunities',
connection_name: 'b2b_sandbox_bq',
view_name: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.stage', label: 'Stage', type: 'string', sql: '$' + '{TABLE}.stage' }],
measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '$' + '{TABLE}.arr' }],
},
joins: [
{
name: 'accounts',
type: 'left_outer',
relationship: 'many_to_one',
sql_table_name: 'proj.dataset.accounts',
sql_on: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
from: null,
},
],
}),
run_inline_query: vi.fn().mockResolvedValue('[]'),
logout: vi.fn().mockResolvedValue(undefined),
...overrides,
};
return port;
}
describe('LookerClient', () => {
it('validates credentials with me()', async () => {
const client = new LookerClient(params(), { sdkFactory: () => sdk() });
await expect(client.testConnection()).resolves.toEqual({
success: true,
metadata: { userId: '1', displayName: 'API User', email: 'api@example.com' },
});
});
it('maps dashboards, looks, folders, models, explores, users, and groups to staged DTOs', async () => {
const fakeSdk = sdk();
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
await expect(client.getDashboard('10')).resolves.toMatchObject({
lookerId: '10',
title: 'Revenue Dashboard',
tiles: [{ id: '99', query: { model: 'b2b', view: 'sales_pipeline' } }],
});
await expect(client.listLooks()).resolves.toEqual([{ id: '30', updatedAt: null }]);
await expect(client.getLook('30')).resolves.toMatchObject({
lookerId: '30',
title: 'Open Pipeline ARR',
query: { model: 'b2b', view: 'sales_pipeline' },
});
await expect(client.listFolders()).resolves.toEqual({
folders: [{ id: '20', name: 'Executive', parentId: null, path: ['Executive'] }],
});
await expect(client.listLookmlModels()).resolves.toEqual({
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
});
await expect(client.listLookerConnections()).resolves.toEqual([
{
name: 'b2b_sandbox_bq',
host: 'warehouse.example.com',
database: 'analytics',
schema: 'public',
dialect: 'bigquery_standard_sql',
},
]);
await expect(client.getExplore('b2b', 'sales_pipeline')).resolves.toMatchObject({
modelName: 'b2b',
exploreName: 'sales_pipeline',
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: { dimensions: [{ name: 'opportunities.stage' }], measures: [{ name: 'opportunities.arr' }] },
joins: [
{
name: 'accounts',
rawSqlTableName: 'proj.dataset.accounts',
sqlOn: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
from: null,
targetTable: null,
},
],
targetWarehouseConnectionId: null,
targetTable: null,
});
expect(fakeSdk.dashboard).toHaveBeenCalledWith(
'10',
'id,title,description,folder_id,user_id,updated_at,dashboard_elements(id,title,look_id,query(id,model,view,fields,filters,sorts,limit,dynamic_fields))',
);
expect(fakeSdk.look).toHaveBeenCalledWith(
'30',
'id,title,description,folder_id,user_id,updated_at,query(id,model,view,fields,filters,sorts,limit,dynamic_fields)',
);
expect(fakeSdk.lookml_model_explore).toHaveBeenCalledWith(
'b2b',
'sales_pipeline',
'name,label,description,sql_table_name,connection_name,view_name,fields,joins(name,type,relationship,sql_table_name,sql_on,from)',
);
expect(fakeSdk.all_connections).toHaveBeenCalledWith('name,host,database,schema,dialect_name');
});
it('returns empty usage signals when system activity access fails', async () => {
const client = new LookerClient(params(), {
sdkFactory: () =>
sdk({
run_inline_query: vi.fn().mockRejectedValue(new Error('access denied')),
search_dashboards: vi.fn().mockResolvedValue([{ id: '10', favorite_count: 4 }]),
search_looks: vi.fn().mockResolvedValue([{ id: '30', favorite_count: 2 }]),
search_scheduled_plans: vi.fn().mockResolvedValue([]),
}),
});
await expect(client.getSignals()).resolves.toEqual({
dashboardUsage: [],
lookUsage: [],
scheduledPlans: [],
favorites: [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
{ contentId: '30', contentType: 'look', favoriteCount: 2 },
],
});
});
it('paginates dashboard and Look searches', async () => {
const dashboardPageOne = Array.from({ length: 500 }, (_, index) => ({ id: String(index + 1) }));
const lookPageOne = Array.from({ length: 500 }, (_, index) => ({ id: String(index + 1001) }));
const fakeSdk = sdk({
search_dashboards: vi
.fn()
.mockResolvedValueOnce(dashboardPageOne)
.mockResolvedValueOnce([{ id: '501' }]),
search_looks: vi
.fn()
.mockResolvedValueOnce(lookPageOne)
.mockResolvedValueOnce([{ id: '1501' }]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.listDashboards()).resolves.toHaveLength(501);
await expect(client.listLooks()).resolves.toHaveLength(501);
expect(fakeSdk.search_dashboards).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
deleted: false,
fields: 'id,updated_at',
limit: 500,
offset: 0,
sorts: 'id',
}),
);
expect(fakeSdk.search_dashboards).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
limit: 500,
offset: 500,
}),
);
expect(fakeSdk.search_looks).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
deleted: false,
fields: 'id,updated_at',
limit: 500,
offset: 0,
sorts: 'id',
}),
);
expect(fakeSdk.search_looks).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
limit: 500,
offset: 500,
}),
);
});
it('returns updatedAt cursors from dashboard and Look listing rows', async () => {
const fakeSdk = sdk({
search_dashboards: vi.fn().mockResolvedValue([{ id: '10', updated_at: '2026-04-30T12:00:00.000Z' }]),
search_looks: vi.fn().mockResolvedValue([{ id: '30', updated_at: '2026-04-30T11:00:00.000Z' }]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' }]);
await expect(client.listLooks()).resolves.toEqual([{ id: '30', updatedAt: '2026-04-30T11:00:00.000Z' }]);
});
it('logs out the SDK session during cleanup', async () => {
const fakeSdk = sdk();
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await client.testConnection();
await client.cleanup();
expect(fakeSdk.logout).toHaveBeenCalledTimes(1);
});
it('aggregates usage, scheduled-plan, and favorite signals', async () => {
const runInlineQuery = vi
.fn()
.mockResolvedValueOnce(
JSON.stringify([
{
'dashboard.id': '10',
'history.query_run_count': 3,
'history.created_date': '2026-04-30',
'user.id': 'user-1',
},
{
'dashboard.id': '10',
'history.query_run_count': '2',
'history.created_date': '2026-04-29',
'user.id': 'user-2',
},
]),
)
.mockResolvedValueOnce(
JSON.stringify([
{
'look.id': '30',
'history.query_run_count': 7,
'history.created_date': '2026-04-28',
'user.id': 'user-1',
},
]),
);
const fakeSdk = sdk({
run_inline_query: runInlineQuery,
search_dashboards: vi.fn().mockResolvedValueOnce([{ id: '10', favorite_count: 4 }]),
search_looks: vi.fn().mockResolvedValueOnce([{ id: '30', favorite_count: 2 }]),
search_scheduled_plans: vi.fn().mockResolvedValueOnce([
{
id: 'sp-dashboard',
dashboard_id: '10',
look_id: null,
enabled: true,
scheduled_plan_destination: [{ id: 'dest-1' }, { id: 'dest-2' }],
},
{
id: 'sp-look',
dashboard_id: null,
look_id: '30',
enabled: true,
scheduled_plan_destination: [{ id: 'dest-3' }],
},
]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.getSignals()).resolves.toEqual({
dashboardUsage: [
{
contentId: '10',
queryCount30d: 5,
uniqueUsers30d: 2,
lastRunAt: '2026-04-30',
topUsers: ['user-1', 'user-2'],
},
],
lookUsage: [
{
contentId: '30',
queryCount30d: 7,
uniqueUsers30d: 1,
lastRunAt: '2026-04-28',
topUsers: ['user-1'],
},
],
scheduledPlans: [
{
contentId: '10',
contentType: 'dashboard',
isScheduled: true,
scheduleCount: 1,
recipientCount: 2,
},
{
contentId: '30',
contentType: 'look',
isScheduled: true,
scheduleCount: 1,
recipientCount: 1,
},
],
favorites: [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
{ contentId: '30', contentType: 'look', favoriteCount: 2 },
],
});
expect(runInlineQuery).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
result_format: 'json',
body: expect.objectContaining({
model: 'system__activity',
view: 'history',
fields: ['dashboard.id', 'history.query_run_count', 'history.created_date', 'user.id'],
}),
}),
);
expect(fakeSdk.search_scheduled_plans).toHaveBeenCalledWith(
expect.objectContaining({
all_users: true,
fields: 'id,dashboard_id,look_id,enabled,scheduled_plan_destination',
limit: 500,
offset: 0,
sorts: 'id',
}),
);
});
it('retries a 429 response once using Retry-After seconds', async () => {
const sleep = vi.fn().mockResolvedValue(undefined);
const rateLimitError = new Error('rate limited');
Object.assign(rateLimitError, { statusCode: 429, headers: { 'retry-after': '2' } });
const fakeSdk = sdk({
search_dashboards: vi
.fn()
.mockRejectedValueOnce(rateLimitError)
.mockResolvedValueOnce([{ id: '10' }]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk, sleep });
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
expect(sleep).toHaveBeenCalledWith(2000);
expect(fakeSdk.search_dashboards).toHaveBeenCalledTimes(2);
});
it('does not retry non-429 errors', async () => {
const sleep = vi.fn().mockResolvedValue(undefined);
const error = new Error('broken dashboard');
Object.assign(error, { statusCode: 500 });
const fakeSdk = sdk({ dashboard: vi.fn().mockRejectedValue(error) });
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk, sleep });
await expect(client.getDashboard('10')).rejects.toThrow('broken dashboard');
expect(sleep).not.toHaveBeenCalled();
expect(fakeSdk.dashboard).toHaveBeenCalledTimes(1);
});
it('initializes the real @looker/sdk-node SDK with inline credentials without throwing', async () => {
const client = new LookerClient(params());
const result = await client.testConnection();
// Without injected sdkFactory the real SDK is constructed via InlineLookerSettings.
// This used to throw "Missing required configuration values like base_url" because
// the parent NodeSettingsIniFile constructor validated config before the override
// could supply credentials. Whatever happens now (auth/network failure against the
// bogus example URL is fine) — what must NOT happen is a synchronous SDK-init throw.
expect(result.success).toBe(false);
expect(result.error).toBeDefined();
expect(result.error).not.toMatch(/Missing required configuration values/i);
await client.cleanup();
});
it('strips trailing /api/4.0 from base_url so the SDK does not double-prefix it', async () => {
const clientWithSuffix = new LookerClient({
base_url: 'https://example.looker.com/api/4.0',
client_id: 'id',
[clientSecretParam]: 'credential', // pragma: allowlist secret
});
const result = await clientWithSuffix.testConnection();
expect(result.success).toBe(false);
// If base_url is double-prefixed the SDK would hit /api/4.0/api/4.0/login. Either
// the URL is correctly normalized (transport-level network failure) or we'd see a
// 404/HTML response — either way the stack must not be a config-validation throw.
expect(result.error).not.toMatch(/Missing required configuration values/i);
await clientWithSuffix.cleanup();
});
});

View file

@ -0,0 +1,732 @@
import type {
IRequestRunInlineQuery,
IRequestSearchDashboards,
IRequestSearchLooks,
IRequestSearchScheduledPlans,
} from '@looker/sdk';
import type { IApiSection, IApiSettings } from '@looker/sdk-rtl';
import { LookerNodeSDK, NodeSettings } from '@looker/sdk-node';
import type { LookerRuntimeClient } from './fetch.js';
import type {
StagedDashboardFile,
StagedExploreFile,
StagedFoldersTreeFile,
StagedGroupFile,
StagedLookerQuery,
StagedLookerSignalsFile,
StagedLookFile,
StagedLookmlModelsFile,
StagedUserFile,
} from './types.js';
type LookerRecord = Record<string, unknown>;
export interface TestConnectionResult {
success: boolean;
error?: string;
metadata?: Record<string, unknown>;
}
export interface LookerConnectionParams extends Record<string, unknown> {
base_url: string;
client_id: string;
client_secret: string;
}
export interface LookerWarehouseConnectionInfo {
name: string;
host: string | null;
database: string | null;
schema: string | null;
dialect: string | null;
}
const LOOKER_PAGE_SIZE = 500;
const LOOKER_DASHBOARD_FIELDS =
'id,title,description,folder_id,user_id,updated_at,dashboard_elements(id,title,look_id,query(id,model,view,fields,filters,sorts,limit,dynamic_fields))';
const LOOKER_LOOK_FIELDS =
'id,title,description,folder_id,user_id,updated_at,query(id,model,view,fields,filters,sorts,limit,dynamic_fields)';
const LOOKER_EXPLORE_FIELDS =
'name,label,description,sql_table_name,connection_name,view_name,fields,joins(name,type,relationship,sql_table_name,sql_on,from)';
export interface LookerSdkPort {
me(fields?: string): Promise<LookerRecord>;
search_dashboards(request?: LookerRecord): Promise<LookerRecord[]>;
dashboard(id: string, fields?: string): Promise<LookerRecord>;
search_looks(request?: LookerRecord): Promise<LookerRecord[]>;
search_scheduled_plans(request?: LookerRecord): Promise<LookerRecord[]>;
look(id: string, fields?: string): Promise<LookerRecord>;
all_folders(fields?: string): Promise<LookerRecord[]>;
all_users(fields?: string): Promise<LookerRecord[]>;
all_groups(fields?: string): Promise<LookerRecord[]>;
all_connections(fields?: string): Promise<LookerRecord[]>;
all_lookml_models(fields?: string): Promise<LookerRecord[]>;
lookml_model_explore(modelName: string, exploreName: string, fields?: string): Promise<LookerRecord>;
run_inline_query(request: IRequestRunInlineQuery): Promise<string>;
logout(): Promise<void>;
}
export interface LookerClientLogger {
log(message: string): void;
warn(message: string): void;
error(message: string): void;
debug?(message: string): void;
}
export interface LookerClientDeps {
sdkFactory?: (params: LookerConnectionParams) => LookerSdkPort;
sleep?: (ms: number) => Promise<void>;
logger?: LookerClientLogger;
}
const defaultLogger: LookerClientLogger = {
log: (message) => console.log(message),
warn: (message) => console.warn(message),
error: (message) => console.error(message),
debug: (message) => console.debug(message),
};
class InlineLookerSettings extends NodeSettings {
constructor(private readonly params: LookerConnectionParams) {
super('', {
base_url: normalizeBaseUrl(params.base_url),
client_id: params.client_id,
client_secret: params.client_secret, // pragma: allowlist secret
verify_ssl: 'true',
timeout: '120',
} as unknown as IApiSettings);
}
override readConfig(_section?: string): IApiSection {
return {
base_url: normalizeBaseUrl(this.params.base_url),
client_id: this.params.client_id,
client_secret: this.params.client_secret, // pragma: allowlist secret
verify_ssl: 'true',
timeout: '120',
};
}
}
function createLookerSdkPort(params: LookerConnectionParams): LookerSdkPort {
const sdk = LookerNodeSDK.init40(new InlineLookerSettings(params));
return {
me: (fields) => sdk.ok(sdk.me(fields)).then(toRecord),
search_dashboards: (request) =>
sdk.ok(sdk.search_dashboards((request ?? {}) as IRequestSearchDashboards)).then(toRecordArray),
dashboard: (id, fields) => sdk.ok(sdk.dashboard(id, fields)).then(toRecord),
search_looks: (request) => sdk.ok(sdk.search_looks((request ?? {}) as IRequestSearchLooks)).then(toRecordArray),
search_scheduled_plans: (request) =>
sdk.ok(sdk.search_scheduled_plans((request ?? {}) as IRequestSearchScheduledPlans)).then(toRecordArray),
look: (id, fields) => sdk.ok(sdk.look(id, fields)).then(toRecord),
all_folders: (fields) => sdk.ok(sdk.all_folders(fields)).then(toRecordArray),
all_users: (fields) => sdk.ok(sdk.all_users({ fields })).then(toRecordArray),
all_groups: (fields) => sdk.ok(sdk.all_groups({ fields })).then(toRecordArray),
all_connections: (fields) => sdk.ok(sdk.all_connections(fields)).then(toRecordArray),
all_lookml_models: (fields) => sdk.ok(sdk.all_lookml_models({ fields })).then(toRecordArray),
lookml_model_explore: (modelName, exploreName, fields) =>
sdk
.ok(sdk.lookml_model_explore({ lookml_model_name: modelName, explore_name: exploreName, fields }))
.then(toRecord),
run_inline_query: (request) => sdk.ok(sdk.run_inline_query(request)),
logout: async () => {
await sdk.authSession.logout();
},
};
}
export class LookerClient implements LookerRuntimeClient {
private readonly logger: LookerClientLogger;
private readonly params: LookerConnectionParams;
private sdkInstance: LookerSdkPort | null = null;
constructor(
connectionParams: Record<string, unknown>,
private readonly deps: LookerClientDeps = {},
) {
this.logger = deps.logger ?? defaultLogger;
this.params = parseLookerConnectionParams(connectionParams);
}
get dataSourceType(): string {
return 'LOOKER';
}
async testConnection(): Promise<TestConnectionResult> {
try {
const me = await this.withRateLimitRetry(() => this.sdk().me('id,display_name,email'));
return {
success: true,
metadata: {
userId: stringValue(me.id),
displayName: nullableString(me.display_name),
email: nullableString(me.email),
},
};
} catch (error) {
return { success: false, error: error instanceof Error ? error.message : String(error) };
}
}
async listDashboards(): Promise<Array<{ id: string; updatedAt: string | null }>> {
const dashboards = await this.collectPaged((offset) =>
this.sdk().search_dashboards({
deleted: false,
fields: 'id,updated_at',
limit: LOOKER_PAGE_SIZE,
offset,
sorts: 'id',
}),
);
return dashboards.flatMap(entityRef);
}
async getDashboard(id: string): Promise<StagedDashboardFile> {
const dashboard = await this.withRateLimitRetry(() => this.sdk().dashboard(id, LOOKER_DASHBOARD_FIELDS));
const elements = arrayValue(dashboard.dashboard_elements);
return {
lookerId: stringValue(dashboard.id),
title: stringValue(dashboard.title),
description: nullableString(dashboard.description),
folderId: nullableString(dashboard.folder_id),
ownerId: nullableString(dashboard.user_id),
updatedAt: nullableString(dashboard.updated_at),
tiles: elements.map((tile) => ({
id: stringValue(tile.id),
title: nullableString(tile.title),
lookId: nullableString(tile.look_id),
query: queryValue(tile.query),
})),
};
}
async listLooks(): Promise<Array<{ id: string; updatedAt: string | null }>> {
const looks = await this.collectPaged((offset) =>
this.sdk().search_looks({
deleted: false,
fields: 'id,updated_at',
limit: LOOKER_PAGE_SIZE,
offset,
sorts: 'id',
}),
);
return looks.flatMap(entityRef);
}
async getLook(id: string): Promise<StagedLookFile> {
const look = await this.withRateLimitRetry(() => this.sdk().look(id, LOOKER_LOOK_FIELDS));
return {
lookerId: stringValue(look.id),
title: stringValue(look.title),
description: nullableString(look.description),
folderId: nullableString(look.folder_id),
ownerId: nullableString(look.user_id),
updatedAt: nullableString(look.updated_at),
query: queryValue(look.query),
};
}
async listFolders(): Promise<StagedFoldersTreeFile> {
const folders = await this.withRateLimitRetry(() => this.sdk().all_folders('id,name,parent_id'));
const byId = new Map<string, LookerRecord>();
for (const folder of folders) {
byId.set(stringValue(folder.id), folder);
}
return {
folders: folders.map((folder) => ({
id: stringValue(folder.id),
name: stringValue(folder.name),
parentId: nullableString(folder.parent_id),
path: folderPath(folder, byId),
})),
};
}
async listUsers(): Promise<StagedUserFile[]> {
const users = await this.withRateLimitRetry(() => this.sdk().all_users('id,display_name,email'));
return users.map((user) => ({
id: stringValue(user.id),
displayName: nullableString(user.display_name),
email: nullableString(user.email),
}));
}
async listGroups(): Promise<StagedGroupFile[]> {
const groups = await this.withRateLimitRetry(() => this.sdk().all_groups('id,name'));
return groups.map((group) => ({
id: stringValue(group.id),
name: stringValue(group.name),
}));
}
async listLookmlModels(): Promise<StagedLookmlModelsFile> {
const models = await this.withRateLimitRetry(() => this.sdk().all_lookml_models('name,label,explores'));
return {
models: models.map((model) => ({
name: stringValue(model.name),
label: nullableString(model.label),
explores: arrayValue(model.explores).map((explore) => ({
name: stringValue(explore.name),
label: nullableString(explore.label),
})),
})),
};
}
async listLookerConnections(): Promise<LookerWarehouseConnectionInfo[]> {
const connections = await this.withRateLimitRetry(() =>
this.sdk().all_connections('name,host,database,schema,dialect_name'),
);
return connections.map((connection) => ({
name: stringValue(connection.name),
host: nullableString(connection.host),
database: nullableString(connection.database),
schema: nullableString(connection.schema),
dialect: nullableString(connection.dialect_name ?? connection.dialect),
}));
}
async getExplore(modelName: string, exploreName: string): Promise<StagedExploreFile> {
const explore = await this.withRateLimitRetry(() =>
this.sdk().lookml_model_explore(modelName, exploreName, LOOKER_EXPLORE_FIELDS),
);
const fields = recordValue(explore.fields);
return {
modelName,
exploreName: stringValue(explore.name),
label: nullableString(explore.label),
description: nullableString(explore.description),
rawSqlTableName: nullableString(explore.sql_table_name ?? explore.sqlTableName),
connectionName: nullableString(explore.connection_name ?? explore.connectionName),
viewName: nullableString(explore.view_name ?? explore.viewName),
fields: {
dimensions: arrayValue(fields.dimensions).map(stagedField),
measures: arrayValue(fields.measures).map(stagedField),
},
joins: arrayValue(explore.joins).map((join) => ({
name: stringValue(join.name),
type: nullableString(join.type),
relationship: nullableString(join.relationship),
rawSqlTableName: nullableString(join.sql_table_name ?? join.sqlTableName),
sqlOn: nullableString(join.sql_on ?? join.sqlOn),
from: nullableString(join.from),
targetTable: null,
})),
targetWarehouseConnectionId: null,
targetTable: null,
};
}
async getSignals(): Promise<StagedLookerSignalsFile> {
const [dashboardUsage, lookUsage, scheduledPlans, favorites] = await Promise.all([
this.getUsageSignals('dashboard').catch((error) =>
this.warnAndReturnEmpty('Looker system__activity dashboard usage unavailable', error),
),
this.getUsageSignals('look').catch((error) =>
this.warnAndReturnEmpty('Looker system__activity Look usage unavailable', error),
),
this.getScheduledPlanSignals().catch((error) =>
this.warnAndReturnEmpty('Looker scheduled-plan signals unavailable', error),
),
this.getFavoriteSignals().catch((error) => this.warnAndReturnEmpty('Looker favorite signals unavailable', error)),
]);
return { dashboardUsage, lookUsage, scheduledPlans, favorites };
}
async cleanup(): Promise<void> {
const sdk = this.sdkInstance;
if (!sdk) {
return;
}
await sdk.logout();
this.sdkInstance = null;
}
private async getUsageSignals(contentType: 'dashboard' | 'look'): Promise<StagedLookerSignalsFile['dashboardUsage']> {
const idField = contentType === 'dashboard' ? 'dashboard.id' : 'look.id';
const raw = await this.withRateLimitRetry(() =>
this.sdk().run_inline_query({
result_format: 'json',
body: {
model: 'system__activity',
view: 'history',
fields: [idField, 'history.query_run_count', 'history.created_date', 'user.id'],
filters: {
'history.created_date': '30 days',
[idField]: '-NULL',
},
sorts: ['history.query_run_count desc'],
limit: '5000',
},
}),
);
return aggregateUsageRows(parseJsonRows(raw), idField);
}
private async getScheduledPlanSignals(): Promise<StagedLookerSignalsFile['scheduledPlans']> {
const plans = await this.collectPaged((offset) =>
this.sdk().search_scheduled_plans({
all_users: true,
fields: 'id,dashboard_id,look_id,enabled,scheduled_plan_destination',
limit: LOOKER_PAGE_SIZE,
offset,
sorts: 'id',
}),
);
const byContent = new Map<
string,
{
contentId: string;
contentType: 'dashboard' | 'look';
isScheduled: boolean;
scheduleCount: number;
recipientCount: number;
}
>();
for (const plan of plans) {
const dashboardId = nullableString(plan.dashboard_id);
const lookId = nullableString(plan.look_id);
const contentType = dashboardId ? 'dashboard' : lookId ? 'look' : null;
const contentId = dashboardId ?? lookId;
if (!contentType || !contentId) {
continue;
}
const key = `${contentType}:${contentId}`;
const current =
byContent.get(key) ??
({
contentId,
contentType,
isScheduled: false,
scheduleCount: 0,
recipientCount: 0,
} satisfies StagedLookerSignalsFile['scheduledPlans'][number]);
if (plan.enabled !== false) {
current.isScheduled = true;
current.scheduleCount += 1;
current.recipientCount += arrayValue(plan.scheduled_plan_destination).length;
}
byContent.set(key, current);
}
return [...byContent.values()].filter((signal) => signal.scheduleCount > 0).sort(compareContentSignals);
}
private async getFavoriteSignals(): Promise<StagedLookerSignalsFile['favorites']> {
const dashboards = await this.collectPaged((offset) =>
this.sdk().search_dashboards({
deleted: false,
fields: 'id,favorite_count',
limit: LOOKER_PAGE_SIZE,
offset,
sorts: 'id',
}),
);
const looks = await this.collectPaged((offset) =>
this.sdk().search_looks({
deleted: false,
fields: 'id,favorite_count',
limit: LOOKER_PAGE_SIZE,
offset,
sorts: 'id',
}),
);
return [
...dashboards.flatMap((dashboard) => favoriteSignal(dashboard, 'dashboard')),
...looks.flatMap((look) => favoriteSignal(look, 'look')),
].sort(compareContentSignals);
}
private warnAndReturnEmpty(message: string, error: unknown): never[] {
this.logger.warn(`${message}; continuing without that prioritization input: ${errorMessage(error)}`);
return [];
}
private async collectPaged(loadPage: (offset: number) => Promise<LookerRecord[]>): Promise<LookerRecord[]> {
const rows: LookerRecord[] = [];
for (let offset = 0; ; offset += LOOKER_PAGE_SIZE) {
const page = await this.withRateLimitRetry(() => loadPage(offset));
rows.push(...page);
if (page.length < LOOKER_PAGE_SIZE) {
return rows;
}
}
}
private async withRateLimitRetry<T>(load: () => Promise<T>): Promise<T> {
try {
return await load();
} catch (error) {
if (lookerStatusCode(error) !== 429) {
throw error;
}
await (this.deps.sleep ?? sleep)(retryAfterMs(error));
return load();
}
}
private sdk(): LookerSdkPort {
if (!this.sdkInstance) {
this.sdkInstance = this.deps.sdkFactory?.(this.params) ?? createLookerSdkPort(this.params);
}
return this.sdkInstance;
}
}
function parseLookerConnectionParams(raw: Record<string, unknown>): LookerConnectionParams {
const baseUrl = raw.base_url;
const clientId = raw.client_id;
const apiCredential = raw.client_secret; // pragma: allowlist secret
if (typeof baseUrl !== 'string' || baseUrl.trim() === '') {
throw new Error('Looker base_url is required');
}
if (typeof clientId !== 'string' || clientId.trim() === '') {
throw new Error('Looker client_id is required');
}
if (typeof apiCredential !== 'string' || apiCredential.trim() === '') {
throw new Error('Looker client_secret is required'); // pragma: allowlist secret
}
return { base_url: baseUrl, client_id: clientId, client_secret: apiCredential }; // pragma: allowlist secret
}
function toRecord(value: object): LookerRecord {
return value as LookerRecord;
}
function toRecordArray(values: object[]): LookerRecord[] {
return values.map(toRecord);
}
function normalizeBaseUrl(baseUrl: string): string {
return baseUrl
.trim()
.replace(/\/+$/, '')
.replace(/\/api\/(4\.0|3\.1)$/, '');
}
function entityRef(row: LookerRecord): Array<{ id: string; updatedAt: string | null }> {
if (row.id === null || row.id === undefined) {
return [];
}
return [{ id: String(row.id), updatedAt: nullableString(row.updated_at) }];
}
function queryValue(value: unknown): StagedLookerQuery | null {
if (!value || typeof value !== 'object') {
return null;
}
const record = value as LookerRecord;
if (typeof record.model !== 'string' || typeof record.view !== 'string') {
return null;
}
return {
id: nullableString(record.id) ?? undefined,
model: record.model,
view: record.view,
fields: stringArray(record.fields),
filters: recordValue(record.filters),
sorts: stringArray(record.sorts),
limit: typeof record.limit === 'string' || typeof record.limit === 'number' ? record.limit : null,
dynamicFields: nullableString(record.dynamic_fields ?? record.dynamicFields),
targetWarehouseConnectionId: null,
targetTable: null,
};
}
function parseJsonRows(raw: string): LookerRecord[] {
const parsed = JSON.parse(raw) as unknown;
return Array.isArray(parsed) ? parsed.filter((row): row is LookerRecord => !!row && typeof row === 'object') : [];
}
function aggregateUsageRows(
rows: LookerRecord[],
idField: 'dashboard.id' | 'look.id',
): StagedLookerSignalsFile['dashboardUsage'] {
const byContent = new Map<
string,
{
contentId: string;
queryCount30d: number;
lastRunAt: string | null;
users: Set<string>;
}
>();
for (const row of rows) {
const contentId = nullableString(row[idField]);
if (!contentId) {
continue;
}
const current = byContent.get(contentId) ?? {
contentId,
queryCount30d: 0,
lastRunAt: null,
users: new Set<string>(),
};
current.queryCount30d += numberValue(row['history.query_run_count']);
const userId = nullableString(row['user.id']);
if (userId) {
current.users.add(userId);
}
const lastRunAt = nullableString(row['history.created_date']);
if (lastRunAt && (!current.lastRunAt || lastRunAt > current.lastRunAt)) {
current.lastRunAt = lastRunAt;
}
byContent.set(contentId, current);
}
return [...byContent.values()]
.map((signal) => ({
contentId: signal.contentId,
queryCount30d: signal.queryCount30d,
uniqueUsers30d: signal.users.size,
lastRunAt: signal.lastRunAt,
topUsers: [...signal.users].sort().slice(0, 5),
}))
.sort((a, b) => a.contentId.localeCompare(b.contentId));
}
function favoriteSignal(row: LookerRecord, contentType: 'dashboard' | 'look'): StagedLookerSignalsFile['favorites'] {
const contentId = nullableString(row.id);
if (!contentId) {
return [];
}
return [{ contentId, contentType, favoriteCount: numberValue(row.favorite_count) }];
}
function compareContentSignals(
a: { contentType?: string; contentId: string },
b: { contentType?: string; contentId: string },
): number {
return `${a.contentType ?? ''}:${a.contentId}`.localeCompare(`${b.contentType ?? ''}:${b.contentId}`);
}
function numberValue(value: unknown): number {
if (typeof value === 'number' && Number.isFinite(value)) {
return value;
}
if (typeof value === 'string' && value.trim() !== '') {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : 0;
}
return 0;
}
function errorMessage(error: unknown): string {
return error instanceof Error ? error.message : String(error);
}
async function sleep(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
}
function lookerStatusCode(error: unknown): number | null {
if (!error || typeof error !== 'object') {
return null;
}
const record = error as Record<string, unknown>;
const direct = record.statusCode ?? record.status;
if (typeof direct === 'number') {
return direct;
}
if (typeof direct === 'string') {
const parsed = Number(direct);
return Number.isFinite(parsed) ? parsed : null;
}
const response = record.response;
if (response && typeof response === 'object') {
return lookerStatusCode(response);
}
return null;
}
function retryAfterMs(error: unknown): number {
const value = retryAfterHeader(error);
if (!value) {
return 1000;
}
const seconds = Number(value);
if (Number.isFinite(seconds)) {
return Math.max(0, seconds * 1000);
}
const dateMs = Date.parse(value);
return Number.isFinite(dateMs) ? Math.max(0, dateMs - Date.now()) : 1000;
}
function retryAfterHeader(error: unknown): string | null {
if (!error || typeof error !== 'object') {
return null;
}
const record = error as Record<string, unknown>;
const response = record.response;
const responseRecord = response && typeof response === 'object' ? (response as Record<string, unknown>) : null;
const headers = record.headers ?? responseRecord?.headers;
if (!headers || typeof headers !== 'object') {
return null;
}
const getter = (headers as { get?: unknown }).get;
if (typeof getter === 'function') {
const value = getter.call(headers, 'retry-after');
return typeof value === 'string' ? value : null;
}
const headerRecord = headers as Record<string, unknown>;
const direct = headerRecord['retry-after'] ?? headerRecord['Retry-After'];
return typeof direct === 'string' ? direct : null;
}
function stagedField(value: LookerRecord) {
return {
name: stringValue(value.name),
label: nullableString(value.label),
type: nullableString(value.type),
sql: nullableString(value.sql),
description: nullableString(value.description),
};
}
function folderPath(folder: LookerRecord, byId: Map<string, LookerRecord>): string[] {
const path: string[] = [];
let current: LookerRecord | undefined = folder;
const seen = new Set<string>();
while (current) {
const id = stringValue(current.id);
if (seen.has(id)) {
break;
}
seen.add(id);
path.unshift(stringValue(current.name));
const parentId = nullableString(current.parent_id);
current = parentId ? byId.get(parentId) : undefined;
}
return path;
}
function arrayValue(value: unknown): LookerRecord[] {
return Array.isArray(value) ? value.filter((item): item is LookerRecord => !!item && typeof item === 'object') : [];
}
function recordValue(value: unknown): Record<string, unknown> {
return value && typeof value === 'object' && !Array.isArray(value) ? { ...(value as Record<string, unknown>) } : {};
}
function stringArray(value: unknown): string[] {
return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : [];
}
function stringValue(value: unknown): string {
if (value === null || value === undefined) {
return '';
}
return String(value);
}
function nullableString(value: unknown): string | null {
if (value === null || value === undefined) {
return null;
}
return String(value);
}

View file

@ -0,0 +1,44 @@
import { describe, expect, it, vi } from 'vitest';
import { createDaemonLookerTableIdentifierParser } from './daemon-table-identifier-parser.js';
describe('createDaemonLookerTableIdentifierParser', () => {
it('posts parse items to the daemon endpoint', async () => {
const requestJson = vi.fn(async () => ({
results: {
orders: {
ok: true,
catalog: null,
schema: 'public',
name: 'orders',
canonical_table: 'public.orders',
},
},
}));
const parser = createDaemonLookerTableIdentifierParser({
baseUrl: 'http://127.0.0.1:8765',
requestJson,
});
await expect(parser.parse([{ key: 'orders', sql_table_name: 'public.orders', dialect: 'postgres' }])).resolves.toEqual({
orders: {
ok: true,
catalog: null,
schema: 'public',
name: 'orders',
canonical_table: 'public.orders',
},
});
expect(requestJson).toHaveBeenCalledWith('/sql/parse-table-identifier', {
items: [{ key: 'orders', sql_table_name: 'public.orders', dialect: 'postgres' }],
});
});
it('rejects non-object daemon responses', async () => {
const parser = createDaemonLookerTableIdentifierParser({
baseUrl: 'http://127.0.0.1:8765',
requestJson: async () => ({ results: null }),
});
await expect(parser.parse([])).rejects.toThrow('klo-daemon table identifier parser returned invalid results');
});
});

View file

@ -0,0 +1,81 @@
import { request as httpRequest } from 'node:http';
import { request as httpsRequest } from 'node:https';
import { URL } from 'node:url';
import type {
LookerParsedIdentifier,
LookerTableIdentifierParseItem,
LookerTableIdentifierParser,
} from './mapping.js';
export type KloDaemonTableIdentifierHttpJsonRunner = (
path: string,
payload: Record<string, unknown>,
) => Promise<Record<string, unknown>>;
export interface DaemonLookerTableIdentifierParserOptions {
baseUrl: string;
requestJson?: KloDaemonTableIdentifierHttpJsonRunner;
}
export function createDaemonLookerTableIdentifierParser(
options: DaemonLookerTableIdentifierParserOptions,
): LookerTableIdentifierParser {
const requestJson = options.requestJson ?? postJson(options.baseUrl);
return {
async parse(items: LookerTableIdentifierParseItem[]): Promise<Record<string, LookerParsedIdentifier>> {
const raw = await requestJson('/sql/parse-table-identifier', { items });
if (!raw.results || typeof raw.results !== 'object' || Array.isArray(raw.results)) {
throw new Error('klo-daemon table identifier parser returned invalid results');
}
return raw.results as Record<string, LookerParsedIdentifier>;
},
};
}
function normalizedBaseUrl(baseUrl: string): string {
return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`;
}
function postJson(baseUrl: string): KloDaemonTableIdentifierHttpJsonRunner {
return async (path, payload) =>
new Promise((resolve, reject) => {
const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl));
const body = JSON.stringify(payload);
const client = target.protocol === 'https:' ? httpsRequest : httpRequest;
const request = client(
target,
{
method: 'POST',
headers: {
accept: 'application/json',
'content-type': 'application/json',
'content-length': Buffer.byteLength(body),
},
},
(response) => {
const chunks: Buffer[] = [];
response.on('data', (chunk: Buffer) => chunks.push(chunk));
response.on('end', () => {
const text = Buffer.concat(chunks).toString('utf8');
const statusCode = response.statusCode ?? 0;
if (statusCode < 200 || statusCode >= 300) {
reject(new Error(`klo-daemon HTTP ${path} failed with ${statusCode}: ${text}`));
return;
}
try {
const parsed = JSON.parse(text) as unknown;
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
reject(new Error(`klo-daemon HTTP ${path} returned non-object JSON`));
return;
}
resolve(parsed as Record<string, unknown>);
} catch (error) {
reject(error);
}
});
},
);
request.on('error', reject);
request.end(body);
});
}

View file

@ -0,0 +1,47 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { detectLookerStagedDir } from './detect.js';
async function touch(stagedDir: string, relPath: string, body = '{}\n'): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, body, 'utf-8');
}
describe('detectLookerStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-detect-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('returns true when sync-config.json and at least one runtime entity are present', async () => {
await touch(stagedDir, 'sync-config.json');
await touch(stagedDir, 'explores/b2b/sales_pipeline.json');
expect(await detectLookerStagedDir(stagedDir)).toBe(true);
});
it('returns true for dashboard-only staged dirs', async () => {
await touch(stagedDir, 'sync-config.json');
await touch(stagedDir, 'dashboards/10.json');
expect(await detectLookerStagedDir(stagedDir)).toBe(true);
});
it('returns false without sync-config.json', async () => {
await touch(stagedDir, 'looks/20.json');
expect(await detectLookerStagedDir(stagedDir)).toBe(false);
});
it('returns false when only control files are present', async () => {
await touch(stagedDir, 'sync-config.json');
await touch(stagedDir, 'lookml_models.json');
await touch(stagedDir, 'signals/dashboard_usage.json', '[]\n');
expect(await detectLookerStagedDir(stagedDir)).toBe(false);
});
});

View file

@ -0,0 +1,28 @@
import { readdir, stat } from 'node:fs/promises';
import { join, relative } from 'node:path';
import { STAGED_FILES } from './types.js';
const LOOKER_ENTITY_FILE_RE = /^(explores\/[^/]+\/[^/]+|dashboards\/[^/]+|looks\/[^/]+)\.json$/;
async function walk(root: string): Promise<string[]> {
const entries = await readdir(root, { withFileTypes: true, recursive: true });
return entries
.filter((entry) => entry.isFile())
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
.sort();
}
export async function detectLookerStagedDir(stagedDir: string): Promise<boolean> {
try {
await stat(join(stagedDir, STAGED_FILES.syncConfig));
} catch {
return false;
}
try {
const paths = await walk(stagedDir);
return paths.some((path) => LOOKER_ENTITY_FILE_RE.test(path));
} catch {
return false;
}
}

View file

@ -0,0 +1,188 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { dirname, join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { getLookerTriageSignals, writeLookerEvidenceDocuments } from './evidence-documents.js';
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(dirname(target), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
async function readJson<T>(root: string, relPath: string): Promise<T> {
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
}
describe('Looker evidence documents', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-evidence-docs-'));
await writeJson(stagedDir, 'explores/b2b/sales_pipeline.json', {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: 'Pipeline analysis explore.',
fields: {
dimensions: [
{ name: 'opportunities.stage', label: 'Stage', type: 'string', sql: '${TABLE}.stage', description: null },
],
measures: [
{
name: 'opportunities.arr',
label: 'ARR',
type: 'sum',
sql: '${TABLE}.arr',
description: 'Annual recurring revenue.',
},
],
},
joins: [{ name: 'accounts', type: 'left_outer', relationship: 'many_to_one' }],
});
await writeJson(stagedDir, 'dashboards/10.json', {
lookerId: '10',
title: 'Sales Pipeline Overview',
description: 'Executive dashboard for open pipeline ARR.',
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T10:00:00.000Z',
tiles: [
{
id: '100',
title: 'Open Pipeline ARR',
lookId: null,
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr', 'opportunities.stage'],
filters: { 'opportunities.stage': 'open' },
sorts: ['opportunities.arr desc'],
limit: '500',
},
},
],
});
await writeJson(stagedDir, 'looks/20.json', {
lookerId: '20',
title: 'Active Opportunity Pipeline',
description: 'Saved Look for active opportunity pipeline review.',
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T11:00:00.000Z',
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr'],
filters: { 'opportunities.stage': 'open' },
sorts: [],
limit: '500',
},
});
await writeJson(stagedDir, 'signals/dashboard_usage.json', [
{
contentId: '10',
queryCount30d: 80,
uniqueUsers30d: 12,
lastRunAt: '2026-04-30T09:00:00.000Z',
topUsers: ['3'],
},
]);
await writeJson(stagedDir, 'signals/look_usage.json', [
{
contentId: '20',
queryCount30d: 2,
uniqueUsers30d: 1,
lastRunAt: '2026-04-29T09:00:00.000Z',
topUsers: ['3'],
},
]);
await writeJson(stagedDir, 'signals/scheduled_plans.json', [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 2, recipientCount: 5 },
]);
await writeJson(stagedDir, 'signals/favorites.json', [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
]);
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('writes indexable metadata and markdown for explores, dashboards, and Looks', async () => {
await writeLookerEvidenceDocuments(stagedDir);
await expect(readJson(stagedDir, 'evidence/explores/b2b/sales_pipeline/metadata.json')).resolves.toMatchObject({
objectType: 'looker_explore',
id: 'looker:explore:b2b.sales_pipeline',
title: 'Sales Pipeline',
path: 'Looker / Explores / b2b.sales_pipeline',
properties: {
rawPath: 'explores/b2b/sales_pipeline.json',
modelName: 'b2b',
exploreName: 'sales_pipeline',
},
});
await expect(readJson(stagedDir, 'evidence/dashboards/10/metadata.json')).resolves.toMatchObject({
objectType: 'looker_dashboard',
id: 'looker:dashboard:10',
title: 'Sales Pipeline Overview',
path: 'Looker / Dashboards / Sales Pipeline Overview',
lastEditedAt: '2026-04-30T10:00:00.000Z',
properties: {
rawPath: 'dashboards/10.json',
lookerId: '10',
},
});
await expect(readJson(stagedDir, 'evidence/looks/20/metadata.json')).resolves.toMatchObject({
objectType: 'looker_look',
id: 'looker:look:20',
title: 'Active Opportunity Pipeline',
path: 'Looker / Looks / Active Opportunity Pipeline',
properties: {
rawPath: 'looks/20.json',
lookerId: '20',
},
});
const dashboardMarkdown = await readFile(join(stagedDir, 'evidence/dashboards/10/page.md'), 'utf-8');
expect(dashboardMarkdown).toContain('# Sales Pipeline Overview');
expect(dashboardMarkdown).toContain('Executive dashboard for open pipeline ARR.');
expect(dashboardMarkdown).toContain('## Tile: Open Pipeline ARR');
expect(dashboardMarkdown).toContain('- model: b2b');
expect(dashboardMarkdown).toContain('- explore: sales_pipeline');
expect(dashboardMarkdown).toContain('- opportunities.stage = open');
expect(dashboardMarkdown).not.toContain('80');
expect(dashboardMarkdown).not.toContain('queryCount30d');
expect(dashboardMarkdown).not.toContain('recipient');
expect(dashboardMarkdown).not.toContain('favorite');
expect(dashboardMarkdown).not.toContain('owner');
});
it('returns usage-aware triage signals without exposing usage as document prose', async () => {
await writeLookerEvidenceDocuments(stagedDir);
await expect(getLookerTriageSignals(stagedDir, 'looker:dashboard:10')).resolves.toEqual({
objectType: 'looker_dashboard',
propertyHints: {
contentType: 'dashboard',
queryCount30d: '80',
uniqueUsers30d: '12',
isScheduled: 'true',
favoriteCount: '4',
},
lastEditedAt: '2026-04-30T10:00:00.000Z',
});
await expect(getLookerTriageSignals(stagedDir, 'looker:look:20')).resolves.toEqual({
objectType: 'looker_look',
propertyHints: {
contentType: 'look',
queryCount30d: '2',
uniqueUsers30d: '1',
isScheduled: 'false',
favoriteCount: '0',
},
lastEditedAt: '2026-04-30T11:00:00.000Z',
});
});
});

View file

@ -0,0 +1,378 @@
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
import { dirname, join, relative } from 'node:path';
import type { TriageSignals } from '../../types.js';
import {
STAGED_FILES,
type StagedDashboardFile,
type StagedExploreFile,
type StagedLookerSignalsFile,
type StagedLookFile,
stagedDashboardFileSchema,
stagedExploreFileSchema,
stagedLookerSignalsFileSchema,
stagedLookFileSchema,
} from './types.js';
type JsonObject = Record<string, unknown>;
interface EvidenceDocument {
relDir: string;
metadata: JsonObject;
markdown: string;
}
export async function writeLookerEvidenceDocuments(stagedDir: string): Promise<void> {
const paths = await walkJson(stagedDir);
const signals = await readSignals(stagedDir);
const documents: EvidenceDocument[] = [];
for (const relPath of paths) {
if (/^explores\/[^/]+\/[^/]+\.json$/.test(relPath)) {
const explore = await readJson(stagedDir, relPath, stagedExploreFileSchema);
documents.push(renderExploreEvidence(relPath, explore));
continue;
}
if (/^dashboards\/[^/]+\.json$/.test(relPath)) {
const dashboard = await readJson(stagedDir, relPath, stagedDashboardFileSchema);
documents.push(renderDashboardEvidence(relPath, dashboard));
continue;
}
if (/^looks\/[^/]+\.json$/.test(relPath)) {
const look = await readJson(stagedDir, relPath, stagedLookFileSchema);
documents.push(renderLookEvidence(relPath, look));
}
}
for (const document of documents) {
await writeJson(stagedDir, join(document.relDir, 'metadata.json'), document.metadata);
await writeText(stagedDir, join(document.relDir, 'page.md'), document.markdown);
}
await writeJson(stagedDir, join(STAGED_FILES.evidenceRoot, 'signals-summary.json'), {
dashboardUsageCount: signals.dashboardUsage.length,
lookUsageCount: signals.lookUsage.length,
scheduledPlanCount: signals.scheduledPlans.length,
favoriteCount: signals.favorites.length,
});
}
export async function getLookerTriageSignals(stagedDir: string, externalId: string): Promise<TriageSignals> {
const signals = await readSignals(stagedDir);
const dashboardId = /^looker:dashboard:(.+)$/.exec(externalId)?.[1];
if (dashboardId) {
const dashboard = await readOptionalJson(
stagedDir,
`dashboards/${safePathSegment(dashboardId)}.json`,
stagedDashboardFileSchema,
);
const usage = signals.dashboardUsage.find((item) => item.contentId === dashboardId);
const schedule = signals.scheduledPlans.find(
(item) => item.contentType === 'dashboard' && item.contentId === dashboardId,
);
const favorite = signals.favorites.find(
(item) => item.contentType === 'dashboard' && item.contentId === dashboardId,
);
return {
objectType: 'looker_dashboard',
lastEditedAt: dashboard?.updatedAt ?? usage?.lastRunAt ?? undefined,
propertyHints: {
contentType: 'dashboard',
queryCount30d: String(usage?.queryCount30d ?? 0),
uniqueUsers30d: String(usage?.uniqueUsers30d ?? 0),
isScheduled: String(schedule?.isScheduled ?? false),
favoriteCount: String(favorite?.favoriteCount ?? 0),
},
};
}
const lookId = /^looker:look:(.+)$/.exec(externalId)?.[1];
if (lookId) {
const look = await readOptionalJson(stagedDir, `looks/${safePathSegment(lookId)}.json`, stagedLookFileSchema);
const usage = signals.lookUsage.find((item) => item.contentId === lookId);
const schedule = signals.scheduledPlans.find((item) => item.contentType === 'look' && item.contentId === lookId);
const favorite = signals.favorites.find((item) => item.contentType === 'look' && item.contentId === lookId);
return {
objectType: 'looker_look',
lastEditedAt: look?.updatedAt ?? usage?.lastRunAt ?? undefined,
propertyHints: {
contentType: 'look',
queryCount30d: String(usage?.queryCount30d ?? 0),
uniqueUsers30d: String(usage?.uniqueUsers30d ?? 0),
isScheduled: String(schedule?.isScheduled ?? false),
favoriteCount: String(favorite?.favoriteCount ?? 0),
},
};
}
const explore = /^looker:explore:([^.]+)\.(.+)$/.exec(externalId);
if (explore) {
return {
objectType: 'looker_explore',
propertyHints: {
contentType: 'explore',
modelName: explore[1],
exploreName: explore[2],
},
};
}
return { objectType: 'looker_runtime' };
}
function renderExploreEvidence(rawPath: string, explore: StagedExploreFile): EvidenceDocument {
const title = explore.label ?? `${explore.modelName}.${explore.exploreName}`;
const relDir = join(
STAGED_FILES.evidenceRoot,
'explores',
safePathSegment(explore.modelName),
safePathSegment(explore.exploreName),
);
const lines = [
`# ${title}`,
'',
explore.description ? explore.description : '',
'',
'## Explore',
'',
`- model: ${explore.modelName}`,
`- explore: ${explore.exploreName}`,
'',
'## Dimensions',
'',
...fieldLines(explore.fields.dimensions),
'',
'## Measures',
'',
...fieldLines(explore.fields.measures),
'',
'## Joins',
'',
...(explore.joins.length === 0
? ['- none']
: explore.joins.map((item) => `- ${item.name}${item.relationship ? ` (${item.relationship})` : ''}`)),
];
return {
relDir,
metadata: {
objectType: 'looker_explore',
id: `looker:explore:${explore.modelName}.${explore.exploreName}`,
title,
path: `Looker / Explores / ${explore.modelName}.${explore.exploreName}`,
url: null,
parentId: null,
databaseId: null,
dataSourceId: null,
lastEditedAt: null,
lastEditedBy: null,
properties: {
rawPath,
modelName: explore.modelName,
exploreName: explore.exploreName,
},
},
markdown: normalizeMarkdown(lines),
};
}
function renderDashboardEvidence(rawPath: string, dashboard: StagedDashboardFile): EvidenceDocument {
const relDir = join(STAGED_FILES.evidenceRoot, 'dashboards', safePathSegment(dashboard.lookerId));
const lines = [
`# ${dashboard.title}`,
'',
dashboard.description ?? '',
'',
'## Dashboard Queries',
'',
...dashboard.tiles.flatMap((tile) => [
`## Tile: ${tile.title ?? tile.id}`,
'',
...(tile.query ? queryLines(tile.query) : ['- no inline query captured']),
'',
]),
];
return {
relDir,
metadata: {
objectType: 'looker_dashboard',
id: `looker:dashboard:${dashboard.lookerId}`,
title: dashboard.title,
path: `Looker / Dashboards / ${dashboard.title}`,
url: null,
parentId: dashboard.folderId,
databaseId: null,
dataSourceId: null,
lastEditedAt: dashboard.updatedAt,
lastEditedBy: null,
properties: {
rawPath,
lookerId: dashboard.lookerId,
},
},
markdown: normalizeMarkdown(lines),
};
}
function renderLookEvidence(rawPath: string, look: StagedLookFile): EvidenceDocument {
const relDir = join(STAGED_FILES.evidenceRoot, 'looks', safePathSegment(look.lookerId));
const lines = [
`# ${look.title}`,
'',
look.description ?? '',
'',
'## Look Query',
'',
...(look.query ? queryLines(look.query) : ['- no query captured']),
];
return {
relDir,
metadata: {
objectType: 'looker_look',
id: `looker:look:${look.lookerId}`,
title: look.title,
path: `Looker / Looks / ${look.title}`,
url: null,
parentId: look.folderId,
databaseId: null,
dataSourceId: null,
lastEditedAt: look.updatedAt,
lastEditedBy: null,
properties: {
rawPath,
lookerId: look.lookerId,
},
},
markdown: normalizeMarkdown(lines),
};
}
function fieldLines(
fields: Array<{
name: string;
label: string | null;
type: string | null;
sql: string | null;
description: string | null;
}>,
): string[] {
if (fields.length === 0) {
return ['- none'];
}
return fields.map((field) => {
const parts = [
field.name,
field.label ? `label: ${field.label}` : null,
field.type ? `type: ${field.type}` : null,
field.description ? `description: ${field.description}` : null,
].filter(Boolean);
return `- ${parts.join('; ')}`;
});
}
function queryLines(query: StagedDashboardFile['tiles'][number]['query']): string[] {
if (!query) {
return ['- no query captured'];
}
return [
`- model: ${query.model}`,
`- explore: ${query.view}`,
'',
'### Fields',
'',
...(query.fields.length === 0 ? ['- none'] : query.fields.map((field) => `- ${field}`)),
'',
'### Filters',
'',
...filterLines(query.filters),
];
}
function filterLines(filters: Record<string, unknown>): string[] {
const entries = Object.entries(filters).filter(
([, value]) => value !== null && value !== undefined && String(value).trim() !== '',
);
if (entries.length === 0) {
return ['- none'];
}
return entries.map(([field, value]) => `- ${field} = ${String(value)}`);
}
async function readSignals(stagedDir: string): Promise<StagedLookerSignalsFile> {
const [dashboardUsage, lookUsage, scheduledPlans, favorites] = await Promise.all([
readOptionalArray(stagedDir, STAGED_FILES.signals.dashboardUsage),
readOptionalArray(stagedDir, STAGED_FILES.signals.lookUsage),
readOptionalArray(stagedDir, STAGED_FILES.signals.scheduledPlans),
readOptionalArray(stagedDir, STAGED_FILES.signals.favorites),
]);
return stagedLookerSignalsFileSchema.parse({ dashboardUsage, lookUsage, scheduledPlans, favorites });
}
async function readOptionalArray(stagedDir: string, relPath: string): Promise<unknown[]> {
try {
const parsed = JSON.parse(await readFile(join(stagedDir, relPath), 'utf-8')) as unknown;
return Array.isArray(parsed) ? parsed : [];
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
return [];
}
throw error;
}
}
async function readOptionalJson<T>(
stagedDir: string,
relPath: string,
schema: { parse(value: unknown): T },
): Promise<T | null> {
try {
return await readJson(stagedDir, relPath, schema);
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
return null;
}
throw error;
}
}
async function readJson<T>(stagedDir: string, relPath: string, schema: { parse(value: unknown): T }): Promise<T> {
return schema.parse(JSON.parse(await readFile(join(stagedDir, relPath), 'utf-8')));
}
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
await writeText(stagedDir, relPath, `${JSON.stringify(value, null, 2)}\n`);
}
async function writeText(stagedDir: string, relPath: string, body: string): Promise<void> {
const target = join(stagedDir, relPath);
await mkdir(dirname(target), { recursive: true });
await writeFile(target, body, 'utf-8');
}
async function walkJson(root: string, dir = root): Promise<string[]> {
const entries = await readdir(dir, { withFileTypes: true });
const paths: string[] = [];
for (const entry of entries) {
const absPath = join(dir, entry.name);
if (entry.isDirectory()) {
paths.push(...(await walkJson(root, absPath)));
continue;
}
if (entry.isFile() && entry.name.endsWith('.json')) {
paths.push(relative(root, absPath).replace(/\\/g, '/'));
}
}
return paths.sort();
}
function safePathSegment(value: string): string {
if (!/^[a-zA-Z0-9_-]+$/.test(value)) {
throw new Error(`Unsafe Looker evidence path segment: ${value}`);
}
return value;
}
function normalizeMarkdown(lines: string[]): string {
return `${lines
.filter((line, index, all) => line !== '' || all[index - 1] !== '')
.join('\n')
.trim()}\n`;
}

View file

@ -0,0 +1,74 @@
import { describe, expect, it, vi } from 'vitest';
import type { FetchContext } from '../../types.js';
import type { LookerSdkPort } from './client.js';
import {
DefaultLookerClientFactory,
DefaultLookerConnectionClientFactory,
type LookerCredentialResolver,
} from './factory.js';
import type { LookerRuntimeClient } from './fetch.js';
import type { LookerPullConfig } from './types.js';
function sdk(): LookerSdkPort {
return {
me: vi.fn().mockResolvedValue({ id: '1', display_name: 'API User', email: 'api@example.com' }),
search_dashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
dashboard: vi.fn(),
search_looks: vi.fn().mockResolvedValue([]),
search_scheduled_plans: vi.fn().mockResolvedValue([]),
look: vi.fn(),
all_folders: vi.fn().mockResolvedValue([]),
all_users: vi.fn().mockResolvedValue([]),
all_groups: vi.fn().mockResolvedValue([]),
all_connections: vi.fn().mockResolvedValue([]),
all_lookml_models: vi.fn().mockResolvedValue([]),
lookml_model_explore: vi.fn(),
run_inline_query: vi.fn().mockResolvedValue('[]'),
logout: vi.fn().mockResolvedValue(undefined),
};
}
describe('DefaultLookerConnectionClientFactory', () => {
it('resolves credentials by Looker connection id and creates a KLO Looker client', async () => {
const fakeSdk = sdk();
const resolver: LookerCredentialResolver = {
resolve: vi.fn().mockResolvedValue({
base_url: 'https://example.looker.com',
client_id: 'id',
client_secret: 'credential', // pragma: allowlist secret
}),
};
const factory = new DefaultLookerConnectionClientFactory(resolver, { sdkFactory: () => fakeSdk });
const client = await factory.createClient('prod-looker');
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
expect(resolver.resolve).toHaveBeenCalledWith('prod-looker');
});
});
describe('DefaultLookerClientFactory', () => {
const ctx: FetchContext = { connectionId: 'ctx-looker', sourceKey: 'looker' };
it('uses pullConfig.lookerConnectionId when present', async () => {
const runtimeClient = { listDashboards: vi.fn() } as unknown as LookerRuntimeClient;
const inner = { createClient: vi.fn().mockResolvedValue(runtimeClient) };
const factory = new DefaultLookerClientFactory(inner);
const config = { lookerConnectionId: 'prod-looker' } as LookerPullConfig;
await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient);
expect(inner.createClient).toHaveBeenCalledWith('prod-looker');
});
it('falls back to ctx.connectionId when pullConfig.lookerConnectionId is absent', async () => {
const runtimeClient = { listDashboards: vi.fn() } as unknown as LookerRuntimeClient;
const inner = { createClient: vi.fn().mockResolvedValue(runtimeClient) };
const factory = new DefaultLookerClientFactory(inner);
const config = {} as LookerPullConfig;
await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient);
expect(inner.createClient).toHaveBeenCalledWith('ctx-looker');
});
});

View file

@ -0,0 +1,32 @@
import type { FetchContext } from '../../types.js';
import { LookerClient, type LookerClientDeps, type LookerConnectionParams } from './client.js';
import type { LookerClientFactory, LookerRuntimeClient } from './fetch.js';
import type { LookerPullConfig } from './types.js';
export interface LookerCredentialResolver {
resolve(lookerConnectionId: string): Promise<LookerConnectionParams>;
}
export interface LookerConnectionClientFactory {
createClient(lookerConnectionId: string): Promise<LookerRuntimeClient>;
}
export class DefaultLookerConnectionClientFactory implements LookerConnectionClientFactory {
constructor(
private readonly resolver: LookerCredentialResolver,
private readonly deps: LookerClientDeps = {},
) {}
async createClient(lookerConnectionId: string): Promise<LookerRuntimeClient> {
const credentials = await this.resolver.resolve(lookerConnectionId);
return new LookerClient(credentials, this.deps);
}
}
export class DefaultLookerClientFactory implements LookerClientFactory {
constructor(private readonly inner: LookerConnectionClientFactory) {}
async createClient(config: LookerPullConfig, ctx: FetchContext): Promise<LookerRuntimeClient> {
return this.inner.createClient(config.lookerConnectionId ?? ctx.connectionId);
}
}

View file

@ -0,0 +1,77 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { readLookerFetchReport, writeLookerFetchReport } from './fetch-report.js';
describe('Looker staged fetch report', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-fetch-report-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('returns null when a staged bundle has no fetch report', async () => {
await expect(readLookerFetchReport(stagedDir)).resolves.toBeNull();
});
it('round-trips partial fetch issues', async () => {
await writeLookerFetchReport(stagedDir, {
status: 'partial',
retryRecommended: true,
skipped: [
{
rawPath: 'dashboards/10.json',
entityType: 'dashboard',
entityId: '10',
severity: 'error',
statusCode: 429,
message: 'Looker API rate limit remained after retry',
retryRecommended: true,
},
],
warnings: [
{
rawPath: 'signals/dashboard_usage.json',
entityType: 'signals',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'system__activity unavailable',
retryRecommended: false,
},
],
});
await expect(readLookerFetchReport(stagedDir)).resolves.toEqual({
status: 'partial',
retryRecommended: true,
skipped: [
{
rawPath: 'dashboards/10.json',
entityType: 'dashboard',
entityId: '10',
severity: 'error',
statusCode: 429,
message: 'Looker API rate limit remained after retry',
retryRecommended: true,
},
],
warnings: [
{
rawPath: 'signals/dashboard_usage.json',
entityType: 'signals',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'system__activity unavailable',
retryRecommended: false,
},
],
});
});
});

View file

@ -0,0 +1,22 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import { STAGED_FILES, type StagedLookerFetchReport, stagedLookerFetchReportSchema } from './types.js';
export async function readLookerFetchReport(stagedDir: string): Promise<StagedLookerFetchReport | null> {
try {
const raw = await readFile(join(stagedDir, STAGED_FILES.fetchReport), 'utf-8');
return stagedLookerFetchReportSchema.parse(JSON.parse(raw));
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
return null;
}
throw error;
}
}
export async function writeLookerFetchReport(stagedDir: string, report: StagedLookerFetchReport): Promise<void> {
const parsed = stagedLookerFetchReportSchema.parse(report);
const target = join(stagedDir, STAGED_FILES.fetchReport);
await mkdir(dirname(target), { recursive: true });
await writeFile(target, `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8');
}

View file

@ -0,0 +1,645 @@
import { mkdtemp, readdir, readFile, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { chunkLookerStagedDir } from './chunk.js';
import { fetchLookerRuntimeBundle, type LookerRuntimeClient } from './fetch.js';
const connectionId = '11111111-1111-4111-8111-111111111111';
function makeClient(): LookerRuntimeClient {
return {
listDashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
getDashboard: vi.fn().mockResolvedValue({
lookerId: '10',
title: 'Sales Pipeline',
description: 'Pipeline health',
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [{ id: '100', title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
}),
listLooks: vi.fn().mockResolvedValue([{ id: '20' }]),
getLook: vi.fn().mockResolvedValue({
lookerId: '20',
title: 'Open Pipeline',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
}),
listFolders: vi
.fn()
.mockResolvedValue({ folders: [{ id: '7', name: 'Sandbox', parentId: null, path: ['Sandbox'] }] }),
listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: null }]),
listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Sales' }]),
listLookmlModels: vi.fn().mockResolvedValue({
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
}),
getExplore: vi.fn().mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
joins: [],
}),
getSignals: vi.fn().mockResolvedValue({
dashboardUsage: [{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8, lastRunAt: null, topUsers: ['3'] }],
lookUsage: [{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5, lastRunAt: null, topUsers: ['3'] }],
scheduledPlans: [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
],
favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 }],
}),
cleanup: vi.fn().mockResolvedValue(undefined),
};
}
describe('fetchLookerRuntimeBundle', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-fetch-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('writes dashboards, looks, folders, users, groups, models, explores, signals, and sync config', async () => {
const client = makeClient();
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId, instanceBaseUrl: 'https://example.looker.com' },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(await readdir(join(stagedDir, 'dashboards'))).toEqual(['10.json']);
expect(await readdir(join(stagedDir, 'looks'))).toEqual(['20.json']);
expect(await readdir(join(stagedDir, 'users'))).toEqual(['3.json']);
expect(await readdir(join(stagedDir, 'groups'))).toEqual(['4.json']);
expect(await readdir(join(stagedDir, 'explores/b2b'))).toEqual(['sales_pipeline.json']);
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
expect(syncConfig).toEqual({
lookerConnectionId: connectionId,
fetchedAt: '2026-04-30T12:30:00.000Z',
instanceBaseUrl: 'https://example.looker.com',
previousCursors: {
dashboardsLastSyncedAt: null,
looksLastSyncedAt: null,
},
nextCursors: {
dashboardsLastSyncedAt: null,
looksLastSyncedAt: null,
},
});
const scope = JSON.parse(await readFile(join(stagedDir, 'looker-scope.json'), 'utf-8'));
expect(scope).toEqual({
mode: 'full',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json', 'looks/20.json'],
});
const dashboardUsage = JSON.parse(await readFile(join(stagedDir, 'signals/dashboard_usage.json'), 'utf-8'));
expect(dashboardUsage).toEqual([
{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8, lastRunAt: null, topUsers: ['3'] },
]);
const lookUsage = JSON.parse(await readFile(join(stagedDir, 'signals/look_usage.json'), 'utf-8'));
const scheduledPlans = JSON.parse(await readFile(join(stagedDir, 'signals/scheduled_plans.json'), 'utf-8'));
const favorites = JSON.parse(await readFile(join(stagedDir, 'signals/favorites.json'), 'utf-8'));
expect(lookUsage).toEqual([
{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5, lastRunAt: null, topUsers: ['3'] },
]);
expect(scheduledPlans).toEqual([
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
]);
expect(favorites).toEqual([{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 }]);
});
it('stages only changed Dashboard and Look entity bodies during incremental pulls', async () => {
const client = makeClient();
vi.mocked(client.listDashboards).mockResolvedValue([
{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' },
{ id: '11', updatedAt: '2026-04-30T12:10:00.000Z' },
]);
vi.mocked(client.getDashboard).mockImplementation(async (id: string) => ({
lookerId: id,
title: `Dashboard ${id}`,
description: null,
folderId: '7',
ownerId: '3',
updatedAt: id === '11' ? '2026-04-30T12:10:00.000Z' : '2026-04-30T12:00:00.000Z',
tiles: [],
}));
vi.mocked(client.listLooks).mockResolvedValue([
{ id: '20', updatedAt: '2026-04-30T11:00:00.000Z' },
{ id: '21', updatedAt: null },
]);
vi.mocked(client.getLook).mockImplementation(async (id: string) => ({
lookerId: id,
title: `Look ${id}`,
description: null,
folderId: '7',
ownerId: '3',
updatedAt: id === '21' ? null : '2026-04-30T11:00:00.000Z',
query: null,
}));
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
dashboardUpdatedSince: '2026-04-30T12:00:00.000Z',
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(client.getDashboard).toHaveBeenCalledTimes(1);
expect(client.getDashboard).toHaveBeenCalledWith('11');
expect(client.getLook).toHaveBeenCalledTimes(1);
expect(client.getLook).toHaveBeenCalledWith('21');
await expect(readdir(join(stagedDir, 'dashboards'))).resolves.toEqual(['11.json']);
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['21.json']);
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
expect(syncConfig.previousCursors).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
});
expect(syncConfig.nextCursors).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:10:00.000Z',
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
});
const scope = JSON.parse(await readFile(join(stagedDir, 'looker-scope.json'), 'utf-8'));
expect(scope).toEqual({
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json', 'looks/20.json', 'looks/21.json'],
fetchedRawPaths: ['dashboards/11.json', 'looks/21.json'],
});
});
it('falls back to empty signal files when the client has no signal support', async () => {
const client = makeClient();
delete client.getSignals;
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(JSON.parse(await readFile(join(stagedDir, 'signals/look_usage.json'), 'utf-8'))).toEqual([]);
});
it('stamps explore warehouse targets from pull config and reports unmapped Looker connections', async () => {
const client = makeClient();
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
vi.mocked(client.listLookmlModels).mockResolvedValue({
models: [
{
name: 'b2b',
label: 'B2B',
explores: [
{ name: 'sales_pipeline', label: 'Sales Pipeline' },
{ name: 'marketing', label: 'Marketing' },
],
},
],
});
vi.mocked(client.getExplore).mockImplementation(async (_modelName: string, exploreName: string) => {
if (exploreName === 'marketing') {
return {
modelName: 'b2b',
exploreName: 'marketing',
label: 'Marketing',
description: null,
rawSqlTableName: 'proj.dataset.marketing',
connectionName: 'missing_mapping',
viewName: 'marketing',
fields: {
dimensions: [{ name: 'marketing.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'marketing.spend', label: null, type: null, sql: null, description: null }],
},
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
};
}
return {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
},
joins: [
{
name: 'accounts',
type: 'left_outer',
relationship: 'many_to_one',
rawSqlTableName: 'proj.dataset.accounts',
sqlOn: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
from: null,
targetTable: null,
},
],
targetWarehouseConnectionId: null,
targetTable: null,
};
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
'b2b.sales_pipeline.accounts': {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'accounts',
canonicalTable: 'proj.dataset.accounts',
},
},
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
const salesPipeline = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8'));
expect(salesPipeline).toMatchObject({
connectionName: 'b2b_sandbox_bq',
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
joins: [
{
name: 'accounts',
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'accounts',
canonicalTable: 'proj.dataset.accounts',
},
},
],
});
const marketing = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/marketing.json'), 'utf-8'));
expect(marketing).toMatchObject({
connectionName: 'missing_mapping',
targetWarehouseConnectionId: null,
targetTable: {
ok: false,
reason: 'no_connection_mapping',
},
});
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report.status).toBe('partial');
expect(report.skipped).toEqual([]);
expect(report.warnings).toEqual([
{
rawPath: 'looker_connection_mappings/missing_mapping',
entityType: 'looker_connection_mapping',
entityId: 'missing_mapping',
severity: 'warning',
statusCode: null,
message: 'Looker connection missing_mapping is not mapped to a warehouse connection; 1 explore will be wiki-only.',
retryRecommended: false,
kind: 'unmapped_looker_connection',
details: {
lookerConnectionName: 'missing_mapping',
affectedExplores: ['b2b.marketing'],
},
},
]);
});
it('reports parsed target table failures without retrying the Looker fetch', async () => {
const client = makeClient();
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
vi.mocked(client.getExplore).mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: '$' + '{derived.SQL_TABLE_NAME}',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
},
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: false,
reason: 'looker_template_unresolved',
detail: 'Looker template markers cannot be resolved before parsing.',
},
},
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
const explore = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8'));
expect(explore).toMatchObject({
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: false,
reason: 'looker_template_unresolved',
},
});
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report).toMatchObject({
status: 'partial',
retryRecommended: false,
skipped: [],
warnings: [
{
rawPath: 'looker_connection_mappings/b2b_sandbox_bq',
entityType: 'looker_connection_mapping',
entityId: 'b2b_sandbox_bq',
severity: 'warning',
statusCode: null,
message:
'Looker explore b2b.sales_pipeline has sql_table_name that cannot be mapped to a physical warehouse table: looker_template_unresolved.',
retryRecommended: false,
kind: 'looker_template_unresolved',
details: {
lookerConnectionName: 'b2b_sandbox_bq',
rawSqlTableName: '$' + '{derived.SQL_TABLE_NAME}',
reason: 'looker_template_unresolved',
},
},
],
});
});
it('propagates parent explore warehouse targets onto Dashboard tile and Look queries', async () => {
const client = makeClient();
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
vi.mocked(client.getExplore).mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
},
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
},
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
const dashboard = JSON.parse(await readFile(join(stagedDir, 'dashboards/10.json'), 'utf-8'));
expect(dashboard.tiles[0].query).toMatchObject({
model: 'b2b',
view: 'sales_pipeline',
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
});
const look = JSON.parse(await readFile(join(stagedDir, 'looks/20.json'), 'utf-8'));
expect(look.query).toMatchObject({
model: 'b2b',
view: 'sales_pipeline',
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
});
});
it('records skipped detail entities and keeps cursors pinned for affected entity types', async () => {
const client = makeClient();
vi.mocked(client.listDashboards).mockResolvedValue([
{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' },
{ id: '11', updatedAt: '2026-04-30T12:10:00.000Z' },
]);
vi.mocked(client.getDashboard).mockImplementation(async (id: string) => {
if (id === '11') {
const error = new Error('Looker API rate limit remained after retry');
Object.assign(error, { statusCode: 429 });
throw error;
}
return {
lookerId: id,
title: `Dashboard ${id}`,
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [],
};
});
vi.mocked(client.listLooks).mockResolvedValue([{ id: '20', updatedAt: '2026-04-30T11:15:00.000Z' }]);
vi.mocked(client.getLook).mockResolvedValue({
lookerId: '20',
title: 'Look 20',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T11:15:00.000Z',
query: null,
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
dashboardUpdatedSince: '2026-04-30T12:00:00.000Z',
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
await expect(readdir(join(stagedDir, 'dashboards'))).rejects.toMatchObject({ code: 'ENOENT' });
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['20.json']);
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
expect(syncConfig.nextCursors).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
looksLastSyncedAt: '2026-04-30T11:15:00.000Z',
});
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report).toEqual({
status: 'partial',
retryRecommended: true,
skipped: [
{
rawPath: 'dashboards/11.json',
entityType: 'dashboard',
entityId: '11',
severity: 'error',
statusCode: 429,
message: 'Looker API rate limit remained after retry',
retryRecommended: true,
},
],
warnings: [],
});
});
it('continues without explore bootstrap when LookML model listing is denied', async () => {
const client = makeClient();
const error = new Error('LookML model access denied');
Object.assign(error, { statusCode: 403 });
vi.mocked(client.listLookmlModels).mockRejectedValue(error);
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
await expect(readdir(join(stagedDir, 'dashboards'))).resolves.toEqual(['10.json']);
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['20.json']);
await expect(readFile(join(stagedDir, 'lookml_models.json'), 'utf-8')).resolves.toBe('{\n "models": []\n}\n');
await expect(readdir(join(stagedDir, 'explores'))).rejects.toMatchObject({ code: 'ENOENT' });
expect(client.getExplore).not.toHaveBeenCalled();
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report).toEqual({
status: 'success',
retryRecommended: false,
skipped: [],
warnings: [
{
rawPath: 'lookml_models.json',
entityType: 'lookml_models',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'LookML model access denied',
retryRecommended: false,
},
],
});
const chunked = await chunkLookerStagedDir(stagedDir);
expect(chunked.workUnits.map((wu) => wu.unitKey).sort()).toEqual(['looker-dashboard-10', 'looker-look-20']);
expect(chunked.workUnits.flatMap((wu) => wu.dependencyPaths)).not.toContain('explores/b2b/sales_pipeline.json');
});
it('cleans up the Looker client after a successful fetch', async () => {
const client = makeClient();
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(client.cleanup).toHaveBeenCalledTimes(1);
});
it('cleans up the Looker client when fetch throws', async () => {
const client = makeClient();
vi.mocked(client.listDashboards).mockRejectedValue(new Error('Looker API unavailable'));
await expect(
fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
}),
).rejects.toThrow('Looker API unavailable');
expect(client.cleanup).toHaveBeenCalledTimes(1);
});
});

View file

@ -0,0 +1,555 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import type { FetchContext } from '../../types.js';
import { writeLookerEvidenceDocuments } from './evidence-documents.js';
import { writeLookerFetchReport } from './fetch-report.js';
import {
type LookerPullConfig,
type ParsedTargetTable,
parseLookerPullConfig,
STAGED_FILES,
type StagedDashboardFile,
type StagedExploreFile,
type StagedFoldersTreeFile,
type StagedGroupFile,
type StagedLookerFetchIssue,
type StagedLookerFetchReport,
type StagedLookerQuery,
type StagedLookerSignalsFile,
type StagedLookFile,
type StagedLookmlModelsFile,
type StagedUserFile,
stagedDashboardFileSchema,
stagedExploreFileSchema,
stagedFoldersTreeFileSchema,
stagedGroupFileSchema,
stagedLookerScopeFileSchema,
stagedLookerSignalsFileSchema,
stagedLookFileSchema,
stagedLookmlModelsFileSchema,
stagedSyncConfigSchema,
stagedUserFileSchema,
} from './types.js';
export interface LookerEntityRef {
id: string;
updatedAt?: string | null;
}
export interface LookerRuntimeClient {
listDashboards(): Promise<LookerEntityRef[]>;
getDashboard(id: string): Promise<StagedDashboardFile>;
listLooks(): Promise<LookerEntityRef[]>;
getLook(id: string): Promise<StagedLookFile>;
listFolders(): Promise<StagedFoldersTreeFile>;
listUsers(): Promise<StagedUserFile[]>;
listGroups(): Promise<StagedGroupFile[]>;
listLookmlModels(): Promise<StagedLookmlModelsFile>;
getExplore(modelName: string, exploreName: string): Promise<StagedExploreFile>;
getSignals?(): Promise<StagedLookerSignalsFile>;
cleanup?(): Promise<void>;
}
export interface LookerClientFactory {
createClient(config: LookerPullConfig, ctx: FetchContext): Promise<LookerRuntimeClient> | LookerRuntimeClient;
}
interface ExploreTargetSummary {
targetWarehouseConnectionId: string | null;
targetTable: ParsedTargetTable | null;
}
interface StampedExploreResult {
explore: StagedExploreFile;
targetSummary: ExploreTargetSummary;
}
interface StagedJsonFile<T> {
rawPath: string;
value: T;
}
type ParsedTargetTableFailureReason = Extract<ParsedTargetTable, { ok: false }>['reason'];
interface FetchLookerRuntimeBundleParams {
pullConfig: unknown;
stagedDir: string;
ctx: FetchContext;
clientFactory: LookerClientFactory;
now?: () => Date;
}
export async function fetchLookerRuntimeBundle(params: FetchLookerRuntimeBundleParams): Promise<void> {
const config = parseLookerPullConfig(params.pullConfig);
const connectionId = config.lookerConnectionId ?? params.ctx.connectionId;
const client = await params.clientFactory.createClient(config, params.ctx);
try {
const now = params.now ?? (() => new Date());
const skipped: StagedLookerFetchIssue[] = [];
const warnings: StagedLookerFetchIssue[] = [];
let dashboardFetchHadSkips = false;
let lookFetchHadSkips = false;
const fetchedDashboards: Array<StagedJsonFile<StagedDashboardFile>> = [];
const fetchedLooks: Array<StagedJsonFile<StagedLookFile>> = [];
const previousCursors = {
dashboardsLastSyncedAt: config.dashboardUpdatedSince ?? null,
looksLastSyncedAt: config.lookUpdatedSince ?? null,
};
const dashboards = await client.listDashboards();
const dashboardRawPaths = dashboards.map((dashboardRef) => `dashboards/${safePathSegment(dashboardRef.id)}.json`);
const dashboardsToFetch = dashboards.filter((dashboardRef) =>
shouldFetchEntity(dashboardRef, previousCursors.dashboardsLastSyncedAt),
);
const fetchedRawPaths: string[] = [];
for (const dashboardRef of dashboardsToFetch) {
const rawPath = `dashboards/${safePathSegment(dashboardRef.id)}.json`;
try {
const dashboard = stagedDashboardFileSchema.parse(await client.getDashboard(dashboardRef.id));
const dashboardRawPath = `dashboards/${safePathSegment(dashboard.lookerId)}.json`;
fetchedRawPaths.push(dashboardRawPath);
fetchedDashboards.push({ rawPath: dashboardRawPath, value: dashboard });
} catch (error) {
dashboardFetchHadSkips = true;
skipped.push(issueForFetchError({ rawPath, entityType: 'dashboard', entityId: dashboardRef.id, error }));
}
}
const looks = await client.listLooks();
const lookRawPaths = looks.map((lookRef) => `looks/${safePathSegment(lookRef.id)}.json`);
const looksToFetch = looks.filter((lookRef) => shouldFetchEntity(lookRef, previousCursors.looksLastSyncedAt));
for (const lookRef of looksToFetch) {
const rawPath = `looks/${safePathSegment(lookRef.id)}.json`;
try {
const look = stagedLookFileSchema.parse(await client.getLook(lookRef.id));
const lookRawPath = `looks/${safePathSegment(look.lookerId)}.json`;
fetchedRawPaths.push(lookRawPath);
fetchedLooks.push({ rawPath: lookRawPath, value: look });
} catch (error) {
lookFetchHadSkips = true;
skipped.push(issueForFetchError({ rawPath, entityType: 'look', entityId: lookRef.id, error }));
}
}
const nextCursors = {
dashboardsLastSyncedAt: dashboardFetchHadSkips
? previousCursors.dashboardsLastSyncedAt
: maxUpdatedAt(dashboards, previousCursors.dashboardsLastSyncedAt),
looksLastSyncedAt: lookFetchHadSkips
? previousCursors.looksLastSyncedAt
: maxUpdatedAt(looks, previousCursors.looksLastSyncedAt),
};
const fetchMode =
previousCursors.dashboardsLastSyncedAt || previousCursors.looksLastSyncedAt ? 'incremental' : 'full';
await writeJson(
params.stagedDir,
STAGED_FILES.syncConfig,
stagedSyncConfigSchema.parse({
lookerConnectionId: connectionId,
fetchedAt: now().toISOString(),
...(config.instanceBaseUrl ? { instanceBaseUrl: config.instanceBaseUrl } : {}),
previousCursors,
nextCursors,
}),
);
await writeJson(
params.stagedDir,
STAGED_FILES.scope,
stagedLookerScopeFileSchema.parse({
mode: fetchMode,
knownCurrentRawPaths: [...dashboardRawPaths, ...lookRawPaths].sort(),
fetchedRawPaths: fetchedRawPaths.sort(),
}),
);
const folders = stagedFoldersTreeFileSchema.parse(await client.listFolders());
await writeJson(params.stagedDir, STAGED_FILES.foldersTree, folders);
const users = await client.listUsers();
for (const rawUser of users) {
const user = stagedUserFileSchema.parse(rawUser);
await writeJson(params.stagedDir, `users/${safePathSegment(user.id)}.json`, user);
}
const groups = await client.listGroups();
for (const rawGroup of groups) {
const group = stagedGroupFileSchema.parse(rawGroup);
await writeJson(params.stagedDir, `groups/${safePathSegment(group.id)}.json`, group);
}
let models: StagedLookmlModelsFile;
try {
models = stagedLookmlModelsFileSchema.parse(await client.listLookmlModels());
} catch (error) {
warnings.push(
issueForFetchError({
rawPath: STAGED_FILES.lookmlModels,
entityType: 'lookml_models',
entityId: null,
error,
severity: 'warning',
}),
);
models = stagedLookmlModelsFileSchema.parse({ models: [] });
}
await writeJson(params.stagedDir, STAGED_FILES.lookmlModels, models);
const exploreTargetsByKey = new Map<string, ExploreTargetSummary>();
const stagedExplores: StagedExploreFile[] = [];
for (const model of models.models) {
for (const exploreRef of model.explores) {
const rawPath = `explores/${safePathSegment(model.name)}/${safePathSegment(exploreRef.name)}.json`;
try {
const result = stampExploreWarehouseTarget(await client.getExplore(model.name, exploreRef.name), config);
stagedExplores.push(result.explore);
exploreTargetsByKey.set(exploreKey(result.explore.modelName, result.explore.exploreName), result.targetSummary);
await writeJson(
params.stagedDir,
`explores/${safePathSegment(result.explore.modelName)}/${safePathSegment(result.explore.exploreName)}.json`,
result.explore,
);
} catch (error) {
skipped.push(
issueForFetchError({
rawPath,
entityType: 'explore',
entityId: `${model.name}.${exploreRef.name}`,
error,
}),
);
}
}
}
warnings.push(...warehouseTargetWarnings(stagedExplores));
for (const dashboard of fetchedDashboards) {
await writeJson(params.stagedDir, dashboard.rawPath, stampDashboardQueries(dashboard.value, exploreTargetsByKey));
}
for (const look of fetchedLooks) {
await writeJson(params.stagedDir, look.rawPath, stampLookQuery(look.value, exploreTargetsByKey));
}
let signals: StagedLookerSignalsFile;
try {
signals = stagedLookerSignalsFileSchema.parse(client.getSignals ? await client.getSignals() : {});
} catch (error) {
warnings.push(
issueForFetchError({
rawPath: STAGED_FILES.signals.dashboardUsage,
entityType: 'signals',
entityId: null,
error,
}),
);
signals = stagedLookerSignalsFileSchema.parse({});
}
await writeJson(params.stagedDir, STAGED_FILES.signals.dashboardUsage, signals.dashboardUsage);
await writeJson(params.stagedDir, STAGED_FILES.signals.lookUsage, signals.lookUsage);
await writeJson(params.stagedDir, STAGED_FILES.signals.scheduledPlans, signals.scheduledPlans);
await writeJson(params.stagedDir, STAGED_FILES.signals.favorites, signals.favorites);
await writeLookerEvidenceDocuments(params.stagedDir);
await writeLookerFetchReport(params.stagedDir, buildFetchReport(skipped, warnings));
} finally {
await client.cleanup?.();
}
}
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(dirname(abs), { recursive: true });
await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
function safePathSegment(value: string): string {
if (!/^[a-zA-Z0-9_-]+$/.test(value)) {
throw new Error(`Unsafe Looker staged path segment: ${value}`);
}
return value;
}
function shouldFetchEntity(ref: LookerEntityRef, updatedSince: string | null): boolean {
if (!updatedSince) {
return true;
}
if (!ref.updatedAt) {
return true;
}
return Date.parse(ref.updatedAt) > Date.parse(updatedSince);
}
function maxUpdatedAt(refs: LookerEntityRef[], fallback: string | null): string | null {
let max = fallback;
for (const ref of refs) {
if (!ref.updatedAt) {
continue;
}
if (!max || Date.parse(ref.updatedAt) > Date.parse(max)) {
max = ref.updatedAt;
}
}
if (!max) {
return null;
}
const ms = Date.parse(max);
return Number.isNaN(ms) ? null : new Date(ms).toISOString();
}
function stampExploreWarehouseTarget(rawExplore: unknown, config: LookerPullConfig): StampedExploreResult {
const parsed = stagedExploreFileSchema.parse(rawExplore);
const key = exploreKey(parsed.modelName, parsed.exploreName);
const targetWarehouseConnectionId = connectionMappingFor(parsed.connectionName, config);
const targetTable = targetTableFor({
key,
rawSqlTableName: parsed.rawSqlTableName,
targetWarehouseConnectionId,
config,
entityLabel: `Looker explore ${key}`,
});
const explore = stagedExploreFileSchema.parse({
...parsed,
targetWarehouseConnectionId,
targetTable,
joins: parsed.joins.map((join) => ({
...join,
targetTable: join.rawSqlTableName
? targetTableFor({
key: `${key}.${join.name}`,
rawSqlTableName: join.rawSqlTableName,
targetWarehouseConnectionId,
config,
entityLabel: `Looker join ${key}.${join.name}`,
})
: null,
})),
});
return {
explore,
targetSummary: {
targetWarehouseConnectionId: explore.targetWarehouseConnectionId,
targetTable: explore.targetTable,
},
};
}
function connectionMappingFor(connectionName: string | null, config: LookerPullConfig): string | null {
if (!connectionName) {
return null;
}
return config.connectionMappings[connectionName] ?? null;
}
function targetTableFor(input: {
key: string;
rawSqlTableName: string | null;
targetWarehouseConnectionId: string | null;
config: LookerPullConfig;
entityLabel: string;
}): ParsedTargetTable | null {
if (!input.rawSqlTableName && !input.targetWarehouseConnectionId) {
return null;
}
if (!input.targetWarehouseConnectionId) {
return {
ok: false,
reason: 'no_connection_mapping',
detail: `${input.entityLabel} has no mapped warehouse connection.`,
};
}
const parsed = input.config.parsedTargetTables[input.key];
if (parsed) {
return parsed;
}
if (!input.rawSqlTableName) {
return null;
}
return {
ok: false,
reason: 'parse_error',
detail: `${input.entityLabel} has raw sql_table_name but no parsedTargetTables entry for key ${input.key}.`,
};
}
function exploreKey(modelName: string, exploreName: string): string {
return `${modelName}.${exploreName}`;
}
function stampQueryWarehouseTarget(
query: StagedLookerQuery | null,
exploreTargetsByKey: Map<string, ExploreTargetSummary>,
): StagedLookerQuery | null {
if (!query) {
return null;
}
const target = exploreTargetsByKey.get(exploreKey(query.model, query.view));
if (!target) {
return query;
}
return {
...query,
targetWarehouseConnectionId: target.targetWarehouseConnectionId,
targetTable: target.targetTable,
};
}
function stampDashboardQueries(
dashboard: StagedDashboardFile,
exploreTargetsByKey: Map<string, ExploreTargetSummary>,
): StagedDashboardFile {
return stagedDashboardFileSchema.parse({
...dashboard,
tiles: dashboard.tiles.map((tile) => ({
...tile,
query: stampQueryWarehouseTarget(tile.query, exploreTargetsByKey),
})),
});
}
function stampLookQuery(look: StagedLookFile, exploreTargetsByKey: Map<string, ExploreTargetSummary>): StagedLookFile {
return stagedLookFileSchema.parse({
...look,
query: stampQueryWarehouseTarget(look.query, exploreTargetsByKey),
});
}
function warehouseTargetWarnings(explores: StagedExploreFile[]): StagedLookerFetchIssue[] {
const unmapped = new Map<string, string[]>();
const warnings: StagedLookerFetchIssue[] = [];
for (const explore of explores) {
const targetTable = explore.targetTable;
if (!targetTable || targetTable.ok) {
continue;
}
const sourceKey = exploreKey(explore.modelName, explore.exploreName);
const lookerConnectionName = explore.connectionName ?? 'missing_connection_name';
if (targetTable.reason === 'no_connection_mapping') {
const existing = unmapped.get(lookerConnectionName) ?? [];
existing.push(sourceKey);
unmapped.set(lookerConnectionName, existing);
continue;
}
warnings.push({
rawPath: `looker_connection_mappings/${safeWarningPathSegment(lookerConnectionName)}`,
entityType: 'looker_connection_mapping',
entityId: explore.connectionName,
severity: 'warning',
statusCode: null,
message: `Looker explore ${sourceKey} has sql_table_name that cannot be mapped to a physical warehouse table: ${targetTable.reason}.`,
retryRecommended: false,
kind: warningKindForReason(targetTable.reason),
details: {
lookerConnectionName,
rawSqlTableName: explore.rawSqlTableName,
reason: targetTable.reason,
},
});
}
for (const [lookerConnectionName, affectedExplores] of [...unmapped.entries()].sort(([a], [b]) =>
a.localeCompare(b),
)) {
const sortedAffectedExplores = [...affectedExplores].sort();
warnings.push({
rawPath: `looker_connection_mappings/${safeWarningPathSegment(lookerConnectionName)}`,
entityType: 'looker_connection_mapping',
entityId: lookerConnectionName === 'missing_connection_name' ? null : lookerConnectionName,
severity: 'warning',
statusCode: null,
message: `Looker connection ${lookerConnectionName} is not mapped to a warehouse connection; ${sortedAffectedExplores.length} explore${sortedAffectedExplores.length === 1 ? '' : 's'} will be wiki-only.`,
retryRecommended: false,
kind: 'unmapped_looker_connection',
details: {
lookerConnectionName,
affectedExplores: sortedAffectedExplores,
},
});
}
return warnings;
}
function warningKindForReason(reason: ParsedTargetTableFailureReason): StagedLookerFetchIssue['kind'] {
if (reason === 'looker_template_unresolved') {
return 'looker_template_unresolved';
}
if (reason === 'derived_table_not_supported') {
return 'derived_table_not_supported';
}
return 'unparseable_sql_table_name';
}
function safeWarningPathSegment(value: string): string {
return value.replace(/[^a-zA-Z0-9_-]+/g, '_');
}
function issueForFetchError(input: {
rawPath: string;
entityType: StagedLookerFetchIssue['entityType'];
entityId: string | null;
error: unknown;
severity?: StagedLookerFetchIssue['severity'];
}): StagedLookerFetchIssue {
const statusCode = errorStatusCode(input.error);
return {
rawPath: input.rawPath,
entityType: input.entityType,
entityId: input.entityId,
severity: input.severity ?? (input.entityType === 'signals' ? 'warning' : 'error'),
statusCode,
message: errorMessage(input.error),
retryRecommended: statusCode === 429,
};
}
function errorMessage(error: unknown): string {
return error instanceof Error ? error.message : String(error);
}
function errorStatusCode(error: unknown): number | null {
if (!error || typeof error !== 'object') {
return null;
}
const record = error as Record<string, unknown>;
const direct = record.statusCode ?? record.status;
if (typeof direct === 'number') {
return direct;
}
if (typeof direct === 'string') {
const parsed = Number(direct);
return Number.isFinite(parsed) ? parsed : null;
}
const response = record.response;
if (response && typeof response === 'object') {
return errorStatusCode(response);
}
return null;
}
function buildFetchReport(
skipped: StagedLookerFetchIssue[],
warnings: StagedLookerFetchIssue[],
): StagedLookerFetchReport {
const retryRecommended = [...skipped, ...warnings].some((issue) => issue.retryRecommended);
const hasWarehouseTargetWarnings = warnings.some((issue) => issue.entityType === 'looker_connection_mapping');
return {
status: skipped.length > 0 || hasWarehouseTargetWarnings ? 'partial' : 'success',
retryRecommended,
skipped,
warnings,
};
}

View file

@ -0,0 +1,67 @@
import type { KloLocalProject, KloProjectConnectionConfig } from '../../../project/index.js';
import {
DefaultLookerClientFactory,
DefaultLookerConnectionClientFactory,
type LookerCredentialResolver,
} from './factory.js';
import { LookerSourceAdapter } from './looker.adapter.js';
function stringField(value: unknown): string | null {
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
}
function resolveEnvReference(ref: string, env: NodeJS.ProcessEnv): string | null {
if (!ref.startsWith('env:')) {
return null;
}
return stringField(env[ref.slice('env:'.length)]);
}
export function lookerCredentialsFromLocalConnection(
connectionId: string,
connection: KloProjectConnectionConfig | undefined,
env: NodeJS.ProcessEnv = process.env,
) {
if (!connection || String(connection.driver).toLowerCase() !== 'looker') {
throw new Error(`Connection "${connectionId}" is not a Looker connection`);
}
const baseUrl = stringField(connection.base_url) ?? stringField(connection.baseUrl) ?? stringField(connection.url);
const clientId = stringField(connection.client_id) ?? stringField(connection.clientId);
const clientSecret =
stringField(connection.client_secret) ??
stringField(connection.clientSecret) ??
(stringField(connection.client_secret_ref) ? resolveEnvReference(String(connection.client_secret_ref), env) : null) ??
(stringField(connection.clientSecretRef) ? resolveEnvReference(String(connection.clientSecretRef), env) : null);
if (!baseUrl) {
throw new Error(`Connection "${connectionId}" is missing Looker base_url`);
}
if (!clientId) {
throw new Error(`Connection "${connectionId}" is missing Looker client_id`);
}
if (!clientSecret) {
throw new Error(`Connection "${connectionId}" is missing Looker client_secret or client_secret_ref`);
}
return { base_url: baseUrl, client_id: clientId, client_secret: clientSecret };
}
export function createLocalLookerCredentialResolver(
project: KloLocalProject,
env: NodeJS.ProcessEnv = process.env,
): LookerCredentialResolver {
return {
async resolve(lookerConnectionId) {
return lookerCredentialsFromLocalConnection(lookerConnectionId, project.config.connections[lookerConnectionId], env);
},
};
}
export function createLocalLookerSourceAdapter(
project: KloLocalProject,
env: NodeJS.ProcessEnv = process.env,
): LookerSourceAdapter {
const connectionFactory = new DefaultLookerConnectionClientFactory(createLocalLookerCredentialResolver(project, env));
return new LookerSourceAdapter({
clientFactory: new DefaultLookerClientFactory(connectionFactory),
});
}

View file

@ -0,0 +1,116 @@
import { mkdtemp } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { LocalLookerRuntimeStore } from './local-runtime-store.js';
describe('LocalLookerRuntimeStore', () => {
async function store() {
const dir = await mkdtemp(join(tmpdir(), 'klo-looker-store-'));
return new LocalLookerRuntimeStore({
dbPath: join(dir, 'db.sqlite'),
now: () => new Date('2026-05-05T12:00:00.000Z'),
});
}
it('stores cursors and connection mappings', async () => {
const local = await store();
await local.setCursors('prod-looker', {
dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z',
looksLastSyncedAt: null,
});
await local.upsertConnectionMapping({
lookerConnectionId: 'prod-looker',
lookerConnectionName: 'bq_reporting',
kloConnectionId: 'prod-warehouse',
source: 'cli',
});
await expect(local.readCursors('prod-looker')).resolves.toEqual({
dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z',
looksLastSyncedAt: null,
});
await expect(local.readMappings('prod-looker')).resolves.toEqual([
{
lookerConnectionName: 'bq_reporting',
kloConnectionId: 'prod-warehouse',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
]);
});
it('refreshes discovered metadata without dropping local targets', async () => {
const local = await store();
await local.upsertConnectionMapping({
lookerConnectionId: 'prod-looker',
lookerConnectionName: 'bq_reporting',
kloConnectionId: 'prod-warehouse',
source: 'cli',
});
await local.refreshDiscoveredConnections({
lookerConnectionId: 'prod-looker',
discovered: [
{
name: 'bq_reporting',
host: 'bigquery.googleapis.com',
database: 'analytics',
schema: null,
dialect: 'bigquery_standard_sql',
},
],
});
await expect(local.listConnectionMappings('prod-looker')).resolves.toEqual([
{
lookerConnectionName: 'bq_reporting',
kloConnectionId: 'prod-warehouse',
lookerHost: 'bigquery.googleapis.com',
lookerDatabase: 'analytics',
lookerDialect: 'bigquery_standard_sql',
source: 'refresh',
},
]);
});
it('applies yaml mapping intent while preserving refresh metadata and cli overrides', async () => {
const local = await store();
await local.refreshDiscoveredConnections({
lookerConnectionId: 'prod-looker',
discovered: [{ name: 'analytics', host: 'looker-db.test', database: 'warehouse', schema: null, dialect: 'postgres' }],
});
await local.upsertConnectionMapping({
lookerConnectionId: 'prod-looker',
lookerConnectionName: 'manual',
kloConnectionId: 'cli-warehouse',
source: 'cli',
});
await local.applyYamlBootstrap({
lookerConnectionId: 'prod-looker',
mappings: [
{ lookerConnectionName: 'analytics', kloConnectionId: 'yaml-warehouse' },
{ lookerConnectionName: 'manual', kloConnectionId: 'yaml-warehouse' },
],
});
await expect(local.listConnectionMappings('prod-looker')).resolves.toMatchObject([
{
lookerConnectionName: 'analytics',
kloConnectionId: 'yaml-warehouse',
lookerHost: 'looker-db.test',
lookerDatabase: 'warehouse',
lookerDialect: 'postgres',
source: 'klo.yaml',
},
{
lookerConnectionName: 'manual',
kloConnectionId: 'cli-warehouse',
source: 'cli',
},
]);
});
});

View file

@ -0,0 +1,280 @@
import { mkdirSync } from 'node:fs';
import { dirname } from 'node:path';
import Database from 'better-sqlite3';
import type { LookerWarehouseConnectionInfo } from './client.js';
import type { LookerConnectionMapping } from './mapping.js';
import type { LookerRuntimeCursors } from './types.js';
export type LocalLookerMappingSource = 'klo.yaml' | 'cli' | 'refresh';
interface LocalLookerRuntimeStoreOptions {
dbPath: string;
now?: () => Date;
}
export interface LocalLookerConnectionMappingListRow extends LookerConnectionMapping {
source: LocalLookerMappingSource;
}
export interface UpsertLocalLookerConnectionMappingInput {
lookerConnectionId: string;
lookerConnectionName: string;
kloConnectionId: string | null;
source: LocalLookerMappingSource;
}
interface ApplyLocalLookerYamlBootstrapInput {
lookerConnectionId: string;
mappings: Array<{
lookerConnectionName: string;
kloConnectionId: string | null;
}>;
}
export interface RefreshLocalLookerDiscoveredConnectionsInput {
lookerConnectionId: string;
discovered: LookerWarehouseConnectionInfo[];
}
export interface ClearLocalLookerMappingsInput {
lookerConnectionId: string;
lookerConnectionName?: string;
}
export interface LookerSourceStateReader {
readMappings(lookerConnectionId: string): Promise<LookerConnectionMapping[]>;
readCursors(lookerConnectionId: string): Promise<LookerRuntimeCursors>;
}
export class LocalLookerRuntimeStore implements LookerSourceStateReader {
private readonly db: Database.Database;
private readonly now: () => Date;
constructor(options: LocalLookerRuntimeStoreOptions) {
mkdirSync(dirname(options.dbPath), { recursive: true });
this.db = new Database(options.dbPath);
this.db.pragma('journal_mode = WAL');
this.db.pragma('foreign_keys = ON');
this.now = options.now ?? (() => new Date());
this.db.exec(`
CREATE TABLE IF NOT EXISTS local_looker_runtime_config (
looker_connection_id TEXT PRIMARY KEY,
dashboards_last_synced_at TEXT,
looks_last_synced_at TEXT,
updated_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS local_looker_connection_mappings (
looker_connection_id TEXT NOT NULL,
looker_connection_name TEXT NOT NULL,
klo_connection_id TEXT,
looker_host TEXT,
looker_database TEXT,
looker_dialect TEXT,
source TEXT NOT NULL,
updated_at TEXT NOT NULL,
PRIMARY KEY (looker_connection_id, looker_connection_name)
);
`);
}
async applyYamlBootstrap(input: ApplyLocalLookerYamlBootstrapInput): Promise<void> {
const timestamp = this.now().toISOString();
const apply = this.db.transaction(() => {
const existing = this.db.prepare(`
SELECT klo_connection_id, source
FROM local_looker_connection_mappings
WHERE looker_connection_id = ? AND looker_connection_name = ?
`);
const insert = this.db.prepare(`
INSERT INTO local_looker_connection_mappings (
looker_connection_id,
looker_connection_name,
klo_connection_id,
looker_host,
looker_database,
looker_dialect,
source,
updated_at
)
VALUES (?, ?, ?, NULL, NULL, NULL, 'klo.yaml', ?)
`);
const updateRefreshRow = this.db.prepare(`
UPDATE local_looker_connection_mappings
SET klo_connection_id = ?,
source = 'klo.yaml',
updated_at = ?
WHERE looker_connection_id = ?
AND looker_connection_name = ?
AND source = 'refresh'
AND klo_connection_id IS NULL
`);
for (const mapping of input.mappings) {
const row = existing.get(input.lookerConnectionId, mapping.lookerConnectionName) as
| { klo_connection_id: string | null; source: LocalLookerMappingSource }
| undefined;
if (!row) {
insert.run(input.lookerConnectionId, mapping.lookerConnectionName, mapping.kloConnectionId, timestamp);
continue;
}
if (row.source === 'refresh' && row.klo_connection_id === null) {
updateRefreshRow.run(mapping.kloConnectionId, timestamp, input.lookerConnectionId, mapping.lookerConnectionName);
}
}
});
apply();
}
async readCursors(lookerConnectionId: string): Promise<LookerRuntimeCursors> {
const row = this.db
.prepare(
`
SELECT dashboards_last_synced_at, looks_last_synced_at
FROM local_looker_runtime_config
WHERE looker_connection_id = ?
`,
)
.get(lookerConnectionId) as { dashboards_last_synced_at: string | null; looks_last_synced_at: string | null } | undefined;
return {
dashboardsLastSyncedAt: row?.dashboards_last_synced_at ?? null,
looksLastSyncedAt: row?.looks_last_synced_at ?? null,
};
}
async setCursors(lookerConnectionId: string, cursors: LookerRuntimeCursors): Promise<void> {
this.db
.prepare(
`
INSERT INTO local_looker_runtime_config (
looker_connection_id,
dashboards_last_synced_at,
looks_last_synced_at,
updated_at
)
VALUES (?, ?, ?, ?)
ON CONFLICT(looker_connection_id) DO UPDATE SET
dashboards_last_synced_at = excluded.dashboards_last_synced_at,
looks_last_synced_at = excluded.looks_last_synced_at,
updated_at = excluded.updated_at
`,
)
.run(lookerConnectionId, cursors.dashboardsLastSyncedAt, cursors.looksLastSyncedAt, this.now().toISOString());
}
async readMappings(lookerConnectionId: string): Promise<LookerConnectionMapping[]> {
return (await this.listConnectionMappings(lookerConnectionId)).map(({ source: _source, ...mapping }) => mapping);
}
async listConnectionMappings(lookerConnectionId: string): Promise<LocalLookerConnectionMappingListRow[]> {
const rows = this.db
.prepare(
`
SELECT
looker_connection_name,
klo_connection_id,
looker_host,
looker_database,
looker_dialect,
source
FROM local_looker_connection_mappings
WHERE looker_connection_id = ?
ORDER BY looker_connection_name
`,
)
.all(lookerConnectionId) as Array<{
looker_connection_name: string;
klo_connection_id: string | null;
looker_host: string | null;
looker_database: string | null;
looker_dialect: string | null;
source: LocalLookerMappingSource;
}>;
return rows.map((row) => ({
lookerConnectionName: row.looker_connection_name,
kloConnectionId: row.klo_connection_id,
lookerHost: row.looker_host,
lookerDatabase: row.looker_database,
lookerDialect: row.looker_dialect,
source: row.source,
}));
}
async upsertConnectionMapping(input: UpsertLocalLookerConnectionMappingInput): Promise<void> {
this.db
.prepare(
`
INSERT INTO local_looker_connection_mappings (
looker_connection_id,
looker_connection_name,
klo_connection_id,
looker_host,
looker_database,
looker_dialect,
source,
updated_at
)
VALUES (?, ?, ?, NULL, NULL, NULL, ?, ?)
ON CONFLICT(looker_connection_id, looker_connection_name) DO UPDATE SET
klo_connection_id = excluded.klo_connection_id,
source = excluded.source,
updated_at = excluded.updated_at
`,
)
.run(input.lookerConnectionId, input.lookerConnectionName, input.kloConnectionId, input.source, this.now().toISOString());
}
async refreshDiscoveredConnections(input: RefreshLocalLookerDiscoveredConnectionsInput): Promise<void> {
const timestamp = this.now().toISOString();
const update = this.db.transaction(() => {
const upsert = this.db.prepare(`
INSERT INTO local_looker_connection_mappings (
looker_connection_id,
looker_connection_name,
klo_connection_id,
looker_host,
looker_database,
looker_dialect,
source,
updated_at
)
VALUES (?, ?, NULL, ?, ?, ?, 'refresh', ?)
ON CONFLICT(looker_connection_id, looker_connection_name) DO UPDATE SET
looker_host = excluded.looker_host,
looker_database = excluded.looker_database,
looker_dialect = excluded.looker_dialect,
source = excluded.source,
updated_at = excluded.updated_at
`);
for (const connection of input.discovered) {
upsert.run(
input.lookerConnectionId,
connection.name,
connection.host,
connection.database,
connection.dialect,
timestamp,
);
}
});
update();
}
async clearConnectionMappings(input: ClearLocalLookerMappingsInput): Promise<void> {
if (input.lookerConnectionName) {
this.db
.prepare(
`
DELETE FROM local_looker_connection_mappings
WHERE looker_connection_id = ? AND looker_connection_name = ?
`,
)
.run(input.lookerConnectionId, input.lookerConnectionName);
return;
}
this.db.prepare('DELETE FROM local_looker_connection_mappings WHERE looker_connection_id = ?').run(input.lookerConnectionId);
}
}

View file

@ -0,0 +1,125 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { LookerRuntimeClient } from './fetch.js';
import { LookerSourceAdapter } from './looker.adapter.js';
const connectionId = '11111111-1111-4111-8111-111111111111';
function makeClient(): LookerRuntimeClient {
return {
listDashboards: vi.fn().mockResolvedValue([]),
getDashboard: vi.fn(),
listLooks: vi.fn().mockResolvedValue([]),
getLook: vi.fn(),
listFolders: vi.fn().mockResolvedValue({ folders: [] }),
listUsers: vi.fn().mockResolvedValue([]),
listGroups: vi.fn().mockResolvedValue([]),
listLookmlModels: vi.fn().mockResolvedValue({
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
}),
getExplore: vi.fn().mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: { dimensions: [], measures: [] },
joins: [],
}),
};
}
describe('LookerSourceAdapter', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-adapter-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('exposes source="looker" and skillNames=["looker_ingest"]', () => {
const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } });
expect(adapter.source).toBe('looker');
expect(adapter.skillNames).toEqual(['looker_ingest']);
});
it('enables context evidence indexing and delegates triage signals', async () => {
const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } });
expect(adapter.evidenceIndexing).toBe('documents');
expect(adapter.triageSupported).toBe(true);
await expect(adapter.getTriageSignals?.(stagedDir, 'looker:dashboard:10')).resolves.toMatchObject({
objectType: 'looker_dashboard',
});
});
it('fetches, detects, and chunks a runtime bundle through the composed adapter', async () => {
const adapter = new LookerSourceAdapter({
clientFactory: { createClient: vi.fn().mockResolvedValue(makeClient()) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
await mkdir(stagedDir, { recursive: true });
await adapter.fetch({ lookerConnectionId: connectionId }, stagedDir, { connectionId, sourceKey: 'looker' });
expect(await adapter.detect(stagedDir)).toBe(true);
expect(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8')).toContain('sales_pipeline');
const result = await adapter.chunk(stagedDir);
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['looker-explore-b2b-sales_pipeline']);
});
it('passes pull success notifications to the server callback', async () => {
const onPullSucceeded = vi.fn().mockResolvedValue(undefined);
const adapter = new LookerSourceAdapter({
clientFactory: { createClient: () => makeClient() },
onPullSucceeded,
});
const completedAt = new Date('2026-04-30T12:00:00.000Z');
await adapter.onPullSucceeded({
connectionId,
sourceKey: 'looker',
syncId: 'sync-1',
trigger: 'scheduled_pull',
completedAt,
stagedDir: '/tmp/staged',
});
expect(onPullSucceeded).toHaveBeenCalledWith({
connectionId,
sourceKey: 'looker',
syncId: 'sync-1',
trigger: 'scheduled_pull',
completedAt,
stagedDir: '/tmp/staged',
});
});
it('describes incremental fetch scope from the staged scope file', async () => {
await mkdir(join(stagedDir, 'dashboards'), { recursive: true });
await writeFile(
join(stagedDir, 'looker-scope.json'),
JSON.stringify(
{
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json'],
fetchedRawPaths: ['dashboards/11.json'],
},
null,
2,
),
);
const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } });
const scope = await adapter.describeScope(stagedDir);
expect(scope.isPathInScope('dashboards/10.json')).toBe(false);
expect(scope.isPathInScope('dashboards/11.json')).toBe(true);
expect(scope.isPathInScope('dashboards/12.json')).toBe(true);
});
});

View file

@ -0,0 +1,70 @@
import type { ChunkResult, DiffSet, FetchContext, IngestTrigger, ScopeDescriptor, SourceAdapter } from '../../types.js';
import { chunkLookerStagedDir } from './chunk.js';
import { detectLookerStagedDir } from './detect.js';
import { getLookerTriageSignals } from './evidence-documents.js';
import { fetchLookerRuntimeBundle, type LookerClientFactory } from './fetch.js';
import { readLookerFetchReport } from './fetch-report.js';
import { describeLookerScope } from './scope.js';
import { listLookerTargetConnectionIds } from './target-connections.js';
interface LookerPullSucceededContext {
connectionId: string;
sourceKey: string;
syncId: string;
trigger: IngestTrigger;
completedAt: Date;
stagedDir: string;
}
export interface LookerSourceAdapterDeps {
clientFactory: LookerClientFactory;
now?: () => Date;
onPullSucceeded?: (ctx: LookerPullSucceededContext) => Promise<void>;
}
export class LookerSourceAdapter implements SourceAdapter {
readonly source = 'looker';
readonly skillNames: string[] = ['looker_ingest'];
readonly evidenceIndexing = 'documents' as const;
readonly triageSupported = true;
constructor(private readonly deps: LookerSourceAdapterDeps) {}
detect(stagedDir: string): Promise<boolean> {
return detectLookerStagedDir(stagedDir);
}
fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
return fetchLookerRuntimeBundle({
pullConfig,
stagedDir,
ctx,
clientFactory: this.deps.clientFactory,
now: this.deps.now,
});
}
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
return chunkLookerStagedDir(stagedDir, diffSet);
}
readFetchReport(stagedDir: string) {
return readLookerFetchReport(stagedDir);
}
listTargetConnectionIds(stagedDir: string): Promise<string[]> {
return listLookerTargetConnectionIds(stagedDir);
}
getTriageSignals(stagedDir: string, externalId: string) {
return getLookerTriageSignals(stagedDir, externalId);
}
describeScope(stagedDir: string): Promise<ScopeDescriptor> {
return describeLookerScope(stagedDir);
}
async onPullSucceeded(ctx: LookerPullSucceededContext): Promise<void> {
await this.deps.onPullSucceeded?.(ctx);
}
}

View file

@ -0,0 +1,384 @@
import { describe, expect, it, vi } from 'vitest';
import type { StagedExploreFile, StagedLookmlModelsFile } from './types.js';
import {
buildLookerPullConfigFromInputs,
collectExploreParseItems,
computeLookerMappingDrift,
discoverLookerConnections,
lookerDialectToConnectionType,
projectParsedIdentifier,
refreshLookerMappingPlaceholders,
sqlglotDialectForConnectionType,
suggestKloConnectionForLookerConnection,
validateLookerMappings,
validateLookerWarehouseTarget,
} from './mapping.js';
const liveConnections = [
{
name: 'b2b_sandbox_bq',
host: 'warehouse.example.com',
database: 'analytics',
schema: null,
dialect: 'bigquery_standard_sql',
},
{
name: 'pg_runtime',
host: 'pg.internal:5432',
database: 'app',
schema: 'public',
dialect: 'postgres',
},
];
const mappedExplore: StagedExploreFile = {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: 'proj.analytics.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: { dimensions: [], measures: [] },
joins: [
{
name: 'accounts',
type: 'left_outer',
relationship: 'many_to_one',
rawSqlTableName: 'proj.analytics.accounts',
sqlOn: null,
from: null,
targetTable: null,
},
],
targetWarehouseConnectionId: null,
targetTable: null,
};
const models: StagedLookmlModelsFile = {
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
};
describe('discoverLookerConnections', () => {
it('delegates to the runtime client connection discovery method', async () => {
const client = { listLookerConnections: vi.fn().mockResolvedValue(liveConnections) };
await expect(discoverLookerConnections(client)).resolves.toEqual(liveConnections);
expect(client.listLookerConnections).toHaveBeenCalledTimes(1);
});
});
describe('looker dialect and target validation helpers', () => {
it('maps Looker dialect names to KLO connection types', () => {
expect(lookerDialectToConnectionType('bigquery_standard_sql')).toBe('BIGQUERY');
expect(lookerDialectToConnectionType('postgres')).toBe('POSTGRESQL');
expect(lookerDialectToConnectionType('mssql')).toBe('SQLSERVER');
expect(lookerDialectToConnectionType('unknown')).toBeNull();
});
it('maps supported warehouse connection types to sqlglot dialects', () => {
expect(sqlglotDialectForConnectionType('BIGQUERY')).toBe('bigquery');
expect(sqlglotDialectForConnectionType('POSTGRESQL')).toBe('postgres');
expect(sqlglotDialectForConnectionType('LOOKER')).toBeNull();
});
it('returns a structured failure for unsupported Looker warehouse targets', () => {
expect(validateLookerWarehouseTarget('LOOKER')).toEqual({
ok: false,
reason: 'Connection type LOOKER cannot be used as a Looker warehouse mapping target',
});
});
});
describe('suggestKloConnectionForLookerConnection', () => {
it('returns the single deterministic target with matching type, host, and database', () => {
expect(
suggestKloConnectionForLookerConnection({
lookerConnection: liveConnections[1],
candidateConnections: [
{
id: 'wrong-type',
connection_type: 'MYSQL',
connection_params: { host: 'pg.internal', database: 'app' },
},
{
id: 'pg-target',
connection_type: 'POSTGRESQL',
connection_params: { host: 'PG.INTERNAL', database: 'APP' },
},
],
}),
).toBe('pg-target');
});
it('returns null when more than one target matches', () => {
expect(
suggestKloConnectionForLookerConnection({
lookerConnection: liveConnections[1],
candidateConnections: [
{
id: 'first',
connection_type: 'POSTGRESQL',
connection_params: { host: 'pg.internal', database: 'app' },
},
{
id: 'second',
connection_type: 'POSTGRESQL',
connection_params: { host: 'pg.internal:5432', database: 'APP' },
},
],
}),
).toBeNull();
});
});
describe('refreshLookerMappingPlaceholders', () => {
it('adds newly discovered placeholders and refreshes live metadata without dropping saved targets', () => {
expect(
refreshLookerMappingPlaceholders({
stored: [
{
lookerConnectionName: 'b2b_sandbox_bq',
kloConnectionId: 'warehouse',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
],
live: liveConnections,
}),
).toEqual({
changed: true,
mappings: [
{
lookerConnectionName: 'b2b_sandbox_bq',
kloConnectionId: 'warehouse',
lookerHost: 'warehouse.example.com',
lookerDatabase: 'analytics',
lookerDialect: 'bigquery_standard_sql',
},
{
lookerConnectionName: 'pg_runtime',
kloConnectionId: null,
lookerHost: 'pg.internal:5432',
lookerDatabase: 'app',
lookerDialect: 'postgres',
},
],
});
});
});
describe('computeLookerMappingDrift and validateLookerMappings', () => {
it('reports unmapped live connections, stale stored mappings, and in-sync mappings', () => {
expect(
computeLookerMappingDrift({
storedMappings: [
{
lookerConnectionName: 'b2b_sandbox_bq',
kloConnectionId: 'warehouse',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
{
lookerConnectionName: 'stale_runtime',
kloConnectionId: 'warehouse',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
],
discovered: liveConnections,
}),
).toEqual({
unmappedDiscovered: [liveConnections[1]],
staleMappings: [{ lookerConnectionName: 'stale_runtime', reason: 'looker_connection_not_found' }],
inSync: [{ lookerConnectionName: 'b2b_sandbox_bq', kloConnectionId: 'warehouse' }],
});
});
it('validates missing and unsupported target connection ids', () => {
expect(
validateLookerMappings({
mappings: [
{
lookerConnectionName: 'b2b_sandbox_bq',
kloConnectionId: 'missing',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
{
lookerConnectionName: 'pg_runtime',
kloConnectionId: 'looker-target',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
],
knownKloConnectionIds: new Set(['looker-target']),
knownConnectionTypes: new Map([['looker-target', 'LOOKER']]),
}),
).toEqual({
ok: false,
errors: [
{ key: 'b2b_sandbox_bq', reason: 'KLO connection missing does not exist' },
{
key: 'pg_runtime',
reason: 'Connection type LOOKER cannot be used as a Looker warehouse mapping target',
},
],
});
});
});
describe('collectExploreParseItems and projectParsedIdentifier', () => {
it('collects base explore and join parser inputs for mapped explores', () => {
expect(
collectExploreParseItems({
explore: mappedExplore,
connectionMappings: { b2b_sandbox_bq: 'warehouse' },
targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]),
}),
).toEqual({
parsedTargetTables: {},
parseItems: [
{
key: 'b2b.sales_pipeline',
sql_table_name: 'proj.analytics.opportunities AS opportunities',
dialect: 'bigquery',
},
{
key: 'b2b.sales_pipeline.accounts',
sql_table_name: 'proj.analytics.accounts',
dialect: 'bigquery',
},
],
});
});
it('projects successful and failed parser rows into KLO parsed target tables', () => {
expect(
projectParsedIdentifier({
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'accounts',
canonical_table: 'proj.analytics.accounts',
}),
).toEqual({
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'accounts',
canonicalTable: 'proj.analytics.accounts',
});
expect(projectParsedIdentifier({ ok: false, reason: 'derived_table_not_supported' })).toEqual({
ok: false,
reason: 'derived_table_not_supported',
});
});
});
describe('buildLookerPullConfigFromInputs', () => {
it('builds the hosted-equivalent Looker pull config from caller-loaded inputs', async () => {
const parser = {
parse: vi.fn().mockResolvedValue({
'b2b.sales_pipeline': {
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'opportunities',
canonical_table: 'proj.analytics.opportunities',
},
'b2b.sales_pipeline.accounts': {
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'accounts',
canonical_table: 'proj.analytics.accounts',
},
}),
};
const client = {
listLookmlModels: vi.fn().mockResolvedValue(models),
getExplore: vi.fn().mockResolvedValue(mappedExplore),
};
await expect(
buildLookerPullConfigFromInputs({
lookerConnectionId: 'prod-looker',
cursors: {
dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z',
looksLastSyncedAt: null,
},
refreshedMappings: [
{
lookerConnectionName: 'b2b_sandbox_bq',
kloConnectionId: 'warehouse',
lookerHost: 'warehouse.example.com',
lookerDatabase: 'analytics',
lookerDialect: 'bigquery_standard_sql',
},
],
targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]),
client,
parser,
}),
).resolves.toEqual({
lookerConnectionId: 'prod-looker',
dashboardUpdatedSince: '2026-05-01T00:00:00.000Z',
lookUpdatedSince: null,
connectionMappings: { b2b_sandbox_bq: 'warehouse' },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'opportunities',
canonicalTable: 'proj.analytics.opportunities',
},
'b2b.sales_pipeline.accounts': {
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'accounts',
canonicalTable: 'proj.analytics.accounts',
},
},
});
});
it('marks parser failures as parse_error without blocking pull-config construction', async () => {
const parser = { parse: vi.fn().mockRejectedValue(new Error('python unavailable')) };
const client = {
listLookmlModels: vi.fn().mockResolvedValue(models),
getExplore: vi.fn().mockResolvedValue(mappedExplore),
};
const config = await buildLookerPullConfigFromInputs({
lookerConnectionId: 'prod-looker',
cursors: { dashboardsLastSyncedAt: null, looksLastSyncedAt: null },
refreshedMappings: [
{
lookerConnectionName: 'b2b_sandbox_bq',
kloConnectionId: 'warehouse',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
],
targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]),
client,
parser,
});
expect(config.parsedTargetTables).toMatchObject({
'b2b.sales_pipeline': { ok: false, reason: 'parse_error' },
'b2b.sales_pipeline.accounts': { ok: false, reason: 'parse_error' },
});
});
});

View file

@ -0,0 +1,442 @@
import type { LookerWarehouseConnectionInfo } from './client.js';
import type {
LookerPullConfig,
LookerRuntimeCursors,
ParsedTargetTable,
StagedExploreFile,
StagedLookmlModelsFile,
} from './types.js';
export const LOOKER_DIALECT_TO_CONNECTION_TYPE = {
bigquery: 'BIGQUERY',
bigquery_standard_sql: 'BIGQUERY',
snowflake: 'SNOWFLAKE',
postgres: 'POSTGRESQL',
postgresql: 'POSTGRESQL',
mysql: 'MYSQL',
sqlite: 'SQLITE',
sqlserver: 'SQLSERVER',
mssql: 'SQLSERVER',
tsql: 'SQLSERVER',
clickhouse: 'CLICKHOUSE',
} as const;
export type LookerWarehouseTargetConnectionType =
(typeof LOOKER_DIALECT_TO_CONNECTION_TYPE)[keyof typeof LOOKER_DIALECT_TO_CONNECTION_TYPE];
export interface LookerConnectionMapping {
lookerConnectionName: string;
kloConnectionId: string | null;
lookerHost: string | null;
lookerDatabase: string | null;
lookerDialect: string | null;
}
export interface LookerTargetConnection {
id: string;
connection_type: string;
connection_params?: Record<string, unknown> | null;
}
export interface LookerMappingCandidateConnection extends LookerTargetConnection {}
export interface LookerMappingDrift {
unmappedDiscovered: LookerWarehouseConnectionInfo[];
staleMappings: Array<{ lookerConnectionName: string; reason: 'looker_connection_not_found' }>;
inSync: Array<{ lookerConnectionName: string; kloConnectionId: string }>;
}
export type LookerMappingValidationResult =
| { ok: true }
| { ok: false; errors: Array<{ key: string; reason: string }> };
export interface LookerTableIdentifierParseItem {
key: string;
sql_table_name: string;
dialect: string;
}
type ParsedTargetTableFailureReason = Extract<ParsedTargetTable, { ok: false }>['reason'];
export interface LookerParsedIdentifier {
ok: boolean;
catalog?: string | null;
schema?: string | null;
name?: string | null;
canonical_table?: string | null;
reason?: ParsedTargetTableFailureReason | null;
detail?: string | null;
}
export interface LookerTableIdentifierParser {
parse(items: LookerTableIdentifierParseItem[]): Promise<Record<string, LookerParsedIdentifier>>;
}
export interface LookerMappingClient {
listLookerConnections(): Promise<LookerWarehouseConnectionInfo[]>;
listLookmlModels(): Promise<StagedLookmlModelsFile>;
getExplore(modelName: string, exploreName: string): Promise<StagedExploreFile>;
}
const SQLGLOT_DIALECT_BY_CONNECTION_TYPE: Partial<Record<LookerWarehouseTargetConnectionType, string>> = {
BIGQUERY: 'bigquery',
SNOWFLAKE: 'snowflake',
POSTGRESQL: 'postgres',
MYSQL: 'mysql',
SQLITE: 'sqlite',
SQLSERVER: 'tsql',
CLICKHOUSE: 'clickhouse',
};
export async function discoverLookerConnections(
client: Pick<LookerMappingClient, 'listLookerConnections'>,
): Promise<LookerWarehouseConnectionInfo[]> {
return client.listLookerConnections();
}
export function lookerDialectToConnectionType(dialect: string | null): LookerWarehouseTargetConnectionType | null {
if (!dialect) {
return null;
}
return (
LOOKER_DIALECT_TO_CONNECTION_TYPE[dialect.toLowerCase() as keyof typeof LOOKER_DIALECT_TO_CONNECTION_TYPE] ?? null
);
}
export function sqlglotDialectForConnectionType(connectionType: string): string | null {
return SQLGLOT_DIALECT_BY_CONNECTION_TYPE[connectionType as LookerWarehouseTargetConnectionType] ?? null;
}
export function validateLookerWarehouseTarget(connectionType: string): { ok: true } | { ok: false; reason: string } {
return sqlglotDialectForConnectionType(connectionType)
? { ok: true }
: {
ok: false,
reason: `Connection type ${connectionType} cannot be used as a Looker warehouse mapping target`,
};
}
export function extractWarehouseHost(params: unknown, connectionType: string): string | null {
const record = isRecord(params) ? params : {};
switch (connectionType) {
case 'POSTGRESQL':
case 'SQLSERVER':
case 'MYSQL':
case 'CLICKHOUSE':
return readString(record, 'host');
case 'SNOWFLAKE':
return readString(record, 'account');
default:
return null;
}
}
export function extractWarehouseDatabase(params: unknown, connectionType: string): string | null {
const record = isRecord(params) ? params : {};
switch (connectionType) {
case 'POSTGRESQL':
case 'SQLSERVER':
case 'MYSQL':
case 'CLICKHOUSE':
case 'SNOWFLAKE':
return readString(record, 'database');
case 'BIGQUERY':
return readString(record, 'dataset_id');
default:
return null;
}
}
export function normalizeHost(value: string | null): string | null {
return value ? value.toLowerCase().replace(/:\d+$/, '') : null;
}
export function normalizeName(value: string | null): string | null {
return value ? value.toLowerCase() : null;
}
export function suggestKloConnectionForLookerConnection(args: {
lookerConnection: LookerWarehouseConnectionInfo;
candidateConnections: LookerMappingCandidateConnection[];
}): string | null {
const expectedType = lookerDialectToConnectionType(args.lookerConnection.dialect);
if (!expectedType || !args.lookerConnection.host || !args.lookerConnection.database || !args.lookerConnection.dialect) {
return null;
}
const matches = args.candidateConnections.filter((connection) => {
if (connection.connection_type !== expectedType) {
return false;
}
return (
normalizeHost(extractWarehouseHost(connection.connection_params, connection.connection_type)) ===
normalizeHost(args.lookerConnection.host) &&
normalizeName(extractWarehouseDatabase(connection.connection_params, connection.connection_type)) ===
normalizeName(args.lookerConnection.database)
);
});
return matches.length === 1 ? matches[0].id : null;
}
export function computeLookerMappingDrift(args: {
storedMappings: LookerConnectionMapping[];
discovered: LookerWarehouseConnectionInfo[];
}): LookerMappingDrift {
const discoveredByName = new Map(args.discovered.map((connection) => [connection.name, connection]));
const storedByName = new Map(args.storedMappings.map((mapping) => [mapping.lookerConnectionName, mapping]));
return {
unmappedDiscovered: args.discovered.filter((connection) => !storedByName.get(connection.name)?.kloConnectionId),
staleMappings: args.storedMappings
.filter((mapping) => !discoveredByName.has(mapping.lookerConnectionName))
.map((mapping) => ({
lookerConnectionName: mapping.lookerConnectionName,
reason: 'looker_connection_not_found' as const,
})),
inSync: args.storedMappings
.filter((mapping) => discoveredByName.has(mapping.lookerConnectionName) && mapping.kloConnectionId)
.map((mapping) => ({
lookerConnectionName: mapping.lookerConnectionName,
kloConnectionId: mapping.kloConnectionId as string,
})),
};
}
export function validateLookerMappings(args: {
mappings: LookerConnectionMapping[];
knownKloConnectionIds: Set<string>;
knownConnectionTypes: ReadonlyMap<string, string>;
}): LookerMappingValidationResult {
const errors: Array<{ key: string; reason: string }> = [];
for (const mapping of args.mappings) {
if (!mapping.kloConnectionId) {
continue;
}
if (!args.knownKloConnectionIds.has(mapping.kloConnectionId)) {
errors.push({
key: mapping.lookerConnectionName,
reason: `KLO connection ${mapping.kloConnectionId} does not exist`,
});
continue;
}
const connectionType = args.knownConnectionTypes.get(mapping.kloConnectionId);
const validation = validateLookerWarehouseTarget(connectionType ?? 'unknown');
if (!validation.ok) {
errors.push({ key: mapping.lookerConnectionName, reason: validation.reason });
}
}
return errors.length === 0 ? { ok: true } : { ok: false, errors };
}
export function refreshLookerMappingPlaceholders(args: {
stored: LookerConnectionMapping[];
live: LookerWarehouseConnectionInfo[];
}): { mappings: LookerConnectionMapping[]; changed: boolean } {
const byName = new Map(args.stored.map((mapping) => [mapping.lookerConnectionName, mapping]));
let changed = false;
for (const live of args.live) {
const existing = byName.get(live.name);
if (!existing) {
byName.set(live.name, {
lookerConnectionName: live.name,
kloConnectionId: null,
lookerHost: live.host,
lookerDatabase: live.database,
lookerDialect: live.dialect,
});
changed = true;
continue;
}
const refreshed: LookerConnectionMapping = {
...existing,
lookerHost: live.host,
lookerDatabase: live.database,
lookerDialect: live.dialect,
};
if (
refreshed.lookerHost !== existing.lookerHost ||
refreshed.lookerDatabase !== existing.lookerDatabase ||
refreshed.lookerDialect !== existing.lookerDialect
) {
byName.set(live.name, refreshed);
changed = true;
}
}
return { mappings: [...byName.values()], changed };
}
export function collectExploreParseItems(args: {
explore: StagedExploreFile;
connectionMappings: Record<string, string>;
targetConnections: ReadonlyMap<string, Pick<LookerTargetConnection, 'id' | 'connection_type'>>;
}): { parsedTargetTables: Record<string, ParsedTargetTable>; parseItems: LookerTableIdentifierParseItem[] } {
const parsedTargetTables: Record<string, ParsedTargetTable> = {};
const parseItems: LookerTableIdentifierParseItem[] = [];
const lookerConnectionName = args.explore.connectionName;
const targetConnectionId = lookerConnectionName ? args.connectionMappings[lookerConnectionName] : undefined;
if (!lookerConnectionName || !targetConnectionId) {
return { parsedTargetTables, parseItems };
}
const targetConnection = args.targetConnections.get(targetConnectionId);
const dialect = targetConnection ? sqlglotDialectForConnectionType(targetConnection.connection_type) : null;
const key = `${args.explore.modelName}.${args.explore.exploreName}`;
if (!dialect) {
parsedTargetTables[key] = {
ok: false,
reason: 'unsupported_dialect',
detail: `Connection type ${targetConnection?.connection_type ?? 'unknown'} does not map to a supported sqlglot dialect.`,
};
return { parsedTargetTables, parseItems };
}
if (args.explore.rawSqlTableName) {
parseItems.push({ key, sql_table_name: args.explore.rawSqlTableName, dialect });
}
for (const join of args.explore.joins) {
if (!join.rawSqlTableName) {
continue;
}
parseItems.push({
key: `${key}.${join.name}`,
sql_table_name: join.rawSqlTableName,
dialect,
});
}
return { parsedTargetTables, parseItems };
}
export function projectParsedIdentifier(row: LookerParsedIdentifier | undefined): ParsedTargetTable {
if (!row) {
return { ok: false, reason: 'parse_error', detail: 'Python parser response was missing this key.' };
}
if (row.ok && row.name && row.canonical_table) {
return {
ok: true,
catalog: row.catalog ?? null,
schema: row.schema ?? null,
name: row.name,
canonicalTable: row.canonical_table,
};
}
return {
ok: false,
reason: row.reason ?? 'parse_error',
detail: row.reason ? undefined : 'Python parser returned an invalid success row without name or canonical_table.',
};
}
export async function buildLookerPullConfigFromInputs(args: {
lookerConnectionId: string;
cursors: LookerRuntimeCursors;
refreshedMappings: LookerConnectionMapping[];
targetConnections: ReadonlyMap<string, Pick<LookerTargetConnection, 'id' | 'connection_type'>>;
client: Pick<LookerMappingClient, 'listLookmlModels' | 'getExplore'>;
parser: LookerTableIdentifierParser;
}): Promise<LookerPullConfig> {
const connectionMappings: Record<string, string> = {};
const connectionTypes: Record<string, LookerWarehouseTargetConnectionType> = {};
for (const mapping of args.refreshedMappings) {
if (!mapping.kloConnectionId) {
continue;
}
const target = args.targetConnections.get(mapping.kloConnectionId);
if (!target || !validateLookerWarehouseTarget(target.connection_type).ok) {
continue;
}
connectionMappings[mapping.lookerConnectionName] = mapping.kloConnectionId;
connectionTypes[mapping.lookerConnectionName] = target.connection_type as LookerWarehouseTargetConnectionType;
}
const parsedTargetTables = await parseExploreTargets({
client: args.client,
connectionMappings,
targetConnections: args.targetConnections,
parser: args.parser,
});
return {
lookerConnectionId: args.lookerConnectionId,
dashboardUpdatedSince: args.cursors.dashboardsLastSyncedAt,
lookUpdatedSince: args.cursors.looksLastSyncedAt,
connectionMappings,
connectionTypes,
parsedTargetTables,
};
}
async function parseExploreTargets(args: {
client: Pick<LookerMappingClient, 'listLookmlModels' | 'getExplore'>;
connectionMappings: Record<string, string>;
targetConnections: ReadonlyMap<string, Pick<LookerTargetConnection, 'id' | 'connection_type'>>;
parser: LookerTableIdentifierParser;
}): Promise<Record<string, ParsedTargetTable>> {
const parsedTargetTables: Record<string, ParsedTargetTable> = {};
const parseItems: LookerTableIdentifierParseItem[] = [];
let models: StagedLookmlModelsFile;
try {
models = await args.client.listLookmlModels();
} catch {
return parsedTargetTables;
}
for (const model of models.models) {
for (const exploreRef of model.explores) {
let explore: StagedExploreFile;
try {
explore = await args.client.getExplore(model.name, exploreRef.name);
} catch {
continue;
}
const collected = collectExploreParseItems({
explore,
connectionMappings: args.connectionMappings,
targetConnections: args.targetConnections,
});
Object.assign(parsedTargetTables, collected.parsedTargetTables);
parseItems.push(...collected.parseItems);
}
}
if (parseItems.length === 0) {
return parsedTargetTables;
}
let results: Record<string, LookerParsedIdentifier>;
try {
results = await args.parser.parse(parseItems);
} catch {
for (const item of parseItems) {
parsedTargetTables[item.key] = {
ok: false,
reason: 'parse_error',
detail: 'Python parse-table-identifier failed during Looker pull-config projection.',
};
}
return parsedTargetTables;
}
for (const item of parseItems) {
parsedTargetTables[item.key] = projectParsedIdentifier(results[item.key]);
}
return parsedTargetTables;
}
function isRecord(value: unknown): value is Record<string, unknown> {
return value !== null && typeof value === 'object' && !Array.isArray(value);
}
function readString(record: Record<string, unknown>, key: string): string | null {
const value = record[key];
return typeof value === 'string' ? value : null;
}

View file

@ -0,0 +1,25 @@
import { describe, expect, it } from 'vitest';
import { buildLookerReconcileNotes, lookerRuntimeSourceToFileAdapterSource } from './reconcile.js';
describe('lookerRuntimeSourceToFileAdapterSource', () => {
it('maps API-derived Looker source names to file-adapter source names', () => {
expect(lookerRuntimeSourceToFileAdapterSource('looker__b2b__sales_pipeline')).toBe('b2b__sales_pipeline');
expect(lookerRuntimeSourceToFileAdapterSource('looker__finance__orders')).toBe('finance__orders');
});
it('ignores non-Looker and malformed source names', () => {
expect(lookerRuntimeSourceToFileAdapterSource('b2b__sales_pipeline')).toBeNull();
expect(lookerRuntimeSourceToFileAdapterSource('looker__missing_explore')).toBeNull();
});
});
describe('buildLookerReconcileNotes', () => {
it('instructs reconciliation to record subsumed provenance', () => {
expect(buildLookerReconcileNotes()).toEqual([
[
'Looker runtime API-derived SL sources use looker__<model>__<explore>.',
'If the unprefixed file-adapter source <model>__<explore> exists, prefer it in wiki sl_refs, delete or avoid the API-derived source, and call emit_artifact_resolution with actionType="subsumed" for the API raw explore path.',
].join(' '),
]);
});
});

View file

@ -0,0 +1,21 @@
export function lookerRuntimeSourceToFileAdapterSource(sourceName: string): string | null {
if (!sourceName.startsWith('looker__')) {
return null;
}
const stripped = sourceName.slice('looker__'.length);
const parts = stripped.split('__');
if (parts.length < 2 || parts.some((part) => part.length === 0)) {
return null;
}
const [model, ...exploreParts] = parts;
return `${model}__${exploreParts.join('__')}`;
}
export function buildLookerReconcileNotes(): string[] {
return [
[
'Looker runtime API-derived SL sources use looker__<model>__<explore>.',
'If the unprefixed file-adapter source <model>__<explore> exists, prefer it in wiki sl_refs, delete or avoid the API-derived source, and call emit_artifact_resolution with actionType="subsumed" for the API raw explore path.',
].join(' '),
];
}

View file

@ -0,0 +1,101 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { describeLookerScope, hashLookerScope, isPathInLookerScope } from './scope.js';
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
describe('Looker runtime fetch scope', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-scope-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('keeps omitted known-current entity files out of the deletion baseline', () => {
const scope = {
mode: 'incremental' as const,
knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/11.json'],
};
expect(isPathInLookerScope('dashboards/10.json', scope)).toBe(false);
expect(isPathInLookerScope('looks/20.json', scope)).toBe(false);
expect(isPathInLookerScope('dashboards/11.json', scope)).toBe(true);
expect(isPathInLookerScope('looks/21.json', scope)).toBe(true);
expect(isPathInLookerScope('signals/dashboard_usage.json', scope)).toBe(true);
expect(isPathInLookerScope('explores/b2b/sales_pipeline.json', scope)).toBe(true);
});
it('keeps omitted unchanged evidence documents out of incremental delete scope', () => {
const scope = {
mode: 'incremental' as const,
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json'],
};
expect(isPathInLookerScope('evidence/dashboards/10/page.md', scope)).toBe(true);
expect(isPathInLookerScope('evidence/dashboards/10/metadata.json', scope)).toBe(true);
expect(isPathInLookerScope('evidence/looks/20/page.md', scope)).toBe(false);
expect(isPathInLookerScope('evidence/looks/20/metadata.json', scope)).toBe(false);
});
it('treats full scope as all raw paths in scope', () => {
const scope = {
mode: 'full' as const,
knownCurrentRawPaths: ['dashboards/10.json'],
fetchedRawPaths: ['dashboards/10.json'],
};
expect(isPathInLookerScope('dashboards/10.json', scope)).toBe(true);
expect(isPathInLookerScope('dashboards/99.json', scope)).toBe(true);
expect(isPathInLookerScope('looks/20.json', scope)).toBe(true);
});
it('hashes scope order-insensitively', () => {
const a = hashLookerScope({
mode: 'incremental',
knownCurrentRawPaths: ['looks/20.json', 'dashboards/10.json'],
fetchedRawPaths: ['dashboards/10.json'],
});
const b = hashLookerScope({
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json'],
});
expect(a).toBe(b);
expect(a).toMatch(/^[0-9a-f]{64}$/);
});
it('reads staged scope and returns a SourceAdapter ScopeDescriptor', async () => {
await writeJson(stagedDir, 'looker-scope.json', {
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json'],
});
const descriptor = await describeLookerScope(stagedDir);
expect(descriptor.fingerprint).toMatch(/^[0-9a-f]{64}$/);
expect(descriptor.isPathInScope('dashboards/10.json')).toBe(true);
expect(descriptor.isPathInScope('looks/20.json')).toBe(false);
expect(descriptor.isPathInScope('looks/99.json')).toBe(true);
});
it('falls back to full scope when old fixtures do not have a scope file', async () => {
const descriptor = await describeLookerScope(stagedDir);
expect(descriptor.isPathInScope('dashboards/10.json')).toBe(true);
expect(descriptor.isPathInScope('looks/20.json')).toBe(true);
});
});

View file

@ -0,0 +1,63 @@
import { createHash } from 'node:crypto';
import { readFile } from 'node:fs/promises';
import { join } from 'node:path';
import type { ScopeDescriptor } from '../../types.js';
import { STAGED_FILES, type StagedLookerScopeFile, stagedLookerScopeFileSchema } from './types.js';
const LOOKER_ENTITY_PATH_RE = /^(dashboards|looks)\/[^/]+\.json$/;
const LOOKER_EVIDENCE_ENTITY_PATH_RE = /^evidence\/(dashboards|looks)\/([^/]+)\/(?:metadata\.json|page\.md)$/;
export async function describeLookerScope(stagedDir: string): Promise<ScopeDescriptor> {
const scope = await readLookerScope(stagedDir);
return {
fingerprint: hashLookerScope(scope),
isPathInScope: (rawPath) => isPathInLookerScope(rawPath, scope),
};
}
export async function readLookerScope(stagedDir: string): Promise<StagedLookerScopeFile> {
try {
const body = await readFile(join(stagedDir, STAGED_FILES.scope), 'utf-8');
return stagedLookerScopeFileSchema.parse(JSON.parse(body));
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
return { mode: 'full', knownCurrentRawPaths: [], fetchedRawPaths: [] };
}
throw error;
}
}
export function hashLookerScope(scope: StagedLookerScopeFile): string {
const canonical = JSON.stringify({
mode: scope.mode,
knownCurrentRawPaths: [...scope.knownCurrentRawPaths].sort(),
fetchedRawPaths: [...scope.fetchedRawPaths].sort(),
});
return createHash('sha256').update(canonical).digest('hex');
}
export function isPathInLookerScope(rawPath: string, scope: StagedLookerScopeFile): boolean {
if (scope.mode === 'full') {
return true;
}
const entityRawPath = scopedEntityRawPath(rawPath);
if (!entityRawPath) {
return true;
}
const knownCurrent = new Set(scope.knownCurrentRawPaths);
const fetched = new Set(scope.fetchedRawPaths);
return fetched.has(entityRawPath) || !knownCurrent.has(entityRawPath);
}
function scopedEntityRawPath(rawPath: string): string | null {
if (LOOKER_ENTITY_PATH_RE.test(rawPath)) {
return rawPath;
}
const evidence = LOOKER_EVIDENCE_ENTITY_PATH_RE.exec(rawPath);
if (evidence) {
return `${evidence[1]}/${evidence[2]}.json`;
}
return null;
}

View file

@ -0,0 +1,86 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { listLookerTargetConnectionIds } from './target-connections.js';
describe('listLookerTargetConnectionIds', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-targets-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('collects unique target warehouse IDs from explores, dashboard queries, and Look queries', async () => {
await mkdir(join(stagedDir, 'explores', 'b2b'), { recursive: true });
await mkdir(join(stagedDir, 'dashboards'), { recursive: true });
await mkdir(join(stagedDir, 'looks'), { recursive: true });
await writeFile(
join(stagedDir, 'explores', 'b2b', 'sales_pipeline.json'),
JSON.stringify({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: null,
description: null,
fields: { dimensions: [], measures: [] },
joins: [],
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
}),
);
await writeFile(
join(stagedDir, 'dashboards', '1.json'),
JSON.stringify({
lookerId: '1',
title: 'Pipeline',
description: null,
folderId: null,
ownerId: null,
updatedAt: null,
tiles: [
{
id: '11',
title: 'ARR',
lookId: null,
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: [],
filters: {},
sorts: [],
targetWarehouseConnectionId: '33333333-3333-4333-8333-333333333333',
},
},
],
}),
);
await writeFile(
join(stagedDir, 'looks', '2.json'),
JSON.stringify({
lookerId: '2',
title: 'Customers',
description: null,
folderId: null,
ownerId: null,
updatedAt: null,
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: [],
filters: {},
sorts: [],
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
},
}),
);
await expect(listLookerTargetConnectionIds(stagedDir)).resolves.toEqual([
'22222222-2222-4222-8222-222222222222',
'33333333-3333-4333-8333-333333333333',
]);
});
});

View file

@ -0,0 +1,41 @@
import { readdir, readFile } from 'node:fs/promises';
import { join, relative } from 'node:path';
import { stagedDashboardFileSchema, stagedExploreFileSchema, stagedLookFileSchema } from './types.js';
async function walk(root: string): Promise<string[]> {
const entries = await readdir(root, { withFileTypes: true, recursive: true });
return entries
.filter((entry) => entry.isFile())
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
.sort();
}
function addTarget(targets: Set<string>, value: string | null | undefined): void {
if (value) {
targets.add(value);
}
}
export async function listLookerTargetConnectionIds(stagedDir: string): Promise<string[]> {
const targets = new Set<string>();
for (const path of await walk(stagedDir)) {
const fullPath = join(stagedDir, path);
if (/^explores\/[^/]+\/[^/]+\.json$/.test(path)) {
const explore = stagedExploreFileSchema.parse(JSON.parse(await readFile(fullPath, 'utf-8')));
addTarget(targets, explore.targetWarehouseConnectionId);
continue;
}
if (/^dashboards\/[^/]+\.json$/.test(path)) {
const dashboard = stagedDashboardFileSchema.parse(JSON.parse(await readFile(fullPath, 'utf-8')));
for (const tile of dashboard.tiles) {
addTarget(targets, tile.query?.targetWarehouseConnectionId);
}
continue;
}
if (/^looks\/[^/]+\.json$/.test(path)) {
const look = stagedLookFileSchema.parse(JSON.parse(await readFile(fullPath, 'utf-8')));
addTarget(targets, look.query?.targetWarehouseConnectionId);
}
}
return [...targets].sort();
}

View file

@ -0,0 +1,243 @@
import { describe, expect, it } from 'vitest';
import type { ToolOutput } from '../../../../tools/index.js';
import { buildLookerSlProposal, createLookerQueryToSlTool, type LookerSlProposal } from './looker-query-to-sl.tool.js';
describe('buildLookerSlProposal', () => {
it('suggests a measure and segment for an aggregated filtered Looker query', () => {
const proposal = buildLookerSlProposal({
contentTitle: 'Open Pipeline ARR',
contentType: 'look',
usage: { queryCount30d: 42, uniqueUsers30d: 7 },
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr', 'opportunities.stage'],
filters: { 'opportunities.stage': 'open' },
sorts: ['opportunities.arr desc'],
limit: '500',
},
});
expect(proposal.sourceName).toBe('looker__b2b__sales_pipeline');
expect(proposal.triageLane).toBe('full');
expect(proposal.decision).toBe('measure_added');
expect(proposal.measures).toEqual([
{
name: 'arr',
lookerField: 'opportunities.arr',
expr: 'sum(opportunities.arr)',
description: 'Suggested from Looker look "Open Pipeline ARR"; verify against explore field SQL before writing.',
},
]);
expect(proposal.dimensions).toEqual([{ name: 'stage', lookerField: 'opportunities.stage' }]);
expect(proposal.segments).toEqual([
{
name: 'open_pipeline_arr',
filters: { 'opportunities.stage': 'open' },
suggestedPredicate: "opportunities.stage = 'open'",
description: 'Reusable filter candidate from Looker look "Open Pipeline ARR".',
},
]);
expect(proposal.notes).toContain(
'Usage signals can raise priority, but query counts, users, owners, and folders must not be written as wiki narrative.',
);
});
it('keeps simple saved views as wiki-only candidates', () => {
const proposal = buildLookerSlProposal({
contentTitle: 'Accounts By Region',
query: {
model: 'b2b',
view: 'accounts',
fields: ['accounts.region', 'accounts.segment'],
filters: {},
},
});
expect(proposal.sourceName).toBe('looker__b2b__accounts');
expect(proposal.triageLane).toBe('light');
expect(proposal.decision).toBe('wiki_only');
expect(proposal.measures).toEqual([]);
expect(proposal.dimensions).toEqual([
{ name: 'region', lookerField: 'accounts.region' },
{ name: 'segment', lookerField: 'accounts.segment' },
]);
expect(proposal.segments).toEqual([]);
});
it('promotes high-usage filter-only queries as derived-source candidates', () => {
const proposal = buildLookerSlProposal({
contentTitle: 'Active Customers',
usage: { queryCount30d: 15, uniqueUsers30d: 4 },
query: {
model: 'b2b',
view: 'customers',
fields: ['customers.id', 'customers.name'],
filters: { 'customers.status': 'active', 'customers.is_test': '-yes' },
},
});
expect(proposal.sourceName).toBe('looker__b2b__customers');
expect(proposal.decision).toBe('source_created');
expect(proposal.segments).toEqual([
{
name: 'active_customers',
filters: { 'customers.status': 'active', 'customers.is_test': '-yes' },
suggestedPredicate: "customers.status = 'active' AND customers.is_test != 'yes'",
description: 'Reusable filter candidate from Looker look "Active Customers".',
},
]);
});
it('surfaces mapped warehouse target metadata for direct SL writes', () => {
const proposal = buildLookerSlProposal({
contentTitle: 'Open Pipeline ARR',
contentType: 'dashboard_tile',
usage: { queryCount30d: 42, uniqueUsers30d: 7 },
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr', 'opportunities.stage'],
filters: { 'opportunities.stage': 'open' },
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
},
});
expect(proposal.sourceName).toBe('looker__b2b__sales_pipeline');
expect(proposal.targetStatus).toBe('mapped');
expect(proposal.targetWarehouseConnectionId).toBe('22222222-2222-4222-8222-222222222222');
expect(proposal.sourceTable).toBe('proj.dataset.opportunities');
expect(proposal.canWriteStandaloneSource).toBe(true);
expect(proposal.targetTable).toEqual({
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
});
expect(proposal.notes).toContain(
'targetTable.ok is true: write or edit SL on targetWarehouseConnectionId using targetTable.canonicalTable as source.table.',
);
});
it('surfaces unmapped and unparseable target reasons for wiki-only fallback', () => {
const unmapped = buildLookerSlProposal({
contentTitle: 'Revenue Trend',
query: {
model: 'b2b',
view: 'revenue',
fields: ['revenue.arr'],
filters: {},
targetWarehouseConnectionId: null,
targetTable: {
ok: false,
reason: 'no_connection_mapping',
},
},
});
expect(unmapped.targetStatus).toBe('unmapped');
expect(unmapped.targetWarehouseConnectionId).toBeNull();
expect(unmapped.sourceTable).toBeNull();
expect(unmapped.canWriteStandaloneSource).toBe(false);
expect(unmapped.notes).toContain(
'targetTable.ok is false (no_connection_mapping): keep this query wiki-only and pass the reason through emit_unmapped_fallback.',
);
const unparseable = buildLookerSlProposal({
contentTitle: 'Templated Source',
query: {
model: 'b2b',
view: 'templated',
fields: ['templated.count'],
filters: {},
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
targetTable: {
ok: false,
reason: 'looker_template_unresolved',
detail: 'The sql_table_name contains ${derived.SQL_TABLE_NAME}.',
},
},
});
expect(unparseable.targetStatus).toBe('unparseable');
expect(unparseable.targetWarehouseConnectionId).toBe('22222222-2222-4222-8222-222222222222');
expect(unparseable.sourceTable).toBeNull();
expect(unparseable.canWriteStandaloneSource).toBe(false);
expect(unparseable.notes).toContain(
'targetTable.ok is false (looker_template_unresolved): keep this query wiki-only and pass the reason through emit_unmapped_fallback.',
);
});
});
describe('createLookerQueryToSlTool', () => {
it('returns markdown plus the structured proposal', async () => {
const lookerQueryToSl = createLookerQueryToSlTool();
if (!lookerQueryToSl.execute) {
throw new Error('looker_query_to_sl tool must be executable');
}
const output = (await lookerQueryToSl.execute(
{
contentTitle: 'Revenue Trend',
contentType: 'dashboard_tile',
query: {
model: 'finance',
view: 'orders',
fields: ['orders.total_revenue', 'orders.created_month'],
filters: { 'orders.status': 'paid' },
sorts: [],
targetWarehouseConnectionId: null,
targetTable: null,
},
},
{ toolCallId: 'call-1', messages: [] } as never,
)) as ToolOutput<LookerSlProposal>;
expect(output.markdown).toContain('Looker query SL proposal');
expect(output.markdown).toContain('looker__finance__orders');
expect(output.structured.sourceName).toBe('looker__finance__orders');
expect(output.structured.measures[0]?.name).toBe('total_revenue');
});
it('prints target connection and canonical table in markdown output', async () => {
const lookerQueryToSl = createLookerQueryToSlTool();
if (!lookerQueryToSl.execute) {
throw new Error('looker_query_to_sl tool must be executable');
}
const output = (await lookerQueryToSl.execute(
{
contentTitle: 'Revenue Trend',
contentType: 'dashboard_tile',
query: {
model: 'finance',
view: 'orders',
fields: ['orders.total_revenue', 'orders.created_month'],
filters: { 'orders.status': 'paid' },
sorts: [],
targetWarehouseConnectionId: '33333333-3333-4333-8333-333333333333',
targetTable: {
ok: true,
catalog: 'proj',
schema: 'finance',
name: 'orders',
canonicalTable: 'proj.finance.orders',
},
},
},
{ toolCallId: 'call-1', messages: [] } as never,
)) as ToolOutput<LookerSlProposal>;
expect(output.markdown).toContain('- targetStatus: mapped');
expect(output.markdown).toContain('- targetWarehouseConnectionId: 33333333-3333-4333-8333-333333333333');
expect(output.markdown).toContain('- sourceTable: proj.finance.orders');
expect(output.structured.canWriteStandaloneSource).toBe(true);
});
});

View file

@ -0,0 +1,305 @@
import { tool } from 'ai';
import { z } from 'zod';
import type { ToolOutput } from '../../../../tools/index.js';
import { type ParsedTargetTable, stagedLookerQuerySchema } from '../types.js';
const lookerUsageInputSchema = z.object({
queryCount30d: z.number().int().nonnegative().default(0),
uniqueUsers30d: z.number().int().nonnegative().default(0),
});
export const lookerQueryToSlInputSchema = z.object({
query: stagedLookerQuerySchema,
contentTitle: z.string().min(1).optional(),
contentType: z.enum(['look', 'dashboard_tile']).default('look'),
usage: lookerUsageInputSchema.optional(),
});
export type LookerQueryToSlInput = z.input<typeof lookerQueryToSlInputSchema>;
type LookerTargetStatus = 'mapped' | 'unmapped' | 'unparseable' | 'missing_target_table';
export interface LookerSlFieldProposal {
name: string;
lookerField: string;
}
export interface LookerSlMeasureProposal extends LookerSlFieldProposal {
expr: string;
description: string;
}
export interface LookerSlSegmentProposal {
name: string;
filters: Record<string, unknown>;
suggestedPredicate: string;
description: string;
}
export interface LookerSlProposal {
sourceName: string;
targetWarehouseConnectionId: string | null;
targetTable: ParsedTargetTable | null;
targetStatus: LookerTargetStatus;
sourceTable: string | null;
canWriteStandaloneSource: boolean;
triageLane: 'skip' | 'light' | 'full';
decision: 'wiki_only' | 'measure_added' | 'source_created';
dimensions: LookerSlFieldProposal[];
measures: LookerSlMeasureProposal[];
segments: LookerSlSegmentProposal[];
notes: string[];
}
const MEASURE_FIELD_RE =
/\b(count|sum|total|revenue|arr|mrr|amount|avg|average|rate|ratio|percent|pct|margin|profit|value|score)\b/i;
function targetStatus(
targetWarehouseConnectionId: string | null,
targetTable: ParsedTargetTable | null,
): LookerTargetStatus {
if (targetTable?.ok === true && targetWarehouseConnectionId) {
return 'mapped';
}
if (targetTable?.ok === false && targetTable.reason === 'no_connection_mapping') {
return 'unmapped';
}
if (targetTable?.ok === false) {
return 'unparseable';
}
return 'missing_target_table';
}
function targetNotes(status: LookerTargetStatus, targetTable: ParsedTargetTable | null): string[] {
if (status === 'mapped') {
return [
'targetTable.ok is true: write or edit SL on targetWarehouseConnectionId using targetTable.canonicalTable as source.table.',
'Use targetTable.catalog, targetTable.schema, and targetTable.name only for source_tables preflight matching.',
'Never use rawSqlTableName as source.table; it may contain aliases, templates, or derived-table SQL.',
];
}
if (targetTable?.ok === false) {
return [
`targetTable.ok is false (${targetTable.reason}): keep this query wiki-only and pass the reason through emit_unmapped_fallback.`,
];
}
return [
'No targetTable was staged for this query; read the parent explore dependency before attempting any SL write.',
];
}
export function buildLookerSlProposal(raw: LookerQueryToSlInput): LookerSlProposal {
const input = lookerQueryToSlInputSchema.parse(raw);
const sourceName = `looker__${toSlName(input.query.model)}__${toSlName(input.query.view)}`;
const usage = input.usage;
const targetWarehouseConnectionId = input.query.targetWarehouseConnectionId ?? null;
const targetTable = input.query.targetTable ?? null;
const status = targetStatus(targetWarehouseConnectionId, targetTable);
const sourceTable = targetTable?.ok === true ? targetTable.canonicalTable : null;
const canWriteStandaloneSource = status === 'mapped';
const triageLane =
usage && usage.queryCount30d === 0 && usage.uniqueUsers30d === 0 ? 'skip' : isHighUsage(usage) ? 'full' : 'light';
const dimensions: LookerSlFieldProposal[] = [];
const measures: LookerSlMeasureProposal[] = [];
for (const field of input.query.fields) {
const proposal = { name: toSlName(fieldLeaf(field)), lookerField: field };
if (isMeasureLikeField(field)) {
measures.push({
...proposal,
expr: suggestedMeasureExpr(field),
description: `Suggested from Looker ${contentLabel(input)}; verify against explore field SQL before writing.`,
});
} else {
dimensions.push(proposal);
}
}
const filters = nonEmptyFilters(input.query.filters);
const segments =
Object.keys(filters).length === 0
? []
: [
{
name: toSlName(input.contentTitle ?? Object.keys(filters).map(fieldLeaf).join('_')),
filters,
suggestedPredicate: Object.entries(filters)
.map(([field, value]) => filterValueToPredicate(field, value))
.join(' AND '),
description: `Reusable filter candidate from Looker ${contentLabel(input)}.`,
},
];
const decision =
measures.length > 0 ? 'measure_added' : segments.length > 0 && isHighUsage(usage) ? 'source_created' : 'wiki_only';
const notes = [
...targetNotes(status, targetTable),
'Treat this as a proposal, not an instruction to write SL blindly.',
'Verify field SQL, source shape, and existing SL overlap with sl_discover/sl_read_source before sl_write_source or sl_edit_source.',
'Usage signals can raise priority, but query counts, users, owners, and folders must not be written as wiki narrative.',
];
if (triageLane === 'skip') {
notes.push('Zero recent usage is a skip signal unless the raw content clearly defines durable business semantics.');
}
return {
sourceName,
targetWarehouseConnectionId,
targetTable,
targetStatus: status,
sourceTable,
canWriteStandaloneSource,
triageLane,
decision,
dimensions,
measures,
segments,
notes,
};
}
export function createLookerQueryToSlTool() {
return tool({
description:
'Given one staged Looker query JSON, return a conservative proposal for SL measures, dimensions, reusable filters, and triage priority. The proposal is advisory; verify with SL tools before writing.',
inputSchema: lookerQueryToSlInputSchema,
execute: async (input): Promise<ToolOutput<LookerSlProposal>> => {
const structured = buildLookerSlProposal(input);
return {
markdown: formatLookerSlProposal(structured),
structured,
};
},
toModelOutput: ({ output }) => {
const markdown =
output && typeof output === 'object' && 'markdown' in output
? String((output as { markdown: unknown }).markdown)
: String(output);
return { type: 'content', value: [{ type: 'text', text: markdown }] };
},
});
}
export function formatLookerSlProposal(proposal: LookerSlProposal): string {
const lines = [
'## Looker query SL proposal',
'',
`- sourceName: ${proposal.sourceName}`,
`- targetStatus: ${proposal.targetStatus}`,
`- targetWarehouseConnectionId: ${proposal.targetWarehouseConnectionId ?? '(none)'}`,
`- sourceTable: ${proposal.sourceTable ?? '(none)'}`,
`- canWriteStandaloneSource: ${proposal.canWriteStandaloneSource}`,
`- triageLane: ${proposal.triageLane}`,
`- decision: ${proposal.decision}`,
'',
'### Measures',
...(proposal.measures.length === 0
? ['- (none)']
: proposal.measures.map((measure) => `- ${measure.name}: ${measure.expr} (${measure.lookerField})`)),
'',
'### Dimensions',
...(proposal.dimensions.length === 0
? ['- (none)']
: proposal.dimensions.map((dimension) => `- ${dimension.name}: ${dimension.lookerField}`)),
'',
'### Segments',
...(proposal.segments.length === 0
? ['- (none)']
: proposal.segments.map((segment) => `- ${segment.name}: ${segment.suggestedPredicate}`)),
'',
'### Notes',
...proposal.notes.map((note) => `- ${note}`),
];
return lines.join('\n');
}
function isHighUsage(usage: z.infer<typeof lookerUsageInputSchema> | undefined): boolean {
return !!usage && (usage.queryCount30d >= 10 || usage.uniqueUsers30d >= 3);
}
function isMeasureLikeField(field: string): boolean {
return MEASURE_FIELD_RE.test(fieldLeaf(field).replace(/_/g, ' '));
}
function suggestedMeasureExpr(field: string): string {
const leaf = fieldLeaf(field);
if (/\b(count|count_distinct)\b/i.test(leaf.replace(/_/g, ' '))) {
return `count(${field})`;
}
if (/\b(avg|average|rate|ratio|percent|pct|margin|score)\b/i.test(leaf.replace(/_/g, ' '))) {
return `avg(${field})`;
}
return `sum(${field})`;
}
function fieldLeaf(field: string): string {
const parts = field.split('.');
return parts[parts.length - 1] || field;
}
function nonEmptyFilters(filters: Record<string, unknown>): Record<string, unknown> {
return Object.fromEntries(
Object.entries(filters).filter(([, value]) => {
if (value === null || value === undefined) {
return false;
}
if (typeof value === 'string') {
return value.trim().length > 0;
}
if (Array.isArray(value)) {
return value.length > 0;
}
return true;
}),
);
}
function filterValueToPredicate(field: string, value: unknown): string {
if (Array.isArray(value)) {
return `${field} IN (${value.map(sqlLiteral).join(', ')})`;
}
if (typeof value === 'number' || typeof value === 'boolean') {
return `${field} = ${String(value)}`;
}
const raw = String(value).trim();
if (raw.includes(',') && !raw.includes('"') && !raw.includes("'")) {
return `${field} IN (${raw
.split(',')
.map((part) => sqlLiteral(part.trim()))
.join(', ')})`;
}
if (raw.startsWith('-') && raw.length > 1) {
return `${field} != ${sqlLiteral(raw.slice(1).trim())}`;
}
if (raw.includes('%')) {
return `${field} LIKE ${sqlLiteral(raw)}`;
}
return `${field} = ${sqlLiteral(raw)}`;
}
function sqlLiteral(value: unknown): string {
if (typeof value === 'number' || typeof value === 'boolean') {
return String(value);
}
return `'${String(value).replace(/'/g, "''")}'`;
}
function contentLabel(input: z.infer<typeof lookerQueryToSlInputSchema>): string {
const noun = input.contentType === 'dashboard_tile' ? 'dashboard tile' : 'look';
return input.contentTitle ? `${noun} "${input.contentTitle}"` : noun;
}
function toSlName(value: string): string {
const normalized = value
.trim()
.replace(/([a-z0-9])([A-Z])/g, '$1_$2')
.toLowerCase()
.replace(/[^a-z0-9]+/g, '_')
.replace(/^_+|_+$/g, '')
.replace(/_+/g, '_');
if (!normalized) {
throw new Error(`Cannot derive semantic-layer name from empty Looker value`);
}
return /^[0-9]/.test(normalized) ? `n_${normalized}` : normalized;
}

View file

@ -0,0 +1,329 @@
import { describe, expect, it } from 'vitest';
import {
lookerPullConfigSchema,
parseLookerPullConfig,
parsedTargetTableSchema,
stagedDashboardFileSchema,
stagedExploreFileSchema,
stagedLookerFetchIssueSchema,
stagedLookerQuerySchema,
stagedLookerScopeFileSchema,
stagedLookerSignalsFileSchema,
stagedLookFileSchema,
stagedSyncConfigSchema,
} from './types.js';
describe('Looker staged runtime schemas', () => {
it('parses pull config and staged sync config', () => {
expect(
lookerPullConfigSchema.parse({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
instanceBaseUrl: 'https://example.looker.com',
}),
).toEqual({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
instanceBaseUrl: 'https://example.looker.com',
connectionMappings: {},
connectionTypes: {},
parsedTargetTables: {},
});
expect(
stagedSyncConfigSchema.parse({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
fetchedAt: '2026-04-30T12:00:00.000Z',
instanceBaseUrl: 'https://example.looker.com',
}),
).toMatchObject({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
instanceBaseUrl: 'https://example.looker.com',
});
});
it('parses incremental pull cursors and scope manifests', () => {
expect(
parseLookerPullConfig({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
dashboardUpdatedSince: '2026-04-30T10:00:00.000Z',
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
}),
).toEqual({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
dashboardUpdatedSince: '2026-04-30T10:00:00.000Z',
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
connectionMappings: {},
connectionTypes: {},
parsedTargetTables: {},
});
expect(
stagedLookerScopeFileSchema.parse({
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json'],
}),
).toEqual({
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json'],
});
expect(
stagedSyncConfigSchema.parse({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
fetchedAt: '2026-04-30T12:30:00.000Z',
previousCursors: {
dashboardsLastSyncedAt: null,
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
},
nextCursors: {
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
},
}).nextCursors,
).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
});
});
it('normalizes numeric Looker ids to strings', () => {
const dashboard = stagedDashboardFileSchema.parse({
lookerId: 10,
title: 'Sales Pipeline',
description: null,
folderId: 7,
ownerId: 3,
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [{ id: 100, title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
});
expect(dashboard.lookerId).toBe('10');
expect(dashboard.folderId).toBe('7');
expect(dashboard.ownerId).toBe('3');
expect(dashboard.tiles[0].id).toBe('100');
});
it('parses explores, looks, and signal files with defaults', () => {
expect(
stagedExploreFileSchema.parse({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: {
dimensions: [{ name: 'opportunities.id', label: 'Opportunity ID', type: 'number', sql: '${TABLE}.id' }],
measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '${TABLE}.arr' }],
},
joins: [{ name: 'accounts', type: 'left_outer', relationship: 'many_to_one' }],
}),
).toMatchObject({
modelName: 'b2b',
exploreName: 'sales_pipeline',
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
});
expect(
stagedLookFileSchema.parse({
lookerId: '20',
title: 'Open Pipeline',
description: null,
folderId: null,
ownerId: null,
updatedAt: null,
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
}),
).toMatchObject({ lookerId: '20', query: { fields: ['opportunities.arr'] } });
expect(stagedLookerSignalsFileSchema.parse({}).dashboardUsage).toEqual([]);
});
it('parses warehouse SL mapping pull config and staged target table fields', () => {
const targetConnectionId = '22222222-2222-4222-8222-222222222222';
const parsedTargetTable = {
ok: true as const,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
};
expect(parsedTargetTableSchema.parse(parsedTargetTable)).toEqual(parsedTargetTable);
expect(
parseLookerPullConfig({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
connectionMappings: { b2b_sandbox_bq: targetConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable },
}),
).toEqual({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
connectionMappings: { b2b_sandbox_bq: targetConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable },
});
expect(
stagedExploreFileSchema.parse({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: 'Opportunity ID', type: 'number', sql: '${TABLE}.id' }],
measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '${TABLE}.arr' }],
},
joins: [
{
name: 'accounts',
type: 'left_outer',
relationship: 'many_to_one',
rawSqlTableName: 'proj.dataset.accounts',
sqlOn: '${opportunities.account_id} = ${accounts.id}',
from: null,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'accounts',
canonicalTable: 'proj.dataset.accounts',
},
},
],
targetWarehouseConnectionId: targetConnectionId,
targetTable: parsedTargetTable,
}),
).toMatchObject({
modelName: 'b2b',
exploreName: 'sales_pipeline',
connectionName: 'b2b_sandbox_bq',
targetWarehouseConnectionId: targetConnectionId,
targetTable: parsedTargetTable,
joins: [{ name: 'accounts', targetTable: { ok: true, name: 'accounts' } }],
});
});
it('parses structured Looker mapping fetch warnings', () => {
expect(
stagedLookerFetchIssueSchema.parse({
rawPath: 'looker_connection_mappings/b2b_sandbox_bq',
entityType: 'looker_connection_mapping',
entityId: 'b2b_sandbox_bq',
severity: 'warning',
statusCode: null,
message: 'Looker connection b2b_sandbox_bq is not mapped to a warehouse connection.',
retryRecommended: false,
kind: 'unmapped_looker_connection',
details: {
lookerConnectionName: 'b2b_sandbox_bq',
affectedExplores: ['b2b.sales_pipeline'],
},
}),
).toMatchObject({
entityType: 'looker_connection_mapping',
kind: 'unmapped_looker_connection',
details: {
lookerConnectionName: 'b2b_sandbox_bq',
affectedExplores: ['b2b.sales_pipeline'],
},
});
});
it('parses LookML model listing warnings in fetch reports', () => {
expect(
stagedLookerFetchIssueSchema.parse({
rawPath: 'lookml_models.json',
entityType: 'lookml_models',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'LookML model access denied',
retryRecommended: false,
}),
).toEqual({
rawPath: 'lookml_models.json',
entityType: 'lookml_models',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'LookML model access denied',
retryRecommended: false,
});
});
it('accepts slug-shaped connection ids inside KLO Looker runtime schemas', () => {
const parsedTargetTable = {
ok: true as const,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
};
expect(
parseLookerPullConfig({
lookerConnectionId: 'prod-looker',
connectionMappings: { b2b_sandbox_bq: 'prod-warehouse' },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable },
}),
).toMatchObject({
lookerConnectionId: 'prod-looker',
connectionMappings: { b2b_sandbox_bq: 'prod-warehouse' },
});
expect(
stagedSyncConfigSchema.parse({
lookerConnectionId: 'prod-looker',
fetchedAt: '2026-04-30T12:00:00.000Z',
}),
).toMatchObject({
lookerConnectionId: 'prod-looker',
});
expect(
stagedLookerQuerySchema.parse({
model: 'b2b',
view: 'sales_pipeline',
targetWarehouseConnectionId: 'prod-warehouse',
targetTable: parsedTargetTable,
}),
).toMatchObject({
targetWarehouseConnectionId: 'prod-warehouse',
targetTable: parsedTargetTable,
});
expect(
stagedExploreFileSchema.parse({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: { dimensions: [], measures: [] },
targetWarehouseConnectionId: 'prod-warehouse',
targetTable: parsedTargetTable,
}),
).toMatchObject({
targetWarehouseConnectionId: 'prod-warehouse',
targetTable: parsedTargetTable,
});
});
it('rejects unsafe KLO Looker connection ids', () => {
expect(() =>
parseLookerPullConfig({
lookerConnectionId: '../prod-looker',
}),
).toThrow();
expect(() =>
parseLookerPullConfig({
connectionMappings: { b2b_sandbox_bq: 'prod/warehouse' },
}),
).toThrow();
});
});

View file

@ -0,0 +1,255 @@
import { z } from 'zod';
import { connectionTypeSchema } from '../../../connections/connection-type.js';
import { parsedTargetTableSchema } from '../../parsed-target-table.js';
const lookerIdSchema = z.union([z.string(), z.number().int()]).transform(String);
const nullableLookerIdSchema = z.union([lookerIdSchema, z.null()]).default(null);
export const lookerConnectionIdSchema = z.string().min(1).regex(/^[A-Za-z0-9_-]+$/);
export { parsedTargetTableSchema, type ParsedTargetTable } from '../../parsed-target-table.js';
export const lookerRuntimeCursorsSchema = z.object({
dashboardsLastSyncedAt: z.iso.datetime().nullable().default(null),
looksLastSyncedAt: z.iso.datetime().nullable().default(null),
});
export type LookerRuntimeCursors = z.infer<typeof lookerRuntimeCursorsSchema>;
export const lookerPullConfigSchema = z.object({
lookerConnectionId: lookerConnectionIdSchema.optional(),
instanceBaseUrl: z.url().optional(),
dashboardUpdatedSince: z.iso.datetime().nullable().optional(),
lookUpdatedSince: z.iso.datetime().nullable().optional(),
connectionMappings: z.record(z.string(), lookerConnectionIdSchema).default({}),
connectionTypes: z.record(z.string(), connectionTypeSchema).default({}),
parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}),
});
export type LookerPullConfig = z.infer<typeof lookerPullConfigSchema>;
export function parseLookerPullConfig(raw: unknown): LookerPullConfig {
return lookerPullConfigSchema.parse(raw ?? {});
}
export const stagedSyncConfigSchema = z.object({
lookerConnectionId: lookerConnectionIdSchema,
fetchedAt: z.iso.datetime(),
instanceBaseUrl: z.url().optional(),
previousCursors: lookerRuntimeCursorsSchema.default({
dashboardsLastSyncedAt: null,
looksLastSyncedAt: null,
}),
nextCursors: lookerRuntimeCursorsSchema.default({
dashboardsLastSyncedAt: null,
looksLastSyncedAt: null,
}),
});
export const stagedLookerQuerySchema = z.object({
id: lookerIdSchema.optional(),
model: z.string(),
view: z.string(),
fields: z.array(z.string()).default([]),
filters: z.record(z.string(), z.unknown()).default({}),
sorts: z.array(z.string()).default([]),
limit: z.union([z.string(), z.number()]).optional().nullable(),
dynamicFields: z.string().optional().nullable(),
targetWarehouseConnectionId: lookerConnectionIdSchema.nullable().default(null),
targetTable: parsedTargetTableSchema.nullable().default(null),
});
export type StagedLookerQuery = z.infer<typeof stagedLookerQuerySchema>;
const stagedDashboardTileSchema = z.object({
id: lookerIdSchema,
title: z.string().nullable().default(null),
lookId: nullableLookerIdSchema,
query: stagedLookerQuerySchema.nullable().default(null),
});
export const stagedDashboardFileSchema = z.object({
lookerId: lookerIdSchema,
title: z.string(),
description: z.string().nullable(),
folderId: nullableLookerIdSchema,
ownerId: nullableLookerIdSchema,
updatedAt: z.string().nullable(),
tiles: z.array(stagedDashboardTileSchema).default([]),
});
export type StagedDashboardFile = z.infer<typeof stagedDashboardFileSchema>;
export const stagedLookFileSchema = z.object({
lookerId: lookerIdSchema,
title: z.string(),
description: z.string().nullable(),
folderId: nullableLookerIdSchema,
ownerId: nullableLookerIdSchema,
updatedAt: z.string().nullable(),
query: stagedLookerQuerySchema.nullable().default(null),
});
export type StagedLookFile = z.infer<typeof stagedLookFileSchema>;
const stagedFolderSchema = z.object({
id: lookerIdSchema,
name: z.string(),
parentId: nullableLookerIdSchema,
path: z.array(z.string()).default([]),
});
export const stagedFoldersTreeFileSchema = z.object({
folders: z.array(stagedFolderSchema),
});
export type StagedFoldersTreeFile = z.infer<typeof stagedFoldersTreeFileSchema>;
export const stagedUserFileSchema = z.object({
id: lookerIdSchema,
displayName: z.string().nullable(),
email: z.string().nullable().default(null),
});
export type StagedUserFile = z.infer<typeof stagedUserFileSchema>;
export const stagedGroupFileSchema = z.object({
id: lookerIdSchema,
name: z.string(),
});
export type StagedGroupFile = z.infer<typeof stagedGroupFileSchema>;
const stagedLookmlModelSchema = z.object({
name: z.string(),
label: z.string().nullable().default(null),
explores: z.array(z.object({ name: z.string(), label: z.string().nullable().default(null) })),
});
export const stagedLookmlModelsFileSchema = z.object({
models: z.array(stagedLookmlModelSchema),
});
export type StagedLookmlModelsFile = z.infer<typeof stagedLookmlModelsFileSchema>;
const stagedLookerFieldSchema = z.object({
name: z.string(),
label: z.string().nullable().default(null),
type: z.string().nullable().default(null),
sql: z.string().nullable().default(null),
description: z.string().nullable().default(null),
});
const stagedLookerJoinSchema = z.object({
name: z.string(),
type: z.string().nullable().default(null),
relationship: z.string().nullable().default(null),
rawSqlTableName: z.string().nullable().default(null),
sqlOn: z.string().nullable().default(null),
from: z.string().nullable().default(null),
targetTable: parsedTargetTableSchema.nullable().default(null),
});
export const stagedExploreFileSchema = z.object({
modelName: z.string(),
exploreName: z.string(),
label: z.string().nullable().default(null),
description: z.string().nullable().default(null),
rawSqlTableName: z.string().nullable().default(null),
connectionName: z.string().nullable().default(null),
viewName: z.string().nullable().default(null),
fields: z.object({
dimensions: z.array(stagedLookerFieldSchema).default([]),
measures: z.array(stagedLookerFieldSchema).default([]),
}),
joins: z.array(stagedLookerJoinSchema).default([]),
targetWarehouseConnectionId: lookerConnectionIdSchema.nullable().default(null),
targetTable: parsedTargetTableSchema.nullable().default(null),
});
export type StagedExploreFile = z.infer<typeof stagedExploreFileSchema>;
const stagedUsageSignalSchema = z.object({
contentId: lookerIdSchema,
queryCount30d: z.number().int().nonnegative().default(0),
uniqueUsers30d: z.number().int().nonnegative().default(0),
lastRunAt: z.string().nullable().default(null),
topUsers: z.array(lookerIdSchema).default([]),
});
const stagedScheduledPlanSignalSchema = z.object({
contentId: lookerIdSchema,
contentType: z.enum(['dashboard', 'look']),
isScheduled: z.boolean(),
scheduleCount: z.number().int().nonnegative().default(0),
recipientCount: z.number().int().nonnegative().default(0),
});
const stagedFavoriteSignalSchema = z.object({
contentId: lookerIdSchema,
contentType: z.enum(['dashboard', 'look']),
favoriteCount: z.number().int().nonnegative().default(0),
});
export const stagedLookerSignalsFileSchema = z.object({
dashboardUsage: z.array(stagedUsageSignalSchema).default([]),
lookUsage: z.array(stagedUsageSignalSchema).default([]),
scheduledPlans: z.array(stagedScheduledPlanSignalSchema).default([]),
favorites: z.array(stagedFavoriteSignalSchema).default([]),
});
export type StagedLookerSignalsFile = z.infer<typeof stagedLookerSignalsFileSchema>;
export const stagedLookerScopeFileSchema = z.object({
mode: z.enum(['full', 'incremental']),
knownCurrentRawPaths: z.array(z.string()).default([]),
fetchedRawPaths: z.array(z.string()).default([]),
});
export type StagedLookerScopeFile = z.infer<typeof stagedLookerScopeFileSchema>;
const stagedLookerFetchIssueKindSchema = z.enum([
'unmapped_looker_connection',
'unparseable_sql_table_name',
'looker_template_unresolved',
'derived_table_not_supported',
'lookml_connection_mismatch',
]);
export const stagedLookerFetchIssueSchema = z.object({
rawPath: z.string().min(1),
entityType: z.enum(['dashboard', 'look', 'explore', 'signals', 'lookml_models', 'looker_connection_mapping']),
entityId: z.string().nullable().default(null),
severity: z.enum(['warning', 'error']),
statusCode: z.number().int().nullable().default(null),
message: z.string().min(1),
retryRecommended: z.boolean().default(false),
kind: stagedLookerFetchIssueKindSchema.optional(),
details: z.record(z.string(), z.unknown()).optional(),
});
export type StagedLookerFetchIssue = z.infer<typeof stagedLookerFetchIssueSchema>;
export const stagedLookerFetchReportSchema = z.object({
status: z.enum(['success', 'partial']),
retryRecommended: z.boolean().default(false),
skipped: z.array(stagedLookerFetchIssueSchema).default([]),
warnings: z.array(stagedLookerFetchIssueSchema).default([]),
});
export type StagedLookerFetchReport = z.infer<typeof stagedLookerFetchReportSchema>;
export const STAGED_FILES = {
syncConfig: 'sync-config.json',
scope: 'looker-scope.json',
fetchReport: 'looker-fetch-report.json',
evidenceRoot: 'evidence',
lookmlModels: 'lookml_models.json',
foldersTree: 'folders/tree.json',
signals: {
dashboardUsage: 'signals/dashboard_usage.json',
lookUsage: 'signals/look_usage.json',
scheduledPlans: 'signals/scheduled_plans.json',
favorites: 'signals/favorites.json',
},
} as const;

View file

@ -0,0 +1,230 @@
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { chunkLookmlProject } from './chunk.js';
import { type ParsedLookmlProject, parseLookmlStagedDir } from './parse.js';
const FIXTURE_ROOT = join(__dirname, '../../../../test/fixtures/lookml');
describe('chunkLookmlProject — first run', () => {
it('single-model bundle → 1 WU with model + all views in rawFiles', async () => {
const stagedDir = join(FIXTURE_ROOT, 'single-model');
const project = await parseLookmlStagedDir(stagedDir);
const result = chunkLookmlProject(project);
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.unitKey).toBe('lookml-orders');
expect(wu.rawFiles.sort()).toEqual(['orders.model.lkml', 'views/customers.view.lkml', 'views/orders.view.lkml']);
expect(wu.peerFileIndex).toEqual([]);
expect(wu.dependencyPaths).toEqual([]);
expect(result.eviction).toBeUndefined();
});
it('multi-model bundle → 1 WU per model; shared view owned by lex-first model; others see it in dependencyPaths + peerFileIndex is pathless-index', async () => {
const stagedDir = join(FIXTURE_ROOT, 'multi-model');
const project = await parseLookmlStagedDir(stagedDir);
const result = chunkLookmlProject(project);
expect(result.workUnits).toHaveLength(2);
const marketing = result.workUnits.find((wu) => wu.unitKey === 'lookml-marketing');
const orders = result.workUnits.find((wu) => wu.unitKey === 'lookml-orders');
expect(marketing).toBeDefined();
expect(orders).toBeDefined();
if (!marketing || !orders) {
throw new Error('expected marketing and orders work units');
}
// marketing sorts before orders → marketing owns shared_dims
expect(marketing.rawFiles).toContain('views/shared_dims.view.lkml');
expect(marketing.rawFiles).toContain('views/campaigns.view.lkml');
expect(marketing.rawFiles).toContain('marketing.model.lkml');
expect(marketing.rawFiles).not.toContain('views/orders.view.lkml');
expect(marketing.dependencyPaths).toEqual([]);
// orders does NOT own shared_dims — it's in dependencyPaths (read-only upstream).
expect(orders.rawFiles).not.toContain('views/shared_dims.view.lkml');
expect(orders.dependencyPaths).toEqual(['views/shared_dims.view.lkml']);
expect(orders.rawFiles).toContain('views/orders.view.lkml');
expect(orders.rawFiles).toContain('orders.model.lkml');
// Each WU's peerFileIndex lists the OTHER model's files (paths-only index).
expect(orders.peerFileIndex).toContain('marketing.model.lkml');
expect(orders.peerFileIndex).toContain('views/campaigns.view.lkml');
// Dependency paths should not be duplicated into peerFileIndex.
expect(orders.peerFileIndex).not.toContain('views/shared_dims.view.lkml');
});
it('extends-chain fixture: single WU contains base + orders + orders_ext; chain order visible via graph', async () => {
const stagedDir = join(FIXTURE_ROOT, 'extends-chain');
const project = await parseLookmlStagedDir(stagedDir);
const result = chunkLookmlProject(project);
// One model ("orders") includes views/*.view.lkml — so all three views land in its WU.
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.unitKey).toBe('lookml-orders');
expect(wu.rawFiles.sort()).toEqual([
'orders.model.lkml',
'views/base.view.lkml',
'views/orders.view.lkml',
'views/orders_ext.view.lkml',
]);
expect(wu.dependencyPaths).toEqual([]); // all ancestors already in rawFiles on first run
expect(wu.notes).toMatch(/orders/);
});
it('is deterministic: two calls on the same project return structurally identical WorkUnits', async () => {
const stagedDir = join(FIXTURE_ROOT, 'multi-model');
const project = await parseLookmlStagedDir(stagedDir);
const r1 = chunkLookmlProject(project);
const r2 = chunkLookmlProject(project);
expect(r1.workUnits).toEqual(r2.workUnits);
});
it('unitKey is model-name-derived (stable across parse+chunk cycles and across re-syncs)', async () => {
const project = await parseLookmlStagedDir(join(FIXTURE_ROOT, 'multi-model'));
const { workUnits } = chunkLookmlProject(project);
expect(workUnits.map((wu) => wu.unitKey).sort()).toEqual(['lookml-marketing', 'lookml-orders']);
});
it('marks mismatched model WorkUnits as SL-disallowed and keeps wiki ingest enabled', () => {
const project: ParsedLookmlProject = {
models: [
{
path: 'b2b.model.lkml',
name: 'b2b',
includes: ['views/orders.view.lkml'],
explores: ['orders'],
connectionName: 'wrong_connection',
},
],
views: [{ path: 'views/orders.view.lkml', name: 'orders', extendsFrom: [], rawSqlTableName: 'public.orders' }],
dashboards: [],
allPaths: ['b2b.model.lkml', 'views/orders.view.lkml'],
};
const result = chunkLookmlProject(project, { mismatchedModelNames: new Set(['b2b']) });
const wu = result.workUnits[0];
expect(wu.unitKey).toBe('lookml-b2b');
expect(wu.rawFiles).toEqual(['b2b.model.lkml', 'views/orders.view.lkml']);
expect(wu.slDisallowed).toBe(true);
expect(wu.slDisallowedReason).toBe('lookml_connection_mismatch');
expect(wu.notes).toContain('[LOOKML SL WRITES DISALLOWED]');
expect(wu.notes).toContain('reason: lookml_connection_mismatch');
expect(wu.notes).toContain('Do not call sl_write_source or sl_edit_source for this WorkUnit.');
});
});
describe('chunkLookmlProject — re-sync', () => {
it("modified file in one model only emits that model's WU", async () => {
const stagedDir = join(FIXTURE_ROOT, 'multi-model');
const project = await parseLookmlStagedDir(stagedDir);
const result = chunkLookmlProject(project, {
diffSet: {
added: [],
modified: ['views/campaigns.view.lkml'],
deleted: [],
unchanged: [
'marketing.model.lkml',
'orders.model.lkml',
'views/orders.view.lkml',
'views/shared_dims.view.lkml',
],
},
});
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].unitKey).toBe('lookml-marketing');
});
it("added file under a model emits that model's WU with the new path in rawFiles", async () => {
const stagedDir = join(FIXTURE_ROOT, 'single-model');
const project = await parseLookmlStagedDir(stagedDir);
const result = chunkLookmlProject(project, {
diffSet: {
added: ['views/customers.view.lkml'],
modified: [],
deleted: [],
unchanged: ['orders.model.lkml', 'views/orders.view.lkml'],
},
});
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].rawFiles).toContain('views/customers.view.lkml');
});
it('widens dependencyPaths with transitive extends ancestors on re-sync', async () => {
const stagedDir = join(FIXTURE_ROOT, 'extends-chain');
const project = await parseLookmlStagedDir(stagedDir);
// Only orders_ext is touched; base and orders are upstream ancestors.
// Because the single-model WU's rawFiles ALREADY include all three on first run,
// they remain in rawFiles — dependencyPaths stays empty. Widening matters when
// re-sync drops some files from rawFiles, which doesn't apply for a monolithic
// single-model WU. Assert the baseline invariant.
const result = chunkLookmlProject(project, {
diffSet: {
added: [],
modified: ['views/orders_ext.view.lkml'],
deleted: [],
unchanged: ['orders.model.lkml', 'views/base.view.lkml', 'views/orders.view.lkml'],
},
});
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.rawFiles).toContain('views/orders_ext.view.lkml');
// Ancestors already in rawFiles → not duplicated into dependencyPaths.
expect(wu.dependencyPaths).toEqual([]);
});
it('widens dependencyPaths when an ancestor is OUTSIDE the WU (synthesized cross-model case)', () => {
// Synthesize a scenario in-memory: two models, "a" owns base.view.lkml,
// "b" owns derived.view.lkml which extends base. A diff that only touches
// derived.view.lkml should widen b's WU with base.view.lkml in dependencyPaths
// if base lives outside b's rawFiles. In practice with the current emit rules,
// base.view.lkml would already be in dependencyPaths because model b lists
// base.view.lkml under its `include:`. Here we confirm the widening is idempotent.
const project: ParsedLookmlProject = {
models: [
{ path: 'a.model.lkml', name: 'a', includes: ['views/base.view.lkml'], explores: [], connectionName: null },
{
path: 'b.model.lkml',
name: 'b',
includes: ['views/base.view.lkml', 'views/derived.view.lkml'],
explores: [],
connectionName: null,
},
],
views: [
{ path: 'views/base.view.lkml', name: 'base', extendsFrom: [], rawSqlTableName: null },
{ path: 'views/derived.view.lkml', name: 'derived', extendsFrom: ['base'], rawSqlTableName: null },
],
dashboards: [],
allPaths: ['a.model.lkml', 'b.model.lkml', 'views/base.view.lkml', 'views/derived.view.lkml'],
};
const result = chunkLookmlProject(project, {
diffSet: {
added: [],
modified: ['views/derived.view.lkml'],
deleted: [],
unchanged: ['a.model.lkml', 'b.model.lkml', 'views/base.view.lkml'],
},
});
const b = result.workUnits.find((wu) => wu.unitKey === 'lookml-b');
expect(b).toBeDefined();
if (!b) {
throw new Error('expected lookml-b work unit');
}
expect(b.dependencyPaths).toContain('views/base.view.lkml');
});
it('passes through diffSet.deleted as an EvictionUnit', async () => {
const project = await parseLookmlStagedDir(join(FIXTURE_ROOT, 'single-model'));
const result = chunkLookmlProject(project, {
diffSet: {
added: [],
modified: [],
deleted: ['views/zombie.view.lkml'],
unchanged: ['orders.model.lkml', 'views/customers.view.lkml', 'views/orders.view.lkml'],
},
});
expect(result.eviction).toEqual({ deletedRawPaths: ['views/zombie.view.lkml'] });
// No WU emitted because no current files are touched.
expect(result.workUnits).toEqual([]);
});
});

View file

@ -0,0 +1,159 @@
import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js';
import { buildLookmlGraph, type LookmlGraph } from './graph.js';
import type { ParsedLookmlProject } from './parse.js';
interface ChunkOptions {
diffSet?: DiffSet;
mismatchedModelNames?: Set<string>;
}
function lookmlSlDisallowedNotes(modelName: string, existingNotes: string): string {
return [
'[LOOKML SL WRITES DISALLOWED]',
'reason: lookml_connection_mismatch',
`model: ${modelName}`,
'Do not call sl_write_source or sl_edit_source for this WorkUnit.',
'Continue wiki extraction and context candidates from the raw LookML files.',
'[/LOOKML SL WRITES DISALLOWED]',
'',
existingNotes,
].join('\n');
}
/**
* Emit WorkUnits for a parsed LookML project.
*
* First run (no diffSet): one WU per model + `lookml-orphans` (if any non-owned views)
* + `lookml-dashboard-<name>` per dashboard file.
*
* Re-sync (diffSet provided): filter to WUs whose rawFiles intersect addedmodified;
* widen dependencyPaths with every file in `allPaths`
* that's upstream of the WU's changed files via the graph.
* Emit a single EvictionUnit for diffSet.deleted.
*/
export function chunkLookmlProject(project: ParsedLookmlProject, opts: ChunkOptions = {}): ChunkResult {
const graph = buildLookmlGraph(project);
const firstRunUnits = emitFirstRunWorkUnits(project, graph, opts);
if (!opts.diffSet) {
return { workUnits: firstRunUnits };
}
return applyDiffSet(firstRunUnits, project, graph, opts.diffSet);
}
function emitFirstRunWorkUnits(project: ParsedLookmlProject, graph: LookmlGraph, opts: ChunkOptions): WorkUnit[] {
const allModelPaths = [...new Set(project.models.map((m) => m.path))].sort();
const allDashboardPaths = [...new Set(project.dashboards.map((d) => d.path))].sort();
// Dedupe: a .view.lkml with multiple `view:` blocks produces multiple ParsedLookmlView
// entries sharing one path.
const allViewPaths = [...new Set(project.views.map((v) => v.path))].sort();
const workUnits: WorkUnit[] = [];
// Per-model WU, sorted by model name for determinism.
const sortedModels = [...project.models].sort((a, b) => a.name.localeCompare(b.name));
for (const model of sortedModels) {
const includedViewPaths = (graph.viewsIncludedByModel.get(model.name) ?? []).filter((p) =>
allViewPaths.includes(p),
);
// Views the model includes and which this model ALSO owns (first-includer-wins).
const ownedViewPaths = includedViewPaths.filter((p) => graph.ownerByViewPath.get(p) === model.name);
// Views the model includes but that another lexicographically-earlier model owns.
// These land in dependencyPaths so this WU's agent can READ them, but the "canonical
// write" for those views happens in the owner's WU.
const nonOwnedDepViewPaths = includedViewPaths.filter((p) => graph.ownerByViewPath.get(p) !== model.name).sort();
const rawFiles = [model.path, ...ownedViewPaths].sort();
const peerFileIndex = [
...allModelPaths.filter((p) => p !== model.path),
...allViewPaths.filter((p) => !rawFiles.includes(p) && !nonOwnedDepViewPaths.includes(p)),
...allDashboardPaths,
].sort();
const isMismatched = opts.mismatchedModelNames?.has(model.name) ?? false;
const notes =
model.explores.length > 0
? `LookML model "${model.name}" (explores: ${model.explores.join(', ')})`
: `LookML model "${model.name}"`;
workUnits.push({
unitKey: `lookml-${model.name}`,
displayLabel: `LookML model "${model.name}"`,
rawFiles,
peerFileIndex,
dependencyPaths: nonOwnedDepViewPaths,
notes: isMismatched ? lookmlSlDisallowedNotes(model.name, notes) : notes,
slDisallowed: isMismatched ? true : undefined,
slDisallowedReason: isMismatched ? 'lookml_connection_mismatch' : undefined,
});
}
// Orphan view WU — views that no model includes. Skip entirely if none.
const orphanViewPaths = allViewPaths.filter((p) => !graph.ownerByViewPath.has(p)).sort();
if (orphanViewPaths.length > 0) {
workUnits.push({
unitKey: 'lookml-orphans',
displayLabel: 'LookML orphan views',
rawFiles: orphanViewPaths,
peerFileIndex: [...allModelPaths, ...allDashboardPaths].sort(),
dependencyPaths: [],
notes: 'Views not referenced by any .model.lkml (orphaned)',
});
}
// One WU per dashboard file.
for (const dashboard of [...project.dashboards].sort((a, b) => a.name.localeCompare(b.name))) {
workUnits.push({
unitKey: `lookml-dashboard-${dashboard.name}`,
displayLabel: `LookML dashboard "${dashboard.name}"`,
rawFiles: [dashboard.path],
peerFileIndex: [...allModelPaths, ...allViewPaths].sort(),
dependencyPaths: [],
notes: `LookML dashboard "${dashboard.name}"`,
});
}
return workUnits;
}
function applyDiffSet(
firstRunUnits: WorkUnit[],
_project: ParsedLookmlProject,
graph: LookmlGraph,
diffSet: DiffSet,
): ChunkResult {
const touched = new Set([...diffSet.added, ...diffSet.modified]);
const keptUnits: WorkUnit[] = [];
for (const wu of firstRunUnits) {
const anyTouched = wu.rawFiles.some((p) => touched.has(p));
if (!anyTouched) {
continue;
}
// Widen dependencyPaths: for every view in rawFiles, add paths of all transitive
// extends ancestors (if known in the graph) that aren't already in rawFiles.
const existingDeps = new Set(wu.dependencyPaths);
for (const rawPath of wu.rawFiles) {
const viewNames = graph.viewNamesByPath.get(rawPath) ?? [];
for (const viewName of viewNames) {
const ancestors = graph.extendsAncestorsByViewName.get(viewName) ?? [];
for (const ancestorName of ancestors) {
const ancestorPaths = graph.pathsByViewName.get(ancestorName) ?? [];
for (const ancestorPath of ancestorPaths) {
if (!wu.rawFiles.includes(ancestorPath)) {
existingDeps.add(ancestorPath);
}
}
}
}
}
keptUnits.push({
...wu,
dependencyPaths: [...existingDeps].sort(),
});
}
const eviction = diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : undefined;
return { workUnits: keptUnits, eviction };
}

Some files were not shown because too many files have changed in this diff Show more