Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1 @@
export * from './semantic-layer-compute.js';

View file

@ -0,0 +1,339 @@
import { once } from 'node:events';
import { createServer } from 'node:http';
import { describe, expect, it, vi } from 'vitest';
import { createHttpSemanticLayerComputePort, createPythonSemanticLayerComputePort } from './semantic-layer-compute.js';
const source = {
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [{ name: 'order_count', expr: 'count(*)' }],
};
const sourceGenerationInput = {
tables: [
{
name: 'orders',
db: 'public',
comment: 'Orders table',
columns: [
{ name: 'id', type: 'integer', primaryKey: true, nullable: false, comment: 'Order ID' },
{ name: 'customer_id', type: 'integer' },
{ name: 'amount', type: 'decimal', comment: 'Order amount' },
],
},
{
name: 'customers',
db: 'public',
columns: [
{ name: 'id', type: 'integer', primaryKey: true },
{ name: 'email', type: 'varchar' },
],
},
],
links: [
{
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
relationshipType: 'MANY_TO_ONE',
},
],
dialect: 'postgres',
};
const sourceGenerationDaemonPayload = {
tables: [
{
name: 'orders',
db: 'public',
comment: 'Orders table',
columns: [
{ name: 'id', type: 'integer', primary_key: true, nullable: false, comment: 'Order ID' },
{ name: 'customer_id', type: 'integer' },
{ name: 'amount', type: 'decimal', comment: 'Order amount' },
],
},
{
name: 'customers',
db: 'public',
columns: [
{ name: 'id', type: 'integer', primary_key: true },
{ name: 'email', type: 'varchar' },
],
},
],
links: [
{
from_table: 'orders',
from_column: 'customer_id',
to_table: 'customers',
to_column: 'id',
relationship_type: 'MANY_TO_ONE',
},
],
dialect: 'postgres',
};
const sourceGenerationDaemonResponse = {
source_count: 2,
sources: [
{
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [
{
to: 'customers',
on: 'customer_id = customers.id',
relationship: 'many_to_one',
},
],
measures: [{ name: 'record_count', expr: 'count(id)' }],
},
],
};
describe('createPythonSemanticLayerComputePort', () => {
it('calls the semantic-query stdio command', async () => {
const runJson = vi.fn(async () => ({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
}));
const port = createPythonSemanticLayerComputePort({ runJson });
await expect(
port.query({
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
}),
).resolves.toEqual({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
});
expect(runJson).toHaveBeenCalledWith('semantic-query', {
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
});
});
it('calls the semantic-validate stdio command', async () => {
const runJson = vi.fn(async () => ({
valid: true,
errors: [],
warnings: [],
per_source_warnings: {},
}));
const port = createPythonSemanticLayerComputePort({ runJson });
await expect(
port.validateSources({
sources: [source],
dialect: 'postgres',
recentlyTouched: ['orders'],
}),
).resolves.toEqual({
valid: true,
errors: [],
warnings: [],
perSourceWarnings: {},
});
expect(runJson).toHaveBeenCalledWith('semantic-validate', {
sources: [source],
dialect: 'postgres',
recently_touched: ['orders'],
});
});
it('calls the semantic-generate-sources stdio command', async () => {
const runJson = vi.fn(async () => sourceGenerationDaemonResponse);
const port = createPythonSemanticLayerComputePort({ runJson });
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
sourceCount: 2,
sources: sourceGenerationDaemonResponse.sources,
});
expect(runJson).toHaveBeenCalledWith('semantic-generate-sources', sourceGenerationDaemonPayload);
});
});
describe('createHttpSemanticLayerComputePort', () => {
it('calls semantic query and validate HTTP endpoints through an injected runner', async () => {
const requestJson = vi.fn(async (path: string) => {
if (path === '/semantic-layer/query') {
return {
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
};
}
return {
valid: true,
errors: [],
warnings: [],
per_source_warnings: {},
};
});
const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson });
await expect(
port.query({
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
}),
).resolves.toEqual({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
});
await expect(
port.validateSources({
sources: [source],
dialect: 'postgres',
recentlyTouched: ['orders'],
}),
).resolves.toEqual({
valid: true,
errors: [],
warnings: [],
perSourceWarnings: {},
});
expect(requestJson).toHaveBeenNthCalledWith(1, '/semantic-layer/query', {
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
});
expect(requestJson).toHaveBeenNthCalledWith(2, '/semantic-layer/validate', {
sources: [source],
dialect: 'postgres',
recently_touched: ['orders'],
});
});
it('calls the semantic source-generation HTTP endpoint through an injected runner', async () => {
const requestJson = vi.fn(async () => sourceGenerationDaemonResponse);
const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson });
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
sourceCount: 2,
sources: sourceGenerationDaemonResponse.sources,
});
expect(requestJson).toHaveBeenCalledWith('/semantic-layer/generate-sources', sourceGenerationDaemonPayload);
});
it('posts JSON to a running HTTP daemon endpoint', async () => {
const requests: Array<{ url: string | undefined; body: unknown }> = [];
const server = createServer((request, response) => {
const chunks: Buffer[] = [];
request.on('data', (chunk: Buffer) => chunks.push(chunk));
request.on('end', () => {
requests.push({
url: request.url,
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
});
response.writeHead(200, { 'content-type': 'application/json' });
response.end(
JSON.stringify({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
}),
);
});
});
server.listen(0, '127.0.0.1');
await once(server, 'listening');
try {
const address = server.address();
if (!address || typeof address === 'string') {
throw new Error('expected TCP server address');
}
const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` });
await expect(
port.query({
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
}),
).resolves.toMatchObject({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
});
expect(requests).toEqual([
{
url: '/semantic-layer/query',
body: {
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
},
},
]);
} finally {
server.close();
}
});
it('posts source-generation JSON to a running HTTP daemon endpoint', async () => {
const requests: Array<{ url: string | undefined; body: unknown }> = [];
const server = createServer((request, response) => {
const chunks: Buffer[] = [];
request.on('data', (chunk: Buffer) => chunks.push(chunk));
request.on('end', () => {
requests.push({
url: request.url,
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
});
response.writeHead(200, { 'content-type': 'application/json' });
response.end(JSON.stringify(sourceGenerationDaemonResponse));
});
});
server.listen(0, '127.0.0.1');
await once(server, 'listening');
try {
const address = server.address();
if (!address || typeof address === 'string') {
throw new Error('expected TCP server address');
}
const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` });
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
sourceCount: 2,
sources: sourceGenerationDaemonResponse.sources,
});
expect(requests).toEqual([
{
url: '/semantic-layer/generate-sources',
body: sourceGenerationDaemonPayload,
},
]);
} finally {
server.close();
}
});
});

View file

@ -0,0 +1,304 @@
import { request as httpRequest } from 'node:http';
import { request as httpsRequest } from 'node:https';
import { URL } from 'node:url';
import { spawn } from 'node:child_process';
import type { SemanticLayerQueryInput, SemanticLayerSource } from '../sl/index.js';
export interface KloSemanticLayerComputeQueryResult {
sql: string;
dialect: string;
columns: Array<Record<string, unknown>>;
plan: Record<string, unknown>;
}
export interface KloSemanticLayerComputeValidationResult {
valid: boolean;
errors: string[];
warnings: string[];
perSourceWarnings: Record<string, string[]>;
}
export interface KloSemanticLayerSourceGenerationColumnInput {
name: string;
type: string;
primaryKey?: boolean;
nullable?: boolean;
comment?: string | null;
}
export interface KloSemanticLayerSourceGenerationTableInput {
name: string;
catalog?: string | null;
db?: string | null;
comment?: string | null;
columns: KloSemanticLayerSourceGenerationColumnInput[];
}
export interface KloSemanticLayerSourceGenerationLinkInput {
fromTable: string;
fromColumn: string;
toTable: string;
toColumn: string;
relationshipType: string;
}
export interface KloSemanticLayerSourceGenerationInput {
tables: KloSemanticLayerSourceGenerationTableInput[];
links: KloSemanticLayerSourceGenerationLinkInput[];
dialect?: string;
}
export interface KloSemanticLayerSourceGenerationResult {
sources: Array<Record<string, unknown>>;
sourceCount: number;
}
export interface KloSemanticLayerComputePort {
query(input: {
sources: Array<Record<string, unknown> | SemanticLayerSource>;
query: SemanticLayerQueryInput;
dialect: string;
}): Promise<KloSemanticLayerComputeQueryResult>;
validateSources(input: {
sources: Array<Record<string, unknown> | SemanticLayerSource>;
dialect: string;
recentlyTouched?: string[];
}): Promise<KloSemanticLayerComputeValidationResult>;
generateSources(input: KloSemanticLayerSourceGenerationInput): Promise<KloSemanticLayerSourceGenerationResult>;
}
export type KloDaemonCommand = 'semantic-query' | 'semantic-validate' | 'semantic-generate-sources';
export type KloDaemonJsonRunner = (
subcommand: KloDaemonCommand,
payload: Record<string, unknown>,
) => Promise<Record<string, unknown>>;
export type KloDaemonHttpJsonRunner = (path: string, payload: Record<string, unknown>) => Promise<Record<string, unknown>>;
export interface PythonSemanticLayerComputeOptions {
command?: string;
args?: string[];
cwd?: string;
env?: NodeJS.ProcessEnv;
runJson?: KloDaemonJsonRunner;
}
export interface HttpSemanticLayerComputeOptions {
baseUrl: string;
requestJson?: KloDaemonHttpJsonRunner;
}
function parseJsonObject(raw: string, subcommand: string): Record<string, unknown> {
const parsed = JSON.parse(raw) as unknown;
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
throw new Error(`klo-daemon ${subcommand} returned non-object JSON`);
}
return parsed as Record<string, unknown>;
}
function runProcessJson(
options: Required<Pick<PythonSemanticLayerComputeOptions, 'command' | 'args'>> &
Pick<PythonSemanticLayerComputeOptions, 'cwd' | 'env'>,
): KloDaemonJsonRunner {
return async (subcommand: KloDaemonCommand, payload: Record<string, unknown>): Promise<Record<string, unknown>> =>
new Promise((resolve, reject) => {
const child = spawn(options.command, [...options.args, subcommand], {
cwd: options.cwd,
env: { ...process.env, ...options.env },
stdio: ['pipe', 'pipe', 'pipe'],
});
const stdout: Buffer[] = [];
const stderr: Buffer[] = [];
child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk));
child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk));
child.on('error', reject);
child.on('close', (code) => {
const stdoutText = Buffer.concat(stdout).toString('utf8').trim();
const stderrText = Buffer.concat(stderr).toString('utf8').trim();
if (code !== 0) {
reject(new Error(`klo-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`));
return;
}
try {
resolve(parseJsonObject(stdoutText, subcommand));
} catch (error) {
reject(error);
}
});
child.stdin.end(`${JSON.stringify(payload)}\n`);
});
}
function normalizedBaseUrl(baseUrl: string): string {
return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`;
}
function postJson(baseUrl: string): KloDaemonHttpJsonRunner {
return async (path, payload) =>
new Promise((resolve, reject) => {
const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl));
const body = JSON.stringify(payload);
const client = target.protocol === 'https:' ? httpsRequest : httpRequest;
const request = client(
target,
{
method: 'POST',
headers: {
accept: 'application/json',
'content-type': 'application/json',
'content-length': Buffer.byteLength(body),
},
},
(response) => {
const chunks: Buffer[] = [];
response.on('data', (chunk: Buffer) => chunks.push(chunk));
response.on('end', () => {
const text = Buffer.concat(chunks).toString('utf8');
const statusCode = response.statusCode ?? 0;
if (statusCode < 200 || statusCode >= 300) {
reject(new Error(`klo-daemon HTTP ${path} failed with ${statusCode}: ${text}`));
return;
}
try {
resolve(parseJsonObject(text, path));
} catch (error) {
reject(error);
}
});
},
);
request.on('error', reject);
request.end(body);
});
}
function stringArray(value: unknown): string[] {
return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : [];
}
function recordValue(value: unknown): Record<string, unknown> {
return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record<string, unknown>) : {};
}
function recordArray(value: unknown): Array<Record<string, unknown>> {
return Array.isArray(value)
? value.filter(
(item): item is Record<string, unknown> => item !== null && typeof item === 'object' && !Array.isArray(item),
)
: [];
}
function sourceGenerationPayload(input: KloSemanticLayerSourceGenerationInput): Record<string, unknown> {
return {
tables: input.tables.map((table) => ({
name: table.name,
...(table.catalog !== undefined ? { catalog: table.catalog } : {}),
...(table.db !== undefined ? { db: table.db } : {}),
...(table.comment !== undefined ? { comment: table.comment } : {}),
columns: table.columns.map((column) => ({
name: column.name,
type: column.type,
...(column.primaryKey !== undefined ? { primary_key: column.primaryKey } : {}),
...(column.nullable !== undefined ? { nullable: column.nullable } : {}),
...(column.comment !== undefined ? { comment: column.comment } : {}),
})),
})),
links: input.links.map((link) => ({
from_table: link.fromTable,
from_column: link.fromColumn,
to_table: link.toTable,
to_column: link.toColumn,
relationship_type: link.relationshipType,
})),
dialect: input.dialect ?? 'postgres',
};
}
function sourceGenerationResult(raw: Record<string, unknown>): KloSemanticLayerSourceGenerationResult {
return {
sources: recordArray(raw.sources),
sourceCount: typeof raw.source_count === 'number' ? raw.source_count : recordArray(raw.sources).length,
};
}
export function createPythonSemanticLayerComputePort(
options: PythonSemanticLayerComputeOptions = {},
): KloSemanticLayerComputePort {
const command = options.command ?? 'python';
const args = options.args ?? ['-m', 'klo_daemon'];
const runJson = options.runJson ?? runProcessJson({ command, args, cwd: options.cwd, env: options.env });
return {
async query(input) {
const raw = await runJson('semantic-query', {
sources: input.sources,
dialect: input.dialect,
query: input.query,
});
return {
sql: typeof raw.sql === 'string' ? raw.sql : '',
dialect: typeof raw.dialect === 'string' ? raw.dialect : input.dialect,
columns: recordArray(raw.columns),
plan: recordValue(raw.plan),
};
},
async validateSources(input) {
const raw = await runJson('semantic-validate', {
sources: input.sources,
dialect: input.dialect,
recently_touched: input.recentlyTouched,
});
return {
valid: raw.valid === true,
errors: stringArray(raw.errors),
warnings: stringArray(raw.warnings),
perSourceWarnings: recordValue(raw.per_source_warnings) as Record<string, string[]>,
};
},
async generateSources(input) {
const raw = await runJson('semantic-generate-sources', sourceGenerationPayload(input));
return sourceGenerationResult(raw);
},
};
}
export function createHttpSemanticLayerComputePort(
options: HttpSemanticLayerComputeOptions,
): KloSemanticLayerComputePort {
const requestJson = options.requestJson ?? postJson(options.baseUrl);
return {
async query(input) {
const raw = await requestJson('/semantic-layer/query', {
sources: input.sources,
dialect: input.dialect,
query: input.query,
});
return {
sql: typeof raw.sql === 'string' ? raw.sql : '',
dialect: typeof raw.dialect === 'string' ? raw.dialect : input.dialect,
columns: recordArray(raw.columns),
plan: recordValue(raw.plan),
};
},
async validateSources(input) {
const raw = await requestJson('/semantic-layer/validate', {
sources: input.sources,
dialect: input.dialect,
recently_touched: input.recentlyTouched,
});
return {
valid: raw.valid === true,
errors: stringArray(raw.errors),
warnings: stringArray(raw.warnings),
perSourceWarnings: recordValue(raw.per_source_warnings) as Record<string, string[]>,
};
},
async generateSources(input) {
const raw = await requestJson('/semantic-layer/generate-sources', sourceGenerationPayload(input));
return sourceGenerationResult(raw);
},
};
}