mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-22 08:38:08 +02:00
feat(context): add warehouse verification tools (#46)
* feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
This commit is contained in:
parent
bcb0d2f8f7
commit
c22248dabf
89 changed files with 7818 additions and 191 deletions
|
|
@ -64,7 +64,7 @@ export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) {
|
|||
tableRef: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('The fully-qualified table or source reference that triggered the fallback (e.g. "orbit_analytics.customer"). Used to generate canonical detail text.'),
|
||||
.describe('The fully-qualified table or source reference that triggered the fallback (e.g. "<schema>.<table>"). Used to generate canonical detail text.'),
|
||||
clarification: z
|
||||
.string()
|
||||
.optional()
|
||||
|
|
|
|||
|
|
@ -36,6 +36,28 @@ describe('tool transcript summaries', () => {
|
|||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('treats a suggested flat wiki key retry as recovery for an invalid nested key', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'historic-sql/top-accounts-by-contract-arr' },
|
||||
output: { structured: { success: false, key: 'historic-sql/top-accounts-by-contract-arr' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'historic-sql-top-accounts-by-contract-arr' },
|
||||
output: { structured: { success: true, key: 'historic-sql-top-accounts-by-contract-arr' } },
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('counts unrecovered wiki_remove structured failures as fatal transcript errors', () => {
|
||||
const summary = createMutableToolTranscriptSummary('reconcile', '/tmp/reconcile.jsonl');
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type { ToolCallLogEntry } from './tool-call-logger.js';
|
||||
import { isFlatWikiKey, suggestFlatWikiKey } from '../../wiki/keys.js';
|
||||
|
||||
export interface MutableToolTranscriptSummary {
|
||||
unitKey: string;
|
||||
|
|
@ -112,7 +113,10 @@ function structuredSuccess(output: unknown): boolean | null {
|
|||
|
||||
function wikiTargetKey(entry: ToolCallLogEntry): string | null {
|
||||
const key = stringField(recordField(entry.output, 'structured'), 'key') ?? stringField(entry.input, 'key');
|
||||
return key ? `wiki:${key}` : null;
|
||||
if (!key) {
|
||||
return null;
|
||||
}
|
||||
return `wiki:${isFlatWikiKey(key) ? key : suggestFlatWikiKey(key)}`;
|
||||
}
|
||||
|
||||
function slTargetKey(entry: ToolCallLogEntry): string | null {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,97 @@
|
|||
import { tool, type ToolExecuteFunction, type ToolExecutionOptions, type ToolSet } from 'ai';
|
||||
import { z } from 'zod';
|
||||
|
||||
const verificationLedgerInputSchema = z.object({
|
||||
summary: z.string().min(1).max(2000),
|
||||
verifiedIdentifiers: z.array(z.string().min(1)).max(100).default([]),
|
||||
unverifiedIdentifiers: z.array(z.string().min(1)).max(100).default([]),
|
||||
notes: z.string().max(2000).optional(),
|
||||
});
|
||||
|
||||
export interface VerificationLedgerEntry {
|
||||
summary: string;
|
||||
verifiedIdentifiers: string[];
|
||||
unverifiedIdentifiers: string[];
|
||||
notes?: string;
|
||||
}
|
||||
|
||||
export interface VerificationLedgerState {
|
||||
entries: VerificationLedgerEntry[];
|
||||
}
|
||||
|
||||
const WRITE_TOOL_NAMES = new Set([
|
||||
'wiki_write',
|
||||
'wiki_remove',
|
||||
'sl_write_source',
|
||||
'sl_edit_source',
|
||||
'emit_unmapped_fallback',
|
||||
]);
|
||||
|
||||
export const VERIFICATION_LEDGER_PROMPT = `<pre_write_verification>
|
||||
Before any write-capable tool call (wiki_write, wiki_remove, sl_write_source, sl_edit_source, emit_unmapped_fallback), call record_verification_ledger.
|
||||
The ledger is a model-authored checkpoint, not a deterministic parser gate. Summarize the verification protocol from the loaded skill, list identifiers verified with discover_data/entity_details/sql_execution, and list anything intentionally left unverified. If the write contains no warehouse identifiers, say that explicitly.
|
||||
If a write tool returns verification_ledger_required, complete the ledger and retry the write.
|
||||
</pre_write_verification>`;
|
||||
|
||||
export function createVerificationLedgerState(): VerificationLedgerState {
|
||||
return { entries: [] };
|
||||
}
|
||||
|
||||
export function withVerificationLedger(tools: ToolSet, state: VerificationLedgerState): ToolSet {
|
||||
const wrapped: ToolSet = {};
|
||||
for (const [name, original] of Object.entries(tools)) {
|
||||
if (!WRITE_TOOL_NAMES.has(name) || typeof original.execute !== 'function') {
|
||||
wrapped[name] = original;
|
||||
continue;
|
||||
}
|
||||
const originalExecute = original.execute;
|
||||
const guardedExecute: ToolExecuteFunction<unknown, unknown> = async (
|
||||
input: unknown,
|
||||
opts: ToolExecutionOptions,
|
||||
) => {
|
||||
if (state.entries.length === 0) {
|
||||
return verificationRequiredOutput(name);
|
||||
}
|
||||
return (originalExecute as ToolExecuteFunction<unknown, unknown>)(input, opts);
|
||||
};
|
||||
wrapped[name] = { ...original, execute: guardedExecute };
|
||||
}
|
||||
wrapped.record_verification_ledger = createRecordVerificationLedgerTool(state);
|
||||
return wrapped;
|
||||
}
|
||||
|
||||
function createRecordVerificationLedgerTool(state: VerificationLedgerState) {
|
||||
return tool({
|
||||
description:
|
||||
'Record the pre-write verification ledger required by loaded ingest skills. Call this before wiki/SL/fallback writes to state what was verified, which tool calls support it, and what remains intentionally unverified.',
|
||||
inputSchema: verificationLedgerInputSchema,
|
||||
execute: async (input) => {
|
||||
const entry = verificationLedgerInputSchema.parse(input);
|
||||
state.entries.push(entry);
|
||||
return {
|
||||
markdown:
|
||||
`Verification ledger recorded. Summary: ${entry.summary}\n` +
|
||||
`Verified identifiers: ${entry.verifiedIdentifiers.length ? entry.verifiedIdentifiers.join(', ') : '(none)'}\n` +
|
||||
`Unverified identifiers: ${
|
||||
entry.unverifiedIdentifiers.length ? entry.unverifiedIdentifiers.join(', ') : '(none)'
|
||||
}`,
|
||||
structured: { success: true, entry },
|
||||
};
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function verificationRequiredOutput(toolName: string) {
|
||||
return {
|
||||
markdown:
|
||||
`Pre-write verification required before calling ${toolName}. ` +
|
||||
'Call record_verification_ledger first. In the ledger, summarize the loaded skill protocol you followed, ' +
|
||||
'list identifiers verified via discover_data/entity_details/sql_execution, and list any identifiers intentionally left unverified. ' +
|
||||
'If the write contains no warehouse identifiers, say that explicitly in the ledger summary.',
|
||||
structured: {
|
||||
success: false,
|
||||
reason: 'verification_ledger_required',
|
||||
toolName,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import type { BaseTool, ToolContext } from '../../../tools/index.js';
|
||||
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||
import type { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
|
||||
describe('DiscoverDataTool', () => {
|
||||
const wikiSearchTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
|
||||
const slDiscoverTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
|
||||
const catalog = { searchByName: vi.fn() } as unknown as WarehouseCatalogService & {
|
||||
searchByName: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
const context: ToolContext = {
|
||||
sourceId: 'ingest',
|
||||
messageId: 'm1',
|
||||
userId: 'system',
|
||||
session: { allowedConnectionNames: new Set(['warehouse']) } as any,
|
||||
};
|
||||
const tool = new DiscoverDataTool({
|
||||
wikiSearchTool,
|
||||
slDiscoverTool,
|
||||
catalogFactory: () => catalog,
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
wikiSearchTool.call.mockReset();
|
||||
slDiscoverTool.call.mockReset();
|
||||
catalog.searchByName.mockReset();
|
||||
wikiSearchTool.call.mockResolvedValue({
|
||||
markdown: '- orders wiki',
|
||||
structured: { totalFound: 1, results: [{ key: 'orders' }] },
|
||||
});
|
||||
slDiscoverTool.call.mockResolvedValue({
|
||||
markdown: '- orders source',
|
||||
structured: { totalSources: 1, sources: [{ sourceName: 'orders' }] },
|
||||
});
|
||||
catalog.searchByName.mockResolvedValue([
|
||||
{
|
||||
kind: 'table',
|
||||
connectionName: 'warehouse',
|
||||
ref: { catalog: null, db: 'public', name: 'orders' },
|
||||
display: 'public.orders',
|
||||
matchedOn: 'name',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('groups wiki, semantic layer, and raw schema hits with routing hints', async () => {
|
||||
const result = await tool.call({ query: 'orders', connectionName: 'warehouse', limit: 5 }, context);
|
||||
|
||||
expect(result.markdown).toContain('## Wiki Pages');
|
||||
expect(result.markdown).toContain('use `wiki_read(blockKey)` for full content');
|
||||
expect(result.markdown).toContain('## Semantic Layer Sources');
|
||||
expect(result.markdown).toContain('use `sl_read_source(sourceName)` for the YAML');
|
||||
expect(result.markdown).toContain('## Raw Warehouse Schema');
|
||||
expect(result.markdown).toContain('use `entity_details({connectionName, targets: [{display}]})`');
|
||||
expect(result.structured.raw?.hits).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('includes connectionName on raw schema hits so entity_details can follow up', async () => {
|
||||
const multiConnectionContext: ToolContext = {
|
||||
...context,
|
||||
session: { allowedConnectionNames: new Set(['warehouse', 'analytics']) } as any,
|
||||
};
|
||||
catalog.searchByName.mockImplementation(async (connectionName: string, query: string) => [
|
||||
{
|
||||
kind: 'table',
|
||||
connectionName,
|
||||
ref: { catalog: null, db: 'public', name: `${connectionName}_${query}` },
|
||||
display: `public.${connectionName}_${query}`,
|
||||
matchedOn: 'name',
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await tool.call({ query: 'orders', limit: 10 }, multiConnectionContext);
|
||||
|
||||
expect(catalog.searchByName).toHaveBeenCalledWith('analytics', 'orders', 10);
|
||||
expect(catalog.searchByName).toHaveBeenCalledWith('warehouse', 'orders', 10);
|
||||
expect(result.markdown).toContain('connectionName=analytics');
|
||||
expect(result.markdown).toContain('connectionName=warehouse');
|
||||
expect(result.markdown).toContain(
|
||||
'entity_details({connectionName: "analytics", targets: [{display: "public.analytics_orders"}]})',
|
||||
);
|
||||
expect(result.structured.raw?.hits.map((hit) => hit.connectionName)).toEqual(['analytics', 'warehouse']);
|
||||
});
|
||||
|
||||
it('refuses explicit out-of-scope connection names', async () => {
|
||||
const result = await tool.call({ query: 'orders', connectionName: 'billing' }, context);
|
||||
|
||||
expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.');
|
||||
expect(result.structured).toEqual({ wiki: null, sl: null, raw: null });
|
||||
expect(wikiSearchTool.call).not.toHaveBeenCalled();
|
||||
expect(slDiscoverTool.call).not.toHaveBeenCalled();
|
||||
expect(catalog.searchByName).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('delegates sourceName inspect mode to sl_discover only', async () => {
|
||||
slDiscoverTool.call.mockResolvedValueOnce({
|
||||
markdown: 'source detail',
|
||||
structured: { sourceName: 'orders' },
|
||||
});
|
||||
|
||||
const result = await tool.call({ sourceName: 'orders', connectionName: 'warehouse' }, context);
|
||||
|
||||
expect(slDiscoverTool.call).toHaveBeenCalledWith({ sourceName: 'orders', connectionId: 'warehouse' }, context);
|
||||
expect(wikiSearchTool.call).not.toHaveBeenCalled();
|
||||
expect(catalog.searchByName).not.toHaveBeenCalled();
|
||||
expect(result.markdown).toContain('source detail');
|
||||
});
|
||||
|
||||
it('returns the empty-state message when all sections are empty', async () => {
|
||||
wikiSearchTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalFound: 0, results: [] } });
|
||||
slDiscoverTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalSources: 0, sources: [] } });
|
||||
catalog.searchByName.mockResolvedValueOnce([]);
|
||||
|
||||
const result = await tool.call({ query: 'customer source', connectionName: 'warehouse' }, context);
|
||||
|
||||
expect(result.markdown).toContain('No matches for "customer source" across wiki, semantic layer, or raw warehouse schema.');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
import { z } from 'zod';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
|
||||
import { WarehouseCatalogService, type RawSchemaHit } from './warehouse-catalog.service.js';
|
||||
|
||||
const discoverDataInputSchema = z.object({
|
||||
query: z.string().optional(),
|
||||
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(),
|
||||
limit: z.number().int().positive().max(50).optional().default(10),
|
||||
sourceName: z.string().optional(),
|
||||
});
|
||||
|
||||
type DiscoverDataInput = z.input<typeof discoverDataInputSchema>;
|
||||
|
||||
export interface DiscoverDataStructured {
|
||||
wiki: unknown | null;
|
||||
sl: unknown | null;
|
||||
raw: { hits: RawSchemaHit[] } | null;
|
||||
}
|
||||
|
||||
interface DiscoverDataDeps {
|
||||
wikiSearchTool: BaseTool;
|
||||
slDiscoverTool: BaseTool;
|
||||
catalogFactory: (context: ToolContext) => WarehouseCatalogService;
|
||||
}
|
||||
|
||||
function totalFound(structured: unknown): number {
|
||||
return typeof structured === 'object' &&
|
||||
structured !== null &&
|
||||
'totalFound' in structured &&
|
||||
typeof structured.totalFound === 'number'
|
||||
? structured.totalFound
|
||||
: 0;
|
||||
}
|
||||
|
||||
function totalSources(structured: unknown): number {
|
||||
return typeof structured === 'object' &&
|
||||
structured !== null &&
|
||||
'totalSources' in structured &&
|
||||
typeof structured.totalSources === 'number'
|
||||
? structured.totalSources
|
||||
: 0;
|
||||
}
|
||||
|
||||
function allowedConnectionNames(context: ToolContext): ReadonlySet<string> | null {
|
||||
return context.session?.allowedConnectionNames ?? null;
|
||||
}
|
||||
|
||||
export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
|
||||
readonly name = 'discover_data';
|
||||
|
||||
constructor(private readonly deps: DiscoverDataDeps) {
|
||||
super();
|
||||
}
|
||||
|
||||
get description(): string {
|
||||
return 'Discover existing wiki pages, semantic layer sources, and raw warehouse schema hits before writing ingest output.';
|
||||
}
|
||||
|
||||
get inputSchema() {
|
||||
return discoverDataInputSchema;
|
||||
}
|
||||
|
||||
async call(input: DiscoverDataInput, context: ToolContext): Promise<ToolOutput<DiscoverDataStructured>> {
|
||||
const allowed = allowedConnectionNames(context);
|
||||
if (input.connectionName && allowed && !allowed.has(input.connectionName)) {
|
||||
return {
|
||||
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
|
||||
structured: { wiki: null, sl: null, raw: null },
|
||||
};
|
||||
}
|
||||
|
||||
if (input.sourceName) {
|
||||
const sl = await this.deps.slDiscoverTool.call(
|
||||
{ sourceName: input.sourceName, connectionId: input.connectionName },
|
||||
context,
|
||||
);
|
||||
return { markdown: sl.markdown, structured: { wiki: null, sl: sl.structured, raw: null } };
|
||||
}
|
||||
|
||||
const query = input.query?.trim() || '';
|
||||
const limit = input.limit ?? 10;
|
||||
const parts: string[] = [];
|
||||
let wiki: unknown | null = null;
|
||||
let sl: unknown | null = null;
|
||||
let raw: DiscoverDataStructured['raw'] = null;
|
||||
|
||||
if (query) {
|
||||
const wikiResult = await this.deps.wikiSearchTool.call({ query, limit }, context);
|
||||
if (totalFound(wikiResult.structured) > 0) {
|
||||
parts.push('## Wiki Pages', '> use `wiki_read(blockKey)` for full content', wikiResult.markdown, '');
|
||||
wiki = wikiResult.structured;
|
||||
}
|
||||
}
|
||||
|
||||
const slResult = await this.deps.slDiscoverTool.call(
|
||||
{ query: query || undefined, connectionId: input.connectionName },
|
||||
context,
|
||||
);
|
||||
if (totalSources(slResult.structured) > 0) {
|
||||
parts.push(
|
||||
'## Semantic Layer Sources',
|
||||
'> use `sl_read_source(sourceName)` for the YAML, or `entity_details` for warehouse-shape details',
|
||||
slResult.markdown,
|
||||
'',
|
||||
);
|
||||
sl = slResult.structured;
|
||||
}
|
||||
|
||||
const catalog = this.deps.catalogFactory(context);
|
||||
const connections = input.connectionName ? [input.connectionName] : [...(allowed ?? [])].sort();
|
||||
const rawHits: RawSchemaHit[] = [];
|
||||
for (const connectionName of connections) {
|
||||
rawHits.push(...(await catalog.searchByName(connectionName, query, limit)));
|
||||
}
|
||||
if (rawHits.length > 0) {
|
||||
parts.push(
|
||||
'## Raw Warehouse Schema',
|
||||
'> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values',
|
||||
);
|
||||
parts.push(
|
||||
rawHits
|
||||
.slice(0, limit)
|
||||
.map(
|
||||
(hit) =>
|
||||
`- ${hit.kind}: ${hit.display} [connectionName=${hit.connectionName}] (matched on ${hit.matchedOn}) - ` +
|
||||
`follow up with \`entity_details({connectionName: "${hit.connectionName}", targets: [{display: "${hit.display}"}]})\``,
|
||||
)
|
||||
.join('\n'),
|
||||
);
|
||||
raw = { hits: rawHits.slice(0, limit) };
|
||||
}
|
||||
|
||||
if (parts.length === 0) {
|
||||
return {
|
||||
markdown: `No matches for "${query}" across wiki, semantic layer, or raw warehouse schema. Try broader terms; this concept may not exist yet.`,
|
||||
structured: { wiki, sl, raw },
|
||||
};
|
||||
}
|
||||
|
||||
return { markdown: parts.join('\n'), structured: { wiki, sl, raw } };
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../../../project/index.js';
|
||||
import type { ToolContext } from '../../../tools/index.js';
|
||||
import { EntityDetailsTool } from './entity-details.tool.js';
|
||||
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
|
||||
describe('EntityDetailsTool', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
let tool: EntityDetailsTool;
|
||||
let context: ToolContext;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-entity-details-'));
|
||||
project = await initKtxProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' });
|
||||
await seedLiveDatabaseScan();
|
||||
tool = new EntityDetailsTool(() => new WarehouseCatalogService({ fileStore: project.fileStore }));
|
||||
context = {
|
||||
sourceId: 'ingest',
|
||||
messageId: 'm1',
|
||||
userId: 'system',
|
||||
session: {
|
||||
allowedConnectionNames: new Set(['warehouse']),
|
||||
} as any,
|
||||
};
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-1') {
|
||||
const root = `raw-sources/${connectionName}/live-database/${syncId}`;
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/connection.json`,
|
||||
JSON.stringify({ connectionId: connectionName, driver: 'postgres', extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed connection',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/tables/orders.json`,
|
||||
JSON.stringify(
|
||||
{
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Customer orders',
|
||||
estimatedRows: 12,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
{
|
||||
name: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'text',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: 'Order status',
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed orders',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/enrichment/relationship-profile.json`,
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId: connectionName,
|
||||
driver: 'postgres',
|
||||
tables: [{ table: { catalog: null, db: 'public', name: 'orders' }, rowCount: 12 }],
|
||||
columns: {
|
||||
'orders.status': {
|
||||
table: { catalog: null, db: 'public', name: 'orders' },
|
||||
column: 'status',
|
||||
rowCount: 12,
|
||||
nullCount: 0,
|
||||
distinctCount: 2,
|
||||
nullRate: 0,
|
||||
sampleValues: ['paid', 'refunded'],
|
||||
},
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed profile',
|
||||
);
|
||||
}
|
||||
|
||||
it('returns scoped table detail for a display target', async () => {
|
||||
const result = await tool.call({ connectionName: 'warehouse', targets: [{ display: 'public.orders' }] }, context);
|
||||
|
||||
expect(result.markdown).toContain('### public.orders');
|
||||
expect(result.markdown).toContain('- status (text, nullable=false)');
|
||||
expect(result.markdown).toContain('sample: ["paid","refunded"]');
|
||||
expect(result.structured.scanAvailable).toBe(true);
|
||||
expect(result.structured.resolved).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('resolves display targets that include a column name', async () => {
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.status' }] },
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('### public.orders');
|
||||
expect(result.markdown).toContain('- status (text, nullable=false)');
|
||||
expect(result.markdown).not.toContain('- id (integer');
|
||||
expect(result.structured.resolved).toHaveLength(1);
|
||||
expect(result.structured.resolved[0]?.columns.map((column) => column.name)).toEqual(['status']);
|
||||
});
|
||||
|
||||
it('reports missing explicit columns instead of returning an empty column list', async () => {
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] },
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier');
|
||||
expect(result.markdown).toContain('Available columns: id, status');
|
||||
expect(result.structured.resolved).toHaveLength(0);
|
||||
expect(result.structured.missing).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('reports missing structured table targets in model-visible markdown', async () => {
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionName: 'warehouse',
|
||||
targets: [{ catalog: null, db: 'public', name: 'orderz' }],
|
||||
},
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('Not found in scan: public.orderz');
|
||||
expect(result.markdown).toContain('Closest matches: orders');
|
||||
expect(result.structured.resolved).toHaveLength(0);
|
||||
expect(result.structured.missing).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('reports missing structured column targets in model-visible markdown', async () => {
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionName: 'warehouse',
|
||||
targets: [{ catalog: null, db: 'public', name: 'orders', column: 'plan_tier' }],
|
||||
},
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier');
|
||||
expect(result.markdown).toContain('Available columns: id, status');
|
||||
expect(result.structured.resolved).toHaveLength(0);
|
||||
expect(result.structured.missing).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('returns a no-scan state distinct from not found', async () => {
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'empty', targets: [{ display: 'public.orders' }] },
|
||||
{ ...context, session: { ...context.session!, allowedConnectionNames: new Set(['empty']) } },
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('No live-database scan available for connection "empty"; run `ktx scan` first.');
|
||||
expect(result.structured.scanAvailable).toBe(false);
|
||||
});
|
||||
|
||||
it('refuses out-of-scope connections', async () => {
|
||||
const result = await tool.call({ connectionName: 'billing', targets: [{ display: 'public.orders' }] }, context);
|
||||
|
||||
expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.');
|
||||
expect(result.structured.scanAvailable).toBe(false);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
import { z } from 'zod';
|
||||
import type { KtxTableRef } from '../../../scan/types.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
|
||||
import { WarehouseCatalogService, type TableDetail } from './warehouse-catalog.service.js';
|
||||
|
||||
const targetSchema = z.union([
|
||||
z.object({ display: z.string().min(1) }),
|
||||
z.object({
|
||||
catalog: z.string().nullable(),
|
||||
db: z.string().nullable(),
|
||||
name: z.string().min(1),
|
||||
column: z.string().optional(),
|
||||
}),
|
||||
]);
|
||||
|
||||
const entityDetailsInputSchema = z.object({
|
||||
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
|
||||
targets: z.array(targetSchema).min(1).max(50),
|
||||
});
|
||||
|
||||
type EntityDetailsInput = z.infer<typeof entityDetailsInputSchema>;
|
||||
type EntityDetailsTarget = EntityDetailsInput['targets'][number];
|
||||
|
||||
export interface EntityDetailsStructured {
|
||||
resolved: TableDetail[];
|
||||
missing: Array<{ target: unknown; candidates: KtxTableRef[] }>;
|
||||
scanAvailable: boolean;
|
||||
}
|
||||
|
||||
function allowedConnectionNames(context: ToolContext): ReadonlySet<string> | null {
|
||||
return context.session?.allowedConnectionNames ?? null;
|
||||
}
|
||||
|
||||
function targetLabel(target: EntityDetailsTarget): string {
|
||||
if ('display' in target) {
|
||||
return target.display;
|
||||
}
|
||||
return [target.catalog, target.db, target.name, target.column].filter((part): part is string => !!part).join('.');
|
||||
}
|
||||
|
||||
function appendMissingTargetMarkdown(parts: string[], target: EntityDetailsTarget, candidates: KtxTableRef[]): void {
|
||||
parts.push(`Not found in scan: ${targetLabel(target)}`);
|
||||
if (candidates.length > 0) {
|
||||
parts.push(`Closest matches: ${candidates.map((candidate) => candidate.name).join(', ')}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveTarget(
|
||||
catalog: WarehouseCatalogService,
|
||||
connectionName: string,
|
||||
target: EntityDetailsTarget,
|
||||
): Promise<{ resolved: (KtxTableRef & { column?: string }) | null; candidates: KtxTableRef[] }> {
|
||||
if ('display' in target) {
|
||||
return catalog.resolveDisplayTarget(connectionName, target.display);
|
||||
}
|
||||
|
||||
const candidateResolution = await catalog.resolveDisplayTarget(connectionName, targetLabel(target));
|
||||
return {
|
||||
resolved: {
|
||||
catalog: target.catalog,
|
||||
db: target.db,
|
||||
name: target.name,
|
||||
column: target.column,
|
||||
},
|
||||
candidates: candidateResolution.candidates,
|
||||
};
|
||||
}
|
||||
|
||||
function sampleText(values: string[]): string {
|
||||
return values.length > 0 ? ` - sample: ${JSON.stringify(values.slice(0, 10))}` : '';
|
||||
}
|
||||
|
||||
function appendTableMarkdown(parts: string[], detail: TableDetail, columnName?: string): void {
|
||||
const columns = columnName ? detail.columns.filter((column) => column.name === columnName) : detail.columns;
|
||||
parts.push(`### ${detail.display}`);
|
||||
parts.push(`Type: ${detail.kind} | Native columns: ${detail.columns.length}`);
|
||||
if (detail.description || detail.comment) {
|
||||
parts.push(`Description: ${detail.description ?? detail.comment}`);
|
||||
}
|
||||
parts.push('', 'Columns:');
|
||||
for (const column of columns) {
|
||||
const pk = column.primaryKey ? ', PK' : '';
|
||||
parts.push(`- ${column.name} (${column.nativeType}, nullable=${column.nullable}${pk})${sampleText(column.sampleValues)}`);
|
||||
}
|
||||
parts.push('');
|
||||
}
|
||||
|
||||
function findColumn(detail: TableDetail, columnName: string): TableDetail['columns'][number] | null {
|
||||
const normalized = columnName.toLowerCase();
|
||||
return detail.columns.find((column) => column.name.toLowerCase() === normalized) ?? null;
|
||||
}
|
||||
|
||||
export class EntityDetailsTool extends BaseTool<typeof entityDetailsInputSchema> {
|
||||
readonly name = 'entity_details';
|
||||
|
||||
constructor(private readonly catalogFactory: (context: ToolContext) => WarehouseCatalogService) {
|
||||
super();
|
||||
}
|
||||
|
||||
get description(): string {
|
||||
return 'Verify warehouse tables and columns from the latest live-database scan before writing them into wiki or semantic-layer output.';
|
||||
}
|
||||
|
||||
get inputSchema() {
|
||||
return entityDetailsInputSchema;
|
||||
}
|
||||
|
||||
async call(input: EntityDetailsInput, context: ToolContext): Promise<ToolOutput<EntityDetailsStructured>> {
|
||||
const allowed = allowedConnectionNames(context);
|
||||
if (allowed && !allowed.has(input.connectionName)) {
|
||||
return {
|
||||
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
|
||||
structured: { resolved: [], missing: [], scanAvailable: false },
|
||||
};
|
||||
}
|
||||
|
||||
const catalog = this.catalogFactory(context);
|
||||
const scanAvailable = await catalog.hasScan(input.connectionName);
|
||||
if (!scanAvailable) {
|
||||
return {
|
||||
markdown: `No live-database scan available for connection "${input.connectionName}"; run \`ktx scan\` first.`,
|
||||
structured: { resolved: [], missing: [], scanAvailable: false },
|
||||
};
|
||||
}
|
||||
|
||||
const parts: string[] = [];
|
||||
const resolved: TableDetail[] = [];
|
||||
const missing: EntityDetailsStructured['missing'] = [];
|
||||
|
||||
for (const target of input.targets) {
|
||||
const resolution = await resolveTarget(catalog, input.connectionName, target);
|
||||
if (!resolution.resolved) {
|
||||
missing.push({ target, candidates: resolution.candidates });
|
||||
appendMissingTargetMarkdown(parts, target, resolution.candidates);
|
||||
continue;
|
||||
}
|
||||
const detail = await catalog.getTable({ connectionName: input.connectionName, ...resolution.resolved });
|
||||
if (!detail) {
|
||||
missing.push({ target, candidates: resolution.candidates });
|
||||
appendMissingTargetMarkdown(parts, target, resolution.candidates);
|
||||
continue;
|
||||
}
|
||||
const requestedColumn = resolution.resolved.column;
|
||||
if (requestedColumn) {
|
||||
const column = findColumn(detail, requestedColumn);
|
||||
if (!column) {
|
||||
missing.push({
|
||||
target,
|
||||
candidates: [{ catalog: detail.catalog, db: detail.db, name: detail.name }],
|
||||
});
|
||||
parts.push(`Column not found in scan: ${detail.display}.${requestedColumn}`);
|
||||
parts.push(`Available columns: ${detail.columns.map((candidate) => candidate.name).join(', ')}`);
|
||||
continue;
|
||||
}
|
||||
const scopedDetail = { ...detail, columns: [column] };
|
||||
resolved.push(scopedDetail);
|
||||
appendTableMarkdown(parts, scopedDetail, column.name);
|
||||
continue;
|
||||
}
|
||||
|
||||
resolved.push(detail);
|
||||
appendTableMarkdown(parts, detail);
|
||||
}
|
||||
|
||||
return {
|
||||
markdown: parts.join('\n').trim(),
|
||||
structured: { resolved, missing, scanAvailable: true },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
import type { KtxFileStorePort } from '../../../core/index.js';
|
||||
import type { SlConnectionCatalogPort } from '../../../sl/index.js';
|
||||
import type { BaseTool, ToolContext } from '../../../tools/index.js';
|
||||
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||
import { EntityDetailsTool } from './entity-details.tool.js';
|
||||
import { SqlExecutionTool } from './sql-execution.tool.js';
|
||||
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
|
||||
export { DiscoverDataTool } from './discover-data.tool.js';
|
||||
export { EntityDetailsTool } from './entity-details.tool.js';
|
||||
export { SqlExecutionTool } from './sql-execution.tool.js';
|
||||
export { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
export type { RawSchemaHit, TableDetail, WarehouseColumnDetail } from './warehouse-catalog.service.js';
|
||||
|
||||
export function createWarehouseVerificationTools(deps: {
|
||||
connections: SlConnectionCatalogPort;
|
||||
fallbackFileStore: KtxFileStorePort;
|
||||
wikiSearchTool: BaseTool;
|
||||
slDiscoverTool: BaseTool;
|
||||
}): BaseTool[] {
|
||||
const catalogFactory = (context: ToolContext) =>
|
||||
new WarehouseCatalogService({
|
||||
fileStore: context.session?.configService ?? deps.fallbackFileStore,
|
||||
});
|
||||
return [
|
||||
new EntityDetailsTool(catalogFactory),
|
||||
new SqlExecutionTool(deps.connections),
|
||||
new DiscoverDataTool({
|
||||
wikiSearchTool: deps.wikiSearchTool,
|
||||
slDiscoverTool: deps.slDiscoverTool,
|
||||
catalogFactory,
|
||||
}),
|
||||
];
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { SlConnectionCatalogPort } from '../../../sl/index.js';
|
||||
import type { ToolContext } from '../../../tools/index.js';
|
||||
import { SqlExecutionTool } from './sql-execution.tool.js';
|
||||
|
||||
describe('SqlExecutionTool', () => {
|
||||
const connections = {
|
||||
executeQuery: vi.fn(),
|
||||
} as unknown as SlConnectionCatalogPort & { executeQuery: ReturnType<typeof vi.fn> };
|
||||
const tool = new SqlExecutionTool(connections);
|
||||
const context: ToolContext = {
|
||||
sourceId: 'ingest',
|
||||
messageId: 'm1',
|
||||
userId: 'system',
|
||||
session: { allowedConnectionNames: new Set(['warehouse']) } as any,
|
||||
};
|
||||
|
||||
it('wraps read-only SQL with a capped row limit', async () => {
|
||||
connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 });
|
||||
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'warehouse', sql: 'select status from public.orders', rowLimit: 5 },
|
||||
context,
|
||||
);
|
||||
|
||||
expect(connections.executeQuery).toHaveBeenCalledWith(
|
||||
'warehouse',
|
||||
'select * from (select status from public.orders) as ktx_query_result limit 5',
|
||||
);
|
||||
expect(result.markdown).toContain('| status |');
|
||||
expect(result.structured.wrappedSql).toContain('limit 5');
|
||||
});
|
||||
|
||||
it.each(['insert into x values (1)', 'drop table x', 'vacuum'])('rejects mutating SQL: %s', async (sql) => {
|
||||
connections.executeQuery.mockClear();
|
||||
|
||||
const result = await tool.call({ connectionName: 'warehouse', sql }, context);
|
||||
|
||||
expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.');
|
||||
expect(connections.executeQuery).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('surfaces connector errors verbatim', async () => {
|
||||
connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist'));
|
||||
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'warehouse', sql: 'select 1 from orbit_analytics.customer', rowLimit: 1 },
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('relation "orbit_analytics.customer" does not exist');
|
||||
expect(result.structured.error).toContain('relation "orbit_analytics.customer" does not exist');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
import { z } from 'zod';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../../connections/index.js';
|
||||
import type { SlConnectionCatalogPort } from '../../../sl/index.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
|
||||
|
||||
const sqlExecutionInputSchema = z.object({
|
||||
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
|
||||
sql: z.string().min(1),
|
||||
rowLimit: z.number().int().positive().max(1000).optional().default(100),
|
||||
});
|
||||
|
||||
type SqlExecutionInput = z.input<typeof sqlExecutionInputSchema>;
|
||||
|
||||
export interface SqlExecutionStructured {
|
||||
headers: string[];
|
||||
rows: unknown[][];
|
||||
rowCount: number;
|
||||
truncated: boolean;
|
||||
sql: string;
|
||||
wrappedSql: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
function markdownTable(headers: string[], rows: unknown[][], totalRows: number): string {
|
||||
if (headers.length === 0) {
|
||||
return rows.length === 0 ? 'Query returned no rows.' : JSON.stringify(rows.slice(0, 20));
|
||||
}
|
||||
const visible = rows.slice(0, 20);
|
||||
const lines = [
|
||||
`| ${headers.join(' | ')} |`,
|
||||
`| ${headers.map(() => '---').join(' | ')} |`,
|
||||
...visible.map((row) => `| ${row.map((value) => String(value ?? '')).join(' | ')} |`),
|
||||
];
|
||||
if (totalRows > visible.length) {
|
||||
lines.push(`... +${totalRows - visible.length} more rows`);
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
|
||||
readonly name = 'sql_execution';
|
||||
|
||||
constructor(private readonly connections: SlConnectionCatalogPort) {
|
||||
super();
|
||||
}
|
||||
|
||||
get description(): string {
|
||||
return 'Run a single read-only SELECT or WITH probe against an allowed warehouse connection and return a capped markdown table or the warehouse error.';
|
||||
}
|
||||
|
||||
get inputSchema() {
|
||||
return sqlExecutionInputSchema;
|
||||
}
|
||||
|
||||
async call(input: SqlExecutionInput, context: ToolContext): Promise<ToolOutput<SqlExecutionStructured>> {
|
||||
const allowed = context.session?.allowedConnectionNames;
|
||||
if (allowed && !allowed.has(input.connectionName)) {
|
||||
return {
|
||||
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
|
||||
structured: {
|
||||
headers: [],
|
||||
rows: [],
|
||||
rowCount: 0,
|
||||
truncated: false,
|
||||
sql: input.sql,
|
||||
wrappedSql: '',
|
||||
error: 'connection_not_allowed',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let sql: string;
|
||||
let wrappedSql: string;
|
||||
try {
|
||||
sql = assertReadOnlySql(input.sql);
|
||||
wrappedSql = limitSqlForExecution(sql, input.rowLimit);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
markdown: message,
|
||||
structured: { headers: [], rows: [], rowCount: 0, truncated: false, sql: input.sql, wrappedSql: '', error: message },
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await this.connections.executeQuery(input.connectionName, wrappedSql);
|
||||
const headers = result.headers ?? [];
|
||||
const rows = result.rows ?? [];
|
||||
const rowCount = result.totalRows ?? rows.length;
|
||||
return {
|
||||
markdown: markdownTable(headers, rows, rowCount),
|
||||
structured: { headers, rows, rowCount, truncated: rowCount > rows.length, sql, wrappedSql },
|
||||
};
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
markdown: `SQL execution failed: ${message}`,
|
||||
structured: { headers: [], rows: [], rowCount: 0, truncated: false, sql, wrappedSql, error: message },
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,196 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../../../project/index.js';
|
||||
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
|
||||
describe('WarehouseCatalogService', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-warehouse-catalog-'));
|
||||
project = await initKtxProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-2', driver = 'postgres') {
|
||||
const root = `raw-sources/${connectionName}/live-database/${syncId}`;
|
||||
const tableRef = {
|
||||
catalog: driver === 'bigquery' ? 'analytics' : null,
|
||||
db: driver === 'sqlite' ? null : 'public',
|
||||
name: 'orders',
|
||||
};
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/connection.json`,
|
||||
JSON.stringify({ connectionId: connectionName, driver, extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed connection',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/tables/orders.json`,
|
||||
JSON.stringify(
|
||||
{
|
||||
catalog: tableRef.catalog,
|
||||
db: tableRef.db,
|
||||
name: tableRef.name,
|
||||
kind: 'table',
|
||||
comment: 'Customer orders',
|
||||
estimatedRows: 12,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
{
|
||||
name: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'text',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: 'Order status',
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed orders',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
`${root}/enrichment/relationship-profile.json`,
|
||||
JSON.stringify(
|
||||
{
|
||||
connectionId: connectionName,
|
||||
driver,
|
||||
sqlAvailable: true,
|
||||
queryCount: 3,
|
||||
tables: [{ table: { catalog: tableRef.catalog, db: tableRef.db, name: tableRef.name }, rowCount: 12 }],
|
||||
columns: {
|
||||
'orders.status': {
|
||||
table: { catalog: tableRef.catalog, db: tableRef.db, name: tableRef.name },
|
||||
column: 'status',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'text',
|
||||
rowCount: 12,
|
||||
nullCount: 0,
|
||||
distinctCount: 2,
|
||||
uniquenessRatio: 0.1667,
|
||||
nullRate: 0,
|
||||
sampleValues: ['paid', 'refunded'],
|
||||
minTextLength: 4,
|
||||
maxTextLength: 8,
|
||||
},
|
||||
},
|
||||
warnings: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed profile',
|
||||
);
|
||||
}
|
||||
|
||||
it('finds the latest sync and merges table schema with relationship profile values', async () => {
|
||||
await seedLiveDatabaseScan('warehouse', 'sync-1');
|
||||
await seedLiveDatabaseScan('warehouse', 'sync-2');
|
||||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
|
||||
await expect(catalog.getLatestSyncId('warehouse')).resolves.toBe('sync-2');
|
||||
const detail = await catalog.getTable({ connectionName: 'warehouse', catalog: null, db: 'public', name: 'orders' });
|
||||
|
||||
expect(detail).toMatchObject({
|
||||
connectionName: 'warehouse',
|
||||
display: 'public.orders',
|
||||
rowCount: 12,
|
||||
columns: [
|
||||
{ name: 'id', nativeType: 'integer', primaryKey: true },
|
||||
{ name: 'status', nativeType: 'text', sampleValues: ['paid', 'refunded'], distinctCount: 2 },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('returns scanAvailable=false when no live-database scan exists', async () => {
|
||||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
await expect(catalog.getTable({ connectionName: 'missing', catalog: null, db: 'public', name: 'orders' })).resolves.toBeNull();
|
||||
await expect(catalog.hasScan('missing')).resolves.toBe(false);
|
||||
});
|
||||
|
||||
it('resolves postgres display strings and returns closest candidates for missing tables', async () => {
|
||||
await seedLiveDatabaseScan();
|
||||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
|
||||
await expect(catalog.resolveDisplay('warehouse', 'public.orders')).resolves.toMatchObject({
|
||||
resolved: { catalog: null, db: 'public', name: 'orders' },
|
||||
candidates: [],
|
||||
dialect: 'postgres',
|
||||
});
|
||||
await expect(catalog.resolveDisplay('warehouse', 'public.orderz')).resolves.toMatchObject({
|
||||
resolved: null,
|
||||
candidates: [{ name: 'orders' }],
|
||||
});
|
||||
});
|
||||
|
||||
it('treats two-part BigQuery identifiers as ambiguous instead of guessing', async () => {
|
||||
await seedLiveDatabaseScan('warehouse', 'sync-bigquery', 'bigquery');
|
||||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
|
||||
await expect(catalog.resolveDisplay('warehouse', 'public.orders')).resolves.toMatchObject({
|
||||
resolved: null,
|
||||
dialect: 'bigquery',
|
||||
});
|
||||
});
|
||||
|
||||
it('resolves postgres column display strings without treating the column as a table', async () => {
|
||||
await seedLiveDatabaseScan();
|
||||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
|
||||
await expect(catalog.resolveDisplayTarget('warehouse', 'public.orders.status')).resolves.toMatchObject({
|
||||
resolved: { catalog: null, db: 'public', name: 'orders', column: 'status' },
|
||||
candidates: [],
|
||||
dialect: 'postgres',
|
||||
});
|
||||
});
|
||||
|
||||
it('resolves BigQuery column display strings with four parts', async () => {
|
||||
await seedLiveDatabaseScan('warehouse', 'sync-bigquery', 'bigquery');
|
||||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
|
||||
await expect(catalog.resolveDisplayTarget('warehouse', 'analytics.public.orders.status')).resolves.toMatchObject({
|
||||
resolved: { catalog: 'analytics', db: 'public', name: 'orders', column: 'status' },
|
||||
candidates: [],
|
||||
dialect: 'bigquery',
|
||||
});
|
||||
});
|
||||
|
||||
it('searches table names, column names, comments, and descriptions', async () => {
|
||||
await seedLiveDatabaseScan();
|
||||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
|
||||
await expect(catalog.searchByName('warehouse', 'status', 10)).resolves.toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
kind: 'column',
|
||||
ref: expect.objectContaining({ db: 'public', name: 'orders', column: 'status' }),
|
||||
matchedOn: 'name',
|
||||
}),
|
||||
]),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,452 @@
|
|||
import { getDialectForDriver } from '../../../connections/index.js';
|
||||
import type { KtxFileStorePort } from '../../../core/index.js';
|
||||
import type {
|
||||
KtxConnectionDriver,
|
||||
KtxSchemaColumn,
|
||||
KtxSchemaForeignKey,
|
||||
KtxSchemaTable,
|
||||
KtxTableRef,
|
||||
} from '../../../scan/types.js';
|
||||
|
||||
type CatalogDriver = KtxConnectionDriver | 'sqlite3';
|
||||
|
||||
export interface WarehouseCatalogServiceDeps {
|
||||
fileStore: KtxFileStorePort;
|
||||
}
|
||||
|
||||
export interface WarehouseColumnDetail extends KtxSchemaColumn {
|
||||
descriptions: Record<string, string>;
|
||||
rowCount: number | null;
|
||||
nullCount: number | null;
|
||||
distinctCount: number | null;
|
||||
nullRate: number | null;
|
||||
sampleValues: string[];
|
||||
}
|
||||
|
||||
export interface TableDetail {
|
||||
connectionName: string;
|
||||
catalog: string | null;
|
||||
db: string | null;
|
||||
name: string;
|
||||
display: string;
|
||||
kind: string;
|
||||
comment: string | null;
|
||||
description: string | null;
|
||||
rowCount: number | null;
|
||||
columns: WarehouseColumnDetail[];
|
||||
foreignKeys: KtxSchemaForeignKey[];
|
||||
}
|
||||
|
||||
export type RawSchemaHit =
|
||||
| {
|
||||
kind: 'table';
|
||||
connectionName: string;
|
||||
ref: KtxTableRef;
|
||||
display: string;
|
||||
matchedOn: 'name' | 'db' | 'comment' | 'description';
|
||||
}
|
||||
| {
|
||||
kind: 'column';
|
||||
connectionName: string;
|
||||
ref: KtxTableRef & { column: string };
|
||||
display: string;
|
||||
matchedOn: 'name' | 'comment' | 'description';
|
||||
};
|
||||
|
||||
export interface DisplayTargetResolution {
|
||||
resolved: (KtxTableRef & { column?: string }) | null;
|
||||
candidates: KtxTableRef[];
|
||||
dialect: string;
|
||||
}
|
||||
|
||||
interface ConnectionArtifact {
|
||||
driver?: CatalogDriver;
|
||||
}
|
||||
|
||||
interface RelationshipProfileColumn {
|
||||
table?: KtxTableRef;
|
||||
column?: string;
|
||||
rowCount?: number;
|
||||
nullCount?: number;
|
||||
distinctCount?: number;
|
||||
nullRate?: number;
|
||||
sampleValues?: unknown[];
|
||||
}
|
||||
|
||||
interface RelationshipProfileArtifact {
|
||||
driver?: CatalogDriver;
|
||||
tables?: Array<{ table?: KtxTableRef; rowCount?: number }>;
|
||||
columns?: Record<string, RelationshipProfileColumn>;
|
||||
}
|
||||
|
||||
interface ConnectionCatalog {
|
||||
connectionName: string;
|
||||
syncId: string;
|
||||
driver: CatalogDriver;
|
||||
tables: KtxSchemaTable[];
|
||||
profile: RelationshipProfileArtifact | null;
|
||||
}
|
||||
|
||||
type TableWithDescriptions = KtxSchemaTable & {
|
||||
description?: string | null;
|
||||
descriptions?: Record<string, string>;
|
||||
columns: Array<KtxSchemaColumn & { description?: string | null; descriptions?: Record<string, string> }>;
|
||||
};
|
||||
|
||||
function normalize(value: string | null | undefined): string {
|
||||
return (value ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
function refsEqual(left: KtxTableRef, right: KtxTableRef): boolean {
|
||||
return (
|
||||
normalize(left.catalog) === normalize(right.catalog) &&
|
||||
normalize(left.db) === normalize(right.db) &&
|
||||
normalize(left.name) === normalize(right.name)
|
||||
);
|
||||
}
|
||||
|
||||
function refKey(ref: KtxTableRef): string {
|
||||
return [ref.catalog, ref.db, ref.name].map((part) => normalize(part)).join('.');
|
||||
}
|
||||
|
||||
function columnKey(ref: KtxTableRef, column: string): string {
|
||||
return `${refKey(ref)}.${normalize(column)}`;
|
||||
}
|
||||
|
||||
function readJson<T>(content: string): T {
|
||||
return JSON.parse(content) as T;
|
||||
}
|
||||
|
||||
function cleanIdentifierPart(part: string): string {
|
||||
return part.trim().replace(/^["'`\[]|["'`\]]$/g, '');
|
||||
}
|
||||
|
||||
function splitDisplay(display: string): string[] {
|
||||
return display
|
||||
.trim()
|
||||
.split('.')
|
||||
.map(cleanIdentifierPart)
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function formatDisplay(driver: CatalogDriver, table: KtxTableRef): string {
|
||||
if (driver === 'sqlite' || driver === 'sqlite3') {
|
||||
return table.name;
|
||||
}
|
||||
return [table.catalog, table.db, table.name].filter((part): part is string => Boolean(part)).join('.');
|
||||
}
|
||||
|
||||
function parseDisplay(driver: CatalogDriver, display: string): KtxTableRef | null {
|
||||
const parts = splitDisplay(display);
|
||||
if (driver === 'sqlite' || driver === 'sqlite3') {
|
||||
return parts.length === 1 ? { catalog: null, db: null, name: parts[0]! } : null;
|
||||
}
|
||||
if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') {
|
||||
if (parts.length !== 3) {
|
||||
return null;
|
||||
}
|
||||
return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
|
||||
}
|
||||
if (parts.length === 2) {
|
||||
return { catalog: null, db: parts[0]!, name: parts[1]! };
|
||||
}
|
||||
if (parts.length === 3) {
|
||||
return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
|
||||
}
|
||||
return parts.length === 1 ? { catalog: null, db: null, name: parts[0]! } : null;
|
||||
}
|
||||
|
||||
function expectedDisplayPartCount(driver: CatalogDriver): number {
|
||||
if (driver === 'sqlite' || driver === 'sqlite3') {
|
||||
return 1;
|
||||
}
|
||||
if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') {
|
||||
return 3;
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
|
||||
function parseColumnDisplay(driver: CatalogDriver, display: string): (KtxTableRef & { column: string }) | null {
|
||||
const parts = splitDisplay(display);
|
||||
const tablePartCount = expectedDisplayPartCount(driver);
|
||||
if (parts.length !== tablePartCount + 1) {
|
||||
return null;
|
||||
}
|
||||
const column = parts.at(-1);
|
||||
if (!column) {
|
||||
return null;
|
||||
}
|
||||
const table = parseDisplay(driver, parts.slice(0, -1).join('.'));
|
||||
return table ? { ...table, column } : null;
|
||||
}
|
||||
|
||||
function bestCandidates(tables: KtxSchemaTable[], display: string, limit = 5): KtxTableRef[] {
|
||||
const needle = normalize(splitDisplay(display).at(-1) ?? display);
|
||||
return tables
|
||||
.map((table) => {
|
||||
const name = normalize(table.name);
|
||||
let score = 0;
|
||||
if (name === needle) {
|
||||
score = 100;
|
||||
} else if (name.includes(needle) || needle.includes(name)) {
|
||||
score = 80;
|
||||
} else {
|
||||
const samePrefix = [...name].filter((char, index) => needle[index] === char).length;
|
||||
score = samePrefix / Math.max(name.length, needle.length, 1);
|
||||
}
|
||||
return { table, score };
|
||||
})
|
||||
.filter((entry) => entry.score > 0)
|
||||
.sort((left, right) => right.score - left.score || left.table.name.localeCompare(right.table.name))
|
||||
.slice(0, limit)
|
||||
.map(({ table }) => ({ catalog: table.catalog, db: table.db, name: table.name }));
|
||||
}
|
||||
|
||||
function firstDescription(descriptions: Record<string, string> | undefined): string | null {
|
||||
return Object.values(descriptions ?? {}).find((value) => value.trim().length > 0) ?? null;
|
||||
}
|
||||
|
||||
function matchedOnTable(table: TableWithDescriptions, query: string): RawSchemaHit['matchedOn'] | null {
|
||||
const q = normalize(query);
|
||||
if (!q) {
|
||||
return null;
|
||||
}
|
||||
if (normalize(table.name).includes(q)) {
|
||||
return 'name';
|
||||
}
|
||||
if (normalize(table.db).includes(q)) {
|
||||
return 'db';
|
||||
}
|
||||
if (normalize(table.comment).includes(q)) {
|
||||
return 'comment';
|
||||
}
|
||||
if (normalize(firstDescription(table.descriptions) ?? table.description).includes(q)) {
|
||||
return 'description';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function matchedOnColumn(
|
||||
column: KtxSchemaColumn & { description?: string | null; descriptions?: Record<string, string> },
|
||||
query: string,
|
||||
): 'name' | 'comment' | 'description' | null {
|
||||
const q = normalize(query);
|
||||
if (!q) {
|
||||
return null;
|
||||
}
|
||||
if (normalize(column.name).includes(q)) {
|
||||
return 'name';
|
||||
}
|
||||
if (normalize(column.comment).includes(q)) {
|
||||
return 'comment';
|
||||
}
|
||||
if (normalize(firstDescription(column.descriptions) ?? column.description).includes(q)) {
|
||||
return 'description';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export class WarehouseCatalogService {
|
||||
private readonly catalogs = new Map<string, Promise<ConnectionCatalog | null>>();
|
||||
|
||||
constructor(private readonly deps: WarehouseCatalogServiceDeps) {}
|
||||
|
||||
async hasScan(connectionName: string): Promise<boolean> {
|
||||
return (await this.loadCatalog(connectionName)) !== null;
|
||||
}
|
||||
|
||||
async getLatestSyncId(connectionName: string): Promise<string | null> {
|
||||
return (await this.loadCatalog(connectionName))?.syncId ?? null;
|
||||
}
|
||||
|
||||
async listTables(connectionName: string): Promise<KtxTableRef[]> {
|
||||
const catalog = await this.loadCatalog(connectionName);
|
||||
return catalog?.tables.map((table) => ({ catalog: table.catalog, db: table.db, name: table.name })) ?? [];
|
||||
}
|
||||
|
||||
async getTable(ref: { connectionName: string } & KtxTableRef): Promise<TableDetail | null> {
|
||||
const catalog = await this.loadCatalog(ref.connectionName);
|
||||
if (!catalog) {
|
||||
return null;
|
||||
}
|
||||
const table = catalog.tables.find((candidate) => refsEqual(candidate, ref)) as TableWithDescriptions | undefined;
|
||||
if (!table) {
|
||||
return null;
|
||||
}
|
||||
const profileTables = catalog.profile?.tables ?? [];
|
||||
const profileTable = profileTables.find((candidate) => candidate.table && refsEqual(candidate.table, table));
|
||||
const profileColumns = catalog.profile?.columns ?? {};
|
||||
|
||||
return {
|
||||
connectionName: ref.connectionName,
|
||||
catalog: table.catalog,
|
||||
db: table.db,
|
||||
name: table.name,
|
||||
display: formatDisplay(catalog.driver, table),
|
||||
kind: table.kind,
|
||||
comment: table.comment,
|
||||
description: table.description ?? firstDescription(table.descriptions),
|
||||
rowCount: profileTable?.rowCount ?? table.estimatedRows ?? null,
|
||||
columns: table.columns.map((rawColumn) => {
|
||||
const column = rawColumn as KtxSchemaColumn & {
|
||||
description?: string | null;
|
||||
descriptions?: Record<string, string>;
|
||||
};
|
||||
const profileColumn =
|
||||
profileColumns[columnKey(table, column.name)] ??
|
||||
Object.entries(profileColumns).find(
|
||||
([key, value]) =>
|
||||
normalize(key) === `${normalize(table.name)}.${normalize(column.name)}` ||
|
||||
(value.table && refsEqual(value.table, table) && normalize(value.column) === normalize(column.name)),
|
||||
)?.[1];
|
||||
return {
|
||||
...column,
|
||||
descriptions: column.descriptions ?? {},
|
||||
rowCount: profileColumn?.rowCount ?? null,
|
||||
nullCount: profileColumn?.nullCount ?? null,
|
||||
distinctCount: profileColumn?.distinctCount ?? null,
|
||||
nullRate: profileColumn?.nullRate ?? null,
|
||||
sampleValues: (profileColumn?.sampleValues ?? []).map((value) => String(value)),
|
||||
};
|
||||
}),
|
||||
foreignKeys: table.foreignKeys,
|
||||
};
|
||||
}
|
||||
|
||||
async resolveDisplay(
|
||||
connectionName: string,
|
||||
display: string,
|
||||
): Promise<{
|
||||
resolved: KtxTableRef | null;
|
||||
candidates: KtxTableRef[];
|
||||
dialect: string;
|
||||
}> {
|
||||
const catalog = await this.loadCatalog(connectionName);
|
||||
if (!catalog) {
|
||||
return { resolved: null, candidates: [], dialect: 'unknown' };
|
||||
}
|
||||
const dialect = getDialectForDriver(catalog.driver).type;
|
||||
const parsed = parseDisplay(catalog.driver, display);
|
||||
if (!parsed) {
|
||||
return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect };
|
||||
}
|
||||
const table = catalog.tables.find((candidate) => refsEqual(candidate, parsed));
|
||||
if (!table) {
|
||||
return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect };
|
||||
}
|
||||
return { resolved: { catalog: table.catalog, db: table.db, name: table.name }, candidates: [], dialect };
|
||||
}
|
||||
|
||||
async resolveDisplayTarget(connectionName: string, display: string): Promise<DisplayTargetResolution> {
|
||||
const catalog = await this.loadCatalog(connectionName);
|
||||
if (!catalog) {
|
||||
return { resolved: null, candidates: [], dialect: 'unknown' };
|
||||
}
|
||||
|
||||
const dialect = getDialectForDriver(catalog.driver).type;
|
||||
const tableResolution = await this.resolveDisplay(connectionName, display);
|
||||
if (tableResolution.resolved) {
|
||||
return tableResolution;
|
||||
}
|
||||
|
||||
const parsedColumn = parseColumnDisplay(catalog.driver, display);
|
||||
if (!parsedColumn) {
|
||||
return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect };
|
||||
}
|
||||
|
||||
const table = catalog.tables.find((candidate) => refsEqual(candidate, parsedColumn));
|
||||
if (!table) {
|
||||
return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect };
|
||||
}
|
||||
|
||||
return {
|
||||
resolved: {
|
||||
catalog: table.catalog,
|
||||
db: table.db,
|
||||
name: table.name,
|
||||
column: parsedColumn.column,
|
||||
},
|
||||
candidates: [],
|
||||
dialect,
|
||||
};
|
||||
}
|
||||
|
||||
async searchByName(connectionName: string, query: string, limit: number): Promise<RawSchemaHit[]> {
|
||||
const catalog = await this.loadCatalog(connectionName);
|
||||
if (!catalog) {
|
||||
return [];
|
||||
}
|
||||
const hits: RawSchemaHit[] = [];
|
||||
for (const table of catalog.tables as TableWithDescriptions[]) {
|
||||
const tableMatch = matchedOnTable(table, query);
|
||||
if (tableMatch) {
|
||||
hits.push({
|
||||
kind: 'table',
|
||||
connectionName,
|
||||
ref: { catalog: table.catalog, db: table.db, name: table.name },
|
||||
display: formatDisplay(catalog.driver, table),
|
||||
matchedOn: tableMatch,
|
||||
});
|
||||
}
|
||||
for (const column of table.columns) {
|
||||
const columnMatch = matchedOnColumn(column, query);
|
||||
if (!columnMatch) {
|
||||
continue;
|
||||
}
|
||||
hits.push({
|
||||
kind: 'column',
|
||||
connectionName,
|
||||
ref: { catalog: table.catalog, db: table.db, name: table.name, column: column.name },
|
||||
display: `${formatDisplay(catalog.driver, table)}.${column.name}`,
|
||||
matchedOn: columnMatch,
|
||||
});
|
||||
}
|
||||
}
|
||||
return hits.slice(0, Math.max(0, limit));
|
||||
}
|
||||
|
||||
private loadCatalog(connectionName: string): Promise<ConnectionCatalog | null> {
|
||||
const existing = this.catalogs.get(connectionName);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
const pending = this.readCatalog(connectionName);
|
||||
this.catalogs.set(connectionName, pending);
|
||||
return pending;
|
||||
}
|
||||
|
||||
private async readCatalog(connectionName: string): Promise<ConnectionCatalog | null> {
|
||||
const root = `raw-sources/${connectionName}/live-database`;
|
||||
const listed = await this.deps.fileStore.listFiles(root);
|
||||
const connectionFiles = listed.files.filter((file) => file.endsWith('/connection.json')).sort();
|
||||
const latestConnectionPath = connectionFiles.at(-1);
|
||||
if (!latestConnectionPath) {
|
||||
return null;
|
||||
}
|
||||
const latestRoot = latestConnectionPath.slice(0, -'/connection.json'.length);
|
||||
const syncId = latestRoot.split('/').at(-1) ?? '';
|
||||
const connection = readJson<ConnectionArtifact>((await this.deps.fileStore.readFile(latestConnectionPath)).content);
|
||||
const tablesListing = await this.deps.fileStore.listFiles(`${latestRoot}/tables`);
|
||||
const tables: KtxSchemaTable[] = [];
|
||||
for (const tablePath of tablesListing.files.filter((file) => file.endsWith('.json')).sort()) {
|
||||
tables.push(readJson<KtxSchemaTable>((await this.deps.fileStore.readFile(tablePath)).content));
|
||||
}
|
||||
|
||||
let profile: RelationshipProfileArtifact | null = null;
|
||||
try {
|
||||
profile = readJson<RelationshipProfileArtifact>(
|
||||
(await this.deps.fileStore.readFile(`${latestRoot}/enrichment/relationship-profile.json`)).content,
|
||||
);
|
||||
} catch {
|
||||
profile = null;
|
||||
}
|
||||
|
||||
return {
|
||||
connectionName,
|
||||
syncId,
|
||||
driver: connection.driver ?? profile?.driver ?? 'postgres',
|
||||
tables,
|
||||
profile,
|
||||
};
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue