mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
Merge origin/main into npx-ktx-python-daemon
This commit is contained in:
commit
88a65bbdc7
148 changed files with 14743 additions and 3508 deletions
|
|
@ -120,10 +120,12 @@
|
|||
"scripts": {
|
||||
"build": "tsc -p tsconfig.json",
|
||||
"relationships:benchmarks": "pnpm --silent run build && node scripts/relationship-benchmark-report.mjs",
|
||||
"relationships:benchmarks:test": "KTX_RUN_RELATIONSHIP_BENCHMARKS=1 vitest run src/scan/relationship-benchmarks.test.ts",
|
||||
"search:pglite-spike": "node scripts/pglite-hybrid-search-spike.mjs",
|
||||
"search:pglite-owner-prototype": "node scripts/pglite-owner-process-prototype.mjs",
|
||||
"search:pglite-sl-prototype": "node scripts/pglite-sl-search-prototype.mjs",
|
||||
"test": "vitest run",
|
||||
"test": "vitest run --exclude src/scan/relationship-benchmarks.test.ts --exclude src/scan/local-scan.test.ts --exclude src/mcp/local-project-ports.test.ts --exclude src/ingest/local-stage-ingest.test.ts --exclude src/sl/pglite-sl-search-prototype.test.ts --exclude src/core/git.service.test.ts --exclude src/ingest/local-adapters.test.ts --exclude src/ingest/local-bundle-ingest.test.ts --exclude src/ingest/local-metabase-ingest.test.ts --exclude src/sl/local-sl.test.ts --exclude src/search/pglite-owner-process.test.ts --exclude src/scan/local-enrichment-artifacts.test.ts --exclude src/search/pglite-spike.test.ts --exclude src/wiki/local-knowledge.test.ts --exclude src/sl/local-query.test.ts --exclude src/scan/relationship-review-decisions.test.ts --exclude src/scan/relationship-profiling.test.ts",
|
||||
"test:slow": "vitest run src/scan/local-scan.test.ts src/mcp/local-project-ports.test.ts src/ingest/local-stage-ingest.test.ts src/sl/pglite-sl-search-prototype.test.ts src/core/git.service.test.ts src/ingest/local-adapters.test.ts src/ingest/local-bundle-ingest.test.ts src/ingest/local-metabase-ingest.test.ts src/sl/local-sl.test.ts src/search/pglite-owner-process.test.ts src/scan/local-enrichment-artifacts.test.ts src/search/pglite-spike.test.ts src/wiki/local-knowledge.test.ts src/sl/local-query.test.ts src/scan/relationship-review-decisions.test.ts src/scan/relationship-profiling.test.ts --testTimeout 30000",
|
||||
"type-check": "tsc -p tsconfig.json --noEmit"
|
||||
},
|
||||
"dependencies": {
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ export const connectionTypeSchema = z.enum([
|
|||
'METABASE',
|
||||
'LOOKER',
|
||||
'NOTION',
|
||||
'POSTHOG',
|
||||
'MYSQL',
|
||||
'CLICKHOUSE',
|
||||
'PLAIN',
|
||||
|
|
|
|||
|
|
@ -256,6 +256,31 @@ describe('GitService', () => {
|
|||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('serializes concurrent commits from scoped services targeting the same worktree', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-fw-concurrent`);
|
||||
await service.addWorktree(wtDir, 'session/concurrent', commitHash);
|
||||
|
||||
const first = service.forWorktree(wtDir);
|
||||
const second = service.forWorktree(wtDir);
|
||||
await writeFile(join(wtDir, 'a.md'), 'a\n', 'utf-8');
|
||||
await writeFile(join(wtDir, 'b.md'), 'b\n', 'utf-8');
|
||||
|
||||
const [a, b] = await Promise.all([
|
||||
first.commitFile('a.md', 'add a', 'System User', 'system@example.com'),
|
||||
second.commitFile('b.md', 'add b', 'System User', 'system@example.com'),
|
||||
]);
|
||||
|
||||
expect(a.commitHash).toMatch(/^[0-9a-f]{40}$/);
|
||||
expect(b.commitHash).toMatch(/^[0-9a-f]{40}$/);
|
||||
await expect(first.getFileAtCommit('a.md', a.commitHash)).resolves.toBe('a\n');
|
||||
await expect(second.getFileAtCommit('b.md', b.commitHash)).resolves.toBe('b\n');
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
});
|
||||
|
||||
describe('squashMergeIntoMain', () => {
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@ export type SquashMergeResult =
|
|||
| { ok: false; conflict: true; conflictPaths: string[] };
|
||||
|
||||
export class GitService {
|
||||
private static readonly mutationQueues = new Map<string, Promise<void>>();
|
||||
|
||||
private readonly logger: KtxLogger;
|
||||
private git!: SimpleGit;
|
||||
private configDir: string;
|
||||
|
|
@ -92,6 +94,15 @@ export class GitService {
|
|||
commitMessage: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
): Promise<GitCommitInfo> {
|
||||
return this.withMutationQueue(() => this.commitFileUnlocked(filePath, commitMessage, author, authorEmail));
|
||||
}
|
||||
|
||||
private async commitFileUnlocked(
|
||||
filePath: string,
|
||||
commitMessage: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
): Promise<GitCommitInfo> {
|
||||
try {
|
||||
// Stage the file
|
||||
|
|
@ -166,6 +177,15 @@ export class GitService {
|
|||
commitMessage: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
): Promise<GitCommitInfo> {
|
||||
return this.withMutationQueue(() => this.commitFilesUnlocked(filePaths, commitMessage, author, authorEmail));
|
||||
}
|
||||
|
||||
private async commitFilesUnlocked(
|
||||
filePaths: string[],
|
||||
commitMessage: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
): Promise<GitCommitInfo> {
|
||||
try {
|
||||
for (const filePath of filePaths) {
|
||||
|
|
@ -231,6 +251,10 @@ export class GitService {
|
|||
if (filePaths.length === 0) {
|
||||
return;
|
||||
}
|
||||
return this.withMutationQueue(() => this.checkoutFilesUnlocked(filePaths));
|
||||
}
|
||||
|
||||
private async checkoutFilesUnlocked(filePaths: string[]): Promise<void> {
|
||||
try {
|
||||
await this.git.checkout(['--', ...filePaths]);
|
||||
} catch (error) {
|
||||
|
|
@ -292,6 +316,10 @@ export class GitService {
|
|||
if (!trimmed) {
|
||||
return;
|
||||
}
|
||||
return this.withMutationQueue(() => this.addNoteUnlocked(commitHash, trimmed));
|
||||
}
|
||||
|
||||
private async addNoteUnlocked(commitHash: string, trimmed: string): Promise<void> {
|
||||
try {
|
||||
await this.git.raw(['notes', 'add', '-f', '-m', trimmed, commitHash]);
|
||||
} catch (error) {
|
||||
|
|
@ -343,6 +371,15 @@ export class GitService {
|
|||
commitMessage: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
): Promise<GitCommitInfo> {
|
||||
return this.withMutationQueue(() => this.deleteFileUnlocked(filePath, commitMessage, author, authorEmail));
|
||||
}
|
||||
|
||||
private async deleteFileUnlocked(
|
||||
filePath: string,
|
||||
commitMessage: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
): Promise<GitCommitInfo> {
|
||||
try {
|
||||
// Remove the file from git
|
||||
|
|
@ -485,6 +522,13 @@ export class GitService {
|
|||
async squashTo(
|
||||
preHead: string,
|
||||
options: { message: string; author: string; authorEmail: string; expectedAuthor?: string },
|
||||
): Promise<{ squashed: boolean; commitHash: string | null; reason?: string; squashedCount?: number }> {
|
||||
return this.withMutationQueue(() => this.squashToUnlocked(preHead, options));
|
||||
}
|
||||
|
||||
private async squashToUnlocked(
|
||||
preHead: string,
|
||||
options: { message: string; author: string; authorEmail: string; expectedAuthor?: string },
|
||||
): Promise<{ squashed: boolean; commitHash: string | null; reason?: string; squashedCount?: number }> {
|
||||
const { message, author, authorEmail } = options;
|
||||
const expectedAuthor = options.expectedAuthor ?? author;
|
||||
|
|
@ -560,6 +604,15 @@ export class GitService {
|
|||
author: string,
|
||||
authorEmail: string,
|
||||
commitMessage: string,
|
||||
): Promise<SquashMergeResult> {
|
||||
return this.withMutationQueue(() => this.squashMergeIntoMainUnlocked(branch, author, authorEmail, commitMessage));
|
||||
}
|
||||
|
||||
private async squashMergeIntoMainUnlocked(
|
||||
branch: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
commitMessage: string,
|
||||
): Promise<SquashMergeResult> {
|
||||
// Diff of HEAD..branch (two dots) lists commits/files reachable from `branch` that
|
||||
// aren't on HEAD — i.e. exactly what the squash would apply. Three dots (HEAD...branch)
|
||||
|
|
@ -615,7 +668,7 @@ export class GitService {
|
|||
* range, which can pause the sequencer on conflicts.
|
||||
*/
|
||||
async resetHardTo(targetSha: string): Promise<void> {
|
||||
await this.git.raw(['reset', '--hard', targetSha]);
|
||||
await this.withMutationQueue(() => this.git.raw(['reset', '--hard', targetSha]));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -667,6 +720,10 @@ export class GitService {
|
|||
* Used by the memory agent to isolate per-session writes from interactive saves on main.
|
||||
*/
|
||||
async addWorktree(path: string, branch: string, startSha: string): Promise<void> {
|
||||
await this.withMutationQueue(() => this.addWorktreeUnlocked(path, branch, startSha));
|
||||
}
|
||||
|
||||
private async addWorktreeUnlocked(path: string, branch: string, startSha: string): Promise<void> {
|
||||
try {
|
||||
await this.git.raw(['worktree', 'add', '-b', branch, path, startSha]);
|
||||
} catch (error) {
|
||||
|
|
@ -679,6 +736,10 @@ export class GitService {
|
|||
* worktrees are ktx-internal — a clean working tree is not required.
|
||||
*/
|
||||
async removeWorktree(path: string): Promise<void> {
|
||||
await this.withMutationQueue(() => this.removeWorktreeUnlocked(path));
|
||||
}
|
||||
|
||||
private async removeWorktreeUnlocked(path: string): Promise<void> {
|
||||
try {
|
||||
await this.git.raw(['worktree', 'remove', '--force', path]);
|
||||
} catch (error) {
|
||||
|
|
@ -724,7 +785,7 @@ export class GitService {
|
|||
}
|
||||
|
||||
async deleteBranch(branch: string, force = false): Promise<void> {
|
||||
await this.git.raw(['branch', force ? '-D' : '-d', branch]);
|
||||
await this.withMutationQueue(() => this.git.raw(['branch', force ? '-D' : '-d', branch]));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -745,6 +806,15 @@ export class GitService {
|
|||
commitMessage: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
): Promise<GitCommitInfo> {
|
||||
return this.withMutationQueue(() => this.deleteDirectoryUnlocked(directoryPath, commitMessage, author, authorEmail));
|
||||
}
|
||||
|
||||
private async deleteDirectoryUnlocked(
|
||||
directoryPath: string,
|
||||
commitMessage: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
): Promise<GitCommitInfo> {
|
||||
try {
|
||||
// Remove the directory recursively from git
|
||||
|
|
@ -795,6 +865,17 @@ export class GitService {
|
|||
commitMessage: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
): Promise<GitCommitInfo> {
|
||||
return this.withMutationQueue(() =>
|
||||
this.deleteDirectoriesUnlocked(directoryPaths, commitMessage, author, authorEmail),
|
||||
);
|
||||
}
|
||||
|
||||
private async deleteDirectoriesUnlocked(
|
||||
directoryPaths: string[],
|
||||
commitMessage: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
): Promise<GitCommitInfo> {
|
||||
if (directoryPaths.length === 0) {
|
||||
return {
|
||||
|
|
@ -852,4 +933,27 @@ export class GitService {
|
|||
created: true,
|
||||
};
|
||||
}
|
||||
|
||||
private async withMutationQueue<T>(operation: () => Promise<T>): Promise<T> {
|
||||
const key = this.configDir;
|
||||
const previous = GitService.mutationQueues.get(key) ?? Promise.resolve();
|
||||
let release: () => void = () => {};
|
||||
const current = previous.catch(() => undefined).then(
|
||||
() =>
|
||||
new Promise<void>((resolve) => {
|
||||
release = resolve;
|
||||
}),
|
||||
);
|
||||
GitService.mutationQueues.set(key, current);
|
||||
|
||||
await previous.catch(() => undefined);
|
||||
try {
|
||||
return await operation();
|
||||
} finally {
|
||||
release();
|
||||
if (GitService.mutationQueues.get(key) === current) {
|
||||
GitService.mutationQueues.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -284,6 +284,18 @@ describe('chunkMetabaseStagedDir — syncMode enum coverage', () => {
|
|||
expect(allRawFiles).not.toContain('cards/200.json');
|
||||
});
|
||||
|
||||
it('ONLY with no selections includes every matching card for old generated configs', async () => {
|
||||
await writeInline(dir, 'sync-config.json', {
|
||||
...BASE_SYNC,
|
||||
syncMode: 'ONLY',
|
||||
selections: [],
|
||||
});
|
||||
const result = await chunkMetabaseStagedDir(dir);
|
||||
const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles);
|
||||
expect(allRawFiles).toContain('cards/100.json');
|
||||
expect(allRawFiles).toContain('cards/200.json');
|
||||
});
|
||||
|
||||
it('EXCEPT excludes cards in selected collections; includes the rest', async () => {
|
||||
await writeInline(dir, 'sync-config.json', {
|
||||
...BASE_SYNC,
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ function cardMatchesSyncConfig(card: StagedCardFile, config: StagedSyncConfig):
|
|||
if (card.archived) {
|
||||
return false;
|
||||
}
|
||||
if (config.syncMode === 'ALL') {
|
||||
if (config.syncMode === 'ALL' || (config.syncMode === 'ONLY' && config.selections.length === 0)) {
|
||||
return true;
|
||||
}
|
||||
const selectedCollections = new Set(
|
||||
|
|
|
|||
|
|
@ -327,6 +327,40 @@ describe('MetabaseClient.getResolvedSql', () => {
|
|||
expect(result?.resolvedSql).toBe('SELECT * FROM (SELECT a, b FROM base) t ');
|
||||
});
|
||||
|
||||
it('inlines native-query snippets before checking for remaining variables', async () => {
|
||||
const requestSpy = vi.fn().mockResolvedValue([
|
||||
{
|
||||
id: 1,
|
||||
name: 'account_join',
|
||||
content: 'LEFT JOIN accounts a ON a.account_id = mart.account_id',
|
||||
},
|
||||
]);
|
||||
const requestWithCustomRetrySpy = vi.fn();
|
||||
const client = makeClient((client) => {
|
||||
Reflect.set(client, 'request', requestSpy);
|
||||
Reflect.set(client, 'requestWithCustomRetry', requestWithCustomRetrySpy);
|
||||
});
|
||||
const card = nativeCard('SELECT a.account_name FROM mart {{snippet: account_join}}', {
|
||||
'snippet: account_join': {
|
||||
id: 'snippet-tag',
|
||||
name: 'snippet: account_join',
|
||||
type: 'snippet',
|
||||
'snippet-name': 'account_join',
|
||||
'snippet-id': 1,
|
||||
},
|
||||
});
|
||||
|
||||
const result = await client.getResolvedSql(card);
|
||||
|
||||
expect(requestSpy).toHaveBeenCalledWith('GET', '/api/native-query-snippet');
|
||||
expect(requestWithCustomRetrySpy).not.toHaveBeenCalled();
|
||||
expect(result?.resolutionStatus).toBe('resolved');
|
||||
expect(result?.resolvedSql).toBe(
|
||||
'SELECT a.account_name FROM mart LEFT JOIN accounts a ON a.account_id = mart.account_id',
|
||||
);
|
||||
expect(result?.resolvedSql).not.toContain('{{snippet:');
|
||||
});
|
||||
|
||||
it('uses /api/dataset/native for naked variables and prepends a warning comment', async () => {
|
||||
const requestSpy = vi.fn().mockResolvedValue({ query: "SELECT * WHERE id = 'placeholder' AND n = 1" });
|
||||
const client = makeClient((client) => {
|
||||
|
|
|
|||
|
|
@ -39,6 +39,13 @@ interface TemplateTagInfo {
|
|||
dummyValue: string | null;
|
||||
}
|
||||
|
||||
interface NativeQuerySnippet {
|
||||
id: number;
|
||||
name: string;
|
||||
content: string;
|
||||
archived?: boolean | null;
|
||||
}
|
||||
|
||||
interface CreateCardParams {
|
||||
name: string;
|
||||
databaseId: number;
|
||||
|
|
@ -100,6 +107,43 @@ function collectRemainingPlaceholderNames(sql: string): Set<string> {
|
|||
return names;
|
||||
}
|
||||
|
||||
function collectRemainingSnippetNames(sql: string): Set<string> {
|
||||
const names = new Set<string>();
|
||||
for (const match of sql.matchAll(/\{\{\s*snippet:\s*([^}]+?)\s*\}\}/gi)) {
|
||||
names.add(match[1].trim());
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
function normalizeSnippetName(name: string | null | undefined): string {
|
||||
return (name ?? '').replace(/^snippet:\s*/i, '').trim().toLowerCase();
|
||||
}
|
||||
|
||||
function parseNativeQuerySnippets(value: unknown): NativeQuerySnippet[] {
|
||||
const rawItems = Array.isArray(value)
|
||||
? value
|
||||
: typeof value === 'object' && value !== null && Array.isArray((value as { data?: unknown }).data)
|
||||
? (value as { data: unknown[] }).data
|
||||
: [];
|
||||
const snippets: NativeQuerySnippet[] = [];
|
||||
for (const item of rawItems) {
|
||||
if (typeof item !== 'object' || item === null || Array.isArray(item)) {
|
||||
continue;
|
||||
}
|
||||
const rec = item as Record<string, unknown>;
|
||||
if (typeof rec.id !== 'number' || typeof rec.name !== 'string' || typeof rec.content !== 'string') {
|
||||
continue;
|
||||
}
|
||||
snippets.push({
|
||||
id: rec.id,
|
||||
name: rec.name,
|
||||
content: rec.content,
|
||||
...(typeof rec.archived === 'boolean' ? { archived: rec.archived } : {}),
|
||||
});
|
||||
}
|
||||
return snippets;
|
||||
}
|
||||
|
||||
function injectNativeSql(datasetQuery: MetabaseDatasetQuery, sql: string): MetabaseDatasetQuery {
|
||||
if (datasetQuery?.stages?.[0]?.native !== undefined) {
|
||||
const stages = [...(datasetQuery.stages ?? [])];
|
||||
|
|
@ -148,6 +192,7 @@ export class MetabaseClient implements MetabaseRuntimeClient {
|
|||
private readonly logger: MetabaseClientLogger;
|
||||
private readonly baseUrl: string;
|
||||
private readonly config: MetabaseClientConfig;
|
||||
private snippetCache: Promise<NativeQuerySnippet[]> | null = null;
|
||||
|
||||
constructor(
|
||||
runtime: MetabaseClientRuntimeConfig,
|
||||
|
|
@ -261,6 +306,63 @@ export class MetabaseClient implements MetabaseRuntimeClient {
|
|||
return this.request<MetabaseCardSummary[]>('GET', '/api/card/?f=all');
|
||||
}
|
||||
|
||||
private getNativeQuerySnippets(): Promise<NativeQuerySnippet[]> {
|
||||
this.snippetCache ??= this.request<unknown>('GET', '/api/native-query-snippet').then(parseNativeQuerySnippets);
|
||||
return this.snippetCache;
|
||||
}
|
||||
|
||||
private async inlineNativeQuerySnippets(
|
||||
sql: string,
|
||||
templateTags: MetabaseTemplateTag[],
|
||||
cardId: number,
|
||||
): Promise<{ sql: string; unresolved: string[] }> {
|
||||
const names = collectRemainingSnippetNames(sql);
|
||||
if (names.size === 0) {
|
||||
return { sql, unresolved: [] };
|
||||
}
|
||||
|
||||
let snippets: NativeQuerySnippet[];
|
||||
try {
|
||||
snippets = await this.getNativeQuerySnippets();
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`[metabase] failed to load native query snippets for card ${cardId}; leaving snippet placeholders unresolved: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return { sql, unresolved: [...names] };
|
||||
}
|
||||
|
||||
const snippetsById = new Map<number, NativeQuerySnippet>();
|
||||
const snippetsByName = new Map<string, NativeQuerySnippet>();
|
||||
for (const snippet of snippets) {
|
||||
if (snippet.archived === true) {
|
||||
continue;
|
||||
}
|
||||
snippetsById.set(snippet.id, snippet);
|
||||
snippetsByName.set(normalizeSnippetName(snippet.name), snippet);
|
||||
}
|
||||
|
||||
const snippetTags = templateTags.filter((tag) => tag.type === 'snippet');
|
||||
const unresolved = new Set<string>();
|
||||
const inlinedSql = sql.replace(/\{\{\s*snippet:\s*([^}]+?)\s*\}\}/gi, (match, rawName: string) => {
|
||||
const normalizedName = normalizeSnippetName(rawName);
|
||||
const tag = snippetTags.find(
|
||||
(candidate) =>
|
||||
normalizeSnippetName(candidate['snippet-name']) === normalizedName ||
|
||||
normalizeSnippetName(candidate.name) === normalizedName,
|
||||
);
|
||||
const snippet =
|
||||
(typeof tag?.['snippet-id'] === 'number' ? snippetsById.get(tag['snippet-id']) : undefined) ??
|
||||
snippetsByName.get(normalizedName);
|
||||
if (!snippet) {
|
||||
unresolved.add(rawName.trim());
|
||||
return match;
|
||||
}
|
||||
return snippet.content;
|
||||
});
|
||||
|
||||
return { sql: inlinedSql, unresolved: [...unresolved] };
|
||||
}
|
||||
|
||||
async convertMbqlToNative(datasetQuery: MetabaseDatasetQuery): Promise<MetabaseNativeQueryResult> {
|
||||
return this.request<MetabaseNativeQueryResult>('POST', '/api/dataset/native', {
|
||||
...datasetQuery,
|
||||
|
|
@ -351,7 +453,18 @@ export class MetabaseClient implements MetabaseRuntimeClient {
|
|||
// silently filter rows out — see incident with auction_seller_bidder_pair_suspicion).
|
||||
let processedSql = stripOptionalClauses(nativeQuery);
|
||||
|
||||
// Step 2: inline {{#CARD_ID}} card references locally. Recursively strip optional
|
||||
// Step 2: inline native-query snippets. Metabase's substitution endpoint does not
|
||||
// always expand {{snippet: name}} for fetched card SQL, but the snippets API does.
|
||||
const snippetResult = await this.inlineNativeQuerySnippets(processedSql, templateTagEntries, card.id);
|
||||
processedSql = snippetResult.sql;
|
||||
if (snippetResult.unresolved.length > 0) {
|
||||
this.logger.warn(
|
||||
`[metabase] card ${card.id} has unresolved SQL snippets: ${snippetResult.unresolved.join(', ')}`,
|
||||
);
|
||||
return { resolvedSql: processedSql, templateTags, resolutionStatus: 'fallback' };
|
||||
}
|
||||
|
||||
// Step 3: inline {{#CARD_ID}} card references locally. Recursively strip optional
|
||||
// clauses in referenced cards too — the same reasoning applies all the way down.
|
||||
try {
|
||||
processedSql = await expandCardReferences(processedSql, {
|
||||
|
|
@ -361,7 +474,17 @@ export class MetabaseClient implements MetabaseRuntimeClient {
|
|||
if (!referencedNative) {
|
||||
throw new Error(`referenced card ${id} has no native query`);
|
||||
}
|
||||
return { native_query: stripOptionalClauses(referencedNative) };
|
||||
const referencedSnippetResult = await this.inlineNativeQuerySnippets(
|
||||
stripOptionalClauses(referencedNative),
|
||||
Object.values(this.getTemplateTags(referenced)),
|
||||
referenced.id,
|
||||
);
|
||||
if (referencedSnippetResult.unresolved.length > 0) {
|
||||
throw new Error(
|
||||
`referenced card ${id} has unresolved SQL snippets: ${referencedSnippetResult.unresolved.join(', ')}`,
|
||||
);
|
||||
}
|
||||
return { native_query: referencedSnippetResult.sql };
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
|
|
@ -372,7 +495,7 @@ export class MetabaseClient implements MetabaseRuntimeClient {
|
|||
throw err;
|
||||
}
|
||||
|
||||
// Step 3: collect template tags that still appear in the SQL after strip + inline.
|
||||
// Step 4: collect template tags that still appear in the SQL after strip + inline.
|
||||
// Anything bracketed-only is gone now; anything card-referenced is inlined.
|
||||
const remainingNames = collectRemainingPlaceholderNames(processedSql);
|
||||
const remainingTags = templateTagEntries.filter((tag) => tag.type !== 'snippet' && remainingNames.has(tag.name));
|
||||
|
|
@ -381,7 +504,7 @@ export class MetabaseClient implements MetabaseRuntimeClient {
|
|||
return { resolvedSql: processedSql, templateTags, resolutionStatus: 'resolved' };
|
||||
}
|
||||
|
||||
// Step 4: dummy-substitute the remaining naked {{ var }} placeholders via Metabase's
|
||||
// Step 5: dummy-substitute the remaining naked {{ var }} placeholders via Metabase's
|
||||
// substitution endpoint. Only required because we can't translate dimension-tag
|
||||
// bindings to warehouse columns ourselves. Prepend a SQL comment listing every
|
||||
// dummy substitution so downstream consumers (the metabase_ingest LLM) know which
|
||||
|
|
|
|||
|
|
@ -57,13 +57,9 @@ describe('computeFetchScope', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('returns empty explicit scope for ONLY with no selections', () => {
|
||||
it('treats generated ONLY with no selections as all', () => {
|
||||
const scope = computeFetchScope({ ...BASE_CONFIG, syncMode: 'ONLY', selections: [] });
|
||||
expect(scope).toEqual({
|
||||
kind: 'explicit',
|
||||
includeCardIds: new Set(),
|
||||
includeCollectionIds: new Set(),
|
||||
});
|
||||
expect(scope).toEqual({ kind: 'all' });
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ export type FetchScope =
|
|||
* union the fetcher switches on. Pure function; no I/O, no side effects.
|
||||
*/
|
||||
export function computeFetchScope(syncConfig: StagedSyncConfig): FetchScope {
|
||||
if (syncConfig.syncMode === 'ALL') {
|
||||
if (syncConfig.syncMode === 'ALL' || (syncConfig.syncMode === 'ONLY' && syncConfig.selections.length === 0)) {
|
||||
return { kind: 'all' };
|
||||
}
|
||||
const cardIds = new Set<number>();
|
||||
|
|
|
|||
|
|
@ -1,8 +1,21 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { KtxProjectConnectionConfig } from '../../../project/index.js';
|
||||
import { metabaseRuntimeConfigFromLocalConnection } from './local-metabase.adapter.js';
|
||||
|
||||
describe('metabaseRuntimeConfigFromLocalConnection', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-metabase-runtime-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('resolves api_url and env-backed api_key_ref from a flat ktx.yaml connection', () => {
|
||||
const connection: KtxProjectConnectionConfig = {
|
||||
driver: 'metabase',
|
||||
|
|
@ -20,6 +33,21 @@ describe('metabaseRuntimeConfigFromLocalConnection', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('resolves file-backed api_key_ref from pasted setup secrets', async () => {
|
||||
const keyPath = join(tempDir, 'metabase-main-api-key');
|
||||
await writeFile(keyPath, 'mb_file_key\n', 'utf-8'); // pragma: allowlist secret
|
||||
const connection: KtxProjectConnectionConfig = {
|
||||
driver: 'metabase',
|
||||
api_url: 'https://metabase.example.com',
|
||||
api_key_ref: `file:${keyPath}`,
|
||||
};
|
||||
|
||||
expect(metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toEqual({
|
||||
apiUrl: 'https://metabase.example.com',
|
||||
apiKey: 'mb_file_key', // pragma: allowlist secret
|
||||
});
|
||||
});
|
||||
|
||||
it('accepts url as the local api URL alias', () => {
|
||||
const connection: KtxProjectConnectionConfig = {
|
||||
driver: 'metabase',
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import type { KtxLocalProject, KtxProjectConnectionConfig } from '../../../project/index.js';
|
||||
import { ktxLocalStateDbPath } from '../../../project/index.js';
|
||||
import { resolveKtxConfigReference } from '../../../core/config-reference.js';
|
||||
import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultMetabaseConnectionClientFactory } from './client.js';
|
||||
import {
|
||||
IngestMetabaseClientFactory,
|
||||
|
|
@ -13,14 +14,6 @@ function stringField(value: unknown): string | null {
|
|||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
|
||||
}
|
||||
|
||||
function resolveEnvReference(ref: string, env: NodeJS.ProcessEnv): string | null {
|
||||
if (!ref.startsWith('env:')) {
|
||||
return null;
|
||||
}
|
||||
const name = ref.slice('env:'.length);
|
||||
return stringField(env[name]);
|
||||
}
|
||||
|
||||
function hasNetworkProxy(connection: KtxProjectConnectionConfig): boolean {
|
||||
return connection.networkProxy != null || connection.network_proxy != null;
|
||||
}
|
||||
|
|
@ -42,7 +35,7 @@ export function metabaseRuntimeConfigFromLocalConnection(
|
|||
const apiUrl = stringField(connection.api_url) ?? stringField(connection.apiUrl) ?? stringField(connection.url);
|
||||
const literalApiKey = stringField(connection.api_key) ?? stringField(connection.apiKey);
|
||||
const apiKeyRef = stringField(connection.api_key_ref) ?? stringField(connection.apiKeyRef);
|
||||
const apiKey = literalApiKey ?? (apiKeyRef ? resolveEnvReference(apiKeyRef, env) : null);
|
||||
const apiKey = literalApiKey ?? (apiKeyRef ? resolveKtxConfigReference(apiKeyRef, env) : null);
|
||||
|
||||
if (!apiUrl) {
|
||||
throw new Error(`Connection "${connectionId}" is missing metabase api_url`);
|
||||
|
|
|
|||
|
|
@ -79,6 +79,21 @@ function countMemoryFlowActions(actions: MemoryAction[], target: MemoryAction['t
|
|||
return actions.filter((action) => action.target === target).length;
|
||||
}
|
||||
|
||||
function isStructuredToolFailure(output: unknown): boolean {
|
||||
if (!output || typeof output !== 'object') {
|
||||
return false;
|
||||
}
|
||||
const structured = (output as { structured?: unknown }).structured;
|
||||
return !!structured && typeof structured === 'object' && (structured as { success?: unknown }).success === false;
|
||||
}
|
||||
|
||||
function isFailedToolCall(entry: ToolCallLogEntry): boolean {
|
||||
if (entry.error) {
|
||||
return true;
|
||||
}
|
||||
return (entry.toolName === 'sl_write_source' || entry.toolName === 'wiki_write') && isStructuredToolFailure(entry.output);
|
||||
}
|
||||
|
||||
function reportIdFromCreateResult(result: unknown): string | undefined {
|
||||
if (!result || typeof result !== 'object' || !('id' in result)) {
|
||||
return undefined;
|
||||
|
|
@ -344,7 +359,7 @@ export class IngestBundleRunner {
|
|||
toolNames: new Set<string>(),
|
||||
} satisfies MutableToolTranscriptSummary);
|
||||
current.toolCallCount += 1;
|
||||
current.errorCount += entry.error ? 1 : 0;
|
||||
current.errorCount += isFailedToolCall(entry) ? 1 : 0;
|
||||
current.toolNames.add(entry.toolName);
|
||||
transcriptSummaries.set(entry.wuKey, current);
|
||||
};
|
||||
|
|
@ -712,6 +727,7 @@ export class IngestBundleRunner {
|
|||
sourceKey: job.sourceKey,
|
||||
connectionId: job.connectionId,
|
||||
jobId: job.jobId,
|
||||
toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.errorCount ?? 0,
|
||||
onStepFinish: ({ stepIndex, stepBudget }) => {
|
||||
memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget });
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import Database from 'better-sqlite3';
|
||||
import { AgentRunnerService } from '../agent/index.js';
|
||||
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js';
|
||||
import { makeLocalGitRepo } from '../test/make-local-git-repo.js';
|
||||
|
|
@ -57,6 +58,34 @@ class LookerSlWritingAgentRunner extends AgentRunnerService {
|
|||
}
|
||||
}
|
||||
|
||||
class WikiWritingAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
|
||||
const wikiWrite = params.toolSet.wiki_write;
|
||||
if (!wikiWrite?.execute) {
|
||||
throw new Error('wiki_write tool was not available to the WorkUnit');
|
||||
}
|
||||
const result = await wikiWrite.execute(
|
||||
{
|
||||
key: 'orders_context',
|
||||
summary: 'Orders source context',
|
||||
content: 'Orders are purchase records used for revenue analysis.',
|
||||
tags: ['orders'],
|
||||
},
|
||||
{ toolCallId: 'wiki-write' },
|
||||
);
|
||||
if (!result.structured.success) {
|
||||
throw new Error(result.markdown);
|
||||
}
|
||||
}
|
||||
return { stopReason: 'natural' as const };
|
||||
});
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
}
|
||||
|
||||
function makeLookerRuntimeClient() {
|
||||
const lookerModels = {
|
||||
models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }],
|
||||
|
|
@ -252,6 +281,33 @@ describe('canonical local ingest', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('indexes wiki pages written by local ingest into the SQLite knowledge tables', async () => {
|
||||
const sourceDir = join(tempDir, 'source');
|
||||
await mkdir(join(sourceDir, 'orders'), { recursive: true });
|
||||
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
|
||||
const agentRunner = new WikiWritingAgentRunner();
|
||||
|
||||
const result = await runLocalIngest({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
adapter: 'fake',
|
||||
connectionId: 'warehouse',
|
||||
sourceDir,
|
||||
jobId: 'wiki-local-1',
|
||||
agentRunner,
|
||||
});
|
||||
|
||||
expect(result.result.failedWorkUnits).toEqual([]);
|
||||
const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true });
|
||||
try {
|
||||
expect(db.prepare('SELECT key, summary FROM knowledge_pages ORDER BY key').all()).toEqual([
|
||||
{ key: 'orders_context', summary: 'Orders source context' },
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects direct Metabase scheduled pulls before requiring a local ingest LLM provider', async () => {
|
||||
const projectDir = join(tempDir, 'metabase-project');
|
||||
await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
|
|
|||
|
|
@ -56,6 +56,8 @@ import {
|
|||
type KnowledgeIndexPort,
|
||||
KnowledgeWikiService,
|
||||
searchLocalKnowledgePages,
|
||||
SqliteKnowledgeIndex,
|
||||
type SqliteKnowledgeIndexPage,
|
||||
WikiListTagsTool,
|
||||
WikiReadTool,
|
||||
WikiRemoveTool,
|
||||
|
|
@ -257,6 +259,17 @@ function parseWiki(raw: string): { summary: string; content: string } {
|
|||
};
|
||||
}
|
||||
|
||||
function parseWikiTags(raw: string): string[] {
|
||||
const match = raw.match(/^---\n([\s\S]*?)\n---\n?/);
|
||||
if (!match) {
|
||||
return [];
|
||||
}
|
||||
const frontmatter = (YAML.parse(match[1]) ?? {}) as Record<string, unknown>;
|
||||
return Array.isArray(frontmatter.tags)
|
||||
? frontmatter.tags.filter((tag): tag is string => typeof tag === 'string')
|
||||
: [];
|
||||
}
|
||||
|
||||
function scoreText(text: string, query: string): number {
|
||||
const normalized = query.toLowerCase().trim();
|
||||
if (!normalized) {
|
||||
|
|
@ -271,21 +284,49 @@ function scoreText(text: string, query: string): number {
|
|||
}
|
||||
|
||||
class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
||||
constructor(private readonly project: KtxLocalProject) {}
|
||||
private readonly sqlite: SqliteKnowledgeIndex;
|
||||
|
||||
async upsertPage(): Promise<void> {}
|
||||
|
||||
async applyDiffTransactional(): Promise<void> {}
|
||||
|
||||
async getExistingSearchTexts(): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>> {
|
||||
return new Map();
|
||||
constructor(private readonly project: KtxLocalProject) {
|
||||
this.sqlite = new SqliteKnowledgeIndex({ dbPath: ktxLocalStateDbPath(project) });
|
||||
}
|
||||
|
||||
async deleteStale(): Promise<void> {}
|
||||
async upsertPage(): Promise<void> {
|
||||
await this.syncAllPagesFromDisk();
|
||||
}
|
||||
|
||||
async deleteByScope(): Promise<void> {}
|
||||
async applyDiffTransactional(): Promise<void> {
|
||||
await this.syncAllPagesFromDisk();
|
||||
}
|
||||
|
||||
async deleteByKey(): Promise<void> {}
|
||||
async getExistingSearchTexts(
|
||||
scope: string,
|
||||
scopeId: string | null,
|
||||
): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>> {
|
||||
const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${scopeId}/`;
|
||||
const result = new Map<string, { searchText: string; hasEmbedding: boolean }>();
|
||||
for (const [path, page] of this.sqlite.getExistingPages()) {
|
||||
if (!path.startsWith(prefix)) {
|
||||
continue;
|
||||
}
|
||||
result.set(path.slice(prefix.length).replace(/\.md$/, ''), {
|
||||
searchText: page.searchText,
|
||||
hasEmbedding: page.embedding !== null,
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
async deleteStale(): Promise<void> {
|
||||
await this.syncAllPagesFromDisk();
|
||||
}
|
||||
|
||||
async deleteByScope(): Promise<void> {
|
||||
await this.syncAllPagesFromDisk();
|
||||
}
|
||||
|
||||
async deleteByKey(): Promise<void> {
|
||||
await this.syncAllPagesFromDisk();
|
||||
}
|
||||
|
||||
async findPageByKey(scope: string, scopeId: string | null, pageKey: string) {
|
||||
const path = scope === 'GLOBAL' ? `knowledge/global/${pageKey}.md` : `knowledge/user/${scopeId}/${pageKey}.md`;
|
||||
|
|
@ -344,6 +385,41 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
.sort((left, right) => right.rrfScore - left.rrfScore || left.pageKey.localeCompare(right.pageKey))
|
||||
.slice(0, limit);
|
||||
}
|
||||
|
||||
private async syncAllPagesFromDisk(): Promise<void> {
|
||||
const listed = await this.project.fileStore.listFiles('knowledge', true);
|
||||
const pages: SqliteKnowledgeIndexPage[] = [];
|
||||
for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) {
|
||||
const parsedPath = parseKnowledgeIndexPath(file);
|
||||
if (!parsedPath) {
|
||||
continue;
|
||||
}
|
||||
const path = `knowledge/${file}`;
|
||||
const raw = await this.project.fileStore.readFile(path);
|
||||
const parsed = parseWiki(raw.content);
|
||||
pages.push({
|
||||
path,
|
||||
key: parsedPath.pageKey,
|
||||
scope: parsedPath.scope,
|
||||
summary: parsed.summary,
|
||||
content: parsed.content,
|
||||
tags: parseWikiTags(raw.content),
|
||||
embedding: null,
|
||||
});
|
||||
}
|
||||
this.sqlite.sync(pages);
|
||||
}
|
||||
}
|
||||
|
||||
function parseKnowledgeIndexPath(file: string): { scope: 'GLOBAL' | 'USER'; pageKey: string } | null {
|
||||
const segments = file.split('/');
|
||||
if (segments.length === 2 && segments[0] === 'global') {
|
||||
return { scope: 'GLOBAL', pageKey: segments[1].replace(/\.md$/, '') };
|
||||
}
|
||||
if (segments.length === 3 && segments[0] === 'user') {
|
||||
return { scope: 'USER', pageKey: segments[2].replace(/\.md$/, '') };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
class NoopKnowledgeEventPort implements KnowledgeEventPort {
|
||||
|
|
|
|||
|
|
@ -106,6 +106,21 @@ describe('Stage 3 — executeWorkUnit', () => {
|
|||
expect(deps.resetHardTo).toHaveBeenCalledWith('pre');
|
||||
});
|
||||
|
||||
it('tool failures reset to the pre-WU SHA and mark WU failed even when the loop ends naturally', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
|
||||
deps.agentRunner.runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' });
|
||||
deps.toolFailureCount = vi.fn().mockReturnValue(2);
|
||||
|
||||
const outcome = await executeWorkUnit(deps, makeWu());
|
||||
|
||||
expect(outcome.status).toBe('failed');
|
||||
expect(outcome.reason).toContain('2 tool call(s) failed');
|
||||
expect(outcome.actions).toEqual([]);
|
||||
expect(outcome.touchedSlSources).toEqual([]);
|
||||
expect(deps.resetHardTo).toHaveBeenCalledWith('pre');
|
||||
});
|
||||
|
||||
it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ export interface WorkUnitExecutionDeps {
|
|||
connectionId: string;
|
||||
jobId: string;
|
||||
onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void;
|
||||
toolFailureCount?: (unitKey: string) => number;
|
||||
}
|
||||
|
||||
export interface WorkUnitOutcome {
|
||||
|
|
@ -128,6 +129,11 @@ export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit)
|
|||
return failWithReset(runResult.error?.message ?? 'agent loop errored');
|
||||
}
|
||||
|
||||
const toolFailureCount = deps.toolFailureCount?.(wu.unitKey) ?? 0;
|
||||
if (toolFailureCount > 0) {
|
||||
return failWithReset(`${toolFailureCount} tool call(s) failed during WorkUnit ${wu.unitKey}`);
|
||||
}
|
||||
|
||||
const touched = listTouchedSlSources(deps.captureSession.touchedSlSources);
|
||||
if (touched.length > 0) {
|
||||
const validation = await deps.validateTouchedSources(touched);
|
||||
|
|
|
|||
|
|
@ -116,8 +116,7 @@ function normalizeScanDriver(driver: string | undefined): KtxConnectionDriver {
|
|||
normalized === 'clickhouse' ||
|
||||
normalized === 'sqlserver' ||
|
||||
normalized === 'bigquery' ||
|
||||
normalized === 'snowflake' ||
|
||||
normalized === 'posthog'
|
||||
normalized === 'snowflake'
|
||||
) {
|
||||
return normalized === 'sqlite3' ? 'sqlite' : normalized;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ interface BuiltMocks {
|
|||
appSettings: any;
|
||||
llmProvider: any;
|
||||
prompt: any;
|
||||
posthog: any;
|
||||
eventTracker: any;
|
||||
telemetry: any;
|
||||
skillsRegistry: any;
|
||||
wikiService: any;
|
||||
|
|
@ -64,7 +64,7 @@ const buildMocks = (overrides: Partial<BuiltMocks> = {}): BuiltMocks => {
|
|||
},
|
||||
llmProvider: { getModel: vi.fn().mockReturnValue({}) },
|
||||
prompt: { loadPrompt: vi.fn().mockResolvedValue('base framing') },
|
||||
posthog: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) },
|
||||
eventTracker: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) },
|
||||
telemetry: {
|
||||
isEnabled: () => false,
|
||||
appSettingsService: { settings: { telemetry: { recordInputs: false, recordOutputs: false } } },
|
||||
|
|
@ -177,7 +177,7 @@ const buildService = (mocks: BuiltMocks): MemoryAgentService =>
|
|||
slValidator: mocks.slValidator,
|
||||
toolsetFactory: mocks.toolsetFactory,
|
||||
telemetry: {
|
||||
trackMemoryIngestion: mocks.posthog.trackEvent,
|
||||
trackMemoryIngestion: mocks.eventTracker.trackEvent,
|
||||
},
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@ describe('KTX local project runtime', () => {
|
|||
const gitignore = await readFile(join(projectDir, '.ktx/.gitignore'), 'utf-8');
|
||||
expect(gitignore).toContain('cache/');
|
||||
expect(gitignore).toContain('db.sqlite');
|
||||
expect(gitignore).toContain('db.sqlite-*');
|
||||
expect(gitignore).toContain('ingest-transcripts/');
|
||||
expect(gitignore).toContain('secrets/');
|
||||
expect(gitignore).toContain('setup/');
|
||||
expect(gitignore).toContain('agents/');
|
||||
|
|
|
|||
|
|
@ -35,7 +35,10 @@ export interface InitKtxProjectResult extends KtxLocalProject {
|
|||
}
|
||||
|
||||
const TRACKED_SCAFFOLD_FILES: Array<{ path: string; content: string }> = [
|
||||
{ path: '.ktx/.gitignore', content: 'cache/\ndb.sqlite\nsecrets/\nsetup/\nagents/\n' },
|
||||
{
|
||||
path: '.ktx/.gitignore',
|
||||
content: 'cache/\ndb.sqlite\ndb.sqlite-*\ningest-transcripts/\nsecrets/\nsetup/\nagents/\n',
|
||||
},
|
||||
{ path: '.ktx/prompts/.gitkeep', content: '' },
|
||||
{ path: '.ktx/skills/.gitkeep', content: '' },
|
||||
{ path: 'knowledge/global/.gitkeep', content: '' },
|
||||
|
|
|
|||
|
|
@ -67,10 +67,10 @@ describe('KTX setup config helpers', () => {
|
|||
|
||||
it('merges setup-local gitignore entries without removing existing lines', () => {
|
||||
expect(mergeKtxSetupGitignoreEntries('cache/\ndb.sqlite\n')).toBe(
|
||||
['cache/', 'db.sqlite', 'secrets/', 'setup/', 'agents/', ''].join('\n'),
|
||||
['cache/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'secrets/', 'setup/', 'agents/', ''].join('\n'),
|
||||
);
|
||||
expect(mergeKtxSetupGitignoreEntries('cache/\nsecrets/\n')).toBe(
|
||||
['cache/', 'secrets/', 'setup/', 'agents/', ''].join('\n'),
|
||||
['cache/', 'secrets/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'setup/', 'agents/', ''].join('\n'),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,7 +4,15 @@ export const KTX_SETUP_STEPS = ['project', 'llm', 'embeddings', 'databases', 'so
|
|||
|
||||
export type KtxSetupStep = (typeof KTX_SETUP_STEPS)[number];
|
||||
|
||||
const SETUP_GITIGNORE_ENTRIES = ['secrets/', 'setup/', 'agents/'] as const;
|
||||
const SETUP_GITIGNORE_ENTRIES = [
|
||||
'cache/',
|
||||
'db.sqlite',
|
||||
'db.sqlite-*',
|
||||
'ingest-transcripts/',
|
||||
'secrets/',
|
||||
'setup/',
|
||||
'agents/',
|
||||
] as const;
|
||||
|
||||
export function markKtxSetupStepComplete(config: KtxProjectConfig, step: KtxSetupStep): KtxProjectConfig {
|
||||
const databaseConnectionIds = config.setup?.database_connection_ids ?? [];
|
||||
|
|
|
|||
|
|
@ -103,13 +103,12 @@ function normalizeDriver(driver: string | undefined): KtxConnectionDriver {
|
|||
normalized === 'clickhouse' ||
|
||||
normalized === 'sqlserver' ||
|
||||
normalized === 'bigquery' ||
|
||||
normalized === 'snowflake' ||
|
||||
normalized === 'posthog'
|
||||
normalized === 'snowflake'
|
||||
) {
|
||||
return normalized === 'sqlite3' ? 'sqlite' : normalized;
|
||||
}
|
||||
throw new Error(
|
||||
`Standalone ktx scan supports postgres/postgresql/sqlite/mysql/clickhouse/sqlserver/bigquery/snowflake/posthog in this phase, received "${driver ?? 'unknown'}"`,
|
||||
`Standalone ktx scan supports postgres/postgresql/sqlite/mysql/clickhouse/sqlserver/bigquery/snowflake in this phase, received "${driver ?? 'unknown'}"`,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -53,6 +53,12 @@ const CHECKED_IN_FIXTURE_ORIGINS = {
|
|||
semantic_embedding_aliases_no_declared_constraints: 'synthetic',
|
||||
} as const;
|
||||
|
||||
function runAdHocRelationshipBenchmarks(): boolean {
|
||||
return process.env.KTX_RUN_RELATIONSHIP_BENCHMARKS === '1';
|
||||
}
|
||||
|
||||
const adHocRelationshipBenchmarkIt = runAdHocRelationshipBenchmarks() ? it : it.skip;
|
||||
|
||||
function snapshot(): KtxSchemaSnapshot {
|
||||
return {
|
||||
connectionId: 'warehouse',
|
||||
|
|
@ -644,7 +650,7 @@ describe('relationship benchmarks', () => {
|
|||
expect(fixture.expected.expectedLinks).toHaveLength(1900);
|
||||
});
|
||||
|
||||
it('runs the scale stress fixture inside the benchmark validation budget', async () => {
|
||||
adHocRelationshipBenchmarkIt('runs the scale stress fixture inside the benchmark validation budget', async () => {
|
||||
const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url);
|
||||
const fixture = await loadKtxRelationshipBenchmarkFixture(
|
||||
join(fixtureRoot.pathname, 'scale_stress_no_declared_constraints'),
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import type {
|
|||
} from './enrichment-types.js';
|
||||
import { localCandidateTables } from './relationship-locality.js';
|
||||
import {
|
||||
type KtxRelationshipNormalizedName,
|
||||
normalizeKtxRelationshipName,
|
||||
pluralizeKtxRelationshipToken,
|
||||
singularizeKtxRelationshipToken,
|
||||
|
|
@ -97,9 +98,22 @@ const REFERENCE_SUFFIXES: Array<{ suffix: string; reason: string }> = [
|
|||
{ suffix: '_uuid', reason: 'foreign_key_uuid_suffix' },
|
||||
];
|
||||
const RELATIONSHIP_KEY_TARGET_SUFFIXES = ['_id', '_key', '_code', '_uuid'] as const;
|
||||
const tableAliasesCache = new WeakMap<KtxEnrichedTable, Set<string>>();
|
||||
const parentTableNameAliasesCache = new WeakMap<KtxEnrichedTable, Set<string>>();
|
||||
const normalizedColumnNameCache = new WeakMap<KtxEnrichedColumn, KtxRelationshipNormalizedName>();
|
||||
|
||||
function normalizedColumnName(column: KtxEnrichedColumn): KtxRelationshipNormalizedName {
|
||||
const cached = normalizedColumnNameCache.get(column);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
const normalized = normalizeKtxRelationshipName(column.name);
|
||||
normalizedColumnNameCache.set(column, normalized);
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function isRelationshipKeyShapedTarget(column: KtxEnrichedColumn): boolean {
|
||||
const normalized = normalizeKtxRelationshipName(column.name);
|
||||
const normalized = normalizedColumnName(column);
|
||||
return (
|
||||
normalized.tokens.length >= 2 &&
|
||||
RELATIONSHIP_KEY_TARGET_SUFFIXES.some((suffix) => normalized.normalized.endsWith(suffix))
|
||||
|
|
@ -107,8 +121,8 @@ function isRelationshipKeyShapedTarget(column: KtxEnrichedColumn): boolean {
|
|||
}
|
||||
|
||||
function columnSuffixMatchesTarget(input: { fromColumn: KtxEnrichedColumn; toColumn: KtxEnrichedColumn }): boolean {
|
||||
const source = normalizeKtxRelationshipName(input.fromColumn.name).normalized;
|
||||
const target = normalizeKtxRelationshipName(input.toColumn.name).normalized;
|
||||
const source = normalizedColumnName(input.fromColumn).normalized;
|
||||
const target = normalizedColumnName(input.toColumn).normalized;
|
||||
return source !== target && target.length > 0 && source.endsWith(`_${target}`);
|
||||
}
|
||||
|
||||
|
|
@ -160,7 +174,7 @@ function hasUsableEmbedding(column: KtxEnrichedColumn): boolean {
|
|||
}
|
||||
|
||||
function sourceColumnReference(column: KtxEnrichedColumn): KtxRelationshipSourceColumnReference | null {
|
||||
const normalized = normalizeKtxRelationshipName(column.name);
|
||||
const normalized = normalizedColumnName(column);
|
||||
if (SELF_REFERENCE_NAMES.has(normalized.normalized)) {
|
||||
return { base: normalized.normalized.replace(/_id$/u, ''), reason: 'foreign_key_suffix' };
|
||||
}
|
||||
|
|
@ -192,6 +206,11 @@ function addNormalizedTableAlias(aliases: Set<string>, name: string): void {
|
|||
}
|
||||
|
||||
function tableAliases(table: KtxEnrichedTable): Set<string> {
|
||||
const cached = tableAliasesCache.get(table);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
const normalized = normalizeKtxRelationshipName(table.ref.name);
|
||||
const aliases = new Set([normalized.normalized, normalized.singular, normalized.plural]);
|
||||
if (normalized.tokens.length > 1) {
|
||||
|
|
@ -203,6 +222,7 @@ function tableAliases(table: KtxEnrichedTable): Set<string> {
|
|||
aliases.add(pluralizeKtxRelationshipToken(singularLastToken));
|
||||
}
|
||||
}
|
||||
tableAliasesCache.set(table, aliases);
|
||||
return aliases;
|
||||
}
|
||||
|
||||
|
|
@ -212,13 +232,19 @@ function finalTableNamePart(table: KtxEnrichedTable): string {
|
|||
}
|
||||
|
||||
function parentTableNameAliases(table: KtxEnrichedTable): Set<string> {
|
||||
const aliases = tableAliases(table);
|
||||
const cached = parentTableNameAliasesCache.get(table);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
const aliases = new Set(tableAliases(table));
|
||||
addNormalizedTableAlias(aliases, finalTableNamePart(table));
|
||||
parentTableNameAliasesCache.set(table, aliases);
|
||||
return aliases;
|
||||
}
|
||||
|
||||
function targetKeyScore(table: KtxEnrichedTable, column: KtxEnrichedColumn): number {
|
||||
const columnName = normalizeKtxRelationshipName(column.name).normalized;
|
||||
const columnName = normalizedColumnName(column).normalized;
|
||||
const tableKeyBases = parentTableNameAliases(table);
|
||||
if (column.primaryKey) {
|
||||
return 1;
|
||||
|
|
@ -338,7 +364,7 @@ function candidateParentTables(input: {
|
|||
maxParentTables,
|
||||
}).map((item) => item.table);
|
||||
|
||||
const normalizedColumn = normalizeKtxRelationshipName(input.fromColumn.name).normalized;
|
||||
const normalizedColumn = normalizedColumnName(input.fromColumn).normalized;
|
||||
if (!SELF_REFERENCE_NAMES.has(normalizedColumn) || ranked.some((table) => table.id === input.fromTable.id)) {
|
||||
return ranked;
|
||||
}
|
||||
|
|
@ -364,7 +390,7 @@ function targetKeyEvidence(
|
|||
return { score: 0, reasons: [] };
|
||||
}
|
||||
|
||||
const columnName = normalizeKtxRelationshipName(column.name).normalized;
|
||||
const columnName = normalizedColumnName(column).normalized;
|
||||
if (columnName === 'code' || columnName.endsWith('_code') || columnName === 'key' || columnName.endsWith('_key')) {
|
||||
return { score: 0.86, reasons: ['profile_unique_target'] };
|
||||
}
|
||||
|
|
@ -500,7 +526,7 @@ function createCandidate(input: {
|
|||
evidence: {
|
||||
sourceColumnBase: input.sourceBase,
|
||||
targetTableBase: input.targetBase,
|
||||
targetColumnBase: normalizeKtxRelationshipName(input.toColumn.name).normalized,
|
||||
targetColumnBase: normalizedColumnName(input.toColumn).normalized,
|
||||
targetKeyScore: input.targetKeyScore,
|
||||
nameScore: input.nameScore,
|
||||
reasons: input.reasons,
|
||||
|
|
@ -553,7 +579,7 @@ function generateKtxEmbeddingRelationshipCandidates(
|
|||
continue;
|
||||
}
|
||||
|
||||
const sourceBase = normalizeKtxRelationshipName(fromColumn.name).normalized;
|
||||
const sourceBase = normalizedColumnName(fromColumn).normalized;
|
||||
const targetBase = normalizeKtxRelationshipName(toTable.ref.name).singular;
|
||||
const reasons = ['embedding_similarity', ...keyEvidence.reasons];
|
||||
const candidate = createCandidate({
|
||||
|
|
@ -620,7 +646,7 @@ export function generateKtxRelationshipDiscoveryCandidates(
|
|||
const sameTable = fromTable.id === toTable.id;
|
||||
const nameMatchesTarget = strictAliases.has(sourceBase);
|
||||
const parentTableNameMatcher = !sameTable && !nameMatchesTarget && parentAliases.has(sourceBase);
|
||||
const selfReference = sameTable && SELF_REFERENCE_NAMES.has(normalizeKtxRelationshipName(fromColumn.name).normalized);
|
||||
const selfReference = sameTable && SELF_REFERENCE_NAMES.has(normalizedColumnName(fromColumn).normalized);
|
||||
const strictTableMatcher = (!sameTable && nameMatchesTarget) || selfReference;
|
||||
|
||||
for (const toColumn of toTable.columns) {
|
||||
|
|
@ -675,7 +701,7 @@ export function generateKtxRelationshipDiscoveryCandidates(
|
|||
if (
|
||||
!suffixMatcher &&
|
||||
!parentTableNameMatcher &&
|
||||
normalizeKtxRelationshipName(fromColumn.name).normalized === normalizeKtxRelationshipName(toColumn.name).normalized
|
||||
normalizedColumnName(fromColumn).normalized === normalizedColumnName(toColumn).normalized
|
||||
) {
|
||||
reasons.push('exact_column_name');
|
||||
nameScore = Math.max(nameScore, 0.9);
|
||||
|
|
|
|||
|
|
@ -18,20 +18,28 @@ export interface LocalKtxRelationshipCandidateTablesInput {
|
|||
|
||||
const DEFAULT_MAX_PARENT_TABLES = 20;
|
||||
const RELATIONSHIP_SUFFIX_TOKENS = new Set(['id', 'ids', 'key', 'keys', 'code', 'codes', 'uuid', 'uuids']);
|
||||
const normalizedTokenVariantsCache = new Map<string, string[]>();
|
||||
|
||||
function roundedScore(value: number): number {
|
||||
return Number(Math.max(0, Math.min(1, value)).toFixed(3));
|
||||
}
|
||||
|
||||
function normalizedTokenVariants(name: string): string[] {
|
||||
const cached = normalizedTokenVariantsCache.get(name);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
const normalized = normalizeKtxRelationshipName(name);
|
||||
return Array.from(
|
||||
const variants = Array.from(
|
||||
new Set([
|
||||
...normalized.tokens,
|
||||
...tokenizeKtxRelationshipName(normalized.singular),
|
||||
...tokenizeKtxRelationshipName(normalized.plural),
|
||||
]),
|
||||
).filter(Boolean);
|
||||
normalizedTokenVariantsCache.set(name, variants);
|
||||
return variants;
|
||||
}
|
||||
|
||||
function childColumnLocalityTokens(column: KtxEnrichedColumn): string[] {
|
||||
|
|
@ -91,24 +99,29 @@ function parentEmbeddingScore(childColumn: KtxEnrichedColumn, parentTable: KtxEn
|
|||
}
|
||||
|
||||
function tableTokenScore(input: {
|
||||
childTable: KtxEnrichedTable;
|
||||
childColumn: KtxEnrichedColumn;
|
||||
childTableId: string;
|
||||
childTableTokens: readonly string[];
|
||||
childColumnTokens: readonly string[];
|
||||
parentTable: KtxEnrichedTable;
|
||||
}): number {
|
||||
const childTableTokens = normalizedTokenVariants(input.childTable.ref.name);
|
||||
const childColumnTokens = childColumnLocalityTokens(input.childColumn);
|
||||
const parentTokens = normalizedTokenVariants(input.parentTable.ref.name);
|
||||
const columnOnlyScore = jaccard(childColumnTokens, parentTokens);
|
||||
if (input.parentTable.id === input.childTable.id) {
|
||||
const columnOnlyScore = jaccard(input.childColumnTokens, parentTokens);
|
||||
if (parentTokens.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
if (input.parentTable.id === input.childTableId) {
|
||||
return columnOnlyScore;
|
||||
}
|
||||
const columnAndTableScore = jaccard(uniqueTokens([...childTableTokens, ...childColumnTokens]), parentTokens);
|
||||
const columnAndTableScore = jaccard(uniqueTokens([...input.childTableTokens, ...input.childColumnTokens]), parentTokens);
|
||||
return Math.max(columnOnlyScore, columnAndTableScore * 0.6);
|
||||
}
|
||||
|
||||
function localityScore(input: {
|
||||
childTable: KtxEnrichedTable;
|
||||
childTableId: string;
|
||||
childTableTokens: readonly string[];
|
||||
childColumn: KtxEnrichedColumn;
|
||||
childColumnTokens: readonly string[];
|
||||
parentTable: KtxEnrichedTable;
|
||||
}): Omit<KtxRelationshipLocalityCandidateTable, 'table'> {
|
||||
const tokenScore = roundedScore(tableTokenScore(input));
|
||||
|
|
@ -143,12 +156,18 @@ export function localCandidateTables(
|
|||
return [];
|
||||
}
|
||||
|
||||
const childTableTokens = normalizedTokenVariants(input.childTable.ref.name);
|
||||
const childColumnTokens = childColumnLocalityTokens(input.childColumn);
|
||||
|
||||
return input.parentTables
|
||||
.map((table) => ({
|
||||
table,
|
||||
...localityScore({
|
||||
childTable: input.childTable,
|
||||
childTableId: input.childTable.id,
|
||||
childTableTokens,
|
||||
childColumn: input.childColumn,
|
||||
childColumnTokens,
|
||||
parentTable: table,
|
||||
}),
|
||||
}))
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ const SAMPLE_VALUE_DELIMITER = '\u001f';
|
|||
type QuoteStyle = 'double' | 'backtick' | 'bracket';
|
||||
|
||||
function quoteStyle(driver: KtxConnectionDriver): QuoteStyle {
|
||||
if (driver === 'mysql' || driver === 'clickhouse' || driver === 'posthog') {
|
||||
if (driver === 'mysql' || driver === 'clickhouse') {
|
||||
return 'backtick';
|
||||
}
|
||||
if (driver === 'sqlserver') {
|
||||
|
|
@ -93,7 +93,7 @@ export function quoteKtxRelationshipIdentifier(driver: KtxConnectionDriver, iden
|
|||
|
||||
export function formatKtxRelationshipTableRef(driver: KtxConnectionDriver, table: KtxTableRef): string {
|
||||
const parts =
|
||||
driver === 'sqlite' || driver === 'posthog'
|
||||
driver === 'sqlite'
|
||||
? [table.name]
|
||||
: [table.catalog, table.db, table.name].filter((value): value is string => Boolean(value));
|
||||
return parts.map((part) => quoteKtxRelationshipIdentifier(driver, part)).join('.');
|
||||
|
|
@ -109,7 +109,7 @@ function textLengthExpression(driver: KtxConnectionDriver, columnSql: string): s
|
|||
if (driver === 'bigquery') {
|
||||
return `LENGTH(CAST(${columnSql} AS STRING))`;
|
||||
}
|
||||
if (driver === 'clickhouse' || driver === 'posthog') {
|
||||
if (driver === 'clickhouse') {
|
||||
return `length(toString(${columnSql}))`;
|
||||
}
|
||||
return `LENGTH(CAST(${columnSql} AS TEXT))`;
|
||||
|
|
@ -223,7 +223,7 @@ function sampleAggregateSql(driver: KtxConnectionDriver, innerSql: string): stri
|
|||
if (driver === 'sqlserver') {
|
||||
return `(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`;
|
||||
}
|
||||
if (driver === 'clickhouse' || driver === 'posthog') {
|
||||
if (driver === 'clickhouse') {
|
||||
return `(SELECT arrayStringConcat(groupArray(toString(value)), '\\x1F') FROM (${innerSql}) AS relationship_profile_values)`;
|
||||
}
|
||||
return `(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (${innerSql}) AS relationship_profile_values)`;
|
||||
|
|
|
|||
|
|
@ -150,14 +150,14 @@ describe('KTX scan contract types', () => {
|
|||
};
|
||||
|
||||
const connector: KtxScanConnector = {
|
||||
id: 'posthog:product',
|
||||
driver: 'posthog',
|
||||
id: 'clickhouse:product',
|
||||
driver: 'clickhouse',
|
||||
capabilities: createKtxConnectorCapabilities({ eventStreamDiscovery: true }),
|
||||
eventStreamDiscovery: discovery,
|
||||
async introspect() {
|
||||
return {
|
||||
connectionId: 'product',
|
||||
driver: 'posthog',
|
||||
driver: 'clickhouse',
|
||||
extractedAt: '2026-04-29T00:00:00.000Z',
|
||||
scope: { catalogs: ['157881'] },
|
||||
metadata: {},
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ export type KtxConnectionDriver =
|
|||
| 'sqlserver'
|
||||
| 'bigquery'
|
||||
| 'snowflake'
|
||||
| 'posthog'
|
||||
| 'mysql'
|
||||
| 'clickhouse';
|
||||
|
||||
|
|
|
|||
136
packages/context/src/sl/description-normalization.ts
Normal file
136
packages/context/src/sl/description-normalization.ts
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
type DescriptionMap = Record<string, string>;
|
||||
|
||||
interface NormalizeDescriptionOptions {
|
||||
fillMissing?: boolean;
|
||||
}
|
||||
|
||||
function cleanText(value: unknown): string | null {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
|
||||
}
|
||||
|
||||
function cleanDescriptionMap(value: unknown): DescriptionMap {
|
||||
const result: DescriptionMap = {};
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
return result;
|
||||
}
|
||||
for (const [key, text] of Object.entries(value)) {
|
||||
const cleaned = cleanText(text);
|
||||
if (cleaned) {
|
||||
result[key] = cleaned;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function hasDescriptions(descriptions: DescriptionMap): boolean {
|
||||
return Object.keys(descriptions).length > 0;
|
||||
}
|
||||
|
||||
function withDescriptionMap(record: Record<string, unknown>, fallback: string | null): Record<string, unknown> {
|
||||
const descriptions = cleanDescriptionMap(record.descriptions);
|
||||
const flatDescription = cleanText(record.description);
|
||||
if (flatDescription && !descriptions.user) {
|
||||
descriptions.user = flatDescription;
|
||||
}
|
||||
if (!hasDescriptions(descriptions) && fallback) {
|
||||
descriptions.ktx = fallback;
|
||||
}
|
||||
|
||||
const next = { ...record };
|
||||
delete next.description;
|
||||
if (hasDescriptions(descriptions)) {
|
||||
next.descriptions = descriptions;
|
||||
} else {
|
||||
delete next.descriptions;
|
||||
}
|
||||
return next;
|
||||
}
|
||||
|
||||
function humanizeIdentifier(value: string): string {
|
||||
return value
|
||||
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
||||
.replace(/[_-]+/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
}
|
||||
|
||||
function formatCount(count: number, singular: string, plural = `${singular}s`): string | null {
|
||||
if (count <= 0) {
|
||||
return null;
|
||||
}
|
||||
return `${count} ${count === 1 ? singular : plural}`;
|
||||
}
|
||||
|
||||
function sourceFallback(source: Record<string, unknown>, sourceName: string): string {
|
||||
const table = cleanText(source.table);
|
||||
const sql = cleanText(source.sql);
|
||||
if (table) {
|
||||
return `Semantic-layer source for ${sourceName} backed by ${table}.`;
|
||||
}
|
||||
if (sql) {
|
||||
return `Semantic-layer source for ${sourceName} backed by curated SQL.`;
|
||||
}
|
||||
|
||||
const counts = [
|
||||
formatCount(Array.isArray(source.measures) ? source.measures.length : 0, 'measure'),
|
||||
formatCount(Array.isArray(source.segments) ? source.segments.length : 0, 'segment'),
|
||||
formatCount(Array.isArray(source.columns) ? source.columns.length : 0, 'computed column'),
|
||||
].filter((item): item is string => Boolean(item));
|
||||
return counts.length > 0
|
||||
? `Semantic-layer overlay for ${sourceName} defining ${counts.join(', ')}.`
|
||||
: `Semantic-layer overlay for ${sourceName}.`;
|
||||
}
|
||||
|
||||
function columnFallback(column: Record<string, unknown>, sourceName: string): string {
|
||||
const columnName = cleanText(column.name) ?? 'column';
|
||||
const label = humanizeIdentifier(columnName) || columnName;
|
||||
const expr = cleanText(column.expr);
|
||||
|
||||
if (expr) {
|
||||
return `Computed ${label} value for ${sourceName}.`;
|
||||
}
|
||||
|
||||
if (columnName.toLowerCase() === 'id') {
|
||||
return `Identifier column for ${sourceName}.`;
|
||||
}
|
||||
|
||||
const idMatch = columnName.match(/^(.+)_id$/i);
|
||||
if (idMatch) {
|
||||
const entity = humanizeIdentifier(idMatch[1] ?? '');
|
||||
return entity ? `Identifier for the related ${entity} on ${sourceName}.` : `Identifier column for ${sourceName}.`;
|
||||
}
|
||||
|
||||
if (/(^|_)(date|time|timestamp|created_at|updated_at|week_start|month_start)($|_)/i.test(columnName)) {
|
||||
return `Date or time value for ${label} on ${sourceName}.`;
|
||||
}
|
||||
|
||||
return `Column ${label} from ${sourceName}.`;
|
||||
}
|
||||
|
||||
export function normalizeSemanticLayerDescriptions<T extends object>(
|
||||
source: T,
|
||||
options: NormalizeDescriptionOptions = {},
|
||||
): T {
|
||||
const sourceRecord = source as Record<string, unknown>;
|
||||
const sourceName = cleanText(sourceRecord.name) ?? 'source';
|
||||
const normalized = withDescriptionMap(
|
||||
sourceRecord,
|
||||
options.fillMissing ? sourceFallback(sourceRecord, sourceName) : null,
|
||||
);
|
||||
|
||||
if (Array.isArray(sourceRecord.columns)) {
|
||||
normalized.columns = sourceRecord.columns.map((column) => {
|
||||
if (!column || typeof column !== 'object' || Array.isArray(column)) {
|
||||
return column;
|
||||
}
|
||||
const columnRecord = column as Record<string, unknown>;
|
||||
return withDescriptionMap(
|
||||
columnRecord,
|
||||
options.fillMissing ? columnFallback(columnRecord, sourceName) : null,
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
return normalized as T;
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@ import type { KtxEmbeddingPort, KtxFileWriteResult } from '../core/index.js';
|
|||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js';
|
||||
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import { sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||
import { composeOverlay, type ManifestTableEntry, projectManifestEntry } from './semantic-layer.service.js';
|
||||
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
|
||||
|
|
@ -180,14 +181,14 @@ function manifestTables(value: Record<string, unknown>): Record<string, Manifest
|
|||
|
||||
function parsedStandaloneSource(parsed: Record<string, unknown>, name: string): SemanticLayerSource {
|
||||
const source = parsed as Partial<SemanticLayerSource>;
|
||||
return {
|
||||
return normalizeSemanticLayerDescriptions({
|
||||
...source,
|
||||
name,
|
||||
grain: Array.isArray(parsed.grain) ? (parsed.grain.filter((item) => typeof item === 'string') as string[]) : [],
|
||||
columns: Array.isArray(parsed.columns) ? (parsed.columns as SemanticLayerSource['columns']) : [],
|
||||
joins: Array.isArray(parsed.joins) ? (parsed.joins as SemanticLayerSource['joins']) : [],
|
||||
measures: Array.isArray(parsed.measures) ? (parsed.measures as SemanticLayerSource['measures']) : [],
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export async function loadLocalSlSourceRecords(
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { z } from 'zod';
|
||||
|
||||
// Literal vocabularies — kept in lockstep with the Python Pydantic model at
|
||||
// python-service/ktx-sl/semantic_layer/models.py (SourceColumn / ColumnRole /
|
||||
// python/ktx-sl/semantic_layer/models.py (SourceColumn / ColumnRole /
|
||||
// ColumnVisibility / JoinDeclaration). If these diverge, YAMLs can pass
|
||||
// TypeScript validation at ingest time but fail Python loading at query time.
|
||||
const columnTypeValues = ['string', 'number', 'time', 'boolean'] as const;
|
||||
|
|
@ -23,6 +23,8 @@ const segmentDefinitionSchema = z.object({
|
|||
description: z.string().optional(),
|
||||
});
|
||||
|
||||
const descriptionsSchema = z.record(z.string(), z.string().min(1));
|
||||
|
||||
const defaultTimeDimensionDbtSchema = z.object({
|
||||
dbt: z.string().optional(),
|
||||
});
|
||||
|
|
@ -77,6 +79,7 @@ const sourceColumnSchema = z.object({
|
|||
role: z.enum(columnRoleValues).optional(),
|
||||
visibility: z.enum(columnVisibilityValues).optional(),
|
||||
description: z.string().optional(),
|
||||
descriptions: descriptionsSchema.optional(),
|
||||
expr: z.string().optional(),
|
||||
constraints: sourceKeyedColumnConstraintsSchema.optional(),
|
||||
enum_values: sourceKeyedStringArraySchema.optional(),
|
||||
|
|
@ -91,6 +94,7 @@ const overlayColumnSchema = z
|
|||
role: z.enum(columnRoleValues).optional(),
|
||||
visibility: z.enum(columnVisibilityValues).optional(),
|
||||
description: z.string().optional(),
|
||||
descriptions: descriptionsSchema.optional(),
|
||||
expr: z.string().optional(),
|
||||
})
|
||||
.refine((col) => !col.type || col.expr, {
|
||||
|
|
@ -102,6 +106,7 @@ export const sourceDefinitionSchema = z
|
|||
.object({
|
||||
name: z.string().min(1),
|
||||
description: z.string().optional(),
|
||||
descriptions: descriptionsSchema.optional(),
|
||||
// Accepted for documentation parity with the Python spec; behavior is driven
|
||||
// by the `table` / `sql` fields, not by this discriminator.
|
||||
source_type: z.enum(['table', 'sql']).optional(),
|
||||
|
|
|
|||
|
|
@ -257,12 +257,14 @@ describe('sourceDefinitionSchema', () => {
|
|||
it('preserves dbt structural metadata fields used by manifest-backed SL readers', () => {
|
||||
const result = sourceDefinitionSchema.safeParse({
|
||||
name: 'orders',
|
||||
descriptions: { dbt: 'Order facts from dbt.' },
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [
|
||||
{
|
||||
name: 'status',
|
||||
type: 'string',
|
||||
descriptions: { dbt: 'Order lifecycle status.' },
|
||||
constraints: { dbt: { not_null: true, unique: true } },
|
||||
enum_values: { dbt: ['placed', 'shipped'] },
|
||||
tests: {
|
||||
|
|
@ -282,7 +284,9 @@ describe('sourceDefinitionSchema', () => {
|
|||
if (!result.success) {
|
||||
return;
|
||||
}
|
||||
expect(result.data.descriptions).toEqual({ dbt: 'Order facts from dbt.' });
|
||||
expect(result.data.columns[0]).toMatchObject({
|
||||
descriptions: { dbt: 'Order lifecycle status.' },
|
||||
constraints: { dbt: { not_null: true, unique: true } },
|
||||
enum_values: { dbt: ['placed', 'shipped'] },
|
||||
tests: {
|
||||
|
|
@ -528,6 +532,31 @@ describe('loadAllSources — standalone enrichment via inherits_columns_from', (
|
|||
const aav = sources.find((s) => s.name === 'aav_consignments');
|
||||
expect(aav?.columns).toEqual([{ name: 'FOO', type: 'string' }]);
|
||||
});
|
||||
|
||||
it('normalizes legacy flat source and column descriptions when loading standalone files', async () => {
|
||||
const standalonePath = 'semantic-layer/conn-1/orders.yaml';
|
||||
configService.listFiles.mockResolvedValue({ files: [standalonePath] });
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: [
|
||||
'name: orders',
|
||||
'description: Finance orders used for invoice reconciliation.',
|
||||
'table: public.orders',
|
||||
'grain: [id]',
|
||||
'columns:',
|
||||
' - name: id',
|
||||
' type: string',
|
||||
' description: Stable order identifier.',
|
||||
].join('\n'),
|
||||
});
|
||||
|
||||
const sources = await service.loadAllSources('conn-1');
|
||||
|
||||
expect(sources[0]).toMatchObject({
|
||||
name: 'orders',
|
||||
descriptions: { user: 'Finance orders used for invoice reconciliation.' },
|
||||
columns: [{ name: 'id', type: 'string', descriptions: { user: 'Stable order identifier.' } }],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('validateWithProposedSource', () => {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import YAML from 'yaml';
|
|||
import type { KtxFileStorePort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger } from '../core/index.js';
|
||||
import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import { isOverlaySource, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||
import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput, SemanticLayerSource } from './types.js';
|
||||
|
||||
|
|
@ -101,6 +102,7 @@ export class SemanticLayerService {
|
|||
const warnings: string[] = [];
|
||||
|
||||
if (!options?.skipValidation) {
|
||||
source = normalizeSemanticLayerDescriptions(source);
|
||||
const sourceData: Record<string, unknown> = { ...source };
|
||||
|
||||
if ((sourceData.table || sourceData.sql) && (await this.isManifestBacked(connectionId, source.name))) {
|
||||
|
|
@ -129,7 +131,8 @@ export class SemanticLayerService {
|
|||
}
|
||||
|
||||
const path = this.sourcePath(connectionId, source.name);
|
||||
const content = YAML.stringify(source, { indent: 2, lineWidth: 0 });
|
||||
const normalizedSource = normalizeSemanticLayerDescriptions(source);
|
||||
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0 });
|
||||
const message = commitMessage ?? `Update semantic layer source: ${source.name}`;
|
||||
const result = await this.configService.writeFile(path, content, author, authorEmail, message, {
|
||||
skipLock: options?.skipLock,
|
||||
|
|
@ -199,14 +202,14 @@ export class SemanticLayerService {
|
|||
if (sources.has(name)) {
|
||||
this.logger.warn(`Standalone source '${name}' in ${filePath} overrides manifest entry of the same name`);
|
||||
}
|
||||
let standalone: SemanticLayerSource = {
|
||||
let standalone: SemanticLayerSource = normalizeSemanticLayerDescriptions({
|
||||
...(data as Partial<SemanticLayerSource>),
|
||||
name,
|
||||
grain: Array.isArray(data.grain) ? (data.grain as string[]) : [],
|
||||
columns: Array.isArray(data.columns) ? (data.columns as SemanticLayerSource['columns']) : [],
|
||||
joins: Array.isArray(data.joins) ? (data.joins as SemanticLayerSource['joins']) : [],
|
||||
measures: Array.isArray(data.measures) ? (data.measures as SemanticLayerSource['measures']) : [],
|
||||
};
|
||||
});
|
||||
// If the source declares `inherits_columns_from`, fill any blank
|
||||
// type/descriptions/role from the matching manifest entry. Lets the
|
||||
// agent write `columns: [{name: FOO}]` without redeclaring known fields.
|
||||
|
|
@ -1005,7 +1008,8 @@ const COMPOSE_KNOWN_KEYS = new Set([
|
|||
]);
|
||||
|
||||
export function composeOverlay(base: SemanticLayerSource, overlay: Record<string, unknown>): SemanticLayerSource {
|
||||
const unknownKeys = Object.keys(overlay).filter((k) => !COMPOSE_KNOWN_KEYS.has(k));
|
||||
const normalizedOverlay = normalizeSemanticLayerDescriptions(overlay);
|
||||
const unknownKeys = Object.keys(normalizedOverlay).filter((k) => !COMPOSE_KNOWN_KEYS.has(k));
|
||||
if (unknownKeys.length > 0) {
|
||||
throw new Error(
|
||||
`composeOverlay: overlay for '${base.name}' has unhandled keys [${unknownKeys.join(', ')}]. ` +
|
||||
|
|
@ -1015,50 +1019,47 @@ export function composeOverlay(base: SemanticLayerSource, overlay: Record<string
|
|||
|
||||
const result = { ...base };
|
||||
|
||||
if (overlay.description) {
|
||||
result.descriptions = { ...(result.descriptions ?? {}), user: overlay.description as string };
|
||||
}
|
||||
|
||||
// Descriptions (plural) merge keyed by source (e.g. `dbt`, `ai`, `db`). Overlay keys
|
||||
// win over matching base keys but unrelated base keys are preserved.
|
||||
if (overlay.descriptions) {
|
||||
if (normalizedOverlay.descriptions) {
|
||||
result.descriptions = {
|
||||
...(result.descriptions ?? {}),
|
||||
...(overlay.descriptions as Record<string, string>),
|
||||
...(normalizedOverlay.descriptions as Record<string, string>),
|
||||
};
|
||||
}
|
||||
|
||||
// Filter out excluded columns
|
||||
const excluded = new Set((overlay.exclude_columns as string[] | undefined) ?? []);
|
||||
const excluded = new Set((normalizedOverlay.exclude_columns as string[] | undefined) ?? []);
|
||||
let columns = result.columns.filter((c) => !excluded.has(c.name));
|
||||
|
||||
// Append overlay computed columns
|
||||
const overlayColumns = (overlay.columns as SemanticLayerSource['columns'] | undefined) ?? [];
|
||||
const overlayColumns = (normalizedOverlay.columns as SemanticLayerSource['columns'] | undefined) ?? [];
|
||||
columns = [...columns, ...overlayColumns];
|
||||
result.columns = columns;
|
||||
|
||||
// Measures from overlay only
|
||||
result.measures = (overlay.measures as SemanticLayerSource['measures'] | undefined) ?? [];
|
||||
result.measures = (normalizedOverlay.measures as SemanticLayerSource['measures'] | undefined) ?? [];
|
||||
|
||||
// Segments: overlay-replaces semantics. Manifest tables don't carry segments today;
|
||||
// if that changes, add a union branch here.
|
||||
if (overlay.segments !== undefined) {
|
||||
result.segments = overlay.segments as SemanticLayerSource['segments'];
|
||||
if (normalizedOverlay.segments !== undefined) {
|
||||
result.segments = normalizedOverlay.segments as SemanticLayerSource['segments'];
|
||||
}
|
||||
|
||||
// Override grain
|
||||
if (overlay.grain) {
|
||||
result.grain = overlay.grain as string[];
|
||||
if (normalizedOverlay.grain) {
|
||||
result.grain = normalizedOverlay.grain as string[];
|
||||
}
|
||||
|
||||
if (overlay.default_time_dimension !== undefined) {
|
||||
result.default_time_dimension = overlay.default_time_dimension as SemanticLayerSource['default_time_dimension'];
|
||||
if (normalizedOverlay.default_time_dimension !== undefined) {
|
||||
result.default_time_dimension =
|
||||
normalizedOverlay.default_time_dimension as SemanticLayerSource['default_time_dimension'];
|
||||
}
|
||||
|
||||
// Union + dedupe joins, apply suppressions
|
||||
const disabled = new Set(((overlay.disable_joins as string[] | undefined) ?? []).map(normalizeWs));
|
||||
const disabled = new Set(((normalizedOverlay.disable_joins as string[] | undefined) ?? []).map(normalizeWs));
|
||||
const manifestJoins = result.joins.filter((j) => !disabled.has(normalizeWs(j.on)));
|
||||
const overlayJoins = (overlay.joins as SemanticLayerSource['joins'] | undefined) ?? [];
|
||||
const overlayJoins = (normalizedOverlay.joins as SemanticLayerSource['joins'] | undefined) ?? [];
|
||||
const existingKeys = new Set(manifestJoins.map((j) => `${j.to}::${normalizeWs(j.on)}`));
|
||||
const newJoins = overlayJoins.filter((j) => !existingKeys.has(`${j.to}::${normalizeWs(j.on)}`));
|
||||
result.joins = [...manifestJoins, ...newJoins];
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger } from '../core/index.js';
|
||||
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import type { SlSourcesIndexPort } from './ports.js';
|
||||
import type { SemanticLayerSource } from './types.js';
|
||||
|
||||
|
|
@ -8,6 +9,7 @@ export function buildSemanticLayerSourceSearchText(
|
|||
source: SemanticLayerSource,
|
||||
priority: string[] = DEFAULT_PRIORITY,
|
||||
): string {
|
||||
source = normalizeSemanticLayerDescriptions(source);
|
||||
const config = { priority };
|
||||
const parts: string[] = [source.name.replace(/_/g, ' ')];
|
||||
|
||||
|
|
|
|||
|
|
@ -127,6 +127,39 @@ describe('SlEditSourceTool — session gating', () => {
|
|||
);
|
||||
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('fills missing descriptions when an ingest session edits a source', async () => {
|
||||
const { tool } = makeTool();
|
||||
const session = makeSession({
|
||||
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'dbt' },
|
||||
});
|
||||
const context: ToolContext = { ...baseContext, session };
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: session.connectionId,
|
||||
sourceName: 'orders',
|
||||
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
|
||||
} as any,
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(true);
|
||||
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
expect.objectContaining({
|
||||
descriptions: { ktx: expect.stringContaining('orders') },
|
||||
columns: [
|
||||
expect.objectContaining({
|
||||
descriptions: { ktx: expect.stringContaining('Identifier') },
|
||||
}),
|
||||
],
|
||||
}),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('SlEditSourceTool — manifest-backed source without overlay', () => {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import YAML from 'yaml';
|
|||
import { z } from 'zod';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
|
||||
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
|
||||
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
import {
|
||||
BaseSemanticLayerTool,
|
||||
|
|
@ -147,6 +148,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
} catch (e) {
|
||||
return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName);
|
||||
}
|
||||
source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||
|
||||
// Re-serialize and write
|
||||
const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0 });
|
||||
|
|
|
|||
|
|
@ -175,6 +175,89 @@ describe('SlWriteSourceTool — session gating', () => {
|
|||
);
|
||||
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('normalizes flat source and column descriptions before writing', async () => {
|
||||
const { tool, semanticLayerService } = makeTool();
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: '11111111-1111-1111-1111-111111111111',
|
||||
sourceName: 'orders',
|
||||
source: {
|
||||
name: 'orders',
|
||||
description: 'Finance orders used for invoice reconciliation.',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'string', description: 'Stable order identifier.' }],
|
||||
measures: [],
|
||||
joins: [],
|
||||
} as any,
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(true);
|
||||
expect(semanticLayerService.writeSource).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
expect.objectContaining({
|
||||
descriptions: { user: 'Finance orders used for invoice reconciliation.' },
|
||||
columns: [expect.objectContaining({ descriptions: { user: 'Stable order identifier.' } })],
|
||||
}),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
|
||||
it('fills missing descriptions for ingest-written overlays and columns', async () => {
|
||||
const session = makeSession({
|
||||
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'metabase' },
|
||||
semanticLayerService: {
|
||||
loadSource: vi.fn().mockResolvedValue(null),
|
||||
loadAllSources: vi.fn().mockResolvedValue([]),
|
||||
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
|
||||
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
|
||||
deleteSource: vi.fn().mockResolvedValue(undefined),
|
||||
listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']),
|
||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
|
||||
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
|
||||
} as any,
|
||||
});
|
||||
const { tool } = makeTool();
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: session.connectionId,
|
||||
sourceName: 'mart_account_segments',
|
||||
source: {
|
||||
name: 'mart_account_segments',
|
||||
columns: [{ name: 'is_large_contract', type: 'boolean', expr: 'contract_arr_cents >= 20000000' }],
|
||||
measures: [{ name: 'account_count', expr: 'count(account_id)' }],
|
||||
} as any,
|
||||
} as any,
|
||||
{ ...baseContext, session },
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(true);
|
||||
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
expect.objectContaining({
|
||||
descriptions: {
|
||||
ktx: expect.stringContaining('mart_account_segments'),
|
||||
},
|
||||
columns: [
|
||||
expect.objectContaining({
|
||||
descriptions: {
|
||||
ktx: expect.stringContaining('is large contract'),
|
||||
},
|
||||
}),
|
||||
],
|
||||
}),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('SlWriteSourceTool — disconnected-components warning in markdown', () => {
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import {
|
|||
type SemanticLayerStructured,
|
||||
sourceDefinitionSchema,
|
||||
} from './base-semantic-layer.tool.js';
|
||||
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
|
||||
import { slToolConnectionIdSchema } from './connection-id-schema.js';
|
||||
|
||||
const sourceInputSchema = z.union([sourceDefinitionSchema, sourceOverlaySchema]);
|
||||
|
|
@ -154,14 +155,16 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
semanticLayerService: SemanticLayerService,
|
||||
skipIndex: boolean,
|
||||
): Promise<ToolOutput<SemanticLayerStructured>> {
|
||||
const isOverlay = !('table' in source && source.table) && !('sql' in source && source.sql);
|
||||
const normalizedSource = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||
const isOverlay =
|
||||
!('table' in normalizedSource && normalizedSource.table) && !('sql' in normalizedSource && normalizedSource.sql);
|
||||
|
||||
const existing = await this.readSourceYamlFromService(semanticLayerService, connectionId, sourceName);
|
||||
const commitMessage = existing
|
||||
? `${isOverlay ? 'Update overlay' : 'Rewrite source'}: ${sourceName}`
|
||||
: `${isOverlay ? 'Create overlay' : 'Create source'}: ${sourceName}`;
|
||||
|
||||
const yamlContent = YAML.stringify(source);
|
||||
const yamlContent = YAML.stringify(normalizedSource);
|
||||
|
||||
const orphanError = await this.rejectOrphanOverlay(semanticLayerService, connectionId, sourceName, yamlContent);
|
||||
if (orphanError) {
|
||||
|
|
@ -172,7 +175,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
return this.buildOutput(false, [shadowError], sourceName, { yaml: yamlContent });
|
||||
}
|
||||
|
||||
const validatedSource = source as SemanticLayerSource;
|
||||
const validatedSource = normalizedSource as SemanticLayerSource;
|
||||
const validationResult = await semanticLayerService.validateWithProposedSource(connectionId, validatedSource);
|
||||
const validationErrors = validationResult.errors;
|
||||
const validationWarnings = [...validationResult.warnings];
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import { describe, expect, it, vi } from 'vitest';
|
|||
import { createHttpSqlAnalysisPort } from './http-sql-analysis-port.js';
|
||||
|
||||
describe('createHttpSqlAnalysisPort', () => {
|
||||
it('calls the python-service fingerprint endpoint and maps snake_case response fields', async () => {
|
||||
it('calls the SQL-analysis fingerprint endpoint and maps snake_case response fields', async () => {
|
||||
const requestJson = vi.fn(async () => ({
|
||||
fingerprint: 'fingerprint-template',
|
||||
normalized_sql: 'SELECT * FROM analytics.orders WHERE status = ?',
|
||||
|
|
@ -26,7 +26,7 @@ describe('createHttpSqlAnalysisPort', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('preserves python-service parse errors in the mapped result', async () => {
|
||||
it('preserves SQL-analysis parse errors in the mapped result', async () => {
|
||||
const requestJson = vi.fn(async () => ({
|
||||
fingerprint: '',
|
||||
normalized_sql: '',
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ export abstract class BaseTool<TInput extends ZodType = ZodType> {
|
|||
}
|
||||
}
|
||||
},
|
||||
// Send only markdown to LLM - frontend still receives full { markdown, structured } via stream
|
||||
// Send only markdown to the LLM; tool callers still receive the structured output.
|
||||
toModelOutput: ({ output }) => {
|
||||
if (output && typeof output === 'object' && 'markdown' in output) {
|
||||
return { type: 'content', value: [{ type: 'text', text: output.markdown as string }] };
|
||||
|
|
|
|||
|
|
@ -37,6 +37,42 @@ describe('WikiWriteTool', () => {
|
|||
expect(result.markdown).toMatch(/created/i);
|
||||
});
|
||||
|
||||
it('normalizes accidentally escaped markdown newlines before writing', async () => {
|
||||
const { tool, wikiService } = makeTool();
|
||||
|
||||
await tool.call(
|
||||
{
|
||||
key: 'large-contract-requesters',
|
||||
summary: 'Cross-schema Metabase query',
|
||||
content:
|
||||
'# Large Contract Requesters\\n\\n**Source card:** Metabase #110\\n\\n## SQL\\n\\n```sql\\nselect * from orbit_analytics.mart_account_segments\\n```\\n',
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
|
||||
expect(wikiService.writePage.mock.calls[0][4]).toBe(
|
||||
'# Large Contract Requesters\n\n**Source card:** Metabase #110\n\n## SQL\n\n```sql\nselect * from orbit_analytics.mart_account_segments\n```\n',
|
||||
);
|
||||
expect(wikiService.syncSinglePage.mock.calls[0][4]).toBe(
|
||||
'# Large Contract Requesters\n\n**Source card:** Metabase #110\n\n## SQL\n\n```sql\nselect * from orbit_analytics.mart_account_segments\n```\n',
|
||||
);
|
||||
});
|
||||
|
||||
it('preserves intentional escaped newline examples in inline code', async () => {
|
||||
const { tool, wikiService } = makeTool();
|
||||
|
||||
await tool.call(
|
||||
{
|
||||
key: 'newline-token',
|
||||
summary: 'Escaped newline token',
|
||||
content: 'Use `\\n\\n` when documenting the literal separator.',
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
|
||||
expect(wikiService.writePage.mock.calls[0][4]).toBe('Use `\\n\\n` when documenting the literal separator.');
|
||||
});
|
||||
|
||||
it('skips syncSinglePage when session is worktree-scoped', async () => {
|
||||
const { tool, wikiService } = makeTool();
|
||||
const session: ToolSession = {
|
||||
|
|
|
|||
|
|
@ -47,6 +47,22 @@ interface WikiWriteStructured {
|
|||
action?: 'created' | 'updated';
|
||||
}
|
||||
|
||||
function looksLikeEscapedMarkdown(content: string): boolean {
|
||||
const withoutInlineCode = content.replace(/`[^`]*`/g, '');
|
||||
return /\\n\\n|(?:^|\\n)#{1,6}\s|\\n[-*]\s|\\n\d+\.\s|\\n```|\\n\|/.test(withoutInlineCode);
|
||||
}
|
||||
|
||||
function normalizeAccidentalEscapedMarkdownNewlines(content: string): string {
|
||||
const escapedBreaks = content.match(/\\[rn]/g)?.length ?? 0;
|
||||
if (escapedBreaks < 2) return content;
|
||||
|
||||
const actualBreaks = content.match(/\r?\n/g)?.length ?? 0;
|
||||
if (actualBreaks > 0 && escapedBreaks <= actualBreaks * 4) return content;
|
||||
if (!looksLikeEscapedMarkdown(content)) return content;
|
||||
|
||||
return content.replace(/\\r\\n/g, '\n').replace(/\\n/g, '\n').replace(/\\r/g, '\n');
|
||||
}
|
||||
|
||||
export class WikiWriteTool extends BaseTool<typeof wikiWriteInputSchema> {
|
||||
readonly name = 'wiki_write';
|
||||
|
||||
|
|
@ -125,7 +141,7 @@ tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to
|
|||
};
|
||||
|
||||
if (input.content) {
|
||||
finalContent = input.content;
|
||||
finalContent = normalizeAccidentalEscapedMarkdownNewlines(input.content);
|
||||
} else {
|
||||
const editResult = applySqlEdits(existing?.content ?? '', input.replacements ?? []);
|
||||
if (!editResult.success) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue