mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
Improve schema setup and Notion ingest UX (#14)
* Improve schema setup and Notion ingest UX * Handle Postgres network scan failures * WIP: save local changes before main merge * Refine setup prompt choices * Tighten ingest reconciliation guidance * Commit setup config updates * Canonicalize unmapped fallback details * Count reconciliation actions in reports * Harden semantic layer source validation * Return wiki content after edits * Validate SL sources against manifests * Validate wiki refs before writes * Simplify CLI next steps * Clarify agent setup summary * Surface dbt target SL sources * Recover SL write fallbacks * Preserve failed context build metadata * Track raw paths for ingest actions * test(cli): update seeded demo expectations * fix(ingest): scope fallback recovery checks * fix(sl): tighten source validation guards * fix(wiki): ignore empty embedding vectors * Improve Notion ingest UX * Enforce flat wiki keys * test(context): update wiki key assertion --------- Co-authored-by: Andrey Avtomonov <andreybavt@gmail.com>
This commit is contained in:
parent
866d33e71a
commit
60457e9407
116 changed files with 4177 additions and 610 deletions
|
|
@ -33,14 +33,6 @@ repos:
|
|||
name: ruff format (python)
|
||||
files: ^python/
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: ktx-package-checks
|
||||
name: ktx package checks
|
||||
entry: node scripts/precommit-check.mjs
|
||||
language: system
|
||||
files: ^(packages/|scripts/|python/|package\.json$|pnpm-lock\.yaml$|pnpm-workspace\.yaml$|release-policy\.json$|tsconfig\.base\.json$|pyproject\.toml$|uv\.lock$|uv\.toml$)
|
||||
|
||||
- repo: https://github.com/Yelp/detect-secrets
|
||||
rev: v1.5.0
|
||||
hooks:
|
||||
|
|
|
|||
|
|
@ -185,8 +185,8 @@ describe('runKtxAgent', () => {
|
|||
search: vi.fn(async () => ({
|
||||
results: [
|
||||
{
|
||||
key: 'metrics/revenue',
|
||||
path: 'knowledge/global/metrics/revenue.md',
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
scope: 'GLOBAL' as const,
|
||||
summary: 'Revenue metric definition',
|
||||
score: 0.02459016393442623,
|
||||
|
|
@ -207,8 +207,8 @@ describe('runKtxAgent', () => {
|
|||
expect(JSON.parse(io.stdout())).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({
|
||||
key: 'metrics/revenue',
|
||||
path: 'knowledge/global/metrics/revenue.md',
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
matchReasons: ['lexical', 'token'],
|
||||
}),
|
||||
],
|
||||
|
|
|
|||
|
|
@ -355,6 +355,53 @@ describe('runKtxConnectionNotion', () => {
|
|||
expect(io.stdout()).toContain('rootPageIds: 1');
|
||||
});
|
||||
|
||||
it('uses inline Notion auth_token for interactive discovery', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
await writeProjectConfig(projectDir, {
|
||||
...initialized.config,
|
||||
connections: {
|
||||
'notion-main': {
|
||||
driver: 'notion',
|
||||
auth_token: 'ntn_inline_token',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: [PAGE_IDS.engineering],
|
||||
root_database_ids: [],
|
||||
root_data_source_ids: [],
|
||||
max_pages_per_run: 12,
|
||||
max_knowledge_creates_per_run: 2,
|
||||
max_knowledge_updates_per_run: 7,
|
||||
last_successful_cursor: null,
|
||||
},
|
||||
},
|
||||
});
|
||||
const api = fakeNotionApi([notionPage(PAGE_IDS.engineering, 'Engineering')]);
|
||||
const createNotionApi = vi.fn((authToken: string) => {
|
||||
expect(authToken).toBe('ntn_inline_token');
|
||||
return api;
|
||||
});
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxConnectionNotion(
|
||||
{
|
||||
command: 'pick',
|
||||
projectDir,
|
||||
connectionId: 'notion-main',
|
||||
mode: 'interactive',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
createNotionApi,
|
||||
renderPicker: vi.fn(async (): Promise<PickerRenderResult> => ({ kind: 'quit' })),
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(createNotionApi).toHaveBeenCalledOnce();
|
||||
expect(io.stdout()).toContain('No changes saved.');
|
||||
});
|
||||
|
||||
it('passes partial-discovery warnings into the TUI banner state', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { parseNotionConnectionConfig, resolveNotionAuthToken } from '@ktx/context/connections';
|
||||
import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from '@ktx/context/connections';
|
||||
import { type NotionApi, type NotionBotInfo, NotionClient } from '@ktx/context/ingest';
|
||||
import {
|
||||
type KtxLocalProject,
|
||||
|
|
@ -223,7 +223,7 @@ export async function runKtxConnectionNotion(
|
|||
const project = await loadProject({ projectDir: args.projectDir });
|
||||
const rawConnection = notionConnection(project, args.connectionId);
|
||||
const notion = parseNotionConnectionConfig(rawConnection);
|
||||
const authToken = await resolveNotionAuthToken(notion.auth_token_ref, { env: deps.env });
|
||||
const authToken = await resolveNotionConnectionAuthToken(notion, { env: deps.env });
|
||||
const api = deps.createNotionApi ? deps.createNotionApi(authToken) : new NotionClient(authToken);
|
||||
const discovery = await discoverNotionPickerPages(api);
|
||||
const tree = buildPickerTree(discovery.pages);
|
||||
|
|
|
|||
|
|
@ -267,10 +267,11 @@ describe('renderContextBuildView', () => {
|
|||
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
|
||||
]);
|
||||
state.primarySources[0].status = 'failed';
|
||||
state.primarySources[0].failureText = 'KTX lost its connection to PostgreSQL while scanning warehouse.';
|
||||
|
||||
const output = renderContextBuildView(state, { styled: false });
|
||||
expect(output).toContain('✗');
|
||||
expect(output).toContain('failed');
|
||||
expect(output).toContain('KTX lost its connection to PostgreSQL while scanning warehouse.');
|
||||
});
|
||||
|
||||
it('omits empty groups', () => {
|
||||
|
|
@ -327,8 +328,19 @@ describe('createRepainter', () => {
|
|||
repainter.paint('hello');
|
||||
repainter.paint('bye');
|
||||
|
||||
expect(io.stdout()).toContain('\rbye');
|
||||
expect(io.stdout()).not.toContain('\u001b[1A\rbye');
|
||||
expect(io.stdout()).toContain('bye');
|
||||
expect(io.stdout()).not.toMatch(/\[\d+A/);
|
||||
});
|
||||
|
||||
it('does not undershoot cursor-up when a line is exactly the terminal width', () => {
|
||||
const io = makeIo({ isTTY: true, columns: 10 });
|
||||
const repainter = createRepainter(io.io);
|
||||
|
||||
repainter.paint('0123456789\nsecond\n');
|
||||
repainter.paint('0123456789\nsecond\n');
|
||||
|
||||
const cursorMoves = [...io.stdout().matchAll(/\[(\d+)A/g)].map((m) => Number(m[1]));
|
||||
expect(cursorMoves).toEqual([2]);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -373,6 +385,52 @@ describe('runContextBuild', () => {
|
|||
expect(result).toEqual({ exitCode: 1, detached: false });
|
||||
});
|
||||
|
||||
it('renders a friendly network failure when target output contains a network error code', async () => {
|
||||
const io = makeIo();
|
||||
const project = projectWithConnections({
|
||||
warehouse: { driver: 'postgres' },
|
||||
});
|
||||
const executeTarget = vi.fn(async (target, _args, targetIo) => {
|
||||
targetIo.stderr.write('Error: read EADDRNOTAVAIL\n');
|
||||
return failedResult(target.connectionId, target.driver, target.operation);
|
||||
});
|
||||
|
||||
const result = await runContextBuild(
|
||||
project,
|
||||
{ projectDir: '/tmp/project', inputMode: 'disabled' },
|
||||
io.io,
|
||||
{ executeTarget, now: () => 1000 },
|
||||
);
|
||||
|
||||
expect(result).toEqual({ exitCode: 1, detached: false });
|
||||
expect(io.stdout()).toContain('KTX lost its connection to PostgreSQL while scanning warehouse.');
|
||||
expect(io.stdout()).toContain('network address unavailable (EADDRNOTAVAIL)');
|
||||
expect(io.stdout()).toContain('Retry: ktx setup --project-dir /tmp/project');
|
||||
expect(io.stdout()).not.toContain('BoundPool');
|
||||
});
|
||||
|
||||
it('renders a friendly network failure when target execution throws', async () => {
|
||||
const io = makeIo();
|
||||
const project = projectWithConnections({
|
||||
warehouse: { driver: 'postgres' },
|
||||
});
|
||||
const error = Object.assign(new Error('read ECONNRESET'), { code: 'ECONNRESET' });
|
||||
const executeTarget = vi.fn(async () => {
|
||||
throw error;
|
||||
});
|
||||
|
||||
const result = await runContextBuild(
|
||||
project,
|
||||
{ projectDir: '/tmp/project', inputMode: 'disabled' },
|
||||
io.io,
|
||||
{ executeTarget, now: () => 1000 },
|
||||
);
|
||||
|
||||
expect(result).toEqual({ exitCode: 1, detached: false });
|
||||
expect(io.stdout()).toContain('KTX lost its connection to PostgreSQL while scanning warehouse.');
|
||||
expect(io.stdout()).toContain('connection reset (ECONNRESET)');
|
||||
});
|
||||
|
||||
it('renders final view for non-TTY output', async () => {
|
||||
const io = makeIo();
|
||||
const project = projectWithConnections({
|
||||
|
|
@ -529,6 +587,36 @@ describe('runContextBuild', () => {
|
|||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('returns report IDs parsed from failed source-ingest target output', async () => {
|
||||
const io = makeIo();
|
||||
const project = projectWithConnections({
|
||||
warehouse: { driver: 'postgres' },
|
||||
dbt_main: { driver: 'dbt' },
|
||||
});
|
||||
const executeTarget = vi.fn(async (target, _args, targetIo) => {
|
||||
if (target.operation === 'scan') {
|
||||
return successResult(target.connectionId, target.driver, target.operation);
|
||||
}
|
||||
|
||||
targetIo.stdout.write('Report: report-dbt-failed\n');
|
||||
targetIo.stdout.write('Work units: 3\n');
|
||||
return failedResult(target.connectionId, target.driver, target.operation);
|
||||
});
|
||||
|
||||
const result = await runContextBuild(
|
||||
project,
|
||||
{ projectDir: '/tmp/project', inputMode: 'disabled' },
|
||||
io.io,
|
||||
{ executeTarget, now: () => 1000 },
|
||||
);
|
||||
|
||||
expect(result).toMatchObject({
|
||||
exitCode: 1,
|
||||
detached: false,
|
||||
reportIds: ['report-dbt-failed'],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('viewStateFromSourceProgress', () => {
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ export interface ContextBuildTargetState {
|
|||
status: 'queued' | 'running' | 'done' | 'failed';
|
||||
detailLine: string | null;
|
||||
summaryText: string | null;
|
||||
failureText: string | null;
|
||||
startedAt: number | null;
|
||||
elapsedMs: number;
|
||||
}
|
||||
|
|
@ -133,7 +134,8 @@ function targetDetail(target: ContextBuildTargetState, styled: boolean): string
|
|||
return parts.join(' · ');
|
||||
}
|
||||
if (target.status === 'failed') {
|
||||
return styled ? red('failed') : 'failed';
|
||||
const failureText = target.failureText ?? 'failed';
|
||||
return styled ? red(failureText) : failureText;
|
||||
}
|
||||
if (target.status === 'running') {
|
||||
const percent = extractPercent(target.detailLine);
|
||||
|
|
@ -327,6 +329,7 @@ export function viewStateFromSourceProgress(
|
|||
status: s.status,
|
||||
detailLine: null,
|
||||
summaryText: s.summaryText ?? null,
|
||||
failureText: null,
|
||||
startedAt: s.startedAtMs ?? null,
|
||||
elapsedMs: s.status === 'running' && s.startedAtMs ? now - s.startedAtMs : (s.elapsedMs ?? 0),
|
||||
});
|
||||
|
|
@ -378,7 +381,8 @@ export function createRepainter(io: KtxCliIo) {
|
|||
}
|
||||
io.stdout.write('\r');
|
||||
}
|
||||
io.stdout.write(content.replaceAll('\n', `${ESC}[K\n`));
|
||||
io.stdout.write(`${ESC}[2K`);
|
||||
io.stdout.write(content.replaceAll('\n', `\n${ESC}[2K`));
|
||||
io.stdout.write(`${ESC}[J`);
|
||||
hasPainted = true;
|
||||
lastCursorUpRows = cursorUpRowsAfterWrite(content);
|
||||
|
|
@ -440,7 +444,83 @@ export function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void)
|
|||
// --- Orchestration ---
|
||||
|
||||
function makeTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTargetState {
|
||||
return { target, status: 'queued', detailLine: null, summaryText: null, startedAt: null, elapsedMs: 0 };
|
||||
return {
|
||||
target,
|
||||
status: 'queued',
|
||||
detailLine: null,
|
||||
summaryText: null,
|
||||
failureText: null,
|
||||
startedAt: null,
|
||||
elapsedMs: 0,
|
||||
};
|
||||
}
|
||||
|
||||
const NETWORK_ERROR_REASONS: Record<string, string> = {
|
||||
EADDRNOTAVAIL: 'network address unavailable',
|
||||
ECONNRESET: 'connection reset',
|
||||
ECONNREFUSED: 'connection refused',
|
||||
ENETUNREACH: 'network unreachable',
|
||||
ENOTFOUND: 'host not found',
|
||||
ETIMEDOUT: 'connection timed out',
|
||||
EHOSTUNREACH: 'host unreachable',
|
||||
};
|
||||
|
||||
function unknownErrorMessage(error: unknown): string {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
function networkErrorCodeFromText(text: string): string | null {
|
||||
for (const code of Object.keys(NETWORK_ERROR_REASONS)) {
|
||||
if (new RegExp(`\\b${code}\\b`).test(text)) {
|
||||
return code;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function networkErrorCode(error: unknown, capturedOutput = ''): string | null {
|
||||
const directCode = typeof (error as { code?: unknown })?.code === 'string'
|
||||
? (error as { code: string }).code
|
||||
: null;
|
||||
if (directCode && NETWORK_ERROR_REASONS[directCode]) {
|
||||
return directCode;
|
||||
}
|
||||
return networkErrorCodeFromText(`${unknownErrorMessage(error)}\n${capturedOutput}`);
|
||||
}
|
||||
|
||||
function friendlyDriverName(driver: string): string {
|
||||
const normalized = driver.toLowerCase();
|
||||
if (normalized === 'postgres' || normalized === 'postgresql') return 'PostgreSQL';
|
||||
if (normalized === 'mysql') return 'MySQL';
|
||||
if (normalized === 'sqlserver') return 'SQL Server';
|
||||
if (normalized === 'bigquery') return 'BigQuery';
|
||||
if (normalized === 'snowflake') return 'Snowflake';
|
||||
if (normalized === 'clickhouse') return 'ClickHouse';
|
||||
if (normalized === 'sqlite') return 'SQLite';
|
||||
return driver || 'the source';
|
||||
}
|
||||
|
||||
function failedStepDetail(result: KtxPublicIngestTargetResult): string | null {
|
||||
return result.steps.find((step) => step.status === 'failed')?.detail ?? null;
|
||||
}
|
||||
|
||||
function failureTextForTarget(input: {
|
||||
target: KtxPublicIngestPlanTarget;
|
||||
projectDir: string;
|
||||
capturedOutput?: string;
|
||||
error?: unknown;
|
||||
fallback?: string | null;
|
||||
}): string {
|
||||
const code = networkErrorCode(input.error, input.capturedOutput);
|
||||
if (code) {
|
||||
const operation = input.target.operation === 'scan' ? 'scanning' : 'ingesting';
|
||||
return [
|
||||
`KTX lost its connection to ${friendlyDriverName(input.target.driver)} while ${operation} ${input.target.connectionId}.`,
|
||||
`Reason: ${NETWORK_ERROR_REASONS[code]} (${code}).`,
|
||||
`Retry: ${resumeCommand(input.projectDir)}`,
|
||||
].join(' ');
|
||||
}
|
||||
return input.fallback ?? `${input.target.connectionId} failed.`;
|
||||
}
|
||||
|
||||
export function initViewState(targets: KtxPublicIngestPlanTarget[]): ContextBuildViewState {
|
||||
|
|
@ -493,6 +573,7 @@ export async function runContextBuild(
|
|||
const artifactPaths = new Set<string>();
|
||||
|
||||
let detached = false;
|
||||
let exiting = false;
|
||||
let cleanupKeystroke: (() => void) | null = null;
|
||||
|
||||
if (isTTY || deps.setupKeystroke) {
|
||||
|
|
@ -502,6 +583,7 @@ export async function runContextBuild(
|
|||
};
|
||||
cleanupKeystroke = (deps.setupKeystroke ?? defaultSetupKeystroke)(
|
||||
() => {
|
||||
detached = true;
|
||||
cleanup();
|
||||
deps.onDetach?.();
|
||||
const bg = spawnBackgroundBuild(args.projectDir);
|
||||
|
|
@ -509,12 +591,14 @@ export async function runContextBuild(
|
|||
if (bg) io.stdout.write(`Log: ${bg.logPath}\n`);
|
||||
io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`);
|
||||
io.stdout.write(`Status: ktx setup context status --project-dir ${resolve(args.projectDir)}\n`);
|
||||
exiting = true;
|
||||
process.exit(0);
|
||||
},
|
||||
() => {
|
||||
cleanup();
|
||||
io.stdout.write('\n\nContext build stopped. Nothing is running in the background.\n');
|
||||
io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`);
|
||||
exiting = true;
|
||||
process.exit(130);
|
||||
},
|
||||
);
|
||||
|
|
@ -548,21 +632,38 @@ export async function runContextBuild(
|
|||
false,
|
||||
);
|
||||
|
||||
const result = await execTarget(targetState.target, runArgs, capture.io, {});
|
||||
let result: KtxPublicIngestTargetResult | null = null;
|
||||
let thrownError: unknown = null;
|
||||
try {
|
||||
result = await execTarget(targetState.target, runArgs, capture.io, {});
|
||||
} catch (error) {
|
||||
if (exiting) {
|
||||
throw error;
|
||||
}
|
||||
thrownError = error;
|
||||
}
|
||||
|
||||
targetState.elapsedMs = nowFn() - (targetState.startedAt ?? nowFn());
|
||||
const failed = result.steps.some((s) => s.status === 'failed');
|
||||
const failed = thrownError !== null || result?.steps.some((s) => s.status === 'failed') === true;
|
||||
targetState.status = failed ? 'failed' : 'done';
|
||||
targetState.detailLine = null;
|
||||
const capturedOutput = capture.captured();
|
||||
const metadata = collectOutputMetadata(capturedOutput, targetState.target.operation);
|
||||
for (const reportId of metadata.reportIds) reportIds.add(reportId);
|
||||
for (const artifactPath of metadata.artifactPaths) artifactPaths.add(artifactPath);
|
||||
if (!failed) {
|
||||
const capturedOutput = capture.captured();
|
||||
const metadata = collectOutputMetadata(capturedOutput, targetState.target.operation);
|
||||
for (const reportId of metadata.reportIds) reportIds.add(reportId);
|
||||
for (const artifactPath of metadata.artifactPaths) artifactPaths.add(artifactPath);
|
||||
targetState.summaryText =
|
||||
targetState.target.operation === 'scan'
|
||||
? parseScanSummary(capturedOutput)
|
||||
: parseIngestSummary(capturedOutput);
|
||||
} else {
|
||||
targetState.failureText = failureTextForTarget({
|
||||
target: targetState.target,
|
||||
projectDir: args.projectDir,
|
||||
capturedOutput,
|
||||
error: thrownError,
|
||||
fallback: result ? failedStepDetail(result) : null,
|
||||
});
|
||||
}
|
||||
if (failed) hasFailure = true;
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import { join } from 'node:path';
|
|||
import { afterEach, describe, expect, it } from 'vitest';
|
||||
import { runDemoSeeded } from './demo-seeded.js';
|
||||
import { formatSeededInspect, inspectSeededProject } from './demo-seeded-inspect.js';
|
||||
import { KTX_NEXT_STEP_COMMANDS } from './next-steps.js';
|
||||
import { KTX_NEXT_STEP_DIRECT_COMMANDS } from './next-steps.js';
|
||||
|
||||
describe('seeded demo inspect contract', () => {
|
||||
const projectDir = join(tmpdir(), `ktx-demo-seeded-inspect-${process.pid}`);
|
||||
|
|
@ -59,7 +59,7 @@ describe('seeded demo inspect contract', () => {
|
|||
reports: { primaryPath: 'reports/seeded-demo-report.json', fileCount: 1 },
|
||||
replays: { primaryPath: 'replays/replay.memory-flow.v1.json', latestPath: 'replays/latest.memory-flow.v1.json' },
|
||||
},
|
||||
nextCommands: KTX_NEXT_STEP_COMMANDS,
|
||||
nextCommands: KTX_NEXT_STEP_DIRECT_COMMANDS,
|
||||
});
|
||||
|
||||
expect(inspect.generatedOutputs.replays.fileCount).toBeGreaterThanOrEqual(3);
|
||||
|
|
@ -91,10 +91,7 @@ describe('seeded demo inspect contract', () => {
|
|||
expect(output).toContain('Latest replay: seeded (packaged, prebuilt)');
|
||||
expect(output).toContain(' $ ktx agent tools --json');
|
||||
expect(output).toContain(' $ ktx agent context --json');
|
||||
expect(output).toContain(' $ ktx serve --mcp stdio --user-id local');
|
||||
expect(output.indexOf('ktx agent tools --json')).toBeLessThan(
|
||||
output.indexOf('ktx serve --mcp stdio --user-id local'),
|
||||
);
|
||||
expect(output).not.toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(output).not.toContain('ktx ask');
|
||||
expect(output).not.toContain('deterministic mode');
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import { join, resolve } from 'node:path';
|
|||
import type { MemoryFlowReplayInput } from '@ktx/context/ingest/memory-flow';
|
||||
import { loadPackagedDemoReplay } from './demo-assets.js';
|
||||
import { DEMO_LATEST_REPLAY_FILE, loadLatestDemoReplay } from './demo-replay-store.js';
|
||||
import { KTX_NEXT_STEP_COMMANDS, KTX_NEXT_STEP_COMMAND_WIDTH } from './next-steps.js';
|
||||
import { KTX_NEXT_STEP_COMMAND_WIDTH, KTX_NEXT_STEP_DIRECT_COMMANDS } from './next-steps.js';
|
||||
|
||||
type SeededInspectReadiness = 'missing' | 'ready' | 'corrupt';
|
||||
|
||||
|
|
@ -178,7 +178,7 @@ function sourceBundleFromManifest(manifest: DemoSeededManifest): SeededInspectSu
|
|||
}
|
||||
|
||||
function nextCommands(): SeededInspectSummary['nextCommands'] {
|
||||
return [...KTX_NEXT_STEP_COMMANDS];
|
||||
return [...KTX_NEXT_STEP_DIRECT_COMMANDS];
|
||||
}
|
||||
|
||||
function modeMetadataFromReplay(replay: MemoryFlowReplayInput | null): SeededInspectSummary['modeMetadata'] {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import { DEMO_FULL_JOB_ID, defaultDemoProjectDir, ensureDemoProject } from './de
|
|||
import type { DemoFullResult } from './demo-full.js';
|
||||
import { createTestDemoPromptAdapter } from './demo-interaction.js';
|
||||
import type { renderMemoryFlowTui } from './memory-flow-tui.js';
|
||||
import { KTX_NEXT_STEP_COMMANDS } from './next-steps.js';
|
||||
import { KTX_NEXT_STEP_DIRECT_COMMANDS } from './next-steps.js';
|
||||
import { resetVizFallbackWarningsForTest } from './viz-fallback.js';
|
||||
|
||||
const SEEDED_DEMO_SEMANTIC_SOURCE_COUNT = 46;
|
||||
|
|
@ -208,7 +208,7 @@ describe('runKtxDemo', () => {
|
|||
expect(io.stdout()).toContain('Connection: orbit_demo');
|
||||
expect(io.stdout()).toContain('ktx sl list');
|
||||
expect(io.stdout()).toContain('ktx wiki list');
|
||||
expect(io.stdout()).toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(io.stdout()).not.toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(io.stdout()).not.toContain('KTX memory flow');
|
||||
expect(io.stderr()).toContain(
|
||||
'Visualization requested but stdin raw mode is unavailable; printing plain output.',
|
||||
|
|
@ -226,7 +226,7 @@ describe('runKtxDemo', () => {
|
|||
expect(testIo.stdout()).toContain('Connection: orbit_demo');
|
||||
expect(testIo.stdout()).toContain('ktx sl list');
|
||||
expect(testIo.stdout()).toContain('ktx wiki list');
|
||||
expect(testIo.stdout()).toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(testIo.stdout()).not.toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(testIo.stdout()).not.toContain('KTX memory flow');
|
||||
expect(testIo.stderr()).toContain(
|
||||
'Visualization requested but stdout is not an interactive terminal; printing plain output.',
|
||||
|
|
@ -287,7 +287,7 @@ describe('runKtxDemo', () => {
|
|||
expect(io.stdout()).toContain('LLM calls: none');
|
||||
expect(io.stdout()).toContain('Your KTX project files are at:');
|
||||
expect(io.stdout()).toContain(join(tmpdir(), 'ktx-demo-'));
|
||||
expect(io.stdout()).toContain('ktx serve --mcp stdio');
|
||||
expect(io.stdout()).not.toContain('ktx serve --mcp stdio');
|
||||
expect(io.stdout()).not.toContain(['ktx', 'mcp'].join(' '));
|
||||
expect(io.stdout()).not.toContain('deterministic');
|
||||
});
|
||||
|
|
@ -356,7 +356,7 @@ describe('runKtxDemo', () => {
|
|||
generatedContext: 'prebuilt from bundled assets',
|
||||
llmCalls: 'none',
|
||||
},
|
||||
nextCommands: KTX_NEXT_STEP_COMMANDS,
|
||||
nextCommands: KTX_NEXT_STEP_DIRECT_COMMANDS,
|
||||
});
|
||||
expect(parsed.generatedOutputs.replays.fileCount).toBeGreaterThanOrEqual(3);
|
||||
expect(jsonIo.stderr()).toBe('');
|
||||
|
|
@ -423,7 +423,7 @@ describe('runKtxDemo', () => {
|
|||
expect(testIo.stdout()).toContain('KTX finished ingesting your data');
|
||||
expect(testIo.stdout()).toContain('ktx sl list');
|
||||
expect(testIo.stdout()).toContain('ktx wiki list');
|
||||
expect(testIo.stdout()).toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(testIo.stdout()).not.toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(testIo.stdout()).not.toContain(['ktx', 'ask'].join(' '));
|
||||
expect(testIo.stdout()).not.toContain(['ktx', 'mcp'].join(' '));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1171,7 +1171,7 @@ describe('runKtxCli', () => {
|
|||
projectDir: tempDir,
|
||||
inputMode: 'disabled',
|
||||
cliVersion: '0.0.0-private',
|
||||
anthropicApiKeyEnv: 'ANTHROPIC_API_KEY',
|
||||
anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret
|
||||
anthropicModel: 'claude-sonnet-4-6',
|
||||
skipLlm: false,
|
||||
}),
|
||||
|
|
@ -1232,7 +1232,7 @@ describe('runKtxCli', () => {
|
|||
inputMode: 'disabled',
|
||||
skipLlm: true,
|
||||
embeddingBackend: 'openai',
|
||||
embeddingApiKeyEnv: 'OPENAI_API_KEY',
|
||||
embeddingApiKeyEnv: 'OPENAI_API_KEY', // pragma: allowlist secret
|
||||
skipEmbeddings: false,
|
||||
}),
|
||||
setupIo.io,
|
||||
|
|
@ -1333,7 +1333,7 @@ describe('runKtxCli', () => {
|
|||
source: 'metabase',
|
||||
sourceConnectionId: 'prod_metabase',
|
||||
sourceUrl: 'https://metabase.example.com',
|
||||
sourceApiKeyRef: 'env:METABASE_API_KEY',
|
||||
sourceApiKeyRef: 'env:METABASE_API_KEY', // pragma: allowlist secret
|
||||
sourceWarehouseConnectionId: 'warehouse',
|
||||
metabaseDatabaseId: 1,
|
||||
}),
|
||||
|
|
@ -1759,8 +1759,8 @@ describe('runKtxCli', () => {
|
|||
{
|
||||
results: [
|
||||
{
|
||||
key: 'metrics/revenue',
|
||||
path: 'knowledge/global/metrics/revenue.md',
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
score: 0.02459016393442623,
|
||||
|
|
@ -1786,8 +1786,8 @@ describe('runKtxCli', () => {
|
|||
expect(JSON.parse(io.stdout())).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({
|
||||
key: 'metrics/revenue',
|
||||
path: 'knowledge/global/metrics/revenue.md',
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
matchReasons: ['lexical', 'token'],
|
||||
}),
|
||||
],
|
||||
|
|
|
|||
|
|
@ -186,6 +186,91 @@ describe('runKtxIngest viz and replay', () => {
|
|||
expect(io.stdout()).toContain('Connection: warehouse');
|
||||
});
|
||||
|
||||
it('prints live viz final summaries as errors when the report has failed work units', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
|
||||
const liveSession = {
|
||||
update: vi.fn(),
|
||||
close: vi.fn(),
|
||||
isClosed: vi.fn(() => false),
|
||||
};
|
||||
const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession);
|
||||
const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> => {
|
||||
input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'notion', trigger: 'manual_resync', fileCount: 37 });
|
||||
input.memoryFlow?.update({
|
||||
syncId: 'sync-notion',
|
||||
plannedWorkUnits: [
|
||||
{
|
||||
unitKey: 'notion-cluster-1',
|
||||
rawFiles: ['pages/a.md'],
|
||||
peerFileCount: 0,
|
||||
dependencyCount: 0,
|
||||
},
|
||||
],
|
||||
});
|
||||
input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
|
||||
input.memoryFlow?.emit({
|
||||
type: 'work_unit_finished',
|
||||
unitKey: 'notion-cluster-1',
|
||||
status: 'failed',
|
||||
reason: 'notion-cluster-1 failed: {"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)"}',
|
||||
});
|
||||
input.memoryFlow?.emit({ type: 'report_created', runId: 'live-failed' });
|
||||
input.memoryFlow?.finish('done');
|
||||
|
||||
const failedWorkUnit = {
|
||||
...localFakeBundleReport('live-failed').body.workUnits[0],
|
||||
unitKey: 'notion-cluster-1',
|
||||
rawFiles: ['pages/a.md'],
|
||||
status: 'failed' as const,
|
||||
reason: 'notion-cluster-1 failed: {"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)"}',
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
};
|
||||
const report = localFakeBundleReport('live-failed', {
|
||||
id: 'report-live-failed',
|
||||
runId: 'run-live-failed',
|
||||
connectionId: input.connectionId,
|
||||
sourceKey: input.adapter,
|
||||
body: {
|
||||
workUnits: [failedWorkUnit],
|
||||
failedWorkUnits: [failedWorkUnit.unitKey],
|
||||
},
|
||||
});
|
||||
return {
|
||||
result: {
|
||||
jobId: 'live-failed',
|
||||
runId: report.runId,
|
||||
syncId: report.body.syncId,
|
||||
diffSummary: report.body.diffSummary,
|
||||
workUnitCount: report.body.workUnits.length,
|
||||
failedWorkUnits: report.body.failedWorkUnits,
|
||||
artifactsWritten: report.body.provenanceRows.length,
|
||||
commitSha: report.body.commitSha,
|
||||
},
|
||||
report,
|
||||
};
|
||||
});
|
||||
|
||||
await expect(
|
||||
runKtxIngest(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir,
|
||||
connectionId: 'notion-main',
|
||||
adapter: 'notion',
|
||||
outputMode: 'viz',
|
||||
},
|
||||
io.io,
|
||||
{ runLocalIngest: runLocal, startLiveMemoryFlow },
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(io.stdout()).toContain('Memory-flow summary: error');
|
||||
expect(io.stdout()).toContain('Notion authorization expired');
|
||||
});
|
||||
|
||||
it('falls back to text live rendering when the TUI live session is unavailable', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
|
|
|
|||
|
|
@ -437,6 +437,21 @@ function initialRunMemoryFlowInput(
|
|||
};
|
||||
}
|
||||
|
||||
function finalRunMemoryFlowInput(snapshot: MemoryFlowReplayInput, report: IngestReportSnapshot): MemoryFlowReplayInput {
|
||||
const status = reportStatus(report);
|
||||
return {
|
||||
...snapshot,
|
||||
runId: report.runId,
|
||||
connectionId: report.connectionId,
|
||||
adapter: report.sourceKey,
|
||||
status,
|
||||
syncId: report.body.syncId,
|
||||
reportId: report.id,
|
||||
reportPath: report.id,
|
||||
errors: status === 'error' ? report.body.failedWorkUnits : snapshot.errors,
|
||||
};
|
||||
}
|
||||
|
||||
function managedDaemonOptionsForIngestRun(
|
||||
args: Extract<KtxIngestArgs, { command: 'run' }>,
|
||||
io: KtxIngestIo,
|
||||
|
|
@ -592,7 +607,7 @@ export async function runKtxIngest(
|
|||
...(memoryFlow ? { memoryFlow } : {}),
|
||||
});
|
||||
if (shouldUseLiveViz && memoryFlow) {
|
||||
latestMemoryFlowSnapshot = memoryFlow.snapshot();
|
||||
latestMemoryFlowSnapshot = finalRunMemoryFlowInput(memoryFlow.snapshot(), result.report);
|
||||
liveTui?.close();
|
||||
liveTui = null;
|
||||
io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot));
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ describe('runKtxKnowledge', () => {
|
|||
{
|
||||
command: 'write',
|
||||
projectDir,
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
userId: 'local',
|
||||
summary: 'Revenue',
|
||||
|
|
@ -73,24 +73,53 @@ describe('runKtxKnowledge', () => {
|
|||
writeIo.io,
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
expect(writeIo.stdout()).toContain('Wrote knowledge/global/metrics/revenue.md');
|
||||
expect(writeIo.stdout()).toContain('Wrote knowledge/global/metrics-revenue.md');
|
||||
|
||||
const readIo = makeIo();
|
||||
await expect(
|
||||
runKtxKnowledge({ command: 'read', projectDir, key: 'metrics/revenue', userId: 'local' }, readIo.io),
|
||||
runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local' }, readIo.io),
|
||||
).resolves.toBe(0);
|
||||
expect(readIo.stdout()).toContain('# metrics/revenue');
|
||||
expect(readIo.stdout()).toContain('# metrics-revenue');
|
||||
expect(readIo.stdout()).toContain('Revenue is paid order value.');
|
||||
|
||||
const listIo = makeIo();
|
||||
await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local' }, listIo.io)).resolves.toBe(0);
|
||||
expect(listIo.stdout()).toContain('GLOBAL\tmetrics/revenue\tRevenue');
|
||||
expect(listIo.stdout()).toContain('GLOBAL\tmetrics-revenue\tRevenue');
|
||||
|
||||
const searchIo = makeIo();
|
||||
await expect(
|
||||
runKtxKnowledge({ command: 'search', projectDir, query: 'paid order', userId: 'local' }, searchIo.io),
|
||||
).resolves.toBe(0);
|
||||
expect(searchIo.stdout()).toContain('metrics/revenue');
|
||||
expect(searchIo.stdout()).toContain('metrics-revenue');
|
||||
});
|
||||
|
||||
it('rejects slash-delimited write keys with a flat-key suggestion', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
||||
const writeIo = makeIo();
|
||||
await expect(
|
||||
runKtxKnowledge(
|
||||
{
|
||||
command: 'write',
|
||||
projectDir,
|
||||
key: 'orbit/company-overview',
|
||||
scope: 'GLOBAL',
|
||||
userId: 'local',
|
||||
summary: 'Orbit',
|
||||
content: 'Orbit overview.',
|
||||
tags: [],
|
||||
refs: [],
|
||||
slRefs: [],
|
||||
},
|
||||
writeIo.io,
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(writeIo.stderr()).toContain(
|
||||
'Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".',
|
||||
);
|
||||
expect(writeIo.stdout()).toBe('');
|
||||
});
|
||||
|
||||
it('explains empty search results for a project without wiki pages', async () => {
|
||||
|
|
@ -116,7 +145,7 @@ describe('runKtxKnowledge', () => {
|
|||
{
|
||||
command: 'write',
|
||||
projectDir,
|
||||
key: 'historic-sql/active-contract-arr-open-tickets',
|
||||
key: 'active-contract-arr-open-tickets',
|
||||
scope: 'GLOBAL',
|
||||
userId: 'local',
|
||||
summary: 'Active Contract ARR Ranked by Open Support Ticket Count',
|
||||
|
|
@ -138,7 +167,7 @@ describe('runKtxKnowledge', () => {
|
|||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(searchIo.stdout()).toContain('historic-sql/active-contract-arr-open-tickets');
|
||||
expect(searchIo.stdout()).toContain('active-contract-arr-open-tickets');
|
||||
expect(searchIo.stderr()).toBe('');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ describe('MemoryFlowTuiApp', () => {
|
|||
expect(frame).toContain('KTX finished ingesting your data');
|
||||
expect(frame).toContain('ktx sl list');
|
||||
expect(frame).toContain('ktx wiki list');
|
||||
expect(frame).toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(frame).not.toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(frame).not.toContain(['ktx', 'ask'].join(' '));
|
||||
expect(frame).not.toContain(['ktx', 'mcp'].join(' '));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -66,11 +66,10 @@ describe('KTX demo next steps', () => {
|
|||
const rendered = formatNextStepLines().join('\n');
|
||||
|
||||
expect(rendered).toContain('KTX context is ready for agents.');
|
||||
expect(rendered).toContain('Preferred route: CLI + Skills');
|
||||
expect(rendered).toContain('no MCP server is required');
|
||||
expect(rendered).toContain('Direct CLI checks:');
|
||||
expect(rendered).toContain('Optional MCP:');
|
||||
expect(rendered).not.toContain('Ask your agent to use KTX');
|
||||
expect(rendered).toContain('ask a data question');
|
||||
expect(rendered).toContain('Verify with:');
|
||||
expect(rendered).not.toContain('Preferred route');
|
||||
expect(rendered).not.toContain('Optional MCP:');
|
||||
});
|
||||
|
||||
it('does not advertise removed Commander migration commands', () => {
|
||||
|
|
@ -80,7 +79,6 @@ describe('KTX demo next steps', () => {
|
|||
expect(rendered).toContain('ktx agent context --json');
|
||||
expect(rendered).toContain('ktx sl list');
|
||||
expect(rendered).toContain('ktx wiki list');
|
||||
expect(rendered).toContain('ktx serve --mcp stdio --user-id local');
|
||||
|
||||
for (const removed of [
|
||||
command('ktx', 'ask'),
|
||||
|
|
@ -91,6 +89,7 @@ describe('KTX demo next steps', () => {
|
|||
command('dev', 'knowledge'),
|
||||
command('ktx', 'ingest', 'run'),
|
||||
command('ktx', 'ingest', 'replay'),
|
||||
command('ktx', 'serve', '--mcp', 'stdio', '--user-id', 'local'),
|
||||
]) {
|
||||
expect(rendered).not.toContain(removed);
|
||||
}
|
||||
|
|
@ -123,7 +122,7 @@ describe('KTX demo next steps', () => {
|
|||
|
||||
expect(rendered).toContain('KTX context is ready for agents.');
|
||||
expect(rendered).toContain('ktx agent context --json');
|
||||
expect(rendered).toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(rendered).not.toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(rendered).not.toContain('Build KTX context next.');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -58,12 +58,9 @@ function commandLines(commands: ReadonlyArray<{ command: string; description: st
|
|||
|
||||
export function formatNextStepLines(indent = ' '): string[] {
|
||||
return [
|
||||
`${indent}KTX context is ready for agents.`,
|
||||
`${indent}Preferred route: CLI + Skills; installed rules call the pinned local CLI directly, so no MCP server is required.`,
|
||||
`${indent}Direct CLI checks:`,
|
||||
`${indent}KTX context is ready for agents. Open your coding agent in this directory and ask a data question.`,
|
||||
`${indent}Verify with:`,
|
||||
...commandLines(KTX_NEXT_STEP_DIRECT_COMMANDS, indent),
|
||||
`${indent}Optional MCP:`,
|
||||
...commandLines(KTX_NEXT_STEP_MCP_COMMANDS, indent),
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -239,11 +239,11 @@ describe('setup agents', () => {
|
|||
const output = io.stdout();
|
||||
expect(output).toContain('Agent integration complete');
|
||||
expect(output).toContain('Claude Code');
|
||||
expect(output).toContain('+ Skill installed');
|
||||
expect(output).toContain('+ Skill installed — teaches your agent which KTX commands to run');
|
||||
expect(output).toContain('.claude/skills/ktx/SKILL.md');
|
||||
expect(output).toContain('+ Rule installed');
|
||||
expect(output).toContain('+ Rule installed — tells your agent when to use KTX');
|
||||
expect(output).toContain('.claude/rules/ktx.md');
|
||||
expect(output).toContain('+ MCP config added');
|
||||
expect(output).toContain('+ MCP config added — lets your agent talk to KTX over MCP');
|
||||
expect(output).toContain('.mcp.json');
|
||||
});
|
||||
|
||||
|
|
@ -258,9 +258,9 @@ describe('setup agents', () => {
|
|||
);
|
||||
|
||||
expect(summary).toContain('Cursor');
|
||||
expect(summary).toContain('+ Rule installed');
|
||||
expect(summary).toContain('+ Rule installed — tells your agent when to use KTX');
|
||||
expect(summary).toContain('.cursor/rules/ktx.mdc');
|
||||
expect(summary).toContain('+ MCP config added');
|
||||
expect(summary).toContain('+ MCP config added — lets your agent talk to KTX over MCP');
|
||||
expect(summary).toContain('.cursor/mcp.json');
|
||||
expect(summary).not.toContain(tempDir);
|
||||
});
|
||||
|
|
@ -280,9 +280,9 @@ describe('setup agents', () => {
|
|||
);
|
||||
|
||||
expect(summary).toContain('Claude Code');
|
||||
expect(summary).toContain('+ Skill installed');
|
||||
expect(summary).toContain('+ Rule installed');
|
||||
expect(summary).toContain('+ Skill installed — teaches your agent which KTX commands to run');
|
||||
expect(summary).toContain('+ Rule installed — tells your agent when to use KTX');
|
||||
expect(summary).toContain('Codex');
|
||||
expect(summary).toContain('+ MCP config added');
|
||||
expect(summary).toContain('+ MCP config added — lets your agent talk to KTX over MCP');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -348,6 +348,12 @@ export function formatInstallSummary(
|
|||
idx += planned.length;
|
||||
}
|
||||
|
||||
const fileHints: Record<string, string> = {
|
||||
skill: 'teaches your agent which KTX commands to run',
|
||||
rule: 'tells your agent when to use KTX',
|
||||
mcp: 'lets your agent talk to KTX over MCP',
|
||||
};
|
||||
|
||||
const lines: string[] = [];
|
||||
for (const install of installs) {
|
||||
const targetEntries = entriesByTarget.get(install.target) ?? [];
|
||||
|
|
@ -356,11 +362,13 @@ export function formatInstallSummary(
|
|||
const displayPath =
|
||||
install.scope === 'global' ? entry.path : relative(projectDir, entry.path);
|
||||
if (entry.kind === 'file') {
|
||||
const label = entry.role === 'rule' ? 'Rule installed' : fileEntryLabels[install.target];
|
||||
lines.push(` + ${label}`);
|
||||
const isRule = entry.role === 'rule' || fileEntryLabels[install.target] === 'Rule installed';
|
||||
const label = isRule ? 'Rule installed' : fileEntryLabels[install.target];
|
||||
const hint = fileHints[isRule ? 'rule' : (entry.role ?? 'skill')] ?? '';
|
||||
lines.push(` + ${label} — ${hint}`);
|
||||
lines.push(` ${displayPath}`);
|
||||
} else {
|
||||
lines.push(` + MCP config added`);
|
||||
lines.push(` + MCP config added — ${fileHints.mcp}`);
|
||||
lines.push(` ${displayPath}`);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -215,6 +215,47 @@ describe('setup context build state', () => {
|
|||
expect(io.stdout()).toContain('KTX context is ready for agents.');
|
||||
});
|
||||
|
||||
it('records only failed sources as retryable when the context build fails', async () => {
|
||||
await writeReadyProject(tempDir);
|
||||
const io = makeIo();
|
||||
const runContextBuildMock = vi.fn(async (_project, _args, _io, hooks) => {
|
||||
hooks.onSourceProgress?.([
|
||||
{ connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 1000 },
|
||||
{ connectionId: 'docs', operation: 'source-ingest', status: 'failed', elapsedMs: 2000 },
|
||||
]);
|
||||
return {
|
||||
exitCode: 1,
|
||||
detached: false,
|
||||
reportIds: ['report-docs-failed'],
|
||||
artifactPaths: ['raw-sources/docs/notion/sync-1/ingest-report.json'],
|
||||
};
|
||||
});
|
||||
|
||||
await expect(
|
||||
runKtxSetupContextStep(
|
||||
{ projectDir: tempDir, inputMode: 'disabled' },
|
||||
io.io,
|
||||
{
|
||||
runIdFactory: () => 'setup-context-local-failed',
|
||||
now: () => new Date('2026-05-09T10:00:00.000Z'),
|
||||
runContextBuild: runContextBuildMock,
|
||||
},
|
||||
),
|
||||
).resolves.toEqual({ status: 'failed', projectDir: tempDir });
|
||||
|
||||
await expect(readKtxSetupContextState(tempDir)).resolves.toMatchObject({
|
||||
runId: 'setup-context-local-failed',
|
||||
status: 'failed',
|
||||
reportIds: ['report-docs-failed'],
|
||||
artifactPaths: ['raw-sources/docs/notion/sync-1/ingest-report.json'],
|
||||
retryableFailedTargets: ['docs'],
|
||||
sourceProgress: [
|
||||
{ connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 1000 },
|
||||
{ connectionId: 'docs', operation: 'source-ingest', status: 'failed', elapsedMs: 2000 },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('marks context complete without prompting when initial source ingest already made agent context', async () => {
|
||||
await writeReadyProject(tempDir);
|
||||
await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true });
|
||||
|
|
|
|||
|
|
@ -234,6 +234,24 @@ function normalizeSourceProgress(value: unknown): ContextBuildSourceProgressUpda
|
|||
return entries.length > 0 ? entries : undefined;
|
||||
}
|
||||
|
||||
function setupContextTargetIds(targets: KtxSetupContextTargets): string[] {
|
||||
return [...new Set([...targets.primarySourceConnectionIds, ...targets.contextSourceConnectionIds])];
|
||||
}
|
||||
|
||||
function retryableFailedTargetsFromProgress(
|
||||
targets: KtxSetupContextTargets,
|
||||
progress: ContextBuildSourceProgressUpdate[] | undefined,
|
||||
): string[] {
|
||||
const targetIds = setupContextTargetIds(targets);
|
||||
if (!progress || progress.length === 0) {
|
||||
return targetIds;
|
||||
}
|
||||
|
||||
const failedIds = new Set(progress.filter((source) => source.status === 'failed').map((source) => source.connectionId));
|
||||
const failedTargets = targetIds.filter((connectionId) => failedIds.has(connectionId));
|
||||
return failedTargets.length > 0 ? failedTargets : targetIds;
|
||||
}
|
||||
|
||||
export async function readKtxSetupContextState(projectDir: string): Promise<KtxSetupContextState> {
|
||||
const filePath = statePath(projectDir);
|
||||
if (!(await pathExists(filePath))) {
|
||||
|
|
@ -614,7 +632,7 @@ async function runBuild(
|
|||
updatedAt,
|
||||
reportIds: completedReportIds,
|
||||
artifactPaths: completedArtifactPaths,
|
||||
retryableFailedTargets: [...targets.primarySourceConnectionIds, ...targets.contextSourceConnectionIds],
|
||||
retryableFailedTargets: retryableFailedTargetsFromProgress(targets, lastSourceProgress),
|
||||
failureReason: 'Context build failed.',
|
||||
...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}),
|
||||
});
|
||||
|
|
|
|||
|
|
@ -532,8 +532,8 @@ describe('setup databases step', () => {
|
|||
expect(prompts.select).toHaveBeenCalledWith({
|
||||
message: 'Primary sources already configured: warehouse\nWhat would you like to do?',
|
||||
options: [
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -583,8 +583,8 @@ describe('setup databases step', () => {
|
|||
expect(prompts.select).toHaveBeenCalledWith({
|
||||
message: 'Primary sources already configured: warehouse\nWhat would you like to do?',
|
||||
options: [
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -618,8 +618,8 @@ describe('setup databases step', () => {
|
|||
expect(prompts.select).toHaveBeenCalledWith({
|
||||
message: 'Primary sources already configured: postgres-warehouse\nWhat would you like to do?',
|
||||
options: [
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -653,8 +653,8 @@ describe('setup databases step', () => {
|
|||
expect(prompts.select).toHaveBeenNthCalledWith(2, {
|
||||
message: 'Primary sources already configured: postgres-warehouse\nWhat would you like to do?',
|
||||
options: [
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -696,8 +696,8 @@ describe('setup databases step', () => {
|
|||
expect(prompts.select).toHaveBeenNthCalledWith(2, {
|
||||
message: 'Primary sources already configured: warehouse\nWhat would you like to do?',
|
||||
options: [
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -920,8 +920,18 @@ describe('setup databases step', () => {
|
|||
'│ ✓ Connection test passed',
|
||||
'│ Driver: PostgreSQL · Tables: 2',
|
||||
'│',
|
||||
].join('\n'),
|
||||
);
|
||||
expect(io.stdout()).toContain(
|
||||
[
|
||||
'◇ Scanning postgres-warehouse',
|
||||
'│ ✓ Structural scan completed',
|
||||
'│ Running structural scan…',
|
||||
'│',
|
||||
].join('\n'),
|
||||
);
|
||||
expect(io.stdout()).toContain(
|
||||
[
|
||||
'◇ Scan complete for postgres-warehouse',
|
||||
'│ Changes: 2 new tables',
|
||||
'│ Report: raw-sources/postgres-warehouse/live-database/.../scan-report.json',
|
||||
'│',
|
||||
|
|
@ -1009,7 +1019,7 @@ describe('setup databases step', () => {
|
|||
expect(config.connections['postgres-warehouse']).toMatchObject({
|
||||
schemas: ['orbit_analytics', 'orbit_raw'],
|
||||
});
|
||||
expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw');
|
||||
expect(io.stdout()).toContain('✓ orbit_analytics, orbit_raw');
|
||||
});
|
||||
|
||||
it('auto-selects all discovered Postgres schemas in non-interactive setup', async () => {
|
||||
|
|
@ -1045,7 +1055,7 @@ describe('setup databases step', () => {
|
|||
expect(config.connections.warehouse).toMatchObject({
|
||||
schemas: ['orbit_analytics', 'orbit_raw', 'public'],
|
||||
});
|
||||
expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw, public');
|
||||
expect(io.stdout()).toContain('✓ orbit_analytics, orbit_raw, public');
|
||||
});
|
||||
|
||||
it('adds one non-interactive Postgres URL connection, tests it, scans it, and marks databases complete', async () => {
|
||||
|
|
|
|||
|
|
@ -115,6 +115,56 @@ const DEFAULT_CONNECTION_IDS: Record<KtxSetupDatabaseDriver, string> = {
|
|||
snowflake: 'snowflake-warehouse',
|
||||
};
|
||||
|
||||
interface ScopeDiscoverySpec {
|
||||
noun: string;
|
||||
nounPlural: string;
|
||||
promptLabel: string;
|
||||
configArrayField: string;
|
||||
configSingleField: string;
|
||||
defaultSelection: (values: string[]) => string[];
|
||||
}
|
||||
|
||||
const SCOPE_DISCOVERY_SPECS: Partial<Record<KtxSetupDatabaseDriver, ScopeDiscoverySpec>> = {
|
||||
postgres: {
|
||||
noun: 'schema',
|
||||
nounPlural: 'schemas',
|
||||
promptLabel: 'PostgreSQL schemas',
|
||||
configArrayField: 'schemas',
|
||||
configSingleField: 'schema',
|
||||
defaultSelection(schemas) {
|
||||
const nonPublic = schemas.filter((s) => s !== 'public');
|
||||
return nonPublic.length > 0 ? nonPublic : schemas;
|
||||
},
|
||||
},
|
||||
sqlserver: {
|
||||
noun: 'schema',
|
||||
nounPlural: 'schemas',
|
||||
promptLabel: 'SQL Server schemas',
|
||||
configArrayField: 'schemas',
|
||||
configSingleField: 'schema',
|
||||
defaultSelection: (schemas) => schemas,
|
||||
},
|
||||
bigquery: {
|
||||
noun: 'dataset',
|
||||
nounPlural: 'datasets',
|
||||
promptLabel: 'BigQuery datasets',
|
||||
configArrayField: 'dataset_ids',
|
||||
configSingleField: 'dataset_id',
|
||||
defaultSelection: (datasets) => datasets,
|
||||
},
|
||||
snowflake: {
|
||||
noun: 'schema',
|
||||
nounPlural: 'schemas',
|
||||
promptLabel: 'Snowflake schemas',
|
||||
configArrayField: 'schema_names',
|
||||
configSingleField: 'schema_name',
|
||||
defaultSelection(schemas) {
|
||||
const nonPublic = schemas.filter((s) => s !== 'PUBLIC');
|
||||
return nonPublic.length > 0 ? nonPublic : schemas;
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
type UrlDriverType = Extract<KtxSetupDatabaseDriver, 'postgres' | 'mysql' | 'clickhouse' | 'sqlserver'>;
|
||||
|
||||
const DRIVER_CONNECTION_DEFAULTS: Record<UrlDriverType, { port: string }> = {
|
||||
|
|
@ -263,16 +313,53 @@ async function defaultHistoricSqlProbe(input: KtxSetupHistoricSqlProbeInput): Pr
|
|||
async function defaultListSchemas(projectDir: string, connectionId: string): Promise<string[]> {
|
||||
const project = await loadKtxProject({ projectDir });
|
||||
const connection = project.config.connections[connectionId];
|
||||
const { KtxPostgresScanConnector, isKtxPostgresConnectionConfig } = await import('@ktx/connector-postgres');
|
||||
if (!isKtxPostgresConnectionConfig(connection)) {
|
||||
return [];
|
||||
const driver = normalizeDriver(connection?.driver);
|
||||
|
||||
if (driver === 'postgres') {
|
||||
const { KtxPostgresScanConnector, isKtxPostgresConnectionConfig } = await import('@ktx/connector-postgres');
|
||||
if (!isKtxPostgresConnectionConfig(connection)) return [];
|
||||
const connector = new KtxPostgresScanConnector({ connectionId, connection });
|
||||
try {
|
||||
return await connector.listSchemas();
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
const connector = new KtxPostgresScanConnector({ connectionId, connection });
|
||||
try {
|
||||
return await connector.listSchemas();
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
|
||||
if (driver === 'sqlserver') {
|
||||
const { KtxSqlServerScanConnector, isKtxSqlServerConnectionConfig } = await import('@ktx/connector-sqlserver');
|
||||
if (!isKtxSqlServerConnectionConfig(connection)) return [];
|
||||
const connector = new KtxSqlServerScanConnector({ connectionId, connection });
|
||||
try {
|
||||
return await connector.listSchemas();
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
if (driver === 'bigquery') {
|
||||
const { KtxBigQueryScanConnector, isKtxBigQueryConnectionConfig } = await import('@ktx/connector-bigquery');
|
||||
if (!isKtxBigQueryConnectionConfig(connection)) return [];
|
||||
const connector = new KtxBigQueryScanConnector({ connectionId, connection });
|
||||
try {
|
||||
return await connector.listDatasets();
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
if (driver === 'snowflake') {
|
||||
const { KtxSnowflakeScanConnector, isKtxSnowflakeConnectionConfig } = await import('@ktx/connector-snowflake');
|
||||
if (!isKtxSnowflakeConnectionConfig(connection)) return [];
|
||||
const connector = new KtxSnowflakeScanConnector({ connectionId, connection });
|
||||
try {
|
||||
return await connector.listSchemas();
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
function existingConnectionIdsByDriver(
|
||||
|
|
@ -309,8 +396,8 @@ function configuredPrimarySourcesPrompt(connectionIds: string[]): {
|
|||
return {
|
||||
message: `Primary sources already configured: ${connectionIds.join(', ')}\nWhat would you like to do?`,
|
||||
options: [
|
||||
{ value: 'continue', label: 'Continue to knowledge sources' },
|
||||
{ value: 'add', label: 'Add another primary source' },
|
||||
{ value: 'continue', label: 'Continue setup' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
};
|
||||
|
|
@ -849,41 +936,44 @@ async function writeConnectionConfig(input: {
|
|||
}
|
||||
}
|
||||
|
||||
function configuredSchemas(connection: KtxProjectConnectionConfig | undefined): string[] {
|
||||
function configuredScopeValues(
|
||||
connection: KtxProjectConnectionConfig | undefined,
|
||||
spec: ScopeDiscoverySpec,
|
||||
): string[] {
|
||||
if (!connection) return [];
|
||||
if (Array.isArray(connection.schemas)) {
|
||||
return connection.schemas
|
||||
.filter((schema): schema is string => typeof schema === 'string' && schema.trim().length > 0)
|
||||
.map((schema) => schema.trim());
|
||||
const arrayVal = connection[spec.configArrayField];
|
||||
if (Array.isArray(arrayVal)) {
|
||||
return arrayVal
|
||||
.filter((v): v is string => typeof v === 'string' && v.trim().length > 0)
|
||||
.map((v) => v.trim());
|
||||
}
|
||||
return typeof connection.schema === 'string' && connection.schema.trim().length > 0 ? [connection.schema.trim()] : [];
|
||||
const singleVal = connection[spec.configSingleField];
|
||||
return typeof singleVal === 'string' && singleVal.trim().length > 0 ? [singleVal.trim()] : [];
|
||||
}
|
||||
|
||||
function defaultSchemaSelection(schemas: string[]): string[] {
|
||||
const nonPublic = schemas.filter((schema) => schema !== 'public');
|
||||
return nonPublic.length > 0 ? nonPublic : schemas;
|
||||
}
|
||||
|
||||
async function writeConnectionSchemas(input: {
|
||||
async function writeScopeConfig(input: {
|
||||
projectDir: string;
|
||||
connectionId: string;
|
||||
schemas: string[];
|
||||
values: string[];
|
||||
spec: ScopeDiscoverySpec;
|
||||
}): Promise<void> {
|
||||
const project = await loadKtxProject({ projectDir: input.projectDir });
|
||||
const connection = project.config.connections[input.connectionId];
|
||||
if (!connection) return;
|
||||
const { schema: _schema, ...connectionWithoutLegacySchema } = connection;
|
||||
const cleaned = Object.fromEntries(
|
||||
Object.entries(connection).filter(([key]) => key !== input.spec.configSingleField),
|
||||
) as KtxProjectConnectionConfig;
|
||||
await writeConnectionConfig({
|
||||
projectDir: input.projectDir,
|
||||
connectionId: input.connectionId,
|
||||
connection: {
|
||||
...connectionWithoutLegacySchema,
|
||||
schemas: unique(input.schemas),
|
||||
...cleaned,
|
||||
[input.spec.configArrayField]: unique(input.values),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function maybeConfigurePostgresSchemas(input: {
|
||||
async function maybeConfigureSchemaScope(input: {
|
||||
projectDir: string;
|
||||
connectionId: string;
|
||||
args: KtxSetupDatabasesArgs;
|
||||
|
|
@ -893,65 +983,78 @@ async function maybeConfigurePostgresSchemas(input: {
|
|||
}): Promise<boolean> {
|
||||
const project = await loadKtxProject({ projectDir: input.projectDir });
|
||||
const connection = project.config.connections[input.connectionId];
|
||||
if (normalizeDriver(connection?.driver) !== 'postgres') {
|
||||
return true;
|
||||
}
|
||||
const driver = normalizeDriver(connection?.driver);
|
||||
if (!driver) return true;
|
||||
|
||||
if (configuredSchemas(connection).length > 0) {
|
||||
const spec = SCOPE_DISCOVERY_SPECS[driver];
|
||||
if (!spec) return true;
|
||||
|
||||
const arrayVal = connection?.[spec.configArrayField];
|
||||
if (Array.isArray(arrayVal) && arrayVal.length > 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (input.args.databaseSchemas.length > 0) {
|
||||
await writeConnectionSchemas({
|
||||
await writeScopeConfig({
|
||||
projectDir: input.projectDir,
|
||||
connectionId: input.connectionId,
|
||||
schemas: input.args.databaseSchemas,
|
||||
values: input.args.databaseSchemas,
|
||||
spec,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
let discoveredSchemas: string[];
|
||||
writeSetupSection(input.io, `Discovering ${spec.promptLabel.toLowerCase()}`, [
|
||||
`Connecting to ${input.connectionId}…`,
|
||||
]);
|
||||
|
||||
let discovered: string[];
|
||||
try {
|
||||
discoveredSchemas = unique(
|
||||
discovered = unique(
|
||||
await (input.deps.listSchemas ?? defaultListSchemas)(input.projectDir, input.connectionId),
|
||||
);
|
||||
} catch (error) {
|
||||
input.io.stderr.write(
|
||||
`Could not discover PostgreSQL schemas for ${input.connectionId}; continuing with existing schema scope. ` +
|
||||
`Could not discover ${spec.promptLabel.toLowerCase()} for ${input.connectionId}; continuing with existing ${spec.noun} scope. ` +
|
||||
`Pass --database-schema to set it explicitly. ${error instanceof Error ? error.message : String(error)}\n`,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if (discoveredSchemas.length === 0) {
|
||||
if (discovered.length === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
let selectedSchemas: string[];
|
||||
if (input.args.inputMode === 'disabled' || discoveredSchemas.length === 1) {
|
||||
selectedSchemas = discoveredSchemas;
|
||||
let selected: string[];
|
||||
if (input.args.inputMode === 'disabled' || discovered.length === 1) {
|
||||
const preconfigured = configuredScopeValues(connection, spec).filter((v) => discovered.includes(v));
|
||||
selected = preconfigured.length > 0 ? preconfigured : discovered;
|
||||
} else {
|
||||
const initialValues = defaultSchemaSelection(discoveredSchemas);
|
||||
const preconfigured = configuredScopeValues(connection, spec).filter((v) => discovered.includes(v));
|
||||
const initialValues = preconfigured.length > 0 ? preconfigured : spec.defaultSelection(discovered);
|
||||
const choices = await input.prompts.multiselect({
|
||||
message: withMultiselectNavigation(
|
||||
'PostgreSQL schemas to scan\nKTX found multiple non-system schemas. Select every schema agents should use.',
|
||||
`${spec.promptLabel} to scan\n` +
|
||||
`KTX found multiple ${spec.nounPlural}. Select every ${spec.noun} agents should use.`,
|
||||
),
|
||||
options: discoveredSchemas.map((schema) => ({ value: schema, label: schema })),
|
||||
options: discovered.map((v) => ({ value: v, label: v })),
|
||||
initialValues,
|
||||
required: true,
|
||||
});
|
||||
if (choices.includes('back')) {
|
||||
return false;
|
||||
}
|
||||
selectedSchemas = choices.length > 0 ? choices : initialValues;
|
||||
selected = choices.length > 0 ? choices : initialValues;
|
||||
}
|
||||
|
||||
await writeConnectionSchemas({
|
||||
await writeScopeConfig({
|
||||
projectDir: input.projectDir,
|
||||
connectionId: input.connectionId,
|
||||
schemas: selectedSchemas,
|
||||
values: selected,
|
||||
spec,
|
||||
});
|
||||
writeSetupSection(input.io, `Selecting schemas for ${input.connectionId}`, [
|
||||
`Schemas: ${selectedSchemas.join(', ')}`,
|
||||
const capitalNounPlural = spec.nounPlural[0]!.toUpperCase() + spec.nounPlural.slice(1);
|
||||
writeSetupSection(input.io, `${capitalNounPlural} saved for ${input.connectionId}`, [
|
||||
`✓ ${selected.join(', ')}`,
|
||||
]);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1081,7 +1184,7 @@ async function validateAndScanConnection(input: {
|
|||
testLines.push(`Driver: ${driverDisplay}${Number.isFinite(tableCount) ? ` · Tables: ${tableCount}` : ''}`);
|
||||
writeSetupSection(input.io, `Testing ${input.connectionId}`, testLines);
|
||||
|
||||
if (!(await maybeConfigurePostgresSchemas(input))) {
|
||||
if (!(await maybeConfigureSchemaScope(input))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -1091,6 +1194,9 @@ async function validateAndScanConnection(input: {
|
|||
io: input.io,
|
||||
deps: input.deps,
|
||||
});
|
||||
writeSetupSection(input.io, `Scanning ${input.connectionId}`, [
|
||||
'Running structural scan…',
|
||||
]);
|
||||
const scanIo = createBufferedCommandIo();
|
||||
const scanCode = await scanConnection(input.projectDir, input.connectionId, scanIo);
|
||||
if (scanCode !== 0) {
|
||||
|
|
@ -1103,9 +1209,8 @@ async function validateAndScanConnection(input: {
|
|||
const reportPath = readOutputValue(scanOutput, 'Report');
|
||||
writeSetupSection(
|
||||
input.io,
|
||||
`Scanning ${input.connectionId}`,
|
||||
`Scan complete for ${input.connectionId}`,
|
||||
[
|
||||
'✓ Structural scan completed',
|
||||
`Changes: ${summarizeScanChanges(scanOutput)}`,
|
||||
...(reportPath ? [`Report: ${shortenScanReportPath(reportPath)}`] : []),
|
||||
],
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ function createTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTarge
|
|||
status: 'queued',
|
||||
detailLine: null,
|
||||
summaryText: null,
|
||||
failureText: null,
|
||||
startedAt: null,
|
||||
elapsedMs: 0,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -211,6 +211,66 @@ describe('setup sources step', () => {
|
|||
expect(runMapping).toHaveBeenCalledWith(projectDir, 'prod_metabase', io.io);
|
||||
});
|
||||
|
||||
it('writes Notion config with the full default knowledge create budget', async () => {
|
||||
await addPrimarySource();
|
||||
const validateNotion = vi.fn(async () => ({ ok: true as const, detail: 'roots=1' }));
|
||||
|
||||
await expect(
|
||||
runKtxSetupSourcesStep(
|
||||
{
|
||||
projectDir,
|
||||
inputMode: 'disabled',
|
||||
source: 'notion',
|
||||
sourceConnectionId: 'notion-main',
|
||||
sourceApiKeyRef: 'env:NOTION_TOKEN',
|
||||
notionCrawlMode: 'selected_roots',
|
||||
notionRootPageIds: ['page-1'],
|
||||
runInitialSourceIngest: false,
|
||||
skipSources: false,
|
||||
},
|
||||
makeIo().io,
|
||||
{ validateNotion },
|
||||
),
|
||||
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion-main'] });
|
||||
|
||||
expect((await readConfig()).connections['notion-main']).toMatchObject({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
root_page_ids: ['page-1'],
|
||||
max_knowledge_creates_per_run: 25,
|
||||
max_knowledge_updates_per_run: 20,
|
||||
});
|
||||
});
|
||||
|
||||
it('uses selected Notion roots when root page ids are provided even if crawl mode says all accessible', async () => {
|
||||
await addPrimarySource();
|
||||
const validateNotion = vi.fn(async () => ({ ok: true as const, detail: 'roots=1' }));
|
||||
|
||||
await expect(
|
||||
runKtxSetupSourcesStep(
|
||||
{
|
||||
projectDir,
|
||||
inputMode: 'disabled',
|
||||
source: 'notion',
|
||||
sourceConnectionId: 'notion-main',
|
||||
sourceApiKeyRef: 'env:NOTION_TOKEN',
|
||||
notionCrawlMode: 'all_accessible',
|
||||
notionRootPageIds: ['page-1'],
|
||||
runInitialSourceIngest: false,
|
||||
skipSources: false,
|
||||
},
|
||||
makeIo().io,
|
||||
{ validateNotion },
|
||||
),
|
||||
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion-main'] });
|
||||
|
||||
expect((await readConfig()).connections['notion-main']).toMatchObject({
|
||||
driver: 'notion',
|
||||
root_page_ids: ['page-1'],
|
||||
crawl_mode: 'selected_roots',
|
||||
});
|
||||
});
|
||||
|
||||
it('defaults interactive Metabase and Looker source setup to the only warehouse connection', async () => {
|
||||
await addPrimarySource();
|
||||
const cases: Array<{
|
||||
|
|
|
|||
|
|
@ -36,6 +36,8 @@ import { writeProjectLocalSecretReference } from './setup-secrets.js';
|
|||
|
||||
export type KtxSetupSourceType = 'dbt' | 'metricflow' | 'metabase' | 'looker' | 'lookml' | 'notion';
|
||||
|
||||
const DEFAULT_NOTION_MAX_KNOWLEDGE_CREATES_PER_RUN = 25;
|
||||
|
||||
export interface KtxSetupSourcesArgs {
|
||||
projectDir: string;
|
||||
inputMode: 'auto' | 'disabled';
|
||||
|
|
@ -508,8 +510,8 @@ function buildLookmlConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC
|
|||
}
|
||||
|
||||
function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionConfig {
|
||||
const crawlMode = args.notionCrawlMode ?? 'selected_roots';
|
||||
const rootPageIds = args.notionRootPageIds ?? [];
|
||||
const crawlMode = rootPageIds.length > 0 ? 'selected_roots' : (args.notionCrawlMode ?? 'selected_roots');
|
||||
if (crawlMode === 'selected_roots' && rootPageIds.length === 0) {
|
||||
throw new Error('Notion selected_roots requires --notion-root-page-id.');
|
||||
}
|
||||
|
|
@ -521,7 +523,7 @@ function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC
|
|||
root_database_ids: [],
|
||||
root_data_source_ids: [],
|
||||
max_pages_per_run: 1000,
|
||||
max_knowledge_creates_per_run: 5,
|
||||
max_knowledge_creates_per_run: DEFAULT_NOTION_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
max_knowledge_updates_per_run: 20,
|
||||
last_successful_cursor: null,
|
||||
};
|
||||
|
|
@ -1185,10 +1187,10 @@ async function promptForInteractiveSource(
|
|||
},
|
||||
async (currentState) => {
|
||||
const crawlMode = await prompts.select({
|
||||
message: 'Notion crawl mode',
|
||||
message: 'Which Notion pages should KTX ingest?',
|
||||
options: [
|
||||
{ value: 'selected_roots', label: 'Selected roots' },
|
||||
{ value: 'all_accessible', label: 'All accessible pages' },
|
||||
{ value: 'selected_roots', label: 'Specific pages and their subpages (you\'ll paste page IDs)' },
|
||||
{ value: 'all_accessible', label: 'All pages the integration can access' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
|
|
@ -1203,8 +1205,8 @@ async function promptForInteractiveSource(
|
|||
? [
|
||||
async (currentState: SourcePromptState) => {
|
||||
const roots = await promptText(prompts, {
|
||||
message: 'Notion root page ids',
|
||||
placeholder: 'comma-separated ids',
|
||||
message: 'Notion page IDs to ingest (each page includes all its subpages)',
|
||||
placeholder: 'page-id-1, page-id-2',
|
||||
});
|
||||
if (roots === undefined) return 'back';
|
||||
currentState.notionRootPageIds = roots
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import { execFile } from 'node:child_process';
|
||||
import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { promisify } from 'node:util';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import { localFakeBundleReport, persistLocalBundleReport } from './ingest.test-utils.js';
|
||||
|
|
@ -12,6 +14,8 @@ vi.mock('./setup-demo-tour.js', () => ({
|
|||
runDemoTour: vi.fn(async () => 0),
|
||||
}));
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
function makeIo() {
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
|
|
@ -1453,6 +1457,60 @@ describe('setup status', () => {
|
|||
expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources', 'context', 'agents']);
|
||||
});
|
||||
|
||||
it('commits setup config changes written by later setup steps', async () => {
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxSetup(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir: tempDir,
|
||||
mode: 'new',
|
||||
agents: false,
|
||||
inputMode: 'disabled',
|
||||
yes: true,
|
||||
cliVersion: '0.2.0',
|
||||
skipLlm: true,
|
||||
skipEmbeddings: true,
|
||||
skipDatabases: true,
|
||||
skipSources: true,
|
||||
skipAgents: false,
|
||||
databaseSchemas: [],
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
model: async () => ({ status: 'skipped', projectDir: tempDir }),
|
||||
embeddings: async () => ({ status: 'skipped', projectDir: tempDir }),
|
||||
databases: async () => {
|
||||
const configPath = join(tempDir, 'ktx.yaml');
|
||||
const current = await readFile(configPath, 'utf-8');
|
||||
await writeFile(
|
||||
configPath,
|
||||
current.replace(
|
||||
'connections: {}',
|
||||
['connections:', ' warehouse:', ' driver: postgres', ' url: env:DATABASE_URL'].join('\n'),
|
||||
),
|
||||
'utf-8',
|
||||
);
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
sources: async () => ({ status: 'skipped', projectDir: tempDir }),
|
||||
context: async () => ({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' }),
|
||||
agents: async () => ({
|
||||
status: 'ready',
|
||||
projectDir: tempDir,
|
||||
installs: [{ target: 'codex', scope: 'project', mode: 'cli' }],
|
||||
}),
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
const { stdout } = await execFileAsync('git', ['-C', tempDir, 'status', '--short', '--', 'ktx.yaml']);
|
||||
expect(stdout).toBe('');
|
||||
const committedConfig = await execFileAsync('git', ['-C', tempDir, 'show', 'HEAD:ktx.yaml']);
|
||||
expect(committedConfig.stdout).toContain('warehouse:');
|
||||
});
|
||||
|
||||
it('runs agent setup after context succeeds in --agents mode', async () => {
|
||||
const calls: string[] = [];
|
||||
const io = makeIo();
|
||||
|
|
|
|||
|
|
@ -433,6 +433,11 @@ function setupRuntimeInstallPolicy(args: Extract<KtxSetupArgs, { command: 'run'
|
|||
return args.inputMode === 'disabled' ? 'never' : 'prompt';
|
||||
}
|
||||
|
||||
async function commitSetupConfigChanges(projectDir: string): Promise<void> {
|
||||
const project = await loadKtxProject({ projectDir });
|
||||
await project.git.commitFile('ktx.yaml', 'setup: update KTX project config', 'ktx setup', 'setup@ktx.local');
|
||||
}
|
||||
|
||||
export async function runKtxSetup(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetupDeps = {}): Promise<number> {
|
||||
try {
|
||||
return await runKtxSetupInner(args, io, deps);
|
||||
|
|
@ -773,6 +778,8 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
|
|||
break;
|
||||
}
|
||||
|
||||
await commitSetupConfigChanges(projectResult.projectDir);
|
||||
|
||||
const status = await readKtxSetupStatus(projectResult.projectDir);
|
||||
io.stdout.write(formatKtxSetupStatus(status));
|
||||
io.stdout.write('\nWhat you can do next:\n');
|
||||
|
|
|
|||
|
|
@ -84,6 +84,56 @@ describe('runKtxSl', () => {
|
|||
expect(listIo.stdout()).toContain('warehouse\torders\tcolumns=1\tmeasures=0\tjoins=0');
|
||||
});
|
||||
|
||||
it('fails validation when a table-backed source declares columns absent from a matching warehouse manifest', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
const project = await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml',
|
||||
`tables:
|
||||
int_active_contract_arr:
|
||||
table: orbit_analytics.int_active_contract_arr
|
||||
columns:
|
||||
- { name: contract_id, type: string }
|
||||
- { name: contract_arr_cents, type: number }
|
||||
`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add warehouse manifest',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/dbt-main/int_active_contract_arr.yaml',
|
||||
`name: int_active_contract_arr
|
||||
table: orbit_analytics.int_active_contract_arr
|
||||
grain: [contract_id]
|
||||
columns:
|
||||
- { name: contract_id, type: string }
|
||||
- { name: arr_cents, type: number }
|
||||
measures:
|
||||
- { name: arr, expr: sum(arr_cents) }
|
||||
joins: []
|
||||
`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add invalid dbt source',
|
||||
);
|
||||
|
||||
const validateIo = makeIo();
|
||||
await expect(
|
||||
runKtxSl(
|
||||
{
|
||||
command: 'validate',
|
||||
projectDir,
|
||||
connectionId: 'dbt-main',
|
||||
sourceName: 'int_active_contract_arr',
|
||||
},
|
||||
validateIo.io,
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(validateIo.stderr()).toContain('arr_cents');
|
||||
expect(validateIo.stderr()).toContain('absent from physical table');
|
||||
});
|
||||
|
||||
it('runs sl query and prints SQL output', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
const project = await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
|
|||
if (!source) {
|
||||
throw new Error(`Semantic-layer source "${args.connectionId}/${args.sourceName}" was not found`);
|
||||
}
|
||||
const result = await validateLocalSlSource(source.yaml);
|
||||
const result = await validateLocalSlSource(source.yaml, { project, connectionId: args.connectionId });
|
||||
if (!result.valid) {
|
||||
for (const error of result.errors) {
|
||||
io.stderr.write(`${error}\n`);
|
||||
|
|
|
|||
|
|
@ -226,7 +226,7 @@ describe('standalone built ktx CLI smoke', () => {
|
|||
expect(result.stdout).toContain('Notion:');
|
||||
expect(result.stdout).toContain('Semantic-layer sources:');
|
||||
expect(result.stdout).toContain('Knowledge pages:');
|
||||
expect(result.stdout).toContain('ktx serve --mcp stdio');
|
||||
expect(result.stdout).not.toContain('ktx serve --mcp stdio');
|
||||
expect(result.stdout).not.toContain(['--mode', 'deterministic'].join(' '));
|
||||
});
|
||||
|
||||
|
|
@ -341,7 +341,7 @@ describe('standalone built ktx CLI smoke', () => {
|
|||
expect(inspect.stdout).toContain('ktx agent tools --json');
|
||||
expect(inspect.stdout).toContain('ktx agent context --json');
|
||||
expect(inspect.stdout).not.toContain('ktx ask "your question here"');
|
||||
expect(inspect.stdout).toContain('ktx serve --mcp stdio');
|
||||
expect(inspect.stdout).not.toContain('ktx serve --mcp stdio');
|
||||
});
|
||||
|
||||
it('serves seeded demo wiki and semantic-layer context over stdio MCP', async () => {
|
||||
|
|
|
|||
|
|
@ -358,4 +358,38 @@ describe('KtxPostgresScanConnector', () => {
|
|||
expect(snapshot.tables.length).toBeGreaterThan(0);
|
||||
expect(endCalled).toBe(true);
|
||||
});
|
||||
|
||||
it('attaches an error listener to the pg pool', async () => {
|
||||
const on = vi.fn();
|
||||
const poolFactory: KtxPostgresPoolFactory = {
|
||||
createPool() {
|
||||
return {
|
||||
on,
|
||||
async connect() {
|
||||
return {
|
||||
query: vi.fn(async () => ({ rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] })),
|
||||
release: vi.fn(),
|
||||
};
|
||||
},
|
||||
end: vi.fn(async () => undefined),
|
||||
};
|
||||
},
|
||||
};
|
||||
const connector = new KtxPostgresScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
readonly: true,
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(connector.testConnection()).resolves.toEqual({ success: true });
|
||||
|
||||
expect(on).toHaveBeenCalledWith('error', expect.any(Function));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ interface KtxPostgresClient {
|
|||
interface KtxPostgresPool {
|
||||
connect(): Promise<KtxPostgresClient>;
|
||||
end(): Promise<void>;
|
||||
on?(event: 'error', listener: (error: Error) => void): void;
|
||||
}
|
||||
|
||||
export interface KtxPostgresPoolFactory {
|
||||
|
|
@ -359,6 +360,7 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
|
|||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxPostgresDialect();
|
||||
private pool: KtxPostgresPool | null = null;
|
||||
private lastIdlePoolError: Error | null = null;
|
||||
private resolvedEndpoint: KtxPostgresResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KtxPostgresScanConnectorOptions) {
|
||||
|
|
@ -677,11 +679,15 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
|
|||
config = { ...config, host: endpoint.host, port: endpoint.port };
|
||||
}
|
||||
this.pool = this.poolFactory.createPool(config);
|
||||
this.pool.on?.('error', (error) => {
|
||||
this.lastIdlePoolError = error;
|
||||
});
|
||||
}
|
||||
return this.pool;
|
||||
}
|
||||
|
||||
private async queryRaw<T>(sql: string, params?: unknown[]): Promise<T[]> {
|
||||
this.throwIdlePoolErrorIfPresent();
|
||||
const pool = await this.getPool();
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
|
|
@ -693,6 +699,7 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
|
|||
}
|
||||
|
||||
private async query(sql: string, params?: Record<string, unknown> | unknown[]): Promise<KtxQueryResult> {
|
||||
this.throwIdlePoolErrorIfPresent();
|
||||
const pool = await this.getPool();
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
|
|
@ -714,4 +721,13 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
|
|||
throw new Error(`PostgreSQL connector ${this.connectionId} cannot run scan for ${connectionId}`);
|
||||
}
|
||||
}
|
||||
|
||||
private throwIdlePoolErrorIfPresent(): void {
|
||||
if (!this.lastIdlePoolError) {
|
||||
return;
|
||||
}
|
||||
const error = this.lastIdlePoolError;
|
||||
this.lastIdlePoolError = null;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,11 +10,12 @@ Parsimonious. Stage 3 WUs already loaded `ingest_triage` and handled conflicts t
|
|||
1. Load `ingest_triage`, then `sl_capture` + `knowledge_capture`.
|
||||
2. Call `stage_list()` for the full index of this job's writes. If it is empty AND you have no evictions, exit — the runner short-circuits this case but the skill still teaches you to bail fast.
|
||||
3. If the system prompt includes `<canonical_pins>`, apply those pins before flagging a same-name or near-duplicate conflict. A pinned `canonicalArtifactKey` keeps the contested name when it is present in the Stage Index; competing variants keep or receive disambiguated names.
|
||||
4. For each pair of WUs that wrote overlapping SL source names or wiki keys, call `stage_diff` to see the actual difference. If they're the same content, leave it. If they differ per `ingest_triage` rules, apply the correct resolution (rename + capture; election of canonical; silent replace for expression-only re-ingest change; or pinned canonical), then call `emit_conflict_resolution` with the artifact key and decision.
|
||||
5. Call `eviction_list()` for deleted raw paths. For each eviction: if inbound refs are empty, remove the artifact (`sl_delete`, `wiki_remove`); if inbound refs exist, retain with a deprecation marker. Then call `emit_eviction_decision` for every removed or retained artifact.
|
||||
6. If the Stage 4 sweep discovers a raw file whose only honest outcome is standalone SQL, wiki-only capture, or a human flag, call `emit_unmapped_fallback` with the raw path, reason, and fallback kind.
|
||||
7. Use `read_raw_span` to zoom into specific raw files when you need to resolve what two contested measures actually compute.
|
||||
8. Exit when you've processed every item.
|
||||
4. Sweep both exact-key conflicts and near-duplicate writes. Compare WUs that wrote overlapping SL source names, overlapping wiki keys, the same `tables:` or `sl_refs:` action details, or obviously equivalent topic titles under different wiki keys. Call `stage_diff` to see the actual difference, and use `wiki_read`/`sl_read_source` when two different keys appear to describe the same table, metric, or source-of-truth mapping. If they're the same content, leave one canonical artifact and record the duplicate as subsumed. If they differ per `ingest_triage` rules, apply the correct resolution (rename + capture; election of canonical; silent replace for expression-only re-ingest change; or pinned canonical), then call `emit_conflict_resolution` with the artifact key and decision.
|
||||
5. For any `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call you make during reconciliation, include `rawPaths` with only the raw paths that directly caused that reconciliation action.
|
||||
6. Call `eviction_list()` for deleted raw paths. For each eviction: if inbound refs are empty, remove the artifact (`sl_delete`, `wiki_remove`) and include that evicted raw path in `rawPaths`; if inbound refs exist, retain with a deprecation marker and include that evicted raw path in `rawPaths`. Then call `emit_eviction_decision` for every removed or retained artifact.
|
||||
7. If the Stage 4 sweep discovers a raw file whose only honest outcome is standalone SQL, wiki-only capture, or a human flag, call `emit_unmapped_fallback` with the raw path, reason, and fallback kind.
|
||||
8. Use `read_raw_span` to zoom into specific raw files when you need to resolve what two contested measures or wiki pages actually describe.
|
||||
9. Exit when you've processed every item.
|
||||
</workflow>
|
||||
|
||||
<scope>
|
||||
|
|
|
|||
|
|
@ -11,9 +11,10 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing
|
|||
2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `knowledge_capture`, and `ingest_triage` last. The triage skill tells you how to react when `wiki_sl_search` reveals that a prior WU already wrote something overlapping.
|
||||
3. If the system prompt includes `<canonical_pins>`, read those pins before choosing artifact keys. A pin's `canonicalArtifactKey` is the preferred artifact for its `contestedKey`: prefer editing the pinned canonical artifact when it already exists or when this raw file clearly updates it. Do not create a duplicate contested artifact when a pin says another artifact is canonical; use a specific disambiguated key only when the raw file describes a genuinely different domain.
|
||||
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `wiki_sl_search` for each candidate name to find prior-WU writes; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip.
|
||||
5. When `priorProvenance` names an existing artifact for one of your raw files, prefer `sl_edit` over `sl_write` for that artifact: the re-ingest change rule says expression-only changes replace silently, grain/column/filter changes replace and flag.
|
||||
6. When a raw file cannot map to normal SL and you use a fallback path, call `emit_unmapped_fallback` exactly once for that raw file and reason. Use `fallback: "sql_standalone"` for a standalone SQL source, `fallback: "wiki_only"` for documentation-only capture, and `fallback: "flagged"` when no reliable artifact can be written.
|
||||
7. When you're done, exit the loop without further tool calls.
|
||||
5. For every `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call, include `rawPaths` with only the raw file paths that directly support that action. If one artifact synthesizes several files, list each contributing raw file. Do not include unrelated files from the same WorkUnit.
|
||||
6. When `priorProvenance` names an existing artifact for one of your raw files, prefer `sl_edit` over `sl_write` for that artifact: the re-ingest change rule says expression-only changes replace silently, grain/column/filter changes replace and flag.
|
||||
7. When a raw file cannot map to normal SL and you use a fallback path, call `emit_unmapped_fallback` exactly once for that raw file and reason. Use `fallback: "sql_standalone"` for a standalone SQL source, `fallback: "wiki_only"` for documentation-only capture, and `fallback: "flagged"` when no reliable artifact can be written.
|
||||
8. When you're done, exit the loop without further tool calls.
|
||||
</workflow>
|
||||
|
||||
<scope>
|
||||
|
|
@ -23,6 +24,8 @@ All wiki writes go to the GLOBAL scope. Bundle ingests are not personal. The `wi
|
|||
<do_not>
|
||||
- Do not read peer files; only files listed in `rawFiles` or `dependencyPaths` are accessible. `read_raw_file` will reject everything else.
|
||||
- Do not invent measures/joins/rules not declared in the raw files.
|
||||
- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`wiki_sl_search`, `sl_discover`, `sl_describe_table`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source.
|
||||
- Do not write context-source overlays into the context source connection just because that is the current WorkUnit connection. Use `sl_discover` across data sources and write the SL artifact to the warehouse/data-source connection that owns the matching manifest. If there is no confirmed target connection, use `emit_unmapped_fallback` and wiki capture.
|
||||
- Do not duplicate an artifact that prior provenance says you already produced; update it.
|
||||
- Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`.
|
||||
</do_not>
|
||||
|
|
|
|||
|
|
@ -19,6 +19,18 @@ Use this skill for **uploaded** dbt projects (`dbt_project.yml` at stage root, `
|
|||
| `accepted_values` | Add a **brief** line in the column description: allowed values (truncate long lists) | Also mention enum-like use in `wiki_sl_search` / filters. |
|
||||
| `relationships` | Add or confirm `joins:` on the overlay **only** when `to` resolves to a real table via `read_raw_file` + `wiki_sl_search` / `sl_describe_table` | If the ref cannot be resolved, capture the intent in a wiki page instead. |
|
||||
|
||||
## Physical schema grounding
|
||||
|
||||
dbt YAML is documentation and test metadata; it is not permission to invent physical columns. Before writing any table-backed SL source, confirm the real warehouse shape with `wiki_sl_search`, `sl_discover`, or `sl_describe_table` and use only confirmed column names in `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr`.
|
||||
|
||||
For dbt context-source ingest, the dbt connection is usually not the warehouse connection. Call `sl_discover` without `connectionId` first, then write overlays to the connection that owns the matching manifest-backed source (for example `postgres-warehouse`), not to the dbt connection (for example `dbt-main`). If no matching manifest-backed source is visible on any warehouse connection, do not call `sl_write_source`; record `emit_unmapped_fallback` and keep the fact wiki-only.
|
||||
|
||||
If a `models:` entry has no `columns:` block, or the available raw files do not confirm the physical column names, do **not** synthesize a full standalone source. Write a wiki note or a description-only overlay for the resolved manifest table instead. If a business metric is described but its referenced column is not confirmed in the warehouse schema, omit the measure and capture the unresolved intent in the wiki.
|
||||
|
||||
Include `rawPaths` on every `wiki_write`, `sl_write_source`, and `sl_edit_source` call with only the dbt YAML files that directly support the action.
|
||||
|
||||
After every `sl_write_source`, call `sl_validate`. A validation error saying a declared column or measure reference is absent from the physical table is a hard stop: re-read the warehouse-backed source and rewrite with confirmed names, or remove the invalid SL fields.
|
||||
|
||||
## 1.1 test hints (descriptions / meta)
|
||||
|
||||
When YAML shows `accepted_values` or `not_null`, add **short** hints into `columns[].descriptions` (e.g. under `user`) or freeform column notes so chat and validation see intent before the next git sync refreshes `constraints` / `enum_values` in `_schema`. Keep hints under a few words when possible.
|
||||
|
|
@ -30,5 +42,7 @@ If the same bundle also has MetricFlow `semantic_models:` / `metrics:`, the **`m
|
|||
## Do not
|
||||
|
||||
- Do not run `dbt` CLI or assume `target/` / `manifest.json` exists in the upload.
|
||||
- Do not invent column names, grain keys, or measure expressions from dbt model names, descriptions, tests, or common naming patterns.
|
||||
- Do not write `columns:`, `grain:`, or `measures:` for a dbt model unless those exact column names are confirmed by dbt YAML columns or warehouse schema discovery.
|
||||
- Do not invent joins from `relationships` tests if the target model/table is not found in SL or the warehouse.
|
||||
- Do not read `peerFileIndex` paths — use `read_raw_file` only on `rawFiles` and `dependencyPaths` from the WorkUnit.
|
||||
|
|
|
|||
|
|
@ -42,10 +42,12 @@ If nothing is worth capturing, respond without calling any tool.
|
|||
2. **Before writing**, search for related content so cross-references are accurate:
|
||||
- `wiki_search` with the topic — find related wiki pages to populate `refs`.
|
||||
- `sl_discover` with the concept — if the page defines a metric (revenue, churn, retention, LTV, ARR, MRR, CAC, attribution, etc.), find matching SL sources or measures to populate `sl_refs`. If no matches, pass `sl_refs: []` so future readers know you checked.
|
||||
3. If updating an existing page, `wiki_read` it first. The read result begins with `[scope: ... | tags: ... | refs: ... | sl_refs: ...]` showing current frontmatter.
|
||||
3. If updating an existing page, `wiki_read` it first. Use the returned `structured.content` or markdown body as the exact stored text for targeted replacements; current tags, refs, and sl_refs are returned in structured metadata.
|
||||
4. `wiki_write` to create or update. Prefer merging into an existing page over creating a new one.
|
||||
5. `wiki_remove` only when a page is truly obsolete — not to replace stale content (update it instead).
|
||||
|
||||
For bundle/external ingest, include `rawPaths` on every `wiki_write`/`wiki_remove` call with only the raw files that directly support that wiki action. This keeps ingest provenance tied to the actual source file, not every file in the WorkUnit.
|
||||
|
||||
## Keys, summaries, and content
|
||||
|
||||
- **Keys** are short kebab-case topic identifiers: `leads-source-filter`, `revenue-definition`, `churn-calculation`. No namespacing, no prefixes.
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ When `targetTable.ok === true`, the explore has a complete KTX backing target. B
|
|||
|
||||
1. Use `targetTable.catalog`, `targetTable.schema`, and `targetTable.name` for `source_tables` preflight matching through `sl_discover` or `sl_read_source`.
|
||||
2. Use Looker field `sql`, labels, descriptions, and type metadata to derive source columns, measures, segments, joins, and grain.
|
||||
3. Call `sl_write_source` or `sl_edit_source` with `connectionId: targetWarehouseConnectionId`.
|
||||
3. Call `sl_write_source` or `sl_edit_source` with `connectionId: targetWarehouseConnectionId` and `rawPaths` set to the staged explore path.
|
||||
4. Set `source.name` to the deterministic API-derived source key, for example `looker__b2b__sales_pipeline`.
|
||||
5. Set `source.table` to `targetTable.canonicalTable`.
|
||||
6. Run `sl_validate` after every SL write.
|
||||
|
|
@ -85,13 +85,13 @@ The `table` field is `targetTable.canonicalTable`, not `rawSqlTableName`. Raw Lo
|
|||
|
||||
Use `targetTable.{catalog,schema,name}` only for source_tables preflight. Do not put those tuple fields separately into the SL source unless the SL schema already asks for them.
|
||||
|
||||
When `targetTable.ok === false`, keep the WU wiki-only for SL purposes. Capture durable domain semantics with `context_candidate_write`, then emit a fallback with the EXACT structured `reason` code from `targetTable.reason`. Put any human-readable context in `detail`, NOT in `reason`:
|
||||
When `targetTable.ok === false`, keep the WU wiki-only for SL purposes. Capture durable domain semantics with `context_candidate_write`, then emit a fallback with the EXACT structured `reason` code from `targetTable.reason`. Put any human-readable context in `clarification`, NOT in `reason`:
|
||||
|
||||
```json
|
||||
{
|
||||
"rawPath": "explores/b2b/sales_pipeline.json",
|
||||
"reason": "no_connection_mapping",
|
||||
"detail": "Looker connection b2b_sandbox_bq is not mapped to a warehouse connection",
|
||||
"clarification": "Looker connection b2b_sandbox_bq is not mapped to a warehouse connection",
|
||||
"fallback": "wiki_only"
|
||||
}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ When SL is allowed:
|
|||
- **Overlay** when the view is a thin wrapper over a manifest table (`sql_table_name:` matches a manifest entry). Do not repeat base columns or grain.
|
||||
- **Standalone** when the view uses `derived_table:` or `sql_always_where:`. `sl_write_source` rejects overlays whose name has no manifest entry; that error points here.
|
||||
- **Skip** a view with only `view:`, `sql_table_name:`, and bare `dimension:` entries (no `measure:`, `description:`, `derived_table:`, `sql_always_where:`, `join:`). The pre-filter already short-circuits those.
|
||||
- Include `rawPaths` on every `sl_write_source`/`sl_edit_source` call with the exact LookML raw file(s) that support the action.
|
||||
|
||||
## Preflight: never guess column names
|
||||
|
||||
|
|
|
|||
|
|
@ -48,8 +48,9 @@ Use `resultMetadata` to:
|
|||
|
||||
For each card:
|
||||
1. Analyze `resolvedSql` + `resultMetadata`: identify base tables, aggregations, joins, filters, column types.
|
||||
2. Check `sl_discover` and `sl_read_source` for existing sources that overlap.
|
||||
3. Decide:
|
||||
2. **REQUIRED before any write**: call `sl_discover` for every candidate target source name. The response tells you whether the name is manifest-backed (`Type: table` or `Type: sql`). For manifest-backed names you MUST use the overlay shape (`name:` + `measures:`/`segments:`/`description:` only — no `sql:`, `table:`, `grain:`, or `columns:`); the tool will reject a standalone write and you'll have wasted the call. If `sl_discover` returns nothing for the name, you can write a standalone source. Also call `sl_read_source` on existing sources you intend to extend so you don't duplicate measures.
|
||||
3. Include `rawPaths: ["cards/<id>.json"]` on every `sl_write_source`, `sl_edit_source`, and `wiki_write` call. If one artifact generalizes multiple near-duplicate cards, include each contributing card path and no unrelated cards.
|
||||
4. Decide:
|
||||
- Simple aggregation on a table that already has a source → `sl_edit_source` to add a measure.
|
||||
- Join between tables that should be linked in the SL graph → `sl_edit_source` to add a join.
|
||||
- Complex derived SQL (CTEs, multi-layer aggregation, scoring models) → `sl_write_source` with `source_type: sql`. When the SQL projects/filters from a single manifest-backed base table, set `inherits_columns_from: <manifest_key>` so columns inherit type and description from the manifest — see `sl_capture` skill for the slim form. Use `sl_discover` to discover the manifest key from the table reference in the SQL (it accepts `MARTS.CONSIGNMENTS`, `ANALYTICS.MARTS.CONSIGNMENTS`, or `CONSIGNMENTS`).
|
||||
|
|
@ -57,7 +58,7 @@ For each card:
|
|||
- Trivial query (`SELECT *`, simple `COUNT(*)` with no business logic) → do nothing; the runner will record this card as `action_type='skipped'`.
|
||||
- Duplicate of an existing measure → same as trivial; do nothing for this card.
|
||||
|
||||
**Manifest-only names need an overlay first.** If `sl_discover` shows a source name with `Type: table` but `sl_read_source` returns "Source not found", the source lives only in the schema manifest (no standalone overlay yet). `sl_edit_source` cannot edit manifest-only names — you must bootstrap an overlay with `sl_write_source` using the overlay shape:
|
||||
**Manifest-only names need an overlay first.** If `sl_discover` shows a source name with `Type: table` but `sl_read_source` returns "Source not found", the source lives only in the schema manifest (no standalone overlay yet). `sl_edit_source` cannot edit manifest-only names, and a full standalone `sl_write_source` for that name would shadow manifest columns and joins. Bootstrap an overlay with `sl_write_source` using the overlay shape:
|
||||
|
||||
```yaml
|
||||
name: <SOURCE_NAME>
|
||||
|
|
@ -68,7 +69,9 @@ measures:
|
|||
|
||||
Overlay shape: `name:` plus any of `measures:`, `segments:`, `description:`, `joins:`, `disable_joins:`. Never include `sql:`, `table:`, `grain:`, or `columns:` on a manifest-backed name — those would shadow the manifest's schema and drop its joins. Overlay `joins:` are merged additively with the manifest's joins (deduped by `to` + `on`); use `disable_joins: ["<on-clause>"]` to suppress a specific manifest join. After the overlay exists, use `sl_edit_source` for further tweaks. See `sl_capture` skill for the canonical overlay rule.
|
||||
|
||||
**Join discovery:** When your card's SQL references warehouse tables (e.g. in `FROM` or `JOIN` clauses), call `sl_discover({ query: '<table>' })` before writing. The matching manifest entry's `name` is the value you put in `joins: [- to: <name>]`. Use `many_to_one` for FK-to-dimension joins, `one_to_many` for the reverse.
|
||||
**Join discovery:** When your card's SQL references warehouse tables (e.g. in `FROM` or `JOIN` clauses), call `sl_discover({ query: '<table>' })` before writing. The matching manifest entry's `name` is the value you use in `joins: [- to: <name>]` only when the card output exposes a local key that matches the target source grain (for example `account_id = mart_account_segments.account_id`). Do not declare a KTX join just because the card SQL joins that table internally. If the output only exposes display fields such as `account_name`, keep the SQL source self-contained or project the key before adding the join. Use `many_to_one` for FK-to-dimension joins, `one_to_many` for the reverse.
|
||||
|
||||
**Hard rule on join columns (prevents broken joins):** For every join you declare, the local column on the left of `on:` MUST be (a) present in your source's projected output and (b) a key/ID column, never a display value. If the natural FK isn't in your SELECT, add it to SELECT before declaring the join. Joining `account_name = mart_account_segments.account_id` is always wrong — names are not identifiers and the equality produces zero matches. The validator rejects this with a "display value to identifier" error; the tool will refuse to save it. Add `account_id` to your SELECT and join on `account_id = mart_account_segments.account_id`, or omit the join entirely.
|
||||
|
||||
## priorProvenance
|
||||
|
||||
|
|
@ -162,7 +165,7 @@ After Steps A and B, your SQL must:
|
|||
- Reference no aliases that aren't defined inside the SQL itself.
|
||||
- Be valid as a standalone subquery (the validator runs `SELECT * FROM (your_sql) LIMIT 1`).
|
||||
|
||||
If `resolutionStatus: "fallback"` and the SQL is still complex enough that you can't confidently translate it, **skip the card** rather than writing broken SQL. Call `emit_unmapped_fallback` with the staged card path as `rawPath`, `reason: "metabase_sql_untranslated"`, and `fallback: "flagged"`.
|
||||
If `resolutionStatus: "fallback"` and the SQL is still complex enough that you can't confidently translate it, **skip the card** rather than writing broken SQL. Call `emit_unmapped_fallback` with the staged card path as `rawPath`, `reason: "parse_error"`, `clarification: "metabase_sql_untranslated"`, and `fallback: "flagged"`.
|
||||
|
||||
## Join-graph connectivity
|
||||
|
||||
|
|
@ -171,8 +174,9 @@ For `source_type: table`:
|
|||
- Match column names ending in `_id` against existing sources' grain columns.
|
||||
|
||||
For `source_type: sql`:
|
||||
- The validator parses your SQL and **rejects the write** if any FROM/JOIN table has a manifest entry that you did not declare in `joins:`. The error names every missing join target — declare a `many_to_one` join for each and reissue.
|
||||
- Tables outside the manifest (schemas not covered by this connection — e.g. `staging.*` referenced from a MARTS source) are not flagged. For those, write a single-line `wiki_write` with key `unmapped-table-<table_name>` so the gap is documented, then call `emit_unmapped_fallback` with the staged card path as `rawPath`, `reason: "table_outside_manifest"`, and `fallback: "wiki_only"`.
|
||||
- The validator parses your SQL and rejects the write when a referenced manifest table has a viable projected local key but no declared `joins:` entry. Add the join only after confirming the output key and target grain match.
|
||||
- If `sl_discover` resolves the table, it is not outside the manifest. Do not write an `unmapped-table-*` fallback for resolved `orbit_raw`, `mart`, or other manifest-backed sources just because they appear inside card SQL.
|
||||
- If `sl_discover` cannot resolve a referenced table at all, write a single-line `wiki_write` with key `unmapped-table-<table_name>` and `rawPaths: ["cards/<id>.json"]` so the gap is documented, then call `emit_unmapped_fallback` with the staged card path as `rawPath`, `reason: "missing_target_table"`, `tableRef: "<table_name>"`, and `fallback: "wiki_only"`. Do not use this fallback if `sl_discover` resolved the table/source.
|
||||
|
||||
Joins on manifest-backed names compose: the manifest's joins are inherited automatically, and any overlay `joins:` are merged on top (deduped by `to` + `on`). Use `disable_joins: ["<on-clause>"]` in the overlay to suppress a specific manifest join. If `sl_discover` shows a manifest-backed source with `Joins: 0` and the warehouse FK metadata is genuinely absent, declaring application-level joins via the overlay is fair game — bootstrap with `sl_write_source` (overlay shape above), then refine via `sl_edit_source`.
|
||||
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@ Each WorkUnit is either a single Notion page/span or a topical cluster of relate
|
|||
2. For each assigned page, call `read_raw_file`, or `read_raw_span` for oversized pages when the notes specify a span.
|
||||
3. Search `wiki_search` for existing pages that overlap the WorkUnit topics. Prefer updating an existing page over creating a duplicate.
|
||||
4. Use `context_evidence_search`, `context_evidence_read`, and `context_evidence_neighbors` to pull supporting chunks when indexed evidence is relevant. Pass `chunkId` and `documentId` values verbatim as returned by the evidence tools.
|
||||
5. Write durable business knowledge with `wiki_write`. Aim for a small number of high-quality pages per WorkUnit or cluster.
|
||||
6. When the Notion content defines a reusable dataset, metric, segment, join rule, source-of-truth mapping, or table with explicit columns, load `sl_capture`, discover existing sources first with `sl_discover` or `sl_read_source`, then use `sl_write_source` or `sl_edit_source`.
|
||||
5. Write durable business knowledge with `wiki_write`. Aim for a small number of high-quality pages per WorkUnit or cluster. Include `rawPaths` with the exact Notion raw files that support each page.
|
||||
6. When the Notion content defines a reusable dataset, metric, segment, join rule, source-of-truth mapping, or table with explicit columns, load `sl_capture`, discover existing sources first with `sl_discover` or `sl_read_source`, then use `sl_write_source` or `sl_edit_source` only for a confirmed mapped non-Notion target source. Include `rawPaths` with the exact Notion raw files that support the SL action. If no mapped target exists, call `emit_unmapped_fallback` and keep the content wiki-only.
|
||||
7. For every deleted raw path in the Eviction Set, call `eviction_list`, decide retention, then `context_eviction_decision_write`. Do this even when no wiki write is needed.
|
||||
|
||||
## What To Capture
|
||||
|
|
@ -46,6 +46,8 @@ Prefer fewer, stronger entries. Every wiki entry must cite at least one Notion p
|
|||
|
||||
If a clustered WorkUnit includes several related pages, synthesize the shared rule or concept instead of writing one thin page per source. For oversized page spans, read only the assigned span unless the WorkUnit explicitly asks for neighboring context.
|
||||
|
||||
Search existing wiki pages for the same `tables:` or `sl_refs:` frontmatter and for source-of-truth aliases before creating a new page. If an existing page already documents the same warehouse object or business concept, update it instead of creating a differently named duplicate.
|
||||
|
||||
## Citation Style
|
||||
|
||||
```md
|
||||
|
|
@ -61,9 +63,14 @@ If a clustered WorkUnit includes several related pages, synthesize the shared ru
|
|||
- Discover existing sources first with `sl_discover`; read existing source YAML before editing.
|
||||
- Prefer overlays on manifest-backed sources over standalone SQL.
|
||||
- If Notion describes a dashboard or metric but does not define executable logic, write a wiki page and attach `sl_refs` only after confirming the referenced source exists.
|
||||
- Notion `dataSourceCount` counts Notion databases/data sources only. It does not prove that a warehouse/dbt table has or lacks a mapped semantic-layer source.
|
||||
- Do not create SL sources under the Notion connection just because a page mentions a warehouse, dbt, Looker, or Metabase object. Use the mapped warehouse/source connection after discovery, or emit an unmapped fallback and write wiki-only.
|
||||
- Distinguish fallback reasons precisely: if a non-Notion warehouse/dbt connection exists but `sl_discover` cannot find the named table/source, use `no_physical_table`; reserve `no_connection_mapping` for cases where there is no plausible non-Notion target connection at all.
|
||||
- If `sl_discover` resolves the table/source, do not call `emit_unmapped_fallback` for that table. Use the resolved source for `sl_refs`, overlay edits, or wiki-only documentation.
|
||||
- When calling `emit_unmapped_fallback`, pass the table or source identifier as `tableRef` (e.g. `tableRef: "orbit_analytics.customer"`) — the tool generates the canonical detail string from the reason code and `tableRef`. Use the optional `clarification` field only to add context that does not contradict the reason. Do not restate the reason in `clarification`.
|
||||
|
||||
## Tools
|
||||
|
||||
Allowed: `read_raw_file`, `read_raw_span`, `wiki_search`, `wiki_read`, `wiki_write`, `sl_discover`, `sl_read_source`, `sl_write_source`, `sl_edit_source`, `sl_validate`, `context_evidence_search`, `context_evidence_read`, `context_evidence_neighbors`, `eviction_list`, `context_eviction_decision_write`.
|
||||
Allowed: `read_raw_file`, `read_raw_span`, `wiki_search`, `wiki_read`, `wiki_write`, `sl_discover`, `sl_read_source`, `sl_write_source`, `sl_edit_source`, `sl_validate`, `context_evidence_search`, `context_evidence_read`, `context_evidence_neighbors`, `emit_unmapped_fallback`, `eviction_list`, `context_eviction_decision_write`.
|
||||
|
||||
Not allowed: `context_candidate_write`, `context_candidate_mark`.
|
||||
|
|
|
|||
|
|
@ -190,6 +190,7 @@ use it) without changing its SQL expression or filters.
|
|||
- **`sl_edit_source`** is the workhorse for additive changes: add a measure, add a join, tweak a description, replace a filter. Cheap, targeted, preserves the rest of the file.
|
||||
- **`sl_write_source`** is for brand-new sources or when the entire file needs restructuring. It overwrites the file completely.
|
||||
- Do NOT modify existing measures or their descriptions unless the current turn explicitly corrects them.
|
||||
- During bundle/external ingest, include `rawPaths` on every `sl_write_source`/`sl_edit_source` call with only the raw files that directly support the SL action.
|
||||
|
||||
## Worked example — additive overlay
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ export {
|
|||
notionConnectionToPullConfig,
|
||||
parseNotionConnectionConfig,
|
||||
redactNotionConnectionConfig,
|
||||
resolveNotionConnectionAuthToken,
|
||||
resolveNotionAuthToken,
|
||||
type KtxNotionConnectionConfig,
|
||||
type RedactedKtxNotionConnectionConfig,
|
||||
|
|
|
|||
|
|
@ -30,18 +30,36 @@ describe('standalone Notion connection config', () => {
|
|||
|
||||
expect(parsed).toEqual({
|
||||
driver: 'notion',
|
||||
auth_token: null,
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
root_database_ids: [],
|
||||
root_data_source_ids: [],
|
||||
max_pages_per_run: 1000,
|
||||
max_knowledge_creates_per_run: 5,
|
||||
max_knowledge_creates_per_run: 25,
|
||||
max_knowledge_updates_per_run: 20,
|
||||
last_successful_cursor: null,
|
||||
});
|
||||
});
|
||||
|
||||
it('parses inline Notion auth tokens without requiring auth_token_ref', () => {
|
||||
const parsed = parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token: ' ntn_inline_token ',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
});
|
||||
|
||||
expect(parsed).toMatchObject({
|
||||
driver: 'notion',
|
||||
auth_token: 'ntn_inline_token',
|
||||
auth_token_ref: null,
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
});
|
||||
});
|
||||
|
||||
it('redacts token references from display output', () => {
|
||||
expect(
|
||||
redactNotionConnectionConfig(
|
||||
|
|
@ -60,7 +78,7 @@ describe('standalone Notion connection config', () => {
|
|||
rootDatabaseIds: [],
|
||||
rootDataSourceIds: [],
|
||||
maxPagesPerRun: 80,
|
||||
maxKnowledgeCreatesPerRun: 5,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
warning: 'Anything accessible to this Notion integration can become organization knowledge.',
|
||||
});
|
||||
|
|
@ -117,4 +135,23 @@ describe('standalone Notion connection config', () => {
|
|||
lastSuccessfulCursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}',
|
||||
});
|
||||
});
|
||||
|
||||
it('uses inline Notion auth_token when building adapter pull config', async () => {
|
||||
const pullConfig = await notionConnectionToPullConfig(
|
||||
parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token: 'ntn_inline_token',
|
||||
auth_token_ref: 'env:STALE_NOTION_TOKEN',
|
||||
crawl_mode: 'all_accessible',
|
||||
}),
|
||||
{
|
||||
env: {},
|
||||
readTextFile: async () => {
|
||||
throw new Error('readTextFile should not be called for inline auth_token');
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
expect(pullConfig.authToken).toBe('ntn_inline_token');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { type NotionPullConfig, notionPullConfigSchema } from '../ingest/adapters/notion/types.js';
|
||||
import {
|
||||
NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
type NotionPullConfig,
|
||||
notionPullConfigSchema,
|
||||
} from '../ingest/adapters/notion/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../project/config.js';
|
||||
|
||||
export const KTX_NOTION_ORG_KNOWLEDGE_WARNING =
|
||||
|
|
@ -11,7 +15,8 @@ type KtxNotionCrawlMode = 'all_accessible' | 'selected_roots';
|
|||
|
||||
export interface KtxNotionConnectionConfig extends KtxProjectConnectionConfig {
|
||||
driver: 'notion';
|
||||
auth_token_ref: string;
|
||||
auth_token: string | null;
|
||||
auth_token_ref: string | null;
|
||||
crawl_mode: KtxNotionCrawlMode;
|
||||
root_page_ids: string[];
|
||||
root_database_ids: string[];
|
||||
|
|
@ -89,11 +94,12 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo
|
|||
if (input.driver !== 'notion') {
|
||||
throw new Error('Notion connection config requires driver: notion');
|
||||
}
|
||||
const authTokenRef = stringValue(input.auth_token_ref, '');
|
||||
if (!authTokenRef) {
|
||||
throw new Error('Notion connection config requires auth_token_ref');
|
||||
const authToken = optionalString(input.auth_token);
|
||||
const authTokenRef = optionalString(input.auth_token_ref);
|
||||
if (!authToken && !authTokenRef) {
|
||||
throw new Error('Notion connection config requires auth_token or auth_token_ref');
|
||||
}
|
||||
if (!authTokenRef.startsWith('env:') && !authTokenRef.startsWith('file:')) {
|
||||
if (authTokenRef && !authTokenRef.startsWith('env:') && !authTokenRef.startsWith('file:')) {
|
||||
throw new Error('Notion auth_token_ref must use env:NAME or file:/path');
|
||||
}
|
||||
|
||||
|
|
@ -111,6 +117,7 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo
|
|||
return {
|
||||
...input,
|
||||
driver: 'notion',
|
||||
auth_token: authToken,
|
||||
auth_token_ref: authTokenRef,
|
||||
crawl_mode: crawlMode,
|
||||
root_page_ids: rootPageIds,
|
||||
|
|
@ -119,7 +126,7 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo
|
|||
max_pages_per_run: boundedInteger(input.max_pages_per_run, 1000, 'max_pages_per_run', 1, 10_000),
|
||||
max_knowledge_creates_per_run: boundedInteger(
|
||||
input.max_knowledge_creates_per_run,
|
||||
5,
|
||||
NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
'max_knowledge_creates_per_run',
|
||||
0,
|
||||
25,
|
||||
|
|
@ -138,7 +145,7 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo
|
|||
export function redactNotionConnectionConfig(config: KtxNotionConnectionConfig): RedactedKtxNotionConnectionConfig {
|
||||
return {
|
||||
driver: 'notion',
|
||||
hasAuthToken: Boolean(config.auth_token_ref),
|
||||
hasAuthToken: Boolean(config.auth_token ?? config.auth_token_ref),
|
||||
crawlMode: config.crawl_mode,
|
||||
rootPageIds: config.root_page_ids,
|
||||
rootDatabaseIds: config.root_database_ids,
|
||||
|
|
@ -178,12 +185,20 @@ export async function resolveNotionAuthToken(
|
|||
throw new Error('Notion auth_token_ref must use env:NAME or file:/path');
|
||||
}
|
||||
|
||||
export async function resolveNotionConnectionAuthToken(
|
||||
config: Pick<KtxNotionConnectionConfig, 'auth_token' | 'auth_token_ref'>,
|
||||
options: ResolveNotionTokenOptions = {},
|
||||
): Promise<string> {
|
||||
return config.auth_token ?? (await resolveNotionAuthToken(config.auth_token_ref ?? '', options));
|
||||
}
|
||||
|
||||
export async function notionConnectionToPullConfig(
|
||||
config: KtxNotionConnectionConfig,
|
||||
options: ResolveNotionTokenOptions = {},
|
||||
): Promise<NotionPullConfig> {
|
||||
const authToken = await resolveNotionConnectionAuthToken(config, options);
|
||||
return notionPullConfigSchema.parse({
|
||||
authToken: await resolveNotionAuthToken(config.auth_token_ref, options),
|
||||
authToken,
|
||||
crawlMode: config.crawl_mode,
|
||||
rootPageIds: config.root_page_ids,
|
||||
rootDatabaseIds: config.root_database_ids,
|
||||
|
|
|
|||
|
|
@ -48,4 +48,10 @@ describe('DbtSourceAdapter', () => {
|
|||
it('implements fetch() for git-backed dbt source setup', () => {
|
||||
expect(adapter.fetch).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
it('reports mapped warehouse targets for bundle SL discovery', async () => {
|
||||
adapter = new DbtSourceAdapter({ targetConnectionIds: ['postgres-warehouse', 'postgres-warehouse'] });
|
||||
|
||||
await expect(adapter.listTargetConnectionIds?.(stagedDir)).resolves.toEqual(['postgres-warehouse']);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import { parseDbtStagedDir } from './parse.js';
|
|||
|
||||
interface DbtSourceAdapterOptions {
|
||||
homeDir?: string;
|
||||
targetConnectionIds?: string[];
|
||||
}
|
||||
|
||||
export class DbtSourceAdapter implements SourceAdapter {
|
||||
|
|
@ -24,6 +25,10 @@ export class DbtSourceAdapter implements SourceAdapter {
|
|||
return detectDbtStagedDir(stagedDir);
|
||||
}
|
||||
|
||||
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
|
||||
return [...new Set(this.options.targetConnectionIds ?? [])].sort((left, right) => left.localeCompare(right));
|
||||
}
|
||||
|
||||
async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
|
||||
const config = pullConfig as DbtPullConfig | undefined;
|
||||
if (!config?.repoUrl) {
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ async function readJson(path: string): Promise<unknown> {
|
|||
async function writeYamlAtomic(path: string, value: unknown): Promise<void> {
|
||||
await mkdir(dirname(path), { recursive: true });
|
||||
const tmp = `${path}.tmp`;
|
||||
await writeFile(tmp, YAML.stringify(value, { indent: 2, lineWidth: 0 }), 'utf-8');
|
||||
await writeFile(tmp, YAML.stringify(value, { indent: 2, lineWidth: 0, version: '1.1' }), 'utf-8');
|
||||
await rename(tmp, path);
|
||||
}
|
||||
|
||||
|
|
@ -270,7 +270,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
}
|
||||
}
|
||||
}
|
||||
const after = YAML.stringify(shard, { indent: 2, lineWidth: 0 });
|
||||
const after = YAML.stringify(shard, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
if (after !== before) {
|
||||
await writeYamlAtomic(path, shard);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ import { notionManifestSchema, notionMetadataSchema } from './types.js';
|
|||
const MAX_NOTION_WORK_UNIT_CHARS = 40_000;
|
||||
export const NOTION_ORG_KNOWLEDGE_WARNING =
|
||||
'Anything accessible to this Notion integration can become organization knowledge.';
|
||||
const NOTION_SL_WRITE_GUIDANCE =
|
||||
'Write wiki entries with wiki_write. Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview. Search existing wiki pages for the same tables or sl_refs before creating a new page. Only write or edit SL sources after sl_discover/sl_read_source confirms a mapped non-Notion target source; if no mapped target exists, emit_unmapped_fallback and keep the fact wiki-only. Notion dataSourceCount counts Notion databases/data sources only, not warehouse/dbt mappings. If a warehouse/dbt connection exists but the named table or source is absent, use reason no_physical_table rather than no_connection_mapping. Do not create SL sources under the Notion connection just because a page mentions a warehouse table.';
|
||||
|
||||
async function walk(root: string): Promise<string[]> {
|
||||
const entries = await readdir(root, { withFileTypes: true, recursive: true });
|
||||
|
|
@ -92,7 +94,7 @@ export async function chunkNotionStagedDir(stagedDir: string, diffSet?: DiffSet)
|
|||
rawFiles,
|
||||
dependencyPaths,
|
||||
peerFileIndex,
|
||||
notes: `Synthesize durable wiki and SL knowledge from this Notion page span only. Use read_raw_span on ${pagePath} for lines ${range.startLine}-${range.endLine}; do not call read_raw_file for oversized pages. Cite evidence chunk/page IDs.`,
|
||||
notes: `Synthesize durable wiki and SL knowledge from this Notion page span only. Use read_raw_span on ${pagePath} for lines ${range.startLine}-${range.endLine}; do not call read_raw_file for oversized pages. ${NOTION_SL_WRITE_GUIDANCE} Cite evidence chunk/page IDs.`,
|
||||
});
|
||||
}
|
||||
continue;
|
||||
|
|
@ -105,7 +107,7 @@ export async function chunkNotionStagedDir(stagedDir: string, diffSet?: DiffSet)
|
|||
dependencyPaths,
|
||||
peerFileIndex,
|
||||
notes:
|
||||
'Synthesize durable wiki and SL knowledge from this Notion page. Write wiki entries with wiki_write and SL sources with sl_write_source; cite evidence chunk/page IDs.',
|
||||
`Synthesize durable wiki and SL knowledge from this Notion page. ${NOTION_SL_WRITE_GUIDANCE} Cite evidence chunk/page IDs.`,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -115,6 +117,8 @@ export async function chunkNotionStagedDir(stagedDir: string, diffSet?: DiffSet)
|
|||
reconcileNotes: [
|
||||
`Notion maxKnowledgeCreatesPerRun=${manifest.maxKnowledgeCreatesPerRun}`,
|
||||
`Notion maxKnowledgeUpdatesPerRun=${manifest.maxKnowledgeUpdatesPerRun}`,
|
||||
'Notion dataSourceCount is Notion-only; use sl_discover for warehouse/dbt mapping decisions.',
|
||||
'Reconcile Notion wiki pages sharing tables/sl_refs before creating distinct artifacts.',
|
||||
],
|
||||
contextReport: {
|
||||
capped: manifest.capped,
|
||||
|
|
|
|||
|
|
@ -79,9 +79,27 @@ describe('clusterNotionWorkUnits', () => {
|
|||
expect(wu.unitKey).toMatch(/^notion-cluster-\d+$/);
|
||||
expect(wu.rawFiles.length).toBeGreaterThan(0);
|
||||
expect(wu.notes).toMatch(/Synthesize/);
|
||||
expect(wu.notes).toContain('emit_unmapped_fallback');
|
||||
expect(wu.notes).toContain('Do not create SL sources under the Notion connection');
|
||||
}
|
||||
});
|
||||
|
||||
test('merges pages into one synthesis unit at the clustering threshold', async () => {
|
||||
const pages = Array.from({ length: MIN_PAGES_TO_CLUSTER }, (_, i) => ({
|
||||
id: `p${i}`,
|
||||
title: `Customer source reference ${i}`,
|
||||
body: `Customer source reference maps to orbit_analytics.customer ${i}`.repeat(10),
|
||||
}));
|
||||
const stagedDir = await makeStaged(pages);
|
||||
const wus = makeWorkUnits(pages);
|
||||
const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: mockEmbed });
|
||||
expect(out).toHaveLength(1);
|
||||
expect(out[0].unitKey).toBe('notion-cluster-1');
|
||||
expect(new Set(out[0].rawFiles)).toEqual(new Set(wus.flatMap((wu) => wu.rawFiles)));
|
||||
expect(out[0].notes).toContain('emit_unmapped_fallback');
|
||||
expect(out[0].notes).toContain('Do not create SL sources under the Notion connection');
|
||||
});
|
||||
|
||||
test('preserves coverage: every input rawFile appears in some cluster', async () => {
|
||||
const pages = Array.from({ length: 12 }, (_, i) => ({
|
||||
id: `p${i}`,
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ import { notionMetadataSchema } from './types.js';
|
|||
export const MIN_PAGES_TO_CLUSTER = 5;
|
||||
const CLUSTER_TEXT_BODY_CHARS = 1024;
|
||||
const CLUSTER_SEED = 42;
|
||||
const NOTION_CLUSTER_SL_WRITE_GUIDANCE =
|
||||
'Write wiki entries directly with wiki_write. Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview. Search existing wiki pages for the same tables or sl_refs before creating a new page. Only write or edit SL sources after sl_discover/sl_read_source confirms a mapped non-Notion target source; if no mapped target exists, emit_unmapped_fallback and keep the fact wiki-only. Notion dataSourceCount counts Notion databases/data sources only, not warehouse/dbt mappings. If a warehouse/dbt connection exists but the named table or source is absent, use reason no_physical_table rather than no_connection_mapping. Do not create SL sources under the Notion connection just because a page mentions a warehouse table.';
|
||||
|
||||
interface ClusterNotionWorkUnitsArgs {
|
||||
workUnits: WorkUnit[];
|
||||
|
|
@ -63,7 +65,7 @@ function mergeWorkUnits(bucket: WorkUnit[], clusterIndex: number): WorkUnit {
|
|||
`Synthesize durable wiki and SL knowledge from these ${bucket.length} related Notion pages. ` +
|
||||
'Read each page with read_raw_file (or read_raw_span for oversized pages). ' +
|
||||
'Search nearby evidence with context_evidence_search/_read/_neighbors when needed. ' +
|
||||
'Write wiki entries directly with wiki_write and SL sources directly with sl_write_source. ' +
|
||||
`${NOTION_CLUSTER_SL_WRITE_GUIDANCE} ` +
|
||||
'Do not call context_candidate_write.',
|
||||
};
|
||||
}
|
||||
|
|
@ -72,7 +74,7 @@ export async function clusterNotionWorkUnits(args: ClusterNotionWorkUnitsArgs):
|
|||
const { workUnits, stagedDir, embedding } = args;
|
||||
if (workUnits.length < MIN_PAGES_TO_CLUSTER) return workUnits;
|
||||
const k = pickK(workUnits.length);
|
||||
if (k <= 1) return workUnits;
|
||||
if (k <= 1) return [mergeWorkUnits(workUnits, 0)];
|
||||
const texts = await Promise.all(workUnits.map((wu) => buildClusterText(wu, stagedDir)));
|
||||
let vectors: number[][];
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ describe('NotionSourceAdapter', () => {
|
|||
continuedFromCursor: false,
|
||||
partialSnapshot: true,
|
||||
maxPagesPerRun: 1,
|
||||
maxKnowledgeCreatesPerRun: 5,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
skipped: [],
|
||||
warnings: ['maxPagesPerRun reached at 1'],
|
||||
|
|
@ -167,7 +167,7 @@ describe('NotionSourceAdapter', () => {
|
|||
continuedFromCursor: true,
|
||||
partialSnapshot: true,
|
||||
maxPagesPerRun: 100,
|
||||
maxKnowledgeCreatesPerRun: 5,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
nextSuccessfulCursor: null,
|
||||
skipped: [],
|
||||
|
|
@ -218,7 +218,7 @@ describe('NotionSourceAdapter', () => {
|
|||
continuedFromCursor: false,
|
||||
partialSnapshot: false,
|
||||
maxPagesPerRun: 100,
|
||||
maxKnowledgeCreatesPerRun: 5,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
skipped: [],
|
||||
warnings: [],
|
||||
|
|
@ -241,13 +241,58 @@ describe('NotionSourceAdapter', () => {
|
|||
dependencyPaths: ['manifest.json', 'pages/page-1/blocks.json'],
|
||||
});
|
||||
expect(result.workUnits[0].notes).toContain('Synthesize durable wiki and SL knowledge');
|
||||
expect(result.workUnits[0].notes).toContain('emit_unmapped_fallback');
|
||||
expect(result.workUnits[0].notes).toContain('use reason no_physical_table rather than no_connection_mapping');
|
||||
expect(result.workUnits[0].notes).toContain('Do not create SL sources under the Notion connection');
|
||||
expect(result.workUnits[0].notes).toContain(
|
||||
'Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview',
|
||||
);
|
||||
expect(result.reconcileNotes).toEqual([
|
||||
'Notion maxKnowledgeCreatesPerRun=5',
|
||||
'Notion maxKnowledgeCreatesPerRun=25',
|
||||
'Notion maxKnowledgeUpdatesPerRun=20',
|
||||
'Notion dataSourceCount is Notion-only; use sl_discover for warehouse/dbt mapping decisions.',
|
||||
'Reconcile Notion wiki pages sharing tables/sl_refs before creating distinct artifacts.',
|
||||
]);
|
||||
expect(result.contextReport).toEqual({ capped: false, warnings: [NOTION_ORG_KNOWLEDGE_WARNING] });
|
||||
});
|
||||
|
||||
it('chunks retried pages when failed provenance makes unchanged raw files look added again', async () => {
|
||||
await writeFile(
|
||||
join(stagedDir, 'manifest.json'),
|
||||
JSON.stringify({
|
||||
source: 'notion',
|
||||
apiVersion: '2026-03-11',
|
||||
crawlMode: 'selected_roots',
|
||||
rootPageIds: ['page-1'],
|
||||
rootDatabaseIds: [],
|
||||
rootDataSourceIds: [],
|
||||
fetchedAt: '2026-04-28T00:00:00.000Z',
|
||||
pageCount: 1,
|
||||
databaseCount: 0,
|
||||
dataSourceCount: 0,
|
||||
capped: false,
|
||||
continuedFromCursor: false,
|
||||
partialSnapshot: false,
|
||||
maxPagesPerRun: 100,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
skipped: [],
|
||||
warnings: [],
|
||||
}),
|
||||
'utf-8',
|
||||
);
|
||||
await writePage('page-1', 'Retry Me');
|
||||
|
||||
const result = await adapter.chunk(stagedDir, {
|
||||
added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'],
|
||||
modified: [],
|
||||
deleted: [],
|
||||
unchanged: ['manifest.json', 'pages/page-1/blocks.json'],
|
||||
});
|
||||
|
||||
expect(result.workUnits.map((workUnit) => workUnit.unitKey)).toEqual(['notion-page-page-1']);
|
||||
});
|
||||
|
||||
it('reports malformed manifests with a Notion-specific error', async () => {
|
||||
await writeFile(join(stagedDir, 'manifest.json'), '{bad json', 'utf-8');
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { z } from 'zod';
|
|||
|
||||
export const NOTION_API_VERSION = '2026-03-11';
|
||||
export const NOTION_SOURCE_KEY = 'notion';
|
||||
export const NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN = 25;
|
||||
|
||||
export const notionPullConfigSchema = z.object({
|
||||
authToken: z.string().min(1),
|
||||
|
|
@ -10,7 +11,7 @@ export const notionPullConfigSchema = z.object({
|
|||
rootDatabaseIds: z.array(z.string().min(1)).default([]),
|
||||
rootDataSourceIds: z.array(z.string().min(1)).default([]),
|
||||
maxPagesPerRun: z.number().int().min(1).max(10_000).default(1000),
|
||||
maxKnowledgeCreatesPerRun: z.number().int().min(0).max(25).default(5),
|
||||
maxKnowledgeCreatesPerRun: z.number().int().min(0).max(25).default(NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN),
|
||||
maxKnowledgeUpdatesPerRun: z.number().int().min(0).max(100).default(20),
|
||||
lastSuccessfulCursor: z.string().nullable().default(null),
|
||||
});
|
||||
|
|
|
|||
|
|
@ -315,6 +315,7 @@ export type {
|
|||
MetricflowPullConfig,
|
||||
} from './adapters/metricflow/pull-config.js';
|
||||
export { NOTION_ORG_KNOWLEDGE_WARNING } from './adapters/notion/chunk.js';
|
||||
export { NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN } from './adapters/notion/types.js';
|
||||
export { NotionSourceAdapter, type NotionSourceAdapterDeps } from './adapters/notion/notion.adapter.js';
|
||||
export { NotionClient, type NotionApi, type NotionBotInfo } from './adapters/notion/notion-client.js';
|
||||
export { bucketDistinctUsers, bucketErrorRate, bucketExecutions, bucketP95Runtime, bucketRecency } from './adapters/historic-sql/buckets.js';
|
||||
|
|
|
|||
|
|
@ -184,7 +184,11 @@ const makeDeps = () => {
|
|||
.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve(connectionId === 'warehouse-2' ? ['looker__orders.yaml'] : []),
|
||||
),
|
||||
loadAllSources: vi.fn().mockResolvedValue([]),
|
||||
loadAllSources: vi
|
||||
.fn()
|
||||
.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve(connectionId === 'warehouse-2' ? [{ name: 'looker__orders' }] : []),
|
||||
),
|
||||
};
|
||||
const slSearchService = {
|
||||
indexSources: vi.fn().mockResolvedValue(undefined),
|
||||
|
|
@ -1261,8 +1265,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu',
|
||||
);
|
||||
expect(deps.adapter.listTargetConnectionIds).toHaveBeenCalledWith('/tmp/stage/upload-x');
|
||||
expect(deps.semanticLayerService.listFilesForConnection).toHaveBeenCalledWith('looker-run');
|
||||
expect(deps.semanticLayerService.listFilesForConnection).toHaveBeenCalledWith('warehouse-2');
|
||||
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('looker-run');
|
||||
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2');
|
||||
expect(workUnitCall?.[0].userPrompt).toContain('looker__orders');
|
||||
expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['looker-run', 'warehouse-2']);
|
||||
});
|
||||
|
|
@ -1556,6 +1560,49 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
expect(deps.knowledgeIndex.listPagesForUser).toHaveBeenCalledWith('system');
|
||||
});
|
||||
|
||||
it('includes manifest-backed target sources in WorkUnit prompts', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['postgres-warehouse']);
|
||||
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve(connectionId === 'postgres-warehouse' ? [{ name: 'stg_accounts' }] : []),
|
||||
);
|
||||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['models/schema.yml', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/dbt-main/dbt/s',
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
||||
await runner.run({
|
||||
jobId: 'j1',
|
||||
connectionId: 'dbt-main',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
|
||||
});
|
||||
|
||||
const workUnitCall = deps.agentRunner.runLoop.mock.calls.find(
|
||||
([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu',
|
||||
);
|
||||
expect(workUnitCall?.[0].userPrompt).toContain('## postgres-warehouse');
|
||||
expect(workUnitCall?.[0].userPrompt).toContain('stg_accounts');
|
||||
expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['dbt-main', 'postgres-warehouse']);
|
||||
});
|
||||
|
||||
it('does not resolve qualified fallback table refs by source name alone', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.semanticLayerService.loadAllSources.mockResolvedValue([{ name: 'orders', table: 'sales.orders' }]);
|
||||
const runner = buildRunner(deps);
|
||||
|
||||
await expect(
|
||||
(runner as any).tableRefExistsInSemanticLayer(deps.semanticLayerService, ['warehouse'], 'finance.orders'),
|
||||
).resolves.toBe(false);
|
||||
await expect(
|
||||
(runner as any).tableRefExistsInSemanticLayer(deps.semanticLayerService, ['warehouse'], 'sales.orders'),
|
||||
).resolves.toBe(true);
|
||||
});
|
||||
|
||||
it('passes relevant canonical pins into the reconciliation system prompt', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.diffSetService.compute.mockResolvedValue({
|
||||
|
|
|
|||
|
|
@ -5,9 +5,10 @@ import pLimit from 'p-limit';
|
|||
import { z } from 'zod';
|
||||
import { type KtxLogger, noopLogger } from '../core/index.js';
|
||||
import type { CaptureSession, MemoryAction } from '../memory/index.js';
|
||||
import type { SlValidationDeps } from '../sl/index.js';
|
||||
import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js';
|
||||
import { createTouchedSlSources, type ToolContext, type ToolSession } from '../tools/index.js';
|
||||
import { actionTargetConnectionId } from './action-identity.js';
|
||||
import { NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN } from './adapters/notion/types.js';
|
||||
import { selectRelevantCanonicalPins } from './canonical-pins.js';
|
||||
import { sanitizeMemoryFlowError } from './memory-flow/live-buffer.js';
|
||||
import type { MemoryFlowPlannedWorkUnit } from './memory-flow/types.js';
|
||||
|
|
@ -39,6 +40,11 @@ import { createReadRawSpanTool } from './tools/read-raw-span.tool.js';
|
|||
import { createStageDiffTool } from './tools/stage-diff.tool.js';
|
||||
import { createStageListTool } from './tools/stage-list.tool.js';
|
||||
import { type ToolCallLogEntry, wrapToolsWithLogger } from './tools/tool-call-logger.js';
|
||||
import {
|
||||
createMutableToolTranscriptSummary,
|
||||
recordToolTranscriptEntry,
|
||||
type MutableToolTranscriptSummary,
|
||||
} from './tools/tool-transcript-summary.js';
|
||||
import type {
|
||||
EvictionUnit,
|
||||
IngestBundleJob,
|
||||
|
|
@ -48,14 +54,6 @@ import type {
|
|||
WorkUnit,
|
||||
} from './types.js';
|
||||
|
||||
interface MutableToolTranscriptSummary {
|
||||
unitKey: string;
|
||||
path: string;
|
||||
toolCallCount: number;
|
||||
errorCount: number;
|
||||
toolNames: Set<string>;
|
||||
}
|
||||
|
||||
function workUnitToMemoryFlowPlannedWorkUnit(workUnit: WorkUnit): MemoryFlowPlannedWorkUnit {
|
||||
return {
|
||||
unitKey: workUnit.unitKey,
|
||||
|
|
@ -80,21 +78,6 @@ function countMemoryFlowActions(actions: MemoryAction[], target: MemoryAction['t
|
|||
return actions.filter((action) => action.target === target).length;
|
||||
}
|
||||
|
||||
function isStructuredToolFailure(output: unknown): boolean {
|
||||
if (!output || typeof output !== 'object') {
|
||||
return false;
|
||||
}
|
||||
const structured = (output as { structured?: unknown }).structured;
|
||||
return !!structured && typeof structured === 'object' && (structured as { success?: unknown }).success === false;
|
||||
}
|
||||
|
||||
function isFailedToolCall(entry: ToolCallLogEntry): boolean {
|
||||
if (entry.error) {
|
||||
return true;
|
||||
}
|
||||
return (entry.toolName === 'sl_write_source' || entry.toolName === 'wiki_write') && isStructuredToolFailure(entry.output);
|
||||
}
|
||||
|
||||
function reportIdFromCreateResult(result: unknown): string | undefined {
|
||||
if (!result || typeof result !== 'object' || !('id' in result)) {
|
||||
return undefined;
|
||||
|
|
@ -103,6 +86,46 @@ function reportIdFromCreateResult(result: unknown): string | undefined {
|
|||
return typeof id === 'string' && id.length > 0 ? id : undefined;
|
||||
}
|
||||
|
||||
function normalizeTableReference(value: string): string {
|
||||
return value
|
||||
.trim()
|
||||
.replace(/["`]/g, '')
|
||||
.replace(/[\[\]]/g, '')
|
||||
.toLowerCase();
|
||||
}
|
||||
|
||||
function finalReferenceSegment(value: string): string {
|
||||
const parts = value.split('.').filter((part) => part.length > 0);
|
||||
return parts.at(-1) ?? value;
|
||||
}
|
||||
|
||||
function semanticSourceMatchesTableRef(source: SemanticLayerSource, tableRef: string): boolean {
|
||||
const normalizedRef = normalizeTableReference(tableRef);
|
||||
if (!normalizedRef) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const refIsQualified = normalizedRef.includes('.');
|
||||
const normalizedSourceName = normalizeTableReference(source.name);
|
||||
if (normalizedSourceName === normalizedRef) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const table = typeof source.table === 'string' ? normalizeTableReference(source.table) : '';
|
||||
if (table && (table === normalizedRef || table.endsWith(`.${normalizedRef}`))) {
|
||||
return true;
|
||||
}
|
||||
if (!refIsQualified && table && finalReferenceSegment(table) === normalizedRef) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function rawPathsForAction(action: MemoryAction, fallbackRawPaths: string[]): string[] {
|
||||
return action.rawPaths && action.rawPaths.length > 0 ? [...new Set(action.rawPaths)] : fallbackRawPaths;
|
||||
}
|
||||
|
||||
export class IngestBundleRunner {
|
||||
private readonly logger: KtxLogger;
|
||||
private readonly chainByConnection = new Map<string, Promise<unknown>>();
|
||||
|
|
@ -276,18 +299,46 @@ export class IngestBundleRunner {
|
|||
const blocks = await Promise.all(
|
||||
connectionIds.map(async (connectionId) => {
|
||||
try {
|
||||
const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId);
|
||||
const names = files.filter((f) => !f.startsWith('_schema/')).map((f) => f.replace(/\.yaml$/, ''));
|
||||
const sources = await this.deps.semanticLayerService.loadAllSources(connectionId);
|
||||
const names = sources.map((source) => source.name).sort((left, right) => left.localeCompare(right));
|
||||
const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
|
||||
return `## ${connectionId}\n${body}`;
|
||||
} catch {
|
||||
return `## ${connectionId}\n(empty)`;
|
||||
try {
|
||||
const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId);
|
||||
const names = files
|
||||
.filter((f) => !f.startsWith('_schema/'))
|
||||
.map((f) => f.replace(/\.yaml$/, ''))
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
|
||||
return `## ${connectionId}\n${body}`;
|
||||
} catch {
|
||||
return `## ${connectionId}\n(empty)`;
|
||||
}
|
||||
}
|
||||
}),
|
||||
);
|
||||
return blocks.join('\n\n');
|
||||
}
|
||||
|
||||
private async tableRefExistsInSemanticLayer(
|
||||
semanticLayerService: SemanticLayerService,
|
||||
connectionIds: string[],
|
||||
tableRef: string,
|
||||
): Promise<boolean> {
|
||||
for (const connectionId of connectionIds) {
|
||||
try {
|
||||
const sources = await semanticLayerService.loadAllSources(connectionId);
|
||||
if (sources.some((source) => semanticSourceMatchesTableRef(source, tableRef))) {
|
||||
return true;
|
||||
}
|
||||
} catch {
|
||||
// Fallback diagnostics should not fail an ingest stage if an index lookup is temporarily unavailable.
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private resolveContextCuratorBudget(
|
||||
bundleRef: IngestBundleJob['bundleRef'],
|
||||
stageIndex: StageIndex,
|
||||
|
|
@ -297,7 +348,9 @@ export class IngestBundleRunner {
|
|||
? (bundleRef.config as Record<string, unknown>)
|
||||
: {};
|
||||
const configuredCreates =
|
||||
typeof rawConfig.maxKnowledgeCreatesPerRun === 'number' ? rawConfig.maxKnowledgeCreatesPerRun : 5;
|
||||
typeof rawConfig.maxKnowledgeCreatesPerRun === 'number'
|
||||
? rawConfig.maxKnowledgeCreatesPerRun
|
||||
: NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN;
|
||||
const configuredUpdates =
|
||||
typeof rawConfig.maxKnowledgeUpdatesPerRun === 'number' ? rawConfig.maxKnowledgeUpdatesPerRun : 20;
|
||||
const wikiActions = stageIndex.workUnits.flatMap((wu) => wu.actions).filter((action) => action.target === 'wiki');
|
||||
|
|
@ -351,17 +404,8 @@ export class IngestBundleRunner {
|
|||
(path: string) =>
|
||||
(entry: ToolCallLogEntry): void => {
|
||||
const current =
|
||||
transcriptSummaries.get(entry.wuKey) ??
|
||||
({
|
||||
unitKey: entry.wuKey,
|
||||
path,
|
||||
toolCallCount: 0,
|
||||
errorCount: 0,
|
||||
toolNames: new Set<string>(),
|
||||
} satisfies MutableToolTranscriptSummary);
|
||||
current.toolCallCount += 1;
|
||||
current.errorCount += isFailedToolCall(entry) ? 1 : 0;
|
||||
current.toolNames.add(entry.toolName);
|
||||
transcriptSummaries.get(entry.wuKey) ?? createMutableToolTranscriptSummary(entry.wuKey, path);
|
||||
recordToolTranscriptEntry(current, entry);
|
||||
transcriptSummaries.set(entry.wuKey, current);
|
||||
};
|
||||
const overrideReport = await this.loadOverrideReport(job);
|
||||
|
|
@ -617,6 +661,7 @@ export class IngestBundleRunner {
|
|||
preHead: sessionWorktree.baseSha,
|
||||
touchedSlSources: session.touchedSlSources,
|
||||
actions: sessionActions,
|
||||
allowedRawPaths: new Set(wu.rawFiles),
|
||||
semanticLayerService: scopedSemanticLayerService,
|
||||
wikiService: scopedWikiService,
|
||||
configService: sessionWorktree.config,
|
||||
|
|
@ -681,6 +726,8 @@ export class IngestBundleRunner {
|
|||
emit_unmapped_fallback: createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(wu.rawFiles),
|
||||
tableRefExists: (tableRef) =>
|
||||
this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef),
|
||||
}),
|
||||
};
|
||||
|
||||
|
|
@ -728,7 +775,7 @@ export class IngestBundleRunner {
|
|||
sourceKey: job.sourceKey,
|
||||
connectionId: job.connectionId,
|
||||
jobId: job.jobId,
|
||||
toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.errorCount ?? 0,
|
||||
toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0,
|
||||
onStepFinish: ({ stepIndex, stepBudget }) => {
|
||||
memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget });
|
||||
},
|
||||
|
|
@ -839,6 +886,10 @@ export class IngestBundleRunner {
|
|||
const reconcileActions: MemoryAction[] = [];
|
||||
const rcScopedWiki = this.deps.wikiService.forWorktree(sessionWorktree.workdir);
|
||||
const rcScopedSl = this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir);
|
||||
const reconciliationAllowedRawPaths = new Set<string>([
|
||||
...currentHashes.keys(),
|
||||
...(eviction?.deletedRawPaths ?? []),
|
||||
]);
|
||||
|
||||
const rcToolSession: ToolSession = {
|
||||
connectionId: job.connectionId,
|
||||
|
|
@ -846,6 +897,7 @@ export class IngestBundleRunner {
|
|||
preHead: reconcileSession.preHead,
|
||||
touchedSlSources: reconcileSession.touchedSlSources,
|
||||
actions: reconcileActions,
|
||||
allowedRawPaths: reconciliationAllowedRawPaths,
|
||||
semanticLayerService: rcScopedSl,
|
||||
wikiService: rcScopedWiki,
|
||||
configService: sessionWorktree.config,
|
||||
|
|
@ -910,6 +962,7 @@ export class IngestBundleRunner {
|
|||
emit_unmapped_fallback: createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: allStagedPaths,
|
||||
tableRefExists: (tableRef) => this.tableRefExistsInSemanticLayer(rcScopedSl, slConnectionIds, tableRef),
|
||||
}),
|
||||
};
|
||||
|
||||
|
|
@ -1167,26 +1220,34 @@ export class IngestBundleRunner {
|
|||
return a.type === 'created' ? 'source_created' : 'measure_added';
|
||||
};
|
||||
const producedPaths = new Set<string>();
|
||||
const pushActionProvenance = (rawPath: string, action: MemoryAction): void => {
|
||||
const hash = currentHashes.get(rawPath) ?? 'unknown';
|
||||
provenanceRows.push({
|
||||
connectionId: job.connectionId,
|
||||
sourceKey: job.sourceKey,
|
||||
syncId,
|
||||
rawPath,
|
||||
rawContentHash: hash,
|
||||
artifactKind: action.target,
|
||||
artifactKey: action.key,
|
||||
targetConnectionId: action.target === 'sl' ? actionTargetConnectionId(action, job.connectionId) : null,
|
||||
artifactContentHash: null,
|
||||
actionType: actionToType(action),
|
||||
});
|
||||
producedPaths.add(rawPath);
|
||||
};
|
||||
for (const wu of stageIndex.workUnits) {
|
||||
for (const rawPath of wu.rawFiles) {
|
||||
const hash = currentHashes.get(rawPath) ?? 'unknown';
|
||||
for (const action of wu.actions) {
|
||||
provenanceRows.push({
|
||||
connectionId: job.connectionId,
|
||||
sourceKey: job.sourceKey,
|
||||
syncId,
|
||||
rawPath,
|
||||
rawContentHash: hash,
|
||||
artifactKind: action.target,
|
||||
artifactKey: action.key,
|
||||
targetConnectionId: action.target === 'sl' ? (action.targetConnectionId ?? null) : null,
|
||||
artifactContentHash: null,
|
||||
actionType: actionToType(action),
|
||||
});
|
||||
producedPaths.add(rawPath);
|
||||
for (const action of wu.actions) {
|
||||
for (const rawPath of rawPathsForAction(action, wu.rawFiles)) {
|
||||
pushActionProvenance(rawPath, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const action of reconcileActions) {
|
||||
for (const rawPath of action.rawPaths ?? []) {
|
||||
pushActionProvenance(rawPath, action);
|
||||
}
|
||||
}
|
||||
for (const resolution of stageIndex.artifactResolutions ?? []) {
|
||||
const hash = currentHashes.get(resolution.rawPath) ?? 'unknown';
|
||||
provenanceRows.push({
|
||||
|
|
|
|||
|
|
@ -466,6 +466,38 @@ describe('local ingest adapters', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('exposes configured primary warehouses as dbt target connections', async () => {
|
||||
const dbtProject: KtxLocalProject = {
|
||||
...projectWithConnections({
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
url: 'postgresql://example/db',
|
||||
},
|
||||
analytics_dbt: {
|
||||
driver: 'dbt',
|
||||
source_dir: '/repo/dbt',
|
||||
},
|
||||
}),
|
||||
config: {
|
||||
...project.config,
|
||||
setup: { database_connection_ids: ['warehouse'], completed_steps: [] },
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
url: 'postgresql://example/db',
|
||||
},
|
||||
analytics_dbt: {
|
||||
driver: 'dbt',
|
||||
source_dir: '/repo/dbt',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
const adapter = createDefaultLocalIngestAdapters(dbtProject).find((candidate) => candidate.source === 'dbt');
|
||||
|
||||
await expect(adapter?.listTargetConnectionIds?.('/tmp/staged-dbt')).resolves.toEqual(['warehouse']);
|
||||
});
|
||||
|
||||
it('resolves MetricFlow auth_token_ref without writing literal tokens to config', async () => {
|
||||
const project = projectWithConnections({
|
||||
metricflow_main: {
|
||||
|
|
|
|||
|
|
@ -89,7 +89,10 @@ export function createDefaultLocalIngestAdapters(
|
|||
}),
|
||||
}),
|
||||
new LookmlSourceAdapter({ homeDir: join(project.projectDir, '.ktx/cache') }),
|
||||
new DbtSourceAdapter({ homeDir: join(project.projectDir, '.ktx/cache') }),
|
||||
new DbtSourceAdapter({
|
||||
homeDir: join(project.projectDir, '.ktx/cache'),
|
||||
targetConnectionIds: primaryWarehouseConnectionIds(project),
|
||||
}),
|
||||
createLocalMetabaseSourceAdapter(project, {
|
||||
...(options.logger ? { logger: options.logger } : {}),
|
||||
}),
|
||||
|
|
@ -128,6 +131,21 @@ export function createDefaultLocalIngestAdapters(
|
|||
return adapters;
|
||||
}
|
||||
|
||||
function primaryWarehouseConnectionIds(project: KtxLocalProject): string[] {
|
||||
const configuredPrimaryIds = project.config.setup?.database_connection_ids ?? [];
|
||||
const configured = configuredPrimaryIds.filter((connectionId) =>
|
||||
Boolean(localConnectionToWarehouseDescriptor(connectionId, project.config.connections[connectionId])),
|
||||
);
|
||||
if (configured.length > 0) {
|
||||
return [...new Set(configured)];
|
||||
}
|
||||
|
||||
return Object.entries(project.config.connections)
|
||||
.filter(([connectionId, connection]) => Boolean(localConnectionToWarehouseDescriptor(connectionId, connection)))
|
||||
.map(([connectionId]) => connectionId)
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,6 +88,35 @@ class WikiWritingAgentRunner extends AgentRunnerService {
|
|||
}
|
||||
}
|
||||
|
||||
class WikiWritingWithRawPathAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
|
||||
const wikiWrite = params.toolSet.wiki_write;
|
||||
if (!wikiWrite?.execute) {
|
||||
throw new Error('wiki_write tool was not available to the WorkUnit');
|
||||
}
|
||||
const result = await wikiWrite.execute(
|
||||
{
|
||||
key: 'orders_context',
|
||||
summary: 'Orders source context',
|
||||
content: 'Orders are purchase records used for revenue analysis.',
|
||||
tags: ['orders'],
|
||||
rawPaths: ['orders/orders.json'],
|
||||
},
|
||||
{ toolCallId: 'wiki-write' },
|
||||
);
|
||||
if (!result.structured.success) {
|
||||
throw new Error(result.markdown);
|
||||
}
|
||||
}
|
||||
return { stopReason: 'natural' as const };
|
||||
});
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
}
|
||||
|
||||
class HistoricSqlEvidenceAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
if (
|
||||
|
|
@ -366,14 +395,94 @@ describe('canonical local ingest', () => {
|
|||
expect(result.result.failedWorkUnits).toEqual([]);
|
||||
const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true });
|
||||
try {
|
||||
expect(db.prepare('SELECT key, summary FROM knowledge_pages ORDER BY key').all()).toEqual([
|
||||
{ key: 'orders_context', summary: 'Orders source context' },
|
||||
expect(db.prepare('SELECT key, summary, embedding_json IS NOT NULL AS has_embedding FROM knowledge_pages ORDER BY key').all()).toEqual([
|
||||
{ key: 'orders_context', summary: 'Orders source context', has_embedding: 1 },
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('does not persist noop embedding vectors when local embeddings are disabled', async () => {
|
||||
await writeFile(
|
||||
join(project.projectDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: warehouse',
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: postgres',
|
||||
'ingest:',
|
||||
' adapters:',
|
||||
' - fake',
|
||||
' embeddings:',
|
||||
' backend: none',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
project = await loadKtxProject({ projectDir: project.projectDir });
|
||||
const sourceDir = join(tempDir, 'source');
|
||||
await mkdir(join(sourceDir, 'orders'), { recursive: true });
|
||||
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
|
||||
const agentRunner = new WikiWritingAgentRunner();
|
||||
|
||||
const result = await runLocalIngest({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
adapter: 'fake',
|
||||
connectionId: 'warehouse',
|
||||
sourceDir,
|
||||
jobId: 'wiki-local-no-embeddings-1',
|
||||
agentRunner,
|
||||
});
|
||||
|
||||
expect(result.result.failedWorkUnits).toEqual([]);
|
||||
const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true });
|
||||
try {
|
||||
expect(db.prepare('SELECT key, summary, embedding_json IS NOT NULL AS has_embedding FROM knowledge_pages ORDER BY key').all()).toEqual([
|
||||
{ key: 'orders_context', summary: 'Orders source context', has_embedding: 0 },
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('uses explicit action raw paths to avoid over-attributing work-unit provenance', async () => {
|
||||
const sourceDir = join(tempDir, 'source');
|
||||
await mkdir(join(sourceDir, 'orders'), { recursive: true });
|
||||
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
|
||||
await writeFile(join(sourceDir, 'orders', 'unrelated.json'), '{"name":"unrelated"}\n', 'utf-8');
|
||||
const agentRunner = new WikiWritingWithRawPathAgentRunner();
|
||||
|
||||
const result = await runLocalIngest({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
adapter: 'fake',
|
||||
connectionId: 'warehouse',
|
||||
sourceDir,
|
||||
jobId: 'wiki-raw-path-local-1',
|
||||
agentRunner,
|
||||
});
|
||||
|
||||
expect(result.result.failedWorkUnits).toEqual([]);
|
||||
expect(result.report.body.provenanceRows).toEqual([
|
||||
{
|
||||
rawPath: 'orders/orders.json',
|
||||
artifactKind: 'wiki',
|
||||
artifactKey: 'orders_context',
|
||||
targetConnectionId: null,
|
||||
actionType: 'wiki_written',
|
||||
},
|
||||
{
|
||||
rawPath: 'orders/unrelated.json',
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
targetConnectionId: null,
|
||||
actionType: 'skipped',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs historic-SQL evidence projection through the local bundle post-processor', async () => {
|
||||
const projectDir = join(tempDir, 'historic-sql-project');
|
||||
await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ import {
|
|||
type ToolSession,
|
||||
} from '../tools/index.js';
|
||||
import {
|
||||
buildKnowledgeSearchText,
|
||||
type KnowledgeEventPort,
|
||||
type KnowledgeIndexPort,
|
||||
KnowledgeWikiService,
|
||||
|
|
@ -289,7 +290,10 @@ function scoreText(text: string, query: string): number {
|
|||
class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
||||
private readonly sqlite: SqliteKnowledgeIndex;
|
||||
|
||||
constructor(private readonly project: KtxLocalProject) {
|
||||
constructor(
|
||||
private readonly project: KtxLocalProject,
|
||||
private readonly embedding: KtxEmbeddingPort,
|
||||
) {
|
||||
this.sqlite = new SqliteKnowledgeIndex({ dbPath: ktxLocalStateDbPath(project) });
|
||||
}
|
||||
|
||||
|
|
@ -391,6 +395,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
|
||||
private async syncAllPagesFromDisk(): Promise<void> {
|
||||
const listed = await this.project.fileStore.listFiles('knowledge', true);
|
||||
const existingPages = this.sqlite.getExistingPages();
|
||||
const pages: SqliteKnowledgeIndexPage[] = [];
|
||||
for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) {
|
||||
const parsedPath = parseKnowledgeIndexPath(file);
|
||||
|
|
@ -400,14 +405,21 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
const path = `knowledge/${file}`;
|
||||
const raw = await this.project.fileStore.readFile(path);
|
||||
const parsed = parseWiki(raw.content);
|
||||
const tags = parseWikiTags(raw.content);
|
||||
const searchText = buildKnowledgeSearchText(parsedPath.pageKey, parsed.summary, parsed.content, tags);
|
||||
const existing = existingPages.get(path);
|
||||
const embedding =
|
||||
existing?.searchText === searchText && existing.embedding
|
||||
? existing.embedding
|
||||
: await this.embedding.computeEmbedding(searchText).catch(() => null);
|
||||
pages.push({
|
||||
path,
|
||||
key: parsedPath.pageKey,
|
||||
scope: parsedPath.scope,
|
||||
summary: parsed.summary,
|
||||
content: parsed.content,
|
||||
tags: parseWikiTags(raw.content),
|
||||
embedding: null,
|
||||
tags,
|
||||
embedding,
|
||||
});
|
||||
}
|
||||
this.sqlite.sync(pages);
|
||||
|
|
@ -417,10 +429,19 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
function parseKnowledgeIndexPath(file: string): { scope: 'GLOBAL' | 'USER'; pageKey: string } | null {
|
||||
const segments = file.split('/');
|
||||
if (segments.length === 2 && segments[0] === 'global') {
|
||||
return { scope: 'GLOBAL', pageKey: segments[1].replace(/\.md$/, '') };
|
||||
const pageKey = segments[1].replace(/\.md$/, '');
|
||||
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'GLOBAL', pageKey } : null;
|
||||
}
|
||||
if (segments.length >= 3 && segments[0] === 'global' && segments[1] === 'historic-sql') {
|
||||
const historicPath = segments.slice(2).join('/').replace(/\.md$/, '');
|
||||
if (historicPath.split('/').every((segment) => /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment))) {
|
||||
return { scope: 'GLOBAL', pageKey: `historic-sql/${historicPath}` };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (segments.length === 3 && segments[0] === 'user') {
|
||||
return { scope: 'USER', pageKey: segments[2].replace(/\.md$/, '') };
|
||||
const pageKey = segments[2].replace(/\.md$/, '');
|
||||
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'USER', pageKey } : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
@ -591,7 +612,7 @@ export function createLocalBundleIngestRuntime(
|
|||
);
|
||||
const slSourcesRepository = new SqliteSlSourcesIndex({ dbPath });
|
||||
const slSearchService = new SlSearchService(embedding, slSourcesRepository, logger);
|
||||
const knowledgeIndex = new LocalKnowledgeIndex(options.project);
|
||||
const knowledgeIndex = new LocalKnowledgeIndex(options.project, embedding);
|
||||
const knowledgeEvents = new NoopKnowledgeEventPort();
|
||||
const wikiService = new KnowledgeWikiService(rootFileStore, embedding, knowledgeIndex, options.project.git, logger);
|
||||
const { agentRunner, llmProvider } = resolveAgentRunner(options);
|
||||
|
|
|
|||
|
|
@ -611,7 +611,7 @@ describe('local ingest', () => {
|
|||
continuedFromCursor: false,
|
||||
partialSnapshot: false,
|
||||
maxPagesPerRun: 1000,
|
||||
maxKnowledgeCreatesPerRun: 5,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
nextSuccessfulCursor: null,
|
||||
skipped: [],
|
||||
|
|
@ -654,6 +654,7 @@ describe('local ingest', () => {
|
|||
crawlMode: 'selected_roots',
|
||||
rootPageIds: ['page-1'],
|
||||
maxPagesPerRun: 1000,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
}),
|
||||
expect.any(String),
|
||||
{ connectionId: 'notion-main', sourceKey: 'notion' },
|
||||
|
|
|
|||
28
packages/context/src/ingest/memory-flow/known-errors.ts
Normal file
28
packages/context/src/ingest/memory-flow/known-errors.ts
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
interface MemoryFlowErrorContext {
|
||||
adapter: string;
|
||||
}
|
||||
|
||||
export function isNotionAuthorizationExpired(
|
||||
context: MemoryFlowErrorContext,
|
||||
reason: string | undefined,
|
||||
): boolean {
|
||||
if (context.adapter !== 'notion') {
|
||||
return false;
|
||||
}
|
||||
const normalized = (reason ?? '').toLowerCase();
|
||||
return (
|
||||
normalized.includes('invalid_grant') &&
|
||||
(normalized.includes('invalid_rapt') || normalized.includes('reauth'))
|
||||
);
|
||||
}
|
||||
|
||||
export function formatNotionAuthorizationExpiredDetail(unitKey: string): string {
|
||||
return `${unitKey} could not read Notion because the saved OAuth grant expired or requires reauthentication (invalid_grant / invalid_rapt).`;
|
||||
}
|
||||
|
||||
export function notionAuthorizationFixSuggestions(connectionId: string): string[] {
|
||||
return [
|
||||
`Refresh the Notion token referenced by auth_token_ref for ${connectionId}. If it uses env:NAME, export a fresh token in that variable; if it uses file:/path, replace that file.`,
|
||||
`Run ktx connection notion pick ${connectionId} to confirm Notion access, then rerun ktx ingest ${connectionId}.`,
|
||||
];
|
||||
}
|
||||
|
|
@ -60,6 +60,36 @@ describe('formatMemoryFlowFinalSummary', () => {
|
|||
).toContain('Trust issues: 3');
|
||||
});
|
||||
|
||||
it('explains expired Notion authorization with fix suggestions', () => {
|
||||
const rawReason =
|
||||
'notion-cluster-1 failed: {"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)","error_uri":"https://accounts.example/reauth"}';
|
||||
const summary = formatMemoryFlowFinalSummary(
|
||||
input({
|
||||
connectionId: 'notion-main',
|
||||
adapter: 'notion',
|
||||
status: 'error',
|
||||
events: [
|
||||
{ type: 'source_acquired', adapter: 'notion', trigger: 'manual_resync', fileCount: 37 },
|
||||
{ type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 },
|
||||
{ type: 'work_unit_finished', unitKey: 'notion-cluster-1', status: 'failed', reason: rawReason },
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary).toContain('Memory-flow summary: error');
|
||||
expect(summary).toContain(
|
||||
'Notion authorization expired: notion-cluster-1 could not read Notion because the saved OAuth grant expired or requires reauthentication (invalid_grant / invalid_rapt).',
|
||||
);
|
||||
expect(summary).toContain('Fix suggestions:');
|
||||
expect(summary).toContain(
|
||||
'- Refresh the Notion token referenced by auth_token_ref for notion-main. If it uses env:NAME, export a fresh token in that variable; if it uses file:/path, replace that file.',
|
||||
);
|
||||
expect(summary).toContain(
|
||||
'- Run ktx connection notion pick notion-main to confirm Notion access, then rerun ktx ingest notion-main.',
|
||||
);
|
||||
expect(summary).not.toContain('error_uri');
|
||||
});
|
||||
|
||||
it('labels replay source metadata in final summaries', () => {
|
||||
const summary = formatMemoryFlowFinalSummary({
|
||||
metadata: {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { sanitizeMemoryFlowError } from './live-buffer.js';
|
||||
import type { MemoryFlowEvent, MemoryFlowReplayInput } from './types.js';
|
||||
import { buildMemoryFlowViewModel } from './view-model.js';
|
||||
import { isNotionAuthorizationExpired, notionAuthorizationFixSuggestions } from './known-errors.js';
|
||||
|
||||
function latest<T extends MemoryFlowEvent['type']>(
|
||||
events: MemoryFlowEvent[],
|
||||
|
|
@ -42,6 +43,14 @@ function humanizeSummaryText(value: string): string {
|
|||
.replace(/\bSL\b/g, 'semantic layer');
|
||||
}
|
||||
|
||||
function fixSuggestions(input: MemoryFlowReplayInput): string[] {
|
||||
const workUnitReasons = eventsOf(input.events, 'work_unit_finished').map((event) => event.reason);
|
||||
const hasNotionAuthFailure = [...workUnitReasons, ...input.errors].some((reason) =>
|
||||
isNotionAuthorizationExpired(input, reason),
|
||||
);
|
||||
return hasNotionAuthFailure ? notionAuthorizationFixSuggestions(input.connectionId) : [];
|
||||
}
|
||||
|
||||
export function formatMemoryFlowFinalSummary(input: MemoryFlowReplayInput): string {
|
||||
const sources = eventsOf(input.events, 'source_acquired');
|
||||
const source = sources.at(-1);
|
||||
|
|
@ -84,6 +93,14 @@ export function formatMemoryFlowFinalSummary(input: MemoryFlowReplayInput): stri
|
|||
}
|
||||
}
|
||||
|
||||
const suggestions = fixSuggestions(input);
|
||||
if (suggestions.length > 0) {
|
||||
lines.push('Fix suggestions:');
|
||||
for (const suggestion of suggestions) {
|
||||
lines.push(`- ${suggestion}`);
|
||||
}
|
||||
}
|
||||
|
||||
for (const error of input.errors.slice(0, 3)) {
|
||||
lines.push(`Error: ${sanitizeMemoryFlowError(error)}`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import type {
|
|||
MemoryFlowViewModel,
|
||||
} from './types.js';
|
||||
import { sanitizeMemoryFlowError } from './live-buffer.js';
|
||||
import { formatNotionAuthorizationExpiredDetail, isNotionAuthorizationExpired } from './known-errors.js';
|
||||
|
||||
function latest<T extends MemoryFlowEvent['type']>(
|
||||
events: MemoryFlowEvent[],
|
||||
|
|
@ -109,7 +110,7 @@ function errorDetails(input: MemoryFlowReplayInput): string[] {
|
|||
}
|
||||
|
||||
function isValidationFailure(reason: string | undefined): boolean {
|
||||
return /semantic-layer|validation|invalid/i.test(reason ?? '');
|
||||
return /semantic-layer|validation/i.test(reason ?? '');
|
||||
}
|
||||
|
||||
function failedWorkUnitDetails(failed: Array<Extract<MemoryFlowEvent, { type: 'work_unit_finished' }>>): string[] {
|
||||
|
|
@ -180,11 +181,14 @@ function buildMemoryFlowTrustIssues(input: MemoryFlowReplayInput): MemoryFlowTru
|
|||
|
||||
for (const event of failed) {
|
||||
const reason = sanitizeMemoryFlowError(event.reason ?? 'failed');
|
||||
const knownNotionAuthFailure = isNotionAuthorizationExpired(input, event.reason);
|
||||
issues.push({
|
||||
id: `work-unit-failed:${event.unitKey}`,
|
||||
severity: 'failed',
|
||||
title: 'WorkUnit failed',
|
||||
detail: `${event.unitKey} failed: ${reason}`,
|
||||
title: knownNotionAuthFailure ? 'Notion authorization expired' : 'WorkUnit failed',
|
||||
detail: knownNotionAuthFailure
|
||||
? formatNotionAuthorizationExpiredDetail(event.unitKey)
|
||||
: `${event.unitKey} failed: ${reason}`,
|
||||
columnId: 'workUnits',
|
||||
targetLabel: event.unitKey,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ const ingestActionSchema = z.object({
|
|||
key: z.string(),
|
||||
detail: z.string(),
|
||||
targetConnectionId: z.string().nullable().default(null),
|
||||
rawPaths: z.array(z.string()).optional(),
|
||||
});
|
||||
|
||||
const touchedSlSourceSchema = z.object({
|
||||
|
|
@ -153,6 +154,7 @@ export const ingestReportSnapshotSchema = z
|
|||
),
|
||||
failedWorkUnits: z.array(z.string()),
|
||||
reconciliationSkipped: z.boolean(),
|
||||
reconciliationActions: z.array(ingestActionSchema).default([]),
|
||||
conflictsResolved: z.array(conflictResolvedSchema).default([]),
|
||||
evictionsApplied: z.array(evictionAppliedSchema).default([]),
|
||||
unmappedFallbacks: z.array(unmappedFallbackSchema).default([]),
|
||||
|
|
|
|||
|
|
@ -55,6 +55,10 @@ export interface IngestReportBody {
|
|||
workUnits: IngestReportWorkUnit[];
|
||||
failedWorkUnits: string[];
|
||||
reconciliationSkipped: boolean;
|
||||
// Actions emitted by the reconciliation stage (wiki/sl writes from
|
||||
// cross-WU reconciliation). Counted alongside workUnit.actions in
|
||||
// savedMemoryCountsForReport so progress reports reflect all writes.
|
||||
reconciliationActions?: MemoryAction[];
|
||||
conflictsResolved: ConflictResolvedRecord[];
|
||||
evictionsApplied: EvictionAppliedRecord[];
|
||||
unmappedFallbacks: UnmappedFallbackRecord[];
|
||||
|
|
@ -111,7 +115,9 @@ export function postProcessorSavedMemoryCounts(
|
|||
}
|
||||
|
||||
export function savedMemoryCountsForReport(report: IngestReportSnapshot): IngestSavedMemoryCounts {
|
||||
const actions = report.body.workUnits.flatMap((workUnit) => workUnit.actions);
|
||||
const workUnitActions = report.body.workUnits.flatMap((workUnit) => workUnit.actions);
|
||||
const reconciliationActions = report.body.reconciliationActions ?? [];
|
||||
const actions = [...workUnitActions, ...reconciliationActions];
|
||||
const directCounts = {
|
||||
wikiCount: actions.filter((action) => action.target === 'wiki').length,
|
||||
slCount: actions.filter((action) => action.target === 'sl').length,
|
||||
|
|
|
|||
|
|
@ -66,6 +66,27 @@ function reportBody(syncId: string, supersededBy: string | null = null): IngestR
|
|||
};
|
||||
}
|
||||
|
||||
function emptyReportBody(syncId: string, overrides: Partial<IngestReportBody> = {}): IngestReportBody {
|
||||
return {
|
||||
syncId,
|
||||
diffSummary: diffSummary({ added: 0, modified: 0, deleted: 0, unchanged: 1 }),
|
||||
commitSha: null,
|
||||
workUnits: [],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: true,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
evictionInputs: [],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('SqliteBundleIngestStore', () => {
|
||||
let tempDir: string;
|
||||
let dbPath: string;
|
||||
|
|
@ -226,6 +247,204 @@ describe('SqliteBundleIngestStore', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('does not baseline skipped provenance from failed work units or zero-work retry runs', async () => {
|
||||
const store = new SqliteBundleIngestStore({ dbPath });
|
||||
const rawHashes = new Map([
|
||||
['pages/page-1/metadata.json', 'hash-metadata'],
|
||||
['pages/page-1/page.md', 'hash-page'],
|
||||
]);
|
||||
|
||||
const failedRun = await store.create(runArgs({ jobId: 'job-failed-review', syncId: 'sync-failed-review' }));
|
||||
await store.insertMany(
|
||||
[...rawHashes].map(([rawPath, rawContentHash]) => ({
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
syncId: 'sync-failed-review',
|
||||
rawPath,
|
||||
rawContentHash,
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
artifactContentHash: null,
|
||||
actionType: 'skipped' as const,
|
||||
})),
|
||||
);
|
||||
await store.markCompleted(failedRun.id, diffSummary({ added: 2 }));
|
||||
await store.create({
|
||||
runId: failedRun.id,
|
||||
jobId: 'job-failed-review',
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
body: emptyReportBody('sync-failed-review', {
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'notion-page-page-1',
|
||||
rawFiles: [...rawHashes.keys()],
|
||||
status: 'failed',
|
||||
reason: 'invalid_grant',
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
failedWorkUnits: ['notion-page-page-1'],
|
||||
}),
|
||||
});
|
||||
|
||||
const noWorkRun = await store.create(runArgs({ jobId: 'job-no-work', syncId: 'sync-no-work' }));
|
||||
await store.insertMany(
|
||||
[...rawHashes].map(([rawPath, rawContentHash]) => ({
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
syncId: 'sync-no-work',
|
||||
rawPath,
|
||||
rawContentHash,
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
artifactContentHash: null,
|
||||
actionType: 'skipped' as const,
|
||||
})),
|
||||
);
|
||||
await store.markCompleted(noWorkRun.id, diffSummary({ unchanged: 2 }));
|
||||
await store.create({
|
||||
runId: noWorkRun.id,
|
||||
jobId: 'job-no-work',
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
body: emptyReportBody('sync-no-work', { workUnits: [], failedWorkUnits: [] }),
|
||||
});
|
||||
|
||||
await expect(store.findLatestHashesForCompletedSyncs('docs', 'notion')).resolves.toEqual(new Map());
|
||||
await expect(new DiffSetService(store).compute('docs', 'notion', rawHashes)).resolves.toEqual({
|
||||
added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'],
|
||||
modified: [],
|
||||
deleted: [],
|
||||
unchanged: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('baselines skipped provenance from successful no-output work unit runs', async () => {
|
||||
const store = new SqliteBundleIngestStore({ dbPath });
|
||||
const run = await store.create(runArgs({ jobId: 'job-reviewed-no-output', syncId: 'sync-reviewed-no-output' }));
|
||||
|
||||
await store.insertMany([
|
||||
{
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
syncId: 'sync-reviewed-no-output',
|
||||
rawPath: 'pages/page-1/page.md',
|
||||
rawContentHash: 'hash-reviewed',
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
artifactContentHash: null,
|
||||
actionType: 'skipped',
|
||||
},
|
||||
]);
|
||||
await store.markCompleted(run.id, diffSummary({ added: 1 }));
|
||||
await store.create({
|
||||
runId: run.id,
|
||||
jobId: 'job-reviewed-no-output',
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
body: emptyReportBody('sync-reviewed-no-output', {
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'notion-page-page-1',
|
||||
rawFiles: ['pages/page-1/page.md'],
|
||||
status: 'success',
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
failedWorkUnits: [],
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(store.findLatestHashesForCompletedSyncs('docs', 'notion')).resolves.toEqual(
|
||||
new Map([['pages/page-1/page.md', 'hash-reviewed']]),
|
||||
);
|
||||
await expect(
|
||||
new DiffSetService(store).compute('docs', 'notion', new Map([['pages/page-1/page.md', 'hash-reviewed']])),
|
||||
).resolves.toMatchObject({
|
||||
added: [],
|
||||
unchanged: ['pages/page-1/page.md'],
|
||||
});
|
||||
});
|
||||
|
||||
it('baselines artifact provenance in partial failures but not skipped-only failed paths', async () => {
|
||||
const store = new SqliteBundleIngestStore({ dbPath });
|
||||
const run = await store.create(runArgs({ jobId: 'job-partial', syncId: 'sync-partial' }));
|
||||
|
||||
await store.insertMany([
|
||||
{
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
syncId: 'sync-partial',
|
||||
rawPath: 'pages/success/page.md',
|
||||
rawContentHash: 'hash-success',
|
||||
artifactKind: 'wiki',
|
||||
artifactKey: 'knowledge/notion/success.md',
|
||||
artifactContentHash: 'artifact-success',
|
||||
actionType: 'wiki_written',
|
||||
},
|
||||
{
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
syncId: 'sync-partial',
|
||||
rawPath: 'pages/failed/page.md',
|
||||
rawContentHash: 'hash-failed',
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
artifactContentHash: null,
|
||||
actionType: 'skipped',
|
||||
},
|
||||
]);
|
||||
await store.markCompleted(run.id, diffSummary({ added: 2 }));
|
||||
await store.create({
|
||||
runId: run.id,
|
||||
jobId: 'job-partial',
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
body: emptyReportBody('sync-partial', {
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'notion-page-success',
|
||||
rawFiles: ['pages/success/page.md'],
|
||||
status: 'success',
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
{
|
||||
unitKey: 'notion-page-failed',
|
||||
rawFiles: ['pages/failed/page.md'],
|
||||
status: 'failed',
|
||||
reason: 'invalid_grant',
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
failedWorkUnits: ['notion-page-failed'],
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(store.findLatestHashesForCompletedSyncs('docs', 'notion')).resolves.toEqual(
|
||||
new Map([['pages/success/page.md', 'hash-success']]),
|
||||
);
|
||||
await expect(
|
||||
new DiffSetService(store).compute(
|
||||
'docs',
|
||||
'notion',
|
||||
new Map([
|
||||
['pages/success/page.md', 'hash-success'],
|
||||
['pages/failed/page.md', 'hash-failed'],
|
||||
]),
|
||||
),
|
||||
).resolves.toEqual({
|
||||
added: ['pages/failed/page.md'],
|
||||
modified: [],
|
||||
deleted: [],
|
||||
unchanged: ['pages/success/page.md'],
|
||||
});
|
||||
});
|
||||
|
||||
it('returns the latest stored report across bundle ingest runs', async () => {
|
||||
const store = new SqliteBundleIngestStore({
|
||||
dbPath,
|
||||
|
|
|
|||
|
|
@ -46,6 +46,13 @@ interface ProvenanceRow {
|
|||
action_type: string;
|
||||
}
|
||||
|
||||
interface ProvenanceHashCandidateRow {
|
||||
raw_path: string;
|
||||
raw_content_hash: string;
|
||||
action_type: string;
|
||||
report_body_json: string | null;
|
||||
}
|
||||
|
||||
function parseArtifactKind(kind: string | null): IngestProvenanceRow['artifact_kind'] {
|
||||
if (kind === null || kind === 'sl' || kind === 'wiki') {
|
||||
return kind;
|
||||
|
|
@ -93,6 +100,31 @@ function toPortProvenanceRow(row: ProvenanceRow): IngestProvenanceRow {
|
|||
};
|
||||
}
|
||||
|
||||
function recordValue(value: unknown, key: string): unknown {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value)
|
||||
? (value as Record<string, unknown>)[key]
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function isSuccessfulNoOutputSkippedBaseline(reportBodyJson: string | null): boolean {
|
||||
if (reportBodyJson === null) {
|
||||
return true;
|
||||
}
|
||||
const body = JSON.parse(reportBodyJson) as unknown;
|
||||
const workUnits = recordValue(body, 'workUnits');
|
||||
const failedWorkUnits = recordValue(body, 'failedWorkUnits');
|
||||
return (
|
||||
Array.isArray(workUnits) &&
|
||||
workUnits.length > 0 &&
|
||||
Array.isArray(failedWorkUnits) &&
|
||||
failedWorkUnits.length === 0
|
||||
);
|
||||
}
|
||||
|
||||
function isProcessedHashBaseline(row: ProvenanceHashCandidateRow): boolean {
|
||||
return row.action_type !== 'skipped' || isSuccessfulNoOutputSkippedBaseline(row.report_body_json);
|
||||
}
|
||||
|
||||
function placeholders(values: readonly unknown[]): string {
|
||||
return values.map(() => '?').join(', ');
|
||||
}
|
||||
|
|
@ -275,23 +307,34 @@ export class SqliteBundleIngestStore
|
|||
const rows = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT p.raw_path, p.raw_content_hash
|
||||
SELECT
|
||||
p.raw_path,
|
||||
p.raw_content_hash,
|
||||
p.action_type,
|
||||
br.body_json AS report_body_json
|
||||
FROM bundle_ingest_provenance p
|
||||
INNER JOIN bundle_ingest_runs r
|
||||
ON r.connection_id = p.connection_id
|
||||
AND r.source_key = p.source_key
|
||||
AND r.sync_id = p.sync_id
|
||||
LEFT JOIN bundle_ingest_reports br
|
||||
ON br.run_id = r.id
|
||||
WHERE p.connection_id = ?
|
||||
AND p.source_key = ?
|
||||
AND r.status = 'completed'
|
||||
ORDER BY r.completed_at DESC, r.rowid DESC, p.created_at DESC, p.rowid DESC
|
||||
`,
|
||||
)
|
||||
.all(connectionId, sourceKey) as Array<{ raw_path: string; raw_content_hash: string }>;
|
||||
.all(connectionId, sourceKey) as ProvenanceHashCandidateRow[];
|
||||
|
||||
const latest = new Map<string, string>();
|
||||
const seen = new Set<string>();
|
||||
for (const row of rows) {
|
||||
if (!latest.has(row.raw_path)) {
|
||||
if (seen.has(row.raw_path)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(row.raw_path);
|
||||
if (isProcessedHashBaseline(row)) {
|
||||
latest.set(row.raw_path, row.raw_content_hash);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { buildReconcileSystemPrompt, buildReconcileToolSet } from './build-reconcile-context.js';
|
||||
import { buildReconcileSystemPrompt, buildReconcileToolSet, buildReconcileUserPrompt } from './build-reconcile-context.js';
|
||||
|
||||
describe('buildReconcileSystemPrompt', () => {
|
||||
it('appends canonical pins when relevant pins are supplied', () => {
|
||||
|
|
@ -39,6 +39,40 @@ describe('buildReconcileSystemPrompt', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('buildReconcileUserPrompt', () => {
|
||||
it('includes action details so reconciliation can compare different keys for the same table', () => {
|
||||
const prompt = buildReconcileUserPrompt(
|
||||
{
|
||||
jobId: 'j1',
|
||||
connectionId: 'notion',
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'notion-a',
|
||||
rawFiles: ['pages/a/page.md'],
|
||||
status: 'success',
|
||||
actions: [
|
||||
{
|
||||
target: 'wiki',
|
||||
type: 'created',
|
||||
key: 'orbit-customer-source-reference',
|
||||
detail: 'tables: orbit_analytics.customer',
|
||||
},
|
||||
],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
},
|
||||
undefined,
|
||||
);
|
||||
|
||||
expect(prompt).toContain('orbit-customer-source-reference');
|
||||
expect(prompt).toContain('tables: orbit_analytics.customer');
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildReconcileToolSet', () => {
|
||||
it('includes emit_unmapped_fallback with the reconciliation tools', () => {
|
||||
const toolSet = buildReconcileToolSet({
|
||||
|
|
|
|||
|
|
@ -104,6 +104,10 @@ function curatorPassStateSummary(runState?: ReconcilePromptRunState): string {
|
|||
].join('\n');
|
||||
}
|
||||
|
||||
function formatStageActionDetail(detail: string): string {
|
||||
return detail.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
export function buildReconcileUserPrompt(
|
||||
stageIndex: StageIndex,
|
||||
ev: EvictionUnit | undefined,
|
||||
|
|
@ -119,7 +123,14 @@ export function buildReconcileUserPrompt(
|
|||
const actions =
|
||||
wu.actions.length === 0
|
||||
? ' actions: (none)'
|
||||
: wu.actions.map((a) => ` - ${a.target}:${a.type} ${a.key}`).join('\n');
|
||||
: wu.actions
|
||||
.map((a) => {
|
||||
const detail = formatStageActionDetail(a.detail);
|
||||
return detail.length > 0
|
||||
? ` - ${a.target}:${a.type} ${a.key}; detail: ${detail}`
|
||||
: ` - ${a.target}:${a.type} ${a.key}`;
|
||||
})
|
||||
.join('\n');
|
||||
return `- unitKey: ${wu.unitKey} (status=${wu.status})\n${actions}`;
|
||||
})
|
||||
.join('\n');
|
||||
|
|
|
|||
|
|
@ -148,6 +148,7 @@ describe('reconciliation emit tools', () => {
|
|||
{
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'no_physical_table',
|
||||
detail: expect.stringContaining('not present as a source'),
|
||||
fallback: 'flagged',
|
||||
},
|
||||
]);
|
||||
|
|
@ -175,6 +176,7 @@ describe('reconciliation emit tools', () => {
|
|||
{
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'no_physical_table',
|
||||
detail: expect.stringContaining('not present as a source'),
|
||||
fallback: 'flagged',
|
||||
},
|
||||
]);
|
||||
|
|
@ -199,6 +201,27 @@ describe('reconciliation emit tools', () => {
|
|||
expect(stageIndex.unmappedFallbacks).toEqual([]);
|
||||
});
|
||||
|
||||
it('rejects missing-table fallback decisions when the table resolves to an existing semantic source', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(['cards/revenue.json']),
|
||||
tableRefExists: async (tableRef) => tableRef === 'orbit_analytics.mart_revenue_daily',
|
||||
});
|
||||
|
||||
const output = await executeTool(tool, {
|
||||
rawPath: 'cards/revenue.json',
|
||||
reason: 'no_physical_table',
|
||||
tableRef: 'orbit_analytics.mart_revenue_daily',
|
||||
fallback: 'wiki_only',
|
||||
});
|
||||
|
||||
expect(output).toContain(
|
||||
'Error: tableRef "orbit_analytics.mart_revenue_daily" already resolves to a semantic source',
|
||||
);
|
||||
expect(stageIndex.unmappedFallbacks).toEqual([]);
|
||||
});
|
||||
|
||||
it('records explicit artifact resolutions for provenance rows', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitArtifactResolutionTool({
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { StageIndex, UnmappedFallbackRecord } from '../stages/stage-index.types.js';
|
||||
import type { StageIndex, UnmappedFallbackRecord, UnmappedFallbackReason } from '../stages/stage-index.types.js';
|
||||
|
||||
interface EmitUnmappedFallbackDeps {
|
||||
stageIndex: StageIndex;
|
||||
allowedPaths: ReadonlySet<string>;
|
||||
tableRefExists?: (tableRef: string) => Promise<boolean>;
|
||||
}
|
||||
|
||||
const unmappedFallbackReasonSchema = z.enum([
|
||||
|
|
@ -22,31 +23,78 @@ function sameUnmappedFallback(left: UnmappedFallbackRecord, right: UnmappedFallb
|
|||
return left.rawPath === right.rawPath && left.reason === right.reason && left.fallback === right.fallback;
|
||||
}
|
||||
|
||||
// Generates a canonical description for each reason so the recorded `detail`
|
||||
// is always consistent with the reason code. Free-form text from the LLM
|
||||
// previously caused contradictions like "no_physical_table" being explained
|
||||
// as "no mapped connection exists" — the tool now owns the core sentence and
|
||||
// the LLM may add optional clarification context.
|
||||
function canonicalDetail(reason: UnmappedFallbackReason, tableRef: string | undefined): string {
|
||||
const tableClause = tableRef ? `'${tableRef}'` : 'the referenced object';
|
||||
switch (reason) {
|
||||
case 'no_physical_table':
|
||||
return `${tableClause} is described but is not present as a source in any mapped warehouse/dbt connection.`;
|
||||
case 'no_connection_mapping':
|
||||
return `${tableClause} has no non-Notion warehouse/dbt connection to map against.`;
|
||||
case 'missing_target_table':
|
||||
return `${tableClause} is referenced but the target table could not be located.`;
|
||||
case 'looker_template_unresolved':
|
||||
return `${tableClause} uses LookML templating that could not be resolved.`;
|
||||
case 'derived_table_not_supported':
|
||||
return `${tableClause} is a derived/inline definition that is not yet supported as a semantic-layer source.`;
|
||||
case 'multiple_table_references':
|
||||
return `${tableClause} references multiple tables; cannot map to a single source.`;
|
||||
case 'unsupported_dialect':
|
||||
return `${tableClause} uses a SQL dialect that is not yet supported.`;
|
||||
case 'parse_error':
|
||||
return `${tableClause} could not be parsed.`;
|
||||
}
|
||||
}
|
||||
|
||||
function requiresMissingTableValidation(reason: UnmappedFallbackReason): boolean {
|
||||
return reason === 'no_physical_table' || reason === 'missing_target_table';
|
||||
}
|
||||
|
||||
export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) {
|
||||
return tool({
|
||||
description:
|
||||
'Record one unmapped fallback decision for the final IngestReport. The rawPath must be available to the current ingest stage. The reason MUST be one of the structured codes; put any human-readable context in detail.',
|
||||
'Record one unmapped fallback decision for the final IngestReport. The rawPath must be available to the current ingest stage. The tool generates the canonical detail from the structured reason and optional tableRef; use clarification only to add context that does not contradict the reason code.',
|
||||
inputSchema: z.object({
|
||||
rawPath: z.string().min(1),
|
||||
reason: unmappedFallbackReasonSchema,
|
||||
detail: z.string().optional(),
|
||||
tableRef: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('The fully-qualified table or source reference that triggered the fallback (e.g. "orbit_analytics.customer"). Used to generate canonical detail text.'),
|
||||
clarification: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Optional extra context appended to the canonical detail. Must not contradict the reason code.'),
|
||||
fallback: z.enum(['sql_standalone', 'wiki_only', 'flagged']),
|
||||
}),
|
||||
execute: async (input): Promise<string> => {
|
||||
if (!deps.allowedPaths.has(input.rawPath)) {
|
||||
return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`;
|
||||
}
|
||||
if (input.tableRef && requiresMissingTableValidation(input.reason) && deps.tableRefExists) {
|
||||
const exists = await deps.tableRefExists(input.tableRef);
|
||||
if (exists) {
|
||||
return `Error: tableRef "${input.tableRef}" already resolves to a semantic source; do not record ${input.reason} for an existing table.`;
|
||||
}
|
||||
}
|
||||
|
||||
const base = canonicalDetail(input.reason, input.tableRef);
|
||||
const detail = input.clarification ? `${base} ${input.clarification.trim()}`.trim() : base;
|
||||
|
||||
const record: UnmappedFallbackRecord = {
|
||||
rawPath: input.rawPath,
|
||||
reason: input.reason,
|
||||
...(input.detail !== undefined ? { detail: input.detail } : {}),
|
||||
detail,
|
||||
fallback: input.fallback,
|
||||
};
|
||||
if (!deps.stageIndex.unmappedFallbacks.some((candidate) => sameUnmappedFallback(candidate, record))) {
|
||||
deps.stageIndex.unmappedFallbacks.push(record);
|
||||
}
|
||||
return `recorded unmapped fallback for ${record.rawPath} (${record.fallback})`;
|
||||
return `recorded unmapped fallback for ${record.rawPath} (${record.fallback}): ${detail}`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,14 @@ describe('stage_list tool', () => {
|
|||
unitKey: 'u2',
|
||||
rawFiles: ['b.yml'],
|
||||
status: 'success',
|
||||
actions: [{ target: 'wiki', type: 'created', key: 'page_b', detail: '' }],
|
||||
actions: [
|
||||
{
|
||||
target: 'wiki',
|
||||
type: 'created',
|
||||
key: 'page_b',
|
||||
detail: 'tables: orbit_analytics.customer',
|
||||
},
|
||||
],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
|
|
@ -36,6 +43,7 @@ describe('stage_list tool', () => {
|
|||
expect(out).toContain('src_a');
|
||||
expect(out).toContain('u2');
|
||||
expect(out).toContain('page_b');
|
||||
expect(out).toContain('tables: orbit_analytics.customer');
|
||||
});
|
||||
|
||||
it('says empty when no writes', async () => {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ export interface StageListDeps {
|
|||
stageIndex: StageIndex;
|
||||
}
|
||||
|
||||
function formatActionDetail(detail: string): string {
|
||||
return detail.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
export function createStageListTool(deps: StageListDeps) {
|
||||
return tool({
|
||||
description:
|
||||
|
|
@ -20,7 +24,14 @@ export function createStageListTool(deps: StageListDeps) {
|
|||
const actions =
|
||||
wu.actions.length === 0
|
||||
? ' (no actions)'
|
||||
: wu.actions.map((a) => ` - ${a.target}:${a.type} ${a.key}`).join('\n');
|
||||
: wu.actions
|
||||
.map((a) => {
|
||||
const detail = formatActionDetail(a.detail);
|
||||
return detail.length > 0
|
||||
? ` - ${a.target}:${a.type} ${a.key}; detail: ${detail}`
|
||||
: ` - ${a.target}:${a.type} ${a.key}`;
|
||||
})
|
||||
.join('\n');
|
||||
return `- unitKey: ${wu.unitKey} (status=${wu.status})\n rawFiles: ${wu.rawFiles.join(', ') || '(none)'}\n actions:\n${actions}`;
|
||||
})
|
||||
.join('\n');
|
||||
|
|
|
|||
|
|
@ -0,0 +1,196 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import type { ToolCallLogEntry } from './tool-call-logger.js';
|
||||
import { createMutableToolTranscriptSummary, recordToolTranscriptEntry } from './tool-transcript-summary.js';
|
||||
|
||||
function entry(overrides: Partial<ToolCallLogEntry>): ToolCallLogEntry {
|
||||
return {
|
||||
ts: '2026-05-11T00:00:00.000Z',
|
||||
wuKey: 'wu-1',
|
||||
toolName: 'wiki_write',
|
||||
durationMs: 1,
|
||||
input: {},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('tool transcript summaries', () => {
|
||||
it('keeps recovered wiki_write structured failures out of fatal failures', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'orbit-customers' },
|
||||
output: { structured: { success: false, key: 'orbit-customers' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'orbit-customers' },
|
||||
output: { structured: { success: true, key: 'orbit-customers' } },
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('counts unrecovered wiki_remove structured failures as fatal transcript errors', () => {
|
||||
const summary = createMutableToolTranscriptSummary('reconcile', '/tmp/reconcile.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(summary, {
|
||||
ts: '2026-05-11T00:00:00.000Z',
|
||||
wuKey: 'reconcile',
|
||||
toolCallId: 'remove-1',
|
||||
toolName: 'wiki_remove',
|
||||
durationMs: 1,
|
||||
input: { key: 'duplicate-page' },
|
||||
output: { structured: { success: false, key: 'duplicate-page' } },
|
||||
});
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(1);
|
||||
});
|
||||
|
||||
it('keeps unrecovered structured write failures fatal', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'orbit-customers' },
|
||||
output: { structured: { success: false, key: 'orbit-customers' } },
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(1);
|
||||
});
|
||||
|
||||
it('treats a later sl_edit_source success as recovery for the same SL source', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_write_source',
|
||||
input: { connectionId: 'warehouse', sourceName: 'orbit_customers' },
|
||||
output: { structured: { success: false, sourceName: 'orbit_customers' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_edit_source',
|
||||
input: { connectionId: 'warehouse', sourceName: 'orbit_customers' },
|
||||
output: { structured: { success: true, sourceName: 'orbit_customers' } },
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('treats explicit unmapped fallback as recovery for guarded SL write failures', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_write_source',
|
||||
input: { connectionId: 'dbt-main', sourceName: 'stg_accounts' },
|
||||
output: { structured: { success: false, sourceName: 'stg_accounts' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'emit_unmapped_fallback',
|
||||
input: { rawPath: 'models/schema.yml', reason: 'no_physical_table', tableRef: 'stg_accounts', fallback: 'wiki_only' },
|
||||
output: 'recorded unmapped fallback for models/schema.yml (wiki_only)',
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('treats an untargeted unmapped fallback as recovery when there is only one pending SL failure', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_write_source',
|
||||
input: { connectionId: 'dbt-main', sourceName: 'stg_accounts' },
|
||||
output: { structured: { success: false, sourceName: 'stg_accounts' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'emit_unmapped_fallback',
|
||||
input: { rawPath: 'models/schema.yml', reason: 'no_physical_table', fallback: 'wiki_only' },
|
||||
output: 'recorded unmapped fallback for models/schema.yml (wiki_only)',
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('keeps unrelated SL write failures fatal when one source gets an unmapped fallback', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_write_source',
|
||||
input: { connectionId: 'dbt-main', sourceName: 'stg_accounts' },
|
||||
output: { structured: { success: false, sourceName: 'stg_accounts' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_write_source',
|
||||
input: { connectionId: 'dbt-main', sourceName: 'stg_orders' },
|
||||
output: { structured: { success: false, sourceName: 'stg_orders' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'emit_unmapped_fallback',
|
||||
input: { rawPath: 'models/schema.yml', reason: 'no_physical_table', tableRef: 'stg_accounts', fallback: 'wiki_only' },
|
||||
output: 'recorded unmapped fallback for models/schema.yml (wiki_only)',
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(2);
|
||||
expect(summary.fatalErrorCount).toBe(1);
|
||||
});
|
||||
|
||||
it('keeps thrown tool errors fatal even after a successful write', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'orbit-customers' },
|
||||
error: { message: 'tool crashed' },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'orbit-customers' },
|
||||
output: { structured: { success: true, key: 'orbit-customers' } },
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(1);
|
||||
});
|
||||
});
|
||||
185
packages/context/src/ingest/tools/tool-transcript-summary.ts
Normal file
185
packages/context/src/ingest/tools/tool-transcript-summary.ts
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
import type { ToolCallLogEntry } from './tool-call-logger.js';
|
||||
|
||||
export interface MutableToolTranscriptSummary {
|
||||
unitKey: string;
|
||||
path: string;
|
||||
toolCallCount: number;
|
||||
errorCount: number;
|
||||
fatalErrorCount: number;
|
||||
toolNames: Set<string>;
|
||||
hardErrorCount: number;
|
||||
recoverableFailureCounts: Map<string, number>;
|
||||
}
|
||||
|
||||
export function createMutableToolTranscriptSummary(unitKey: string, path: string): MutableToolTranscriptSummary {
|
||||
return {
|
||||
unitKey,
|
||||
path,
|
||||
toolCallCount: 0,
|
||||
errorCount: 0,
|
||||
fatalErrorCount: 0,
|
||||
toolNames: new Set<string>(),
|
||||
hardErrorCount: 0,
|
||||
recoverableFailureCounts: new Map<string, number>(),
|
||||
};
|
||||
}
|
||||
|
||||
export function recordToolTranscriptEntry(summary: MutableToolTranscriptSummary, entry: ToolCallLogEntry): void {
|
||||
summary.toolCallCount += 1;
|
||||
summary.toolNames.add(entry.toolName);
|
||||
|
||||
if (entry.error) {
|
||||
summary.errorCount += 1;
|
||||
summary.hardErrorCount += 1;
|
||||
refreshFatalErrorCount(summary);
|
||||
return;
|
||||
}
|
||||
|
||||
const recoverableFailureKey = recoverableStructuredFailureKey(entry);
|
||||
if (recoverableFailureKey) {
|
||||
summary.errorCount += 1;
|
||||
summary.recoverableFailureCounts.set(
|
||||
recoverableFailureKey,
|
||||
(summary.recoverableFailureCounts.get(recoverableFailureKey) ?? 0) + 1,
|
||||
);
|
||||
refreshFatalErrorCount(summary);
|
||||
return;
|
||||
}
|
||||
|
||||
const recoveryKey = recoverableStructuredSuccessKey(entry);
|
||||
if (recoveryKey) {
|
||||
summary.recoverableFailureCounts.delete(recoveryKey);
|
||||
}
|
||||
if (entry.toolName === 'emit_unmapped_fallback') {
|
||||
const fallbackTarget = fallbackSlTargetKey(entry);
|
||||
const pendingSlKeys = [...summary.recoverableFailureCounts.keys()].filter((key) => key.startsWith('sl:'));
|
||||
for (const key of pendingSlKeys) {
|
||||
if (
|
||||
(fallbackTarget && slFailureKeyMatchesFallback(key, fallbackTarget)) ||
|
||||
(!fallbackTarget && pendingSlKeys.length === 1)
|
||||
) {
|
||||
summary.recoverableFailureCounts.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
refreshFatalErrorCount(summary);
|
||||
}
|
||||
|
||||
function refreshFatalErrorCount(summary: MutableToolTranscriptSummary): void {
|
||||
summary.fatalErrorCount =
|
||||
summary.hardErrorCount + [...summary.recoverableFailureCounts.values()].reduce((sum, count) => sum + count, 0);
|
||||
}
|
||||
|
||||
function recoverableStructuredFailureKey(entry: ToolCallLogEntry): string | null {
|
||||
if (!isStructuredToolFailure(entry.output)) {
|
||||
return null;
|
||||
}
|
||||
if (entry.toolName === 'wiki_write' || entry.toolName === 'wiki_remove') {
|
||||
return wikiTargetKey(entry);
|
||||
}
|
||||
if (entry.toolName === 'sl_write_source') {
|
||||
return slTargetKey(entry);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function recoverableStructuredSuccessKey(entry: ToolCallLogEntry): string | null {
|
||||
if (!isStructuredToolSuccess(entry.output)) {
|
||||
return null;
|
||||
}
|
||||
if (entry.toolName === 'wiki_write' || entry.toolName === 'wiki_remove') {
|
||||
return wikiTargetKey(entry);
|
||||
}
|
||||
if (entry.toolName === 'sl_write_source' || entry.toolName === 'sl_edit_source') {
|
||||
return slTargetKey(entry);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function isStructuredToolFailure(output: unknown): boolean {
|
||||
return structuredSuccess(output) === false;
|
||||
}
|
||||
|
||||
function isStructuredToolSuccess(output: unknown): boolean {
|
||||
return structuredSuccess(output) === true;
|
||||
}
|
||||
|
||||
function structuredSuccess(output: unknown): boolean | null {
|
||||
const structured = recordField(output, 'structured');
|
||||
const success = structured?.success;
|
||||
return typeof success === 'boolean' ? success : null;
|
||||
}
|
||||
|
||||
function wikiTargetKey(entry: ToolCallLogEntry): string | null {
|
||||
const key = stringField(recordField(entry.output, 'structured'), 'key') ?? stringField(entry.input, 'key');
|
||||
return key ? `wiki:${key}` : null;
|
||||
}
|
||||
|
||||
function slTargetKey(entry: ToolCallLogEntry): string | null {
|
||||
const structured = recordField(entry.output, 'structured');
|
||||
const sourceName = stringField(structured, 'sourceName') ?? stringField(entry.input, 'sourceName');
|
||||
if (!sourceName) {
|
||||
return null;
|
||||
}
|
||||
const connectionId = stringField(entry.input, 'connectionId') ?? '';
|
||||
return `sl:${connectionId}:${sourceName}`;
|
||||
}
|
||||
|
||||
function fallbackSlTargetKey(entry: ToolCallLogEntry): { connectionId?: string; sourceName: string } | null {
|
||||
const tableRef = stringField(entry.input, 'tableRef');
|
||||
if (!tableRef) {
|
||||
return null;
|
||||
}
|
||||
const sourceName = finalReferenceSegment(tableRef);
|
||||
if (!sourceName) {
|
||||
return null;
|
||||
}
|
||||
const connectionId = stringField(entry.input, 'connectionId');
|
||||
return {
|
||||
sourceName,
|
||||
...(connectionId ? { connectionId } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function slFailureKeyMatchesFallback(
|
||||
failureKey: string,
|
||||
fallback: { connectionId?: string; sourceName: string },
|
||||
): boolean {
|
||||
const match = /^sl:([^:]*):(.*)$/.exec(failureKey);
|
||||
if (!match) {
|
||||
return false;
|
||||
}
|
||||
const [, connectionId, sourceName] = match;
|
||||
if (fallback.connectionId && connectionId !== fallback.connectionId) {
|
||||
return false;
|
||||
}
|
||||
return normalizeReferenceSegment(sourceName ?? '') === normalizeReferenceSegment(fallback.sourceName);
|
||||
}
|
||||
|
||||
function finalReferenceSegment(value: string): string {
|
||||
const normalized = value
|
||||
.trim()
|
||||
.replace(/["`]/g, '')
|
||||
.replace(/[\[\]]/g, '');
|
||||
return normalized.split('.').filter(Boolean).at(-1) ?? '';
|
||||
}
|
||||
|
||||
function normalizeReferenceSegment(value: string): string {
|
||||
return finalReferenceSegment(value).toLowerCase();
|
||||
}
|
||||
|
||||
function recordField(value: unknown, field: string): Record<string, unknown> | null {
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
return null;
|
||||
}
|
||||
const nested = (value as Record<string, unknown>)[field];
|
||||
return nested && typeof nested === 'object' && !Array.isArray(nested) ? (nested as Record<string, unknown>) : null;
|
||||
}
|
||||
|
||||
function stringField(value: unknown, field: string): string | null {
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
return null;
|
||||
}
|
||||
const raw = (value as Record<string, unknown>)[field];
|
||||
return typeof raw === 'string' && raw.length > 0 ? raw : null;
|
||||
}
|
||||
|
|
@ -617,7 +617,7 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
userId: 'local-user',
|
||||
key: '../outside',
|
||||
}),
|
||||
).rejects.toThrow('Unsafe knowledge key');
|
||||
).rejects.toThrow('Invalid wiki key "../outside". Wiki keys must be flat; use "outside".');
|
||||
|
||||
await expect(
|
||||
ports.semanticLayer?.readSource({
|
||||
|
|
|
|||
|
|
@ -512,7 +512,7 @@ export function createLocalProjectMcpContextPorts(
|
|||
}
|
||||
|
||||
const yaml =
|
||||
input.yaml ?? YAML.stringify({ ...input.source, name: input.sourceName }, { indent: 2, lineWidth: 0 });
|
||||
input.yaml ?? YAML.stringify({ ...input.source, name: input.sourceName }, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
parseYamlRecord(yaml);
|
||||
await project.fileStore.writeFile(
|
||||
path,
|
||||
|
|
|
|||
|
|
@ -90,6 +90,25 @@ describe('memory runtime assets', () => {
|
|||
expect(body).not.toContain('a standalone SL source only when raw evidence contains enough table or SQL structure');
|
||||
});
|
||||
|
||||
it('ships Metabase guidance that avoids invalid joins for SQL-only card outputs', async () => {
|
||||
const body = await readFile(join(skillsDir, 'metabase_ingest', 'SKILL.md'), 'utf-8');
|
||||
|
||||
expect(body).toContain('Do not declare a KTX join just because the card SQL joins that table internally');
|
||||
expect(body).toContain('only when the card output exposes a local key that matches the target source grain');
|
||||
expect(body).toContain('If `sl_discover` resolves the table, it is not outside the manifest');
|
||||
expect(body).toContain('reason: "parse_error"');
|
||||
expect(body).not.toContain('Tables outside the manifest');
|
||||
expect(body).not.toContain('reason: "metabase_sql_untranslated"');
|
||||
});
|
||||
|
||||
it('ships Notion guidance for physical-table fallbacks and duplicate wiki reconciliation', async () => {
|
||||
const body = await readFile(join(skillsDir, 'notion_synthesize', 'SKILL.md'), 'utf-8');
|
||||
|
||||
expect(body).toContain('Notion `dataSourceCount` counts Notion databases/data sources only');
|
||||
expect(body).toContain('Search existing wiki pages for the same `tables:` or `sl_refs:` frontmatter');
|
||||
expect(body).toContain('no_physical_table');
|
||||
});
|
||||
|
||||
it('packages LookML connection-mismatch SL gate guidance', async () => {
|
||||
const body = await readFile(join(skillsDir, 'lookml_ingest', 'SKILL.md'), 'utf-8');
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ export interface MemoryAction {
|
|||
key: string;
|
||||
detail: string;
|
||||
targetConnectionId?: string | null;
|
||||
rawPaths?: string[];
|
||||
}
|
||||
|
||||
export interface MemoryAgentResult {
|
||||
|
|
|
|||
|
|
@ -299,7 +299,7 @@ export async function writeLocalScanManifestShards(
|
|||
const path = `${schemaDir(input.connectionId)}/${shardKey}.yaml`;
|
||||
await input.project.fileStore.writeFile(
|
||||
path,
|
||||
YAML.stringify(shard, { indent: 2, lineWidth: 0 }),
|
||||
YAML.stringify(shard, { indent: 2, lineWidth: 0, version: '1.1' }),
|
||||
LOCAL_AUTHOR,
|
||||
LOCAL_AUTHOR_EMAIL,
|
||||
`scan(${LIVE_DATABASE_ADAPTER}): write manifest shard ${shardKey} syncId=${input.syncId}`,
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void>
|
|||
|
||||
async function seedWikiProject(project: KtxLocalProject): Promise<void> {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Semantic revenue definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -155,7 +155,7 @@ async function seedWikiProject(project: KtxLocalProject): Promise<void> {
|
|||
slRefs: ['orders'],
|
||||
});
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'support/escalations',
|
||||
key: 'support-escalations',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Support escalation process',
|
||||
content: 'Escalations move urgent support tickets to the operations queue.',
|
||||
|
|
@ -338,9 +338,9 @@ describe('SQLite hybrid search backend conformance', () => {
|
|||
surface: 'wiki',
|
||||
caseName: 'lexical page ranking',
|
||||
results: lexical.map(toWikiConformanceResult),
|
||||
expectedTopIds: ['metrics/revenue'],
|
||||
expectedTopIds: ['metrics-revenue'],
|
||||
expectedReasonsById: {
|
||||
'metrics/revenue': ['lexical'],
|
||||
'metrics-revenue': ['lexical'],
|
||||
},
|
||||
expectedLanes: {
|
||||
lexical: { status: 'available' },
|
||||
|
|
@ -359,9 +359,9 @@ describe('SQLite hybrid search backend conformance', () => {
|
|||
surface: 'wiki',
|
||||
caseName: 'semantic page ranking',
|
||||
results: semantic.map(toWikiConformanceResult),
|
||||
expectedTopIds: ['metrics/revenue'],
|
||||
expectedTopIds: ['metrics-revenue'],
|
||||
expectedReasonsById: {
|
||||
'metrics/revenue': ['semantic'],
|
||||
'metrics-revenue': ['semantic'],
|
||||
},
|
||||
expectedLanes: {
|
||||
semantic: { status: 'available' },
|
||||
|
|
@ -378,9 +378,9 @@ describe('SQLite hybrid search backend conformance', () => {
|
|||
surface: 'wiki',
|
||||
caseName: 'token page fallback',
|
||||
results: token.map(toWikiConformanceResult),
|
||||
expectedTopIds: ['metrics/revenue'],
|
||||
expectedTopIds: ['metrics-revenue'],
|
||||
expectedReasonsById: {
|
||||
'metrics/revenue': ['token'],
|
||||
'metrics-revenue': ['token'],
|
||||
},
|
||||
expectedLanes: {
|
||||
token: { status: 'available' },
|
||||
|
|
|
|||
|
|
@ -89,6 +89,39 @@ describe('local semantic-layer helpers', () => {
|
|||
await expect(validateLocalSlSource(ORDERS_YAML)).resolves.toEqual({ valid: true, errors: [] });
|
||||
});
|
||||
|
||||
it('validates table-backed sources against matching physical manifests when project context is provided', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml',
|
||||
`tables:
|
||||
int_active_contract_arr:
|
||||
table: orbit_analytics.int_active_contract_arr
|
||||
columns:
|
||||
- { name: contract_id, type: string }
|
||||
- { name: contract_arr_cents, type: number }
|
||||
`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add warehouse manifest',
|
||||
);
|
||||
|
||||
const invalidDbtSource = [
|
||||
'name: int_active_contract_arr',
|
||||
'table: orbit_analytics.int_active_contract_arr',
|
||||
'grain: [contract_id]',
|
||||
'columns:',
|
||||
' - { name: contract_id, type: string }',
|
||||
' - { name: arr_cents, type: number }',
|
||||
'measures:',
|
||||
' - { name: arr, expr: sum(arr_cents) }',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
const result = await validateLocalSlSource(invalidDbtSource, { project, connectionId: 'dbt-main' });
|
||||
expect(result.valid).toBe(false);
|
||||
expect(result.errors.join('\n')).toContain('arr_cents');
|
||||
expect(result.errors.join('\n')).toContain('absent from physical table');
|
||||
});
|
||||
|
||||
it('lists and reads manifest-backed scan sources as queryable sources', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
|
|
@ -345,7 +378,7 @@ describe('local semantic-layer helpers', () => {
|
|||
|
||||
await expect(validateLocalSlSource(invalidYaml)).resolves.toMatchObject({
|
||||
valid: false,
|
||||
errors: [expect.stringContaining('grain')],
|
||||
errors: expect.arrayContaining([expect.stringContaining('grain')]),
|
||||
});
|
||||
|
||||
await expect(
|
||||
|
|
|
|||
|
|
@ -7,7 +7,12 @@ import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index
|
|||
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import { sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||
import { composeOverlay, type ManifestTableEntry, projectManifestEntry } from './semantic-layer.service.js';
|
||||
import {
|
||||
composeOverlay,
|
||||
type ManifestTableEntry,
|
||||
projectManifestEntry,
|
||||
SemanticLayerService,
|
||||
} from './semantic-layer.service.js';
|
||||
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
|
||||
import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
|
||||
import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
|
||||
|
|
@ -157,7 +162,7 @@ function summarizeSource(args: { connectionId: string; path: string; raw: string
|
|||
}
|
||||
|
||||
function sourceToYaml(source: SemanticLayerSource): string {
|
||||
return YAML.stringify(source, { indent: 2, lineWidth: 0 });
|
||||
return YAML.stringify(source, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
}
|
||||
|
||||
function summarizeSemanticSource(args: {
|
||||
|
|
@ -246,12 +251,24 @@ export async function loadLocalSlSourceRecords(
|
|||
return [...sources.values()].sort((left, right) => left.name.localeCompare(right.name));
|
||||
}
|
||||
|
||||
export async function validateLocalSlSource(rawYaml: string): Promise<LocalSlValidationResult> {
|
||||
export async function validateLocalSlSource(
|
||||
rawYaml: string,
|
||||
options?: { project?: KtxLocalProject; connectionId?: string },
|
||||
): Promise<LocalSlValidationResult> {
|
||||
try {
|
||||
const parsed = parseYamlRecord(rawYaml);
|
||||
const schema = parsed.table || parsed.sql ? sourceDefinitionSchema : sourceOverlaySchema;
|
||||
schema.parse(parsed);
|
||||
return { valid: true, errors: [] };
|
||||
const result = schema.parse(parsed);
|
||||
const errors: string[] = [];
|
||||
|
||||
if (options?.project && options.connectionId && 'table' in result && result.table) {
|
||||
const service = new SemanticLayerService(options.project.fileStore, {} as never, {} as never);
|
||||
errors.push(
|
||||
...(await service.validatePhysicalTableReferences(options.connectionId, [result as SemanticLayerSource])),
|
||||
);
|
||||
}
|
||||
|
||||
return { valid: errors.length === 0, errors };
|
||||
} catch (error) {
|
||||
return { valid: false, errors: validationErrors(error) };
|
||||
}
|
||||
|
|
@ -261,7 +278,7 @@ export async function writeLocalSlSource(
|
|||
project: KtxLocalProject,
|
||||
input: { connectionId: string; sourceName: string; yaml: string },
|
||||
): Promise<KtxFileWriteResult> {
|
||||
const validation = await validateLocalSlSource(input.yaml);
|
||||
const validation = await validateLocalSlSource(input.yaml, { project, connectionId: input.connectionId });
|
||||
if (!validation.valid) {
|
||||
throw new Error(`Invalid semantic-layer source: ${validation.errors.join('; ')}`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,6 +63,14 @@ const sourceFreshnessSchema = z.object({
|
|||
dbt: freshnessDbtSchema.optional(),
|
||||
});
|
||||
|
||||
// Identifiers (grain entries, column names) must be unqualified output-column
|
||||
// names. A dot would mean the agent emitted a table-qualified reference like
|
||||
// `activity.account_id` — those break SQL generation and grain semantics.
|
||||
const unqualifiedNameSchema = z
|
||||
.string()
|
||||
.min(1)
|
||||
.regex(/^[^.]+$/, "must be unqualified (no '.') — use the output column name");
|
||||
|
||||
const joinDeclarationSchema = z.object({
|
||||
to: z.string().min(1),
|
||||
on: z.string().min(1),
|
||||
|
|
@ -71,7 +79,7 @@ const joinDeclarationSchema = z.object({
|
|||
});
|
||||
|
||||
const sourceColumnSchema = z.object({
|
||||
name: z.string().min(1),
|
||||
name: unqualifiedNameSchema,
|
||||
// type/description optional on standalone sources: compose-time enrichment fills them
|
||||
// from the manifest entry named in `inherits_columns_from`. If the agent does not set
|
||||
// `inherits_columns_from`, or the column is not in the manifest, type must be present
|
||||
|
|
@ -90,7 +98,7 @@ const sourceColumnSchema = z.object({
|
|||
/** Overlay column: type requires expr (structural types are inherited from manifest). */
|
||||
const overlayColumnSchema = z
|
||||
.object({
|
||||
name: z.string().min(1),
|
||||
name: unqualifiedNameSchema,
|
||||
type: z.enum(columnTypeValues).optional(),
|
||||
role: z.enum(columnRoleValues).optional(),
|
||||
visibility: z.enum(columnVisibilityValues).optional(),
|
||||
|
|
@ -118,8 +126,13 @@ export const sourceDefinitionSchema = z
|
|||
// agent write `columns: [{name: FOO}]` instead of redeclaring known fields.
|
||||
// Lookup is fuzzy: bare key, fully-qualified table path, or any suffix all match.
|
||||
inherits_columns_from: z.string().optional(),
|
||||
grain: z.array(z.string()).min(1),
|
||||
columns: z.array(sourceColumnSchema).default([]),
|
||||
grain: z.array(unqualifiedNameSchema).min(1),
|
||||
// Standalone sources MUST declare columns. An empty columns array means
|
||||
// there's nothing to query or join against and breaks grain validation
|
||||
// (the grain must reference declared columns). Inheritance from a manifest
|
||||
// via `inherits_columns_from` only fills in type/description on declared
|
||||
// columns — the column names themselves must be listed here.
|
||||
columns: z.array(sourceColumnSchema).min(1),
|
||||
joins: z.array(joinDeclarationSchema).default([]),
|
||||
measures: z.array(slMeasureDefinitionSchema).default([]),
|
||||
segments: z.array(segmentDefinitionSchema).optional(),
|
||||
|
|
@ -139,7 +152,7 @@ export const sourceOverlaySchema = z
|
|||
name: z.string().min(1),
|
||||
description: z.string().optional(),
|
||||
descriptions: z.record(z.string(), z.string()).optional(),
|
||||
grain: z.array(z.string()).optional(),
|
||||
grain: z.array(unqualifiedNameSchema).optional(),
|
||||
columns: z.array(overlayColumnSchema).optional(),
|
||||
joins: z.array(joinDeclarationSchema).optional(),
|
||||
measures: z.array(slMeasureDefinitionSchema).optional(),
|
||||
|
|
|
|||
|
|
@ -388,6 +388,34 @@ describe('sourceDefinitionSchema', () => {
|
|||
externalOwner: 'analytics',
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects qualified grain names (e.g. 'activity.account_id')", () => {
|
||||
const result = sourceDefinitionSchema.safeParse({
|
||||
name: 'activity',
|
||||
table: 'public.activity',
|
||||
grain: ['activity.account_id'],
|
||||
columns: [{ name: 'account_id', type: 'number' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
});
|
||||
expect(result.success).toBe(false);
|
||||
if (result.success) return;
|
||||
expect(result.error.issues.some((i) => i.path.join('.').startsWith('grain'))).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects qualified column names', () => {
|
||||
const result = sourceDefinitionSchema.safeParse({
|
||||
name: 'activity',
|
||||
table: 'public.activity',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'activity.account_id', type: 'number' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
});
|
||||
expect(result.success).toBe(false);
|
||||
if (result.success) return;
|
||||
expect(result.error.issues.some((i) => i.path.join('.').startsWith('columns'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('projectManifestEntry', () => {
|
||||
|
|
@ -781,6 +809,210 @@ describe('validateWithProposedSource', () => {
|
|||
expect(result.errors[0]).toMatch(/Overlay 'orphan' has no matching manifest entry/);
|
||||
expect(pythonPort.validateSources).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rejects table-backed sources whose declared columns are absent from a matching physical manifest', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
if (dir === 'semantic-layer/dbt-main') {
|
||||
return Promise.resolve({ files: [] });
|
||||
}
|
||||
if (dir === 'semantic-layer') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') {
|
||||
return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] });
|
||||
}
|
||||
return Promise.resolve({ files: [] });
|
||||
});
|
||||
configService.readFile.mockImplementation((path: string) => {
|
||||
if (path === schemaPath) {
|
||||
return Promise.resolve({
|
||||
content: [
|
||||
'tables:',
|
||||
' int_procurement_qualifying_actions:',
|
||||
' table: orbit_analytics.int_procurement_qualifying_actions',
|
||||
' columns:',
|
||||
' - { name: action_id, type: string }',
|
||||
' - { name: account_id, type: string }',
|
||||
' - { name: user_id, type: string }',
|
||||
' - { name: action_date, type: time }',
|
||||
' - { name: action_type, type: string }',
|
||||
].join('\n'),
|
||||
});
|
||||
}
|
||||
return Promise.reject(new Error(`Unexpected readFile: ${path}`));
|
||||
});
|
||||
pythonPort.validateSources.mockResolvedValue({
|
||||
data: { errors: [], warnings: [] },
|
||||
});
|
||||
|
||||
const result = await service.validateWithProposedSource('dbt-main', {
|
||||
name: 'int_procurement_qualifying_actions',
|
||||
table: 'orbit_analytics.int_procurement_qualifying_actions',
|
||||
grain: ['purchase_request_id'],
|
||||
columns: [
|
||||
{ name: 'purchase_request_id', type: 'string' },
|
||||
{ name: 'account_id', type: 'string' },
|
||||
{ name: 'requester_user_id', type: 'string' },
|
||||
{ name: 'action_week', type: 'time' },
|
||||
],
|
||||
joins: [],
|
||||
measures: [{ name: 'qualifying_action_count', expr: 'count(purchase_request_id)' }],
|
||||
});
|
||||
|
||||
expect(result.errors.join('\n')).toMatch(/declared column\(s\) absent from physical table/);
|
||||
expect(result.errors.join('\n')).toMatch(/purchase_request_id/);
|
||||
expect(result.errors.join('\n')).toMatch(/requester_user_id/);
|
||||
expect(result.errors.join('\n')).toMatch(/action_week/);
|
||||
expect(result.errors.join('\n')).toMatch(/measure "qualifying_action_count" references unknown column\(s\)/);
|
||||
});
|
||||
|
||||
it('keeps valid table-backed sources clean when a physical manifest matches', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
if (dir === 'semantic-layer/dbt-main') {
|
||||
return Promise.resolve({ files: [] });
|
||||
}
|
||||
if (dir === 'semantic-layer') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') {
|
||||
return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] });
|
||||
}
|
||||
return Promise.resolve({ files: [] });
|
||||
});
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: [
|
||||
'tables:',
|
||||
' mart_revenue_daily:',
|
||||
' table: orbit_analytics.mart_revenue_daily',
|
||||
' columns:',
|
||||
' - { name: revenue_date, type: time }',
|
||||
' - { name: gross_revenue_cents, type: number }',
|
||||
' - { name: credits_cents, type: number }',
|
||||
' - { name: refunds_cents, type: number }',
|
||||
' - { name: net_revenue_cents, type: number }',
|
||||
].join('\n'),
|
||||
});
|
||||
pythonPort.validateSources.mockResolvedValue({
|
||||
data: { errors: [], warnings: [] },
|
||||
});
|
||||
|
||||
const result = await service.validateWithProposedSource('dbt-main', {
|
||||
name: 'mart_revenue_daily',
|
||||
table: 'orbit_analytics.mart_revenue_daily',
|
||||
grain: ['revenue_date'],
|
||||
columns: [
|
||||
{ name: 'revenue_date', type: 'time' },
|
||||
{ name: 'gross_revenue_cents', type: 'number' },
|
||||
{ name: 'credits_cents', type: 'number' },
|
||||
{ name: 'refunds_cents', type: 'number' },
|
||||
{ name: 'net_revenue_cents', type: 'number' },
|
||||
],
|
||||
joins: [],
|
||||
measures: [{ name: 'net_revenue', expr: 'sum(net_revenue_cents)' }],
|
||||
});
|
||||
|
||||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('allows SQL syntax tokens and cast types in physical expression validation', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
if (dir === 'semantic-layer/dbt-main') {
|
||||
return Promise.resolve({ files: [] });
|
||||
}
|
||||
if (dir === 'semantic-layer') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') {
|
||||
return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] });
|
||||
}
|
||||
return Promise.resolve({ files: [] });
|
||||
});
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: [
|
||||
'tables:',
|
||||
' mart_revenue_daily:',
|
||||
' table: orbit_analytics.mart_revenue_daily',
|
||||
' columns:',
|
||||
' - { name: order_id, type: string }',
|
||||
' - { name: revenue_date, type: time }',
|
||||
' - { name: amount, type: number }',
|
||||
' - { name: status, type: string }',
|
||||
' - { name: created_at, type: time }',
|
||||
].join('\n'),
|
||||
});
|
||||
pythonPort.validateSources.mockResolvedValue({
|
||||
data: { errors: [], warnings: [] },
|
||||
});
|
||||
|
||||
const result = await service.validateWithProposedSource('dbt-main', {
|
||||
name: 'mart_revenue_daily',
|
||||
table: 'orbit_analytics.mart_revenue_daily',
|
||||
grain: ['order_id'],
|
||||
columns: [
|
||||
{ name: 'order_id', type: 'string' },
|
||||
{ name: 'revenue_date', type: 'time' },
|
||||
{ name: 'amount', type: 'number' },
|
||||
{ name: 'status', type: 'string' },
|
||||
{ name: 'created_at', type: 'time' },
|
||||
{ name: 'status_text', type: 'string', expr: 'status::text' },
|
||||
],
|
||||
segments: [{ name: 'current_or_paid', expr: "created_at <= current_date OR status = 'paid'" }],
|
||||
joins: [],
|
||||
measures: [
|
||||
{ name: 'paid_amount', expr: "sum(amount) FILTER (WHERE status = 'paid')" },
|
||||
{ name: 'cast_amount_count', expr: 'count(cast(amount as text))' },
|
||||
],
|
||||
});
|
||||
|
||||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('rejects join keys that are absent from matched physical sources', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
if (dir === 'semantic-layer/dbt-main') {
|
||||
return Promise.resolve({ files: [] });
|
||||
}
|
||||
if (dir === 'semantic-layer') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') {
|
||||
return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] });
|
||||
}
|
||||
return Promise.resolve({ files: [] });
|
||||
});
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: [
|
||||
'tables:',
|
||||
' activity:',
|
||||
' table: orbit_analytics.activity',
|
||||
' columns:',
|
||||
' - { name: account_id, type: string }',
|
||||
' accounts:',
|
||||
' table: orbit_analytics.accounts',
|
||||
' columns:',
|
||||
' - { name: account_id, type: string }',
|
||||
].join('\n'),
|
||||
});
|
||||
pythonPort.validateSources.mockResolvedValue({
|
||||
data: { errors: [], warnings: [] },
|
||||
});
|
||||
|
||||
const result = await service.validateWithProposedSource('dbt-main', {
|
||||
name: 'activity',
|
||||
table: 'orbit_analytics.activity',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [{ to: 'accounts', on: 'activity.account_name = accounts.account_uuid', relationship: 'many_to_one' }],
|
||||
measures: [],
|
||||
});
|
||||
|
||||
expect(result.errors.join('\n')).toMatch(/local column "account_name"/);
|
||||
expect(result.errors.join('\n')).toMatch(/target column "account_uuid"/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('findDanglingSegmentRefs', () => {
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ export class SemanticLayerService {
|
|||
|
||||
const path = this.sourcePath(connectionId, source.name);
|
||||
const normalizedSource = normalizeSemanticLayerDescriptions(source);
|
||||
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0 });
|
||||
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
const message = commitMessage ?? `Update semantic layer source: ${source.name}`;
|
||||
const result = await this.configService.writeFile(path, content, author, authorEmail, message, {
|
||||
skipLock: options?.skipLock,
|
||||
|
|
@ -398,6 +398,174 @@ export class SemanticLayerService {
|
|||
return null;
|
||||
}
|
||||
|
||||
async findManifestEntryByTableRefAcrossConnections(
|
||||
preferredConnectionId: string,
|
||||
ref: string,
|
||||
): Promise<{ connectionId: string; source: SemanticLayerSource } | null> {
|
||||
const preferred = await this.findManifestEntryByTableRef(preferredConnectionId, ref);
|
||||
if (preferred) {
|
||||
return { connectionId: preferredConnectionId, source: preferred };
|
||||
}
|
||||
|
||||
for (const entry of await this.listAllManifestEntries()) {
|
||||
if (entry.connectionId === preferredConnectionId) {
|
||||
continue;
|
||||
}
|
||||
if (manifestEntryMatchesRef(entry.source, ref)) {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async validatePhysicalTableReferences(
|
||||
connectionId: string,
|
||||
sources: SemanticLayerSource[],
|
||||
): Promise<string[]> {
|
||||
const errors: string[] = [];
|
||||
const sourceNames = new Set(sources.map((s) => s.name.toLowerCase()));
|
||||
const sourcesByName = new Map(sources.map((s) => [s.name.toLowerCase(), s]));
|
||||
|
||||
for (const source of sources) {
|
||||
if (!source.table) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const manifestMatch = await this.findManifestEntryByTableRefAcrossConnections(connectionId, source.table);
|
||||
if (!manifestMatch) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const manifestSource = manifestMatch.source;
|
||||
const manifestColumns = new Map(manifestSource.columns.map((c) => [c.name.toLowerCase(), c.name]));
|
||||
const declaredColumns = source.columns ?? [];
|
||||
const declaredByLower = new Map(declaredColumns.map((c) => [c.name.toLowerCase(), c]));
|
||||
const validOutputColumns = new Set(
|
||||
declaredColumns
|
||||
.filter((c) => c.expr || manifestColumns.has(c.name.toLowerCase()))
|
||||
.map((c) => c.name.toLowerCase()),
|
||||
);
|
||||
const measureNames = new Set((source.measures ?? []).map((m) => m.name.toLowerCase()));
|
||||
const manifestLabel =
|
||||
manifestMatch.connectionId === connectionId
|
||||
? manifestSource.name
|
||||
: `${manifestMatch.connectionId}/${manifestSource.name}`;
|
||||
|
||||
const absentDeclaredColumns = declaredColumns
|
||||
.filter((c) => !c.expr && !manifestColumns.has(c.name.toLowerCase()))
|
||||
.map((c) => c.name);
|
||||
if (absentDeclaredColumns.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: table "${source.table}" matched manifest ${manifestLabel}, ` +
|
||||
`but declared column(s) absent from physical table: ${absentDeclaredColumns.join(', ')}. ` +
|
||||
`Available columns: ${[...manifestColumns.values()].join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
const missingGrainColumns = (source.grain ?? []).filter((grain) => {
|
||||
const declared = declaredByLower.get(grain.toLowerCase());
|
||||
return !declared || (!declared.expr && !manifestColumns.has(grain.toLowerCase()));
|
||||
});
|
||||
if (missingGrainColumns.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: grain column(s) absent from physical table "${source.table}": ${missingGrainColumns.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
for (const column of declaredColumns) {
|
||||
if (!column.expr) {
|
||||
continue;
|
||||
}
|
||||
const missing = missingLocalExpressionRefs({
|
||||
expr: column.expr,
|
||||
sourceName: source.name,
|
||||
sourceNames,
|
||||
validColumns: new Set([...manifestColumns.keys(), ...validOutputColumns]),
|
||||
validMeasures: new Set(),
|
||||
});
|
||||
if (missing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: computed column "${column.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (const segment of source.segments ?? []) {
|
||||
const missing = missingLocalExpressionRefs({
|
||||
expr: segment.expr,
|
||||
sourceName: source.name,
|
||||
sourceNames,
|
||||
validColumns: validOutputColumns,
|
||||
validMeasures: new Set(),
|
||||
});
|
||||
if (missing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: segment "${segment.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (const measure of source.measures ?? []) {
|
||||
const exprMissing = missingLocalExpressionRefs({
|
||||
expr: measure.expr,
|
||||
sourceName: source.name,
|
||||
sourceNames,
|
||||
validColumns: validOutputColumns,
|
||||
validMeasures: measureNames,
|
||||
});
|
||||
if (exprMissing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: measure "${measure.name}" references unknown column(s): ${exprMissing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (measure.filter) {
|
||||
const filterMissing = missingLocalExpressionRefs({
|
||||
expr: measure.filter,
|
||||
sourceName: source.name,
|
||||
sourceNames,
|
||||
validColumns: validOutputColumns,
|
||||
validMeasures: new Set(),
|
||||
});
|
||||
if (filterMissing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: measure "${measure.name}" filter references unknown column(s): ${filterMissing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const join of source.joins ?? []) {
|
||||
const parsed = parseJoinColumns(join.on, source.name, join.to);
|
||||
if (!parsed) {
|
||||
continue;
|
||||
}
|
||||
if (!validOutputColumns.has(parsed.localColumn.toLowerCase())) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: join to "${join.to}" references local column ` +
|
||||
`"${parsed.localColumn}" that is not a valid output column`,
|
||||
);
|
||||
}
|
||||
|
||||
const targetSource =
|
||||
sourcesByName.get(join.to.toLowerCase()) ??
|
||||
(await this.findManifestEntryByTableRefAcrossConnections(connectionId, join.to))?.source;
|
||||
if (targetSource) {
|
||||
const targetColumns = new Set(targetSource.columns.map((c) => c.name.toLowerCase()));
|
||||
if (!targetColumns.has(parsed.targetColumn.toLowerCase())) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: join to "${join.to}" references target column ` +
|
||||
`"${parsed.targetColumn}" that does not exist on the target source`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
async getDialectForConnection(connectionId: string): Promise<string> {
|
||||
const connection = await this.connections.getConnectionById(connectionId);
|
||||
if (!connection) {
|
||||
|
|
@ -502,10 +670,15 @@ export class SemanticLayerService {
|
|||
return { errors: [errorMsg], warnings: [], perSourceWarnings: {} };
|
||||
}
|
||||
if (!data) {
|
||||
return { errors: [], warnings: [], perSourceWarnings: {} };
|
||||
return {
|
||||
errors: await this.validatePhysicalTableReferences(connectionId, validatable),
|
||||
warnings: [],
|
||||
perSourceWarnings: {},
|
||||
};
|
||||
}
|
||||
const physicalErrors = await this.validatePhysicalTableReferences(connectionId, validatable);
|
||||
return {
|
||||
errors: data.errors ?? [],
|
||||
errors: [...(data.errors ?? []), ...physicalErrors],
|
||||
warnings: data.warnings ?? [],
|
||||
perSourceWarnings: data.per_source_warnings ?? {},
|
||||
};
|
||||
|
|
@ -531,14 +704,40 @@ export class SemanticLayerService {
|
|||
return { errors: [formatPortError(error, 'Unknown validation error')], warnings: [] };
|
||||
}
|
||||
if (!data) {
|
||||
return { errors: [], warnings: [] };
|
||||
return { errors: await this.validatePhysicalTableReferences(connectionId, sources), warnings: [] };
|
||||
}
|
||||
const physicalErrors = await this.validatePhysicalTableReferences(connectionId, sources);
|
||||
return {
|
||||
errors: data.errors ?? [],
|
||||
errors: [...(data.errors ?? []), ...physicalErrors],
|
||||
warnings: data.warnings ?? [],
|
||||
};
|
||||
}
|
||||
|
||||
private async listAllManifestEntries(): Promise<Array<{ connectionId: string; source: SemanticLayerSource }>> {
|
||||
let files: string[];
|
||||
try {
|
||||
files = (await this.configService.listFiles(SL_DIR_PREFIX)).files;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
||||
const schemaFiles = files.filter((file) => /^semantic-layer\/[^/]+\/_schema\/.+\.ya?ml$/.test(file));
|
||||
const entries: Array<{ connectionId: string; source: SemanticLayerSource }> = [];
|
||||
for (const filePath of schemaFiles) {
|
||||
const connectionId = filePath.split('/')[1];
|
||||
try {
|
||||
const { content } = await this.configService.readFile(filePath);
|
||||
const shard = YAML.parse(content) as { tables?: Record<string, ManifestTableEntry> };
|
||||
for (const [name, entry] of Object.entries(shard?.tables ?? {})) {
|
||||
entries.push({ connectionId, source: projectManifestEntry(name, entry) });
|
||||
}
|
||||
} catch {
|
||||
// skip unparseable shards
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate overlays and standalone sources against the current manifest.
|
||||
* Returns warnings for stale references (non-blocking).
|
||||
|
|
@ -963,6 +1162,8 @@ const SQL_KEYWORDS = new Set([
|
|||
'in',
|
||||
'between',
|
||||
'like',
|
||||
'where',
|
||||
'filter',
|
||||
'cast',
|
||||
'coalesce',
|
||||
'nullif',
|
||||
|
|
@ -971,6 +1172,48 @@ const SQL_KEYWORDS = new Set([
|
|||
'false',
|
||||
'asc',
|
||||
'desc',
|
||||
'date',
|
||||
'day',
|
||||
'month',
|
||||
'quarter',
|
||||
'week',
|
||||
'year',
|
||||
'interval',
|
||||
'extract',
|
||||
'from',
|
||||
'over',
|
||||
'partition',
|
||||
'by',
|
||||
'rows',
|
||||
'range',
|
||||
'current',
|
||||
'current_date',
|
||||
'current_time',
|
||||
'current_timestamp',
|
||||
'localtime',
|
||||
'localtimestamp',
|
||||
'row',
|
||||
'numeric',
|
||||
'decimal',
|
||||
'int',
|
||||
'integer',
|
||||
'bigint',
|
||||
'smallint',
|
||||
'float',
|
||||
'double',
|
||||
'real',
|
||||
'string',
|
||||
'text',
|
||||
'char',
|
||||
'character',
|
||||
'varchar',
|
||||
'timestamp',
|
||||
'time',
|
||||
'uuid',
|
||||
'json',
|
||||
'jsonb',
|
||||
'bool',
|
||||
'boolean',
|
||||
]);
|
||||
|
||||
function extractColumnReferences(expr: string): string[] {
|
||||
|
|
@ -979,6 +1222,122 @@ function extractColumnReferences(expr: string): string[] {
|
|||
return [...new Set(tokens.filter((t) => !SQL_KEYWORDS.has(t.toLowerCase())))];
|
||||
}
|
||||
|
||||
function manifestEntryMatchesRef(source: SemanticLayerSource, ref: string): boolean {
|
||||
if (source.name.toLowerCase() === ref.toLowerCase()) {
|
||||
return true;
|
||||
}
|
||||
const table = source.table?.toLowerCase();
|
||||
const lowered = ref.toLowerCase();
|
||||
return !!table && (table === lowered || table.endsWith(`.${lowered}`));
|
||||
}
|
||||
|
||||
function normalizeSqlExpressionForIdentifierScan(expr: string): string {
|
||||
return expr
|
||||
.replace(/--.*$/gm, ' ')
|
||||
.replace(/\/\*[\s\S]*?\*\//g, ' ')
|
||||
.replace(/'([^']|'')*'/g, ' ')
|
||||
.replace(/"([^"]+)"/g, '$1')
|
||||
.replace(/`([^`]+)`/g, '$1')
|
||||
.replace(/\[([^\]]+)\]/g, '$1')
|
||||
.replace(/::\s*[A-Za-z_][\w$]*(?:\s*\([^)]*\))?/g, ' ');
|
||||
}
|
||||
|
||||
function extractSqlIdentifierRefs(expr: string): Array<{ qualifier?: string; name: string }> {
|
||||
const normalized = normalizeSqlExpressionForIdentifierScan(expr);
|
||||
const refs = new Map<string, { qualifier?: string; name: string }>();
|
||||
const re = /(?:\b([A-Za-z_][\w$]*)\s*\.\s*)?(\b[A-Za-z_][\w$]*)\b/g;
|
||||
for (const match of normalized.matchAll(re)) {
|
||||
const qualifier = match[1];
|
||||
const name = match[2];
|
||||
if (!name) {
|
||||
continue;
|
||||
}
|
||||
const nameLower = name.toLowerCase();
|
||||
const qualifierLower = qualifier?.toLowerCase();
|
||||
const after = normalized.slice((match.index ?? 0) + match[0].length).trimStart();
|
||||
if (!qualifier && after.startsWith('(')) {
|
||||
continue;
|
||||
}
|
||||
if (SQL_KEYWORDS.has(nameLower) || (qualifierLower && SQL_KEYWORDS.has(qualifierLower))) {
|
||||
continue;
|
||||
}
|
||||
refs.set(`${qualifierLower ?? ''}.${nameLower}`, qualifier ? { qualifier, name } : { name });
|
||||
}
|
||||
return [...refs.values()];
|
||||
}
|
||||
|
||||
function refBelongsToSource(
|
||||
ref: { qualifier?: string; name: string },
|
||||
sourceName: string,
|
||||
sourceNames: Set<string>,
|
||||
): boolean {
|
||||
if (!ref.qualifier) {
|
||||
return true;
|
||||
}
|
||||
const qualifier = ref.qualifier.toLowerCase();
|
||||
if (qualifier === sourceName.toLowerCase()) {
|
||||
return true;
|
||||
}
|
||||
return !sourceNames.has(qualifier);
|
||||
}
|
||||
|
||||
function missingLocalExpressionRefs(input: {
|
||||
expr: string;
|
||||
sourceName: string;
|
||||
sourceNames: Set<string>;
|
||||
validColumns: Set<string>;
|
||||
validMeasures: Set<string>;
|
||||
}): string[] {
|
||||
const missing = new Set<string>();
|
||||
for (const ref of extractSqlIdentifierRefs(input.expr)) {
|
||||
if (!refBelongsToSource(ref, input.sourceName, input.sourceNames)) {
|
||||
continue;
|
||||
}
|
||||
const name = ref.name.toLowerCase();
|
||||
if (!input.validColumns.has(name) && !input.validMeasures.has(name)) {
|
||||
missing.add(ref.name);
|
||||
}
|
||||
}
|
||||
return [...missing].sort();
|
||||
}
|
||||
|
||||
function parseJoinSide(side: string): { qualifier?: string; column: string } | null {
|
||||
const match = side.trim().match(/^(?:(\w+)\.)?(\w+)$/);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
return match[1] ? { qualifier: match[1], column: match[2] } : { column: match[2] };
|
||||
}
|
||||
|
||||
function parseJoinColumns(
|
||||
on: string,
|
||||
sourceName: string,
|
||||
targetName: string,
|
||||
): { localColumn: string; targetColumn: string } | null {
|
||||
const sides = on.split('=');
|
||||
if (sides.length !== 2) {
|
||||
return null;
|
||||
}
|
||||
const left = parseJoinSide(sides[0]);
|
||||
const right = parseJoinSide(sides[1]);
|
||||
if (!left || !right) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const sourceLower = sourceName.toLowerCase();
|
||||
const targetLower = targetName.toLowerCase();
|
||||
const leftQualifier = left.qualifier?.toLowerCase();
|
||||
const rightQualifier = right.qualifier?.toLowerCase();
|
||||
|
||||
if (leftQualifier === targetLower || rightQualifier === sourceLower) {
|
||||
return { localColumn: right.column, targetColumn: left.column };
|
||||
}
|
||||
if (rightQualifier === targetLower || leftQualifier === sourceLower || !leftQualifier) {
|
||||
return { localColumn: left.column, targetColumn: right.column };
|
||||
}
|
||||
return { localColumn: left.column, targetColumn: right.column };
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns one message per measure-level segment reference that doesn't resolve to
|
||||
* a segment defined on the source. Array is empty when every reference checks out.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
|
||||
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
|
||||
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
|
|
@ -25,6 +25,10 @@ const slEditSourceInputSchema = z.object({
|
|||
.optional()
|
||||
.describe('Targeted exact-match search/replace edits on the raw YAML content.'),
|
||||
delete: z.boolean().optional().describe('Set to true to delete this source entirely'),
|
||||
rawPaths: z
|
||||
.array(z.string().min(1))
|
||||
.optional()
|
||||
.describe('In ingest sessions, raw source file paths that directly support this SL action.'),
|
||||
});
|
||||
|
||||
type SlEditSourceInput = z.infer<typeof slEditSourceInputSchema>;
|
||||
|
|
@ -75,6 +79,10 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
|
||||
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
|
||||
const skipIndex = context.session?.isWorktreeScoped === true;
|
||||
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
|
||||
if (!rawPathValidation.ok) {
|
||||
return this.buildOutput(false, [rawPathValidation.error], sourceName);
|
||||
}
|
||||
|
||||
// Handle delete
|
||||
if (input.delete) {
|
||||
|
|
@ -88,6 +96,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
key: sourceName,
|
||||
detail: 'Deleted source',
|
||||
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
|
||||
...(rawPathValidation.rawPaths ? { rawPaths: rawPathValidation.rawPaths } : {}),
|
||||
});
|
||||
}
|
||||
return this.buildOutput(true, [], sourceName, { yaml: undefined, commitHash: undefined });
|
||||
|
|
@ -151,7 +160,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||
|
||||
// Re-serialize and write
|
||||
const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0 });
|
||||
const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
|
||||
const { errors: validationErrors, warnings: validationWarnings } =
|
||||
await semanticLayerService.validateWithProposedSource(connectionId, source);
|
||||
|
|
@ -184,6 +193,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
key: sourceName,
|
||||
detail: `Applied ${editCount} edit(s)`,
|
||||
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
|
||||
...(rawPathValidation.rawPaths ? { rawPaths: rawPathValidation.rawPaths } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ function makeDeps(opts: { sourceYaml: string; executeQuery: ReturnType<typeof vi
|
|||
listManifestSourceNames: vi.fn().mockResolvedValue([]),
|
||||
loadSource: vi.fn().mockResolvedValue(null),
|
||||
loadAllSources: vi.fn().mockResolvedValue([]),
|
||||
validatePhysicalTableReferences: vi.fn().mockResolvedValue([]),
|
||||
} as never,
|
||||
connections: {
|
||||
executeQuery: opts.executeQuery,
|
||||
|
|
@ -117,4 +118,29 @@ joins: []
|
|||
expect(probeSql).toMatch(/LIMIT 1\b/);
|
||||
expect(probeSql).not.toMatch(/LIMIT 0\b/);
|
||||
});
|
||||
|
||||
it('adds physical manifest errors for table-backed sources', async () => {
|
||||
const yaml = `name: int_active_contract_arr
|
||||
table: orbit_analytics.int_active_contract_arr
|
||||
grain: [contract_id]
|
||||
columns:
|
||||
- {name: contract_id, type: string}
|
||||
- {name: arr_cents, type: number}
|
||||
measures:
|
||||
- {name: arr, expr: sum(arr_cents)}
|
||||
joins: []
|
||||
`;
|
||||
const executeQuery = vi.fn();
|
||||
const deps = makeDeps({ sourceYaml: yaml, executeQuery }) as any;
|
||||
deps.semanticLayerService.validatePhysicalTableReferences.mockResolvedValue([
|
||||
'int_active_contract_arr.yaml: declared column(s) absent from physical table: arr_cents',
|
||||
]);
|
||||
|
||||
const result = await validateSingleSource(deps, 'conn-1', 'int_active_contract_arr');
|
||||
|
||||
expect(result.errors).toContain(
|
||||
'int_active_contract_arr.yaml: declared column(s) absent from physical table: arr_cents',
|
||||
);
|
||||
expect(executeQuery).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { SYSTEM_GIT_AUTHOR } from '../../tools/index.js';
|
|||
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
|
||||
import { sourceOverlaySchema } from '../schemas.js';
|
||||
import { SemanticLayerService } from '../semantic-layer.service.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
import { sourceDefinitionSchema } from './base-semantic-layer.tool.js';
|
||||
|
||||
export interface SlValidationDeps {
|
||||
|
|
@ -118,6 +119,14 @@ export async function validateSingleSource(
|
|||
return { errors, warnings };
|
||||
}
|
||||
|
||||
if (!isOverlay && 'table' in result.data && result.data.table) {
|
||||
errors.push(
|
||||
...(await deps.semanticLayerService.validatePhysicalTableReferences(connectionId, [
|
||||
result.data as SemanticLayerSource,
|
||||
])),
|
||||
);
|
||||
}
|
||||
|
||||
const measures = (parsed.measures as Array<{ name: string }> | undefined) ?? [];
|
||||
const seenMeasures = new Set<string>();
|
||||
for (const m of measures) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
|
||||
import { sourceOverlaySchema } from '../schemas.js';
|
||||
import type { SemanticLayerService } from '../semantic-layer.service.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
|
|
@ -25,6 +25,10 @@ const slWriteSourceInputSchema = z.object({
|
|||
.optional()
|
||||
.describe('Source definition (standalone with table/sql) or overlay (measures, computed columns, etc.)'),
|
||||
delete: z.boolean().optional().describe('Set to true to delete this source entirely'),
|
||||
rawPaths: z
|
||||
.array(z.string().min(1))
|
||||
.optional()
|
||||
.describe('In ingest sessions, raw source file paths that directly support this SL action.'),
|
||||
});
|
||||
|
||||
type SlWriteSourceInput = z.infer<typeof slWriteSourceInputSchema>;
|
||||
|
|
@ -99,6 +103,10 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
|
||||
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
|
||||
const skipIndex = context.session?.isWorktreeScoped === true;
|
||||
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
|
||||
if (!rawPathValidation.ok) {
|
||||
return this.buildOutput(false, [rawPathValidation.error], sourceName);
|
||||
}
|
||||
|
||||
// Handle delete
|
||||
if (input.delete) {
|
||||
|
|
@ -116,6 +124,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
key: sourceName,
|
||||
detail: 'Deleted source',
|
||||
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
|
||||
...(rawPathValidation.rawPaths ? { rawPaths: rawPathValidation.rawPaths } : {}),
|
||||
});
|
||||
}
|
||||
return this.buildOutput(true, [], sourceName, { yaml: undefined, commitHash: undefined });
|
||||
|
|
@ -142,6 +151,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
context,
|
||||
semanticLayerService,
|
||||
skipIndex,
|
||||
rawPathValidation.rawPaths,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -154,6 +164,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
context: ToolContext,
|
||||
semanticLayerService: SemanticLayerService,
|
||||
skipIndex: boolean,
|
||||
rawPaths: string[] | undefined,
|
||||
): Promise<ToolOutput<SemanticLayerStructured>> {
|
||||
const normalizedSource = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||
const isOverlay =
|
||||
|
|
@ -164,7 +175,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
? `${isOverlay ? 'Update overlay' : 'Rewrite source'}: ${sourceName}`
|
||||
: `${isOverlay ? 'Create overlay' : 'Create source'}: ${sourceName}`;
|
||||
|
||||
const yamlContent = YAML.stringify(normalizedSource);
|
||||
const yamlContent = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
|
||||
const orphanError = await this.rejectOrphanOverlay(semanticLayerService, connectionId, sourceName, yamlContent);
|
||||
if (orphanError) {
|
||||
|
|
@ -211,6 +222,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
key: sourceName,
|
||||
detail: existing ? `Rewrote source` : `Created source`,
|
||||
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
|
||||
...(rawPaths ? { rawPaths } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
30
packages/context/src/tools/action-raw-paths.ts
Normal file
30
packages/context/src/tools/action-raw-paths.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import type { ToolSession } from './tool-session.js';
|
||||
|
||||
type ActionRawPathValidation =
|
||||
| { ok: true; rawPaths?: string[] }
|
||||
| { ok: false; error: string };
|
||||
|
||||
export function validateActionRawPaths(
|
||||
session: ToolSession | undefined,
|
||||
rawPaths: readonly string[] | undefined,
|
||||
): ActionRawPathValidation {
|
||||
if (!rawPaths || rawPaths.length === 0) {
|
||||
return { ok: true };
|
||||
}
|
||||
|
||||
const uniqueRawPaths = [...new Set(rawPaths)];
|
||||
const allowedRawPaths = session?.allowedRawPaths;
|
||||
if (!allowedRawPaths) {
|
||||
return { ok: true, rawPaths: uniqueRawPaths };
|
||||
}
|
||||
|
||||
const unavailable = uniqueRawPaths.filter((rawPath) => !allowedRawPaths.has(rawPath));
|
||||
if (unavailable.length > 0) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `rawPaths include unavailable ingest file(s): ${unavailable.join(', ')}`,
|
||||
};
|
||||
}
|
||||
|
||||
return { ok: true, rawPaths: uniqueRawPaths };
|
||||
}
|
||||
|
|
@ -31,6 +31,7 @@ export { ingestMetadataRequired, resolveIngestMetadata } from './context-ingest-
|
|||
export type { SqlEdit } from './sql-edit-replacer.js';
|
||||
export { applySqlEdits } from './sql-edit-replacer.js';
|
||||
export type { IngestToolMetadata, MemoryAction, ToolSession } from './tool-session.js';
|
||||
export { validateActionRawPaths } from './action-raw-paths.js';
|
||||
export type { TouchedSlSource, TouchedSlSourceSet } from './touched-sl-sources.js';
|
||||
export {
|
||||
addTouchedSlSource,
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ export interface MemoryAction {
|
|||
key: string;
|
||||
detail: string;
|
||||
targetConnectionId?: string | null;
|
||||
rawPaths?: string[];
|
||||
}
|
||||
|
||||
interface EvictionDecisionRecord {
|
||||
|
|
@ -45,6 +46,7 @@ export interface ToolSession {
|
|||
preHead: string | null;
|
||||
touchedSlSources: TouchedSlSourceSet;
|
||||
actions: MemoryAction[];
|
||||
allowedRawPaths?: ReadonlySet<string>;
|
||||
semanticLayerService: SemanticLayerService;
|
||||
wikiService: KnowledgeWikiService;
|
||||
configService: KtxFileStorePort;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,11 @@
|
|||
export { buildKnowledgeSearchText } from './knowledge-search-text.js';
|
||||
export {
|
||||
assertFlatWikiKey,
|
||||
invalidFlatWikiKeyMessage,
|
||||
isFlatWikiKey,
|
||||
suggestFlatWikiKey,
|
||||
validateFlatWikiKey,
|
||||
} from './keys.js';
|
||||
export { KnowledgeWikiService } from './knowledge-wiki.service.js';
|
||||
export * from './local-knowledge.js';
|
||||
export type {
|
||||
|
|
|
|||
31
packages/context/src/wiki/keys.ts
Normal file
31
packages/context/src/wiki/keys.ts
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
const FLAT_WIKI_KEY_PATTERN = /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/;
|
||||
|
||||
export function suggestFlatWikiKey(key: string): string {
|
||||
const suggested = key
|
||||
.trim()
|
||||
.replace(/[\\/]+/g, '-')
|
||||
.replace(/[^a-zA-Z0-9_-]+/g, '-')
|
||||
.replace(/-+/g, '-')
|
||||
.replace(/^[-_]+|[-_]+$/g, '');
|
||||
return suggested.length > 0 ? suggested : 'page-key';
|
||||
}
|
||||
|
||||
export function invalidFlatWikiKeyMessage(key: string): string {
|
||||
return `Invalid wiki key "${key}". Wiki keys must be flat; use "${suggestFlatWikiKey(key)}".`;
|
||||
}
|
||||
|
||||
export function isFlatWikiKey(key: string): boolean {
|
||||
return FLAT_WIKI_KEY_PATTERN.test(key);
|
||||
}
|
||||
|
||||
export function validateFlatWikiKey(key: string): { ok: true; key: string } | { ok: false; error: string } {
|
||||
return isFlatWikiKey(key) ? { ok: true, key } : { ok: false, error: invalidFlatWikiKeyMessage(key) };
|
||||
}
|
||||
|
||||
export function assertFlatWikiKey(key: string): string {
|
||||
const result = validateFlatWikiKey(key);
|
||||
if (!result.ok) {
|
||||
throw new Error(result.error);
|
||||
}
|
||||
return result.key;
|
||||
}
|
||||
|
|
@ -33,13 +33,19 @@ function makeService() {
|
|||
diffNameStatus: vi.fn().mockResolvedValue([]),
|
||||
getFileAtCommit: vi.fn().mockResolvedValue(''),
|
||||
};
|
||||
const logger = {
|
||||
log: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
};
|
||||
const service = new KnowledgeWikiService(
|
||||
configService as any,
|
||||
embeddingService as any,
|
||||
pagesRepository as any,
|
||||
gitService as any,
|
||||
logger as any,
|
||||
);
|
||||
return { service, pagesRepository, embeddingService, configService, gitService };
|
||||
return { service, pagesRepository, embeddingService, configService, gitService, logger };
|
||||
}
|
||||
|
||||
const fm: WikiFrontmatter = { summary: 'sum', usage_mode: 'auto' };
|
||||
|
|
@ -107,6 +113,53 @@ describe('KnowledgeWikiService.syncFromCommit', () => {
|
|||
expect(call.deletes).toEqual([{ scope: 'GLOBAL', scopeId: null, pageKey: 'gone-page' }]);
|
||||
});
|
||||
|
||||
it('indexes historic-SQL nested pages but skips other nested wiki paths from commit sync', async () => {
|
||||
const { service, pagesRepository, gitService, logger } = makeService();
|
||||
|
||||
gitService.diffNameStatus.mockResolvedValue([
|
||||
{ status: 'A', path: 'knowledge/global/revenue-policy.md' },
|
||||
{ status: 'A', path: 'knowledge/global/historic-sql/order-lifecycle.md' },
|
||||
{ status: 'A', path: 'knowledge/global/historic-sql/_archived/retired-pattern.md' },
|
||||
{ status: 'A', path: 'knowledge/global/orbit/company-overview.md' },
|
||||
]);
|
||||
gitService.getFileAtCommit.mockImplementation((path: string) => {
|
||||
if (path.endsWith('revenue-policy.md')) {
|
||||
return Promise.resolve('---\nsummary: revenue\nusage_mode: auto\n---\n\nbody-revenue\n');
|
||||
}
|
||||
if (path.endsWith('order-lifecycle.md')) {
|
||||
return Promise.resolve('---\nsummary: order lifecycle\nusage_mode: auto\n---\n\nbody-orders\n');
|
||||
}
|
||||
if (path.endsWith('retired-pattern.md')) {
|
||||
return Promise.resolve('---\nsummary: retired\nusage_mode: never\n---\n\nbody-retired\n');
|
||||
}
|
||||
return Promise.reject(new Error(`unexpected getFileAtCommit path: ${path}`));
|
||||
});
|
||||
|
||||
await service.syncFromCommit('sha-before', 'sha-after', 'run-uuid');
|
||||
|
||||
expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('knowledge/global/orbit/company-overview.md', 'sha-after');
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'[knowledge.sync] skipping unparseable path: knowledge/global/orbit/company-overview.md',
|
||||
);
|
||||
const call = pagesRepository.applyDiffTransactional.mock.calls[0][0];
|
||||
expect(call.upserts).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ scope: 'GLOBAL', pageKey: 'revenue-policy', summary: 'revenue' }),
|
||||
expect.objectContaining({
|
||||
scope: 'GLOBAL',
|
||||
pageKey: 'historic-sql/order-lifecycle',
|
||||
summary: 'order lifecycle',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
scope: 'GLOBAL',
|
||||
pageKey: 'historic-sql/_archived/retired-pattern',
|
||||
summary: 'retired',
|
||||
}),
|
||||
]),
|
||||
);
|
||||
expect(call.upserts).toHaveLength(3);
|
||||
});
|
||||
|
||||
it('is a no-op when the diff between shas has no knowledge changes', async () => {
|
||||
const { service, pagesRepository, gitService } = makeService();
|
||||
gitService.diffNameStatus.mockResolvedValue([]);
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { createHash } from 'node:crypto';
|
|||
import YAML from 'yaml';
|
||||
import type { KtxEmbeddingPort, KtxFileStorePort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger } from '../core/index.js';
|
||||
import { assertFlatWikiKey, isFlatWikiKey } from './keys.js';
|
||||
import { buildKnowledgeSearchText } from './knowledge-search-text.js';
|
||||
import type { KnowledgeGitDiffPort, KnowledgeIndexPort, UpsertPageParams } from './ports.js';
|
||||
import type { WikiFrontmatter, WikiPage, WikiPageWithScope } from './types.js';
|
||||
|
|
@ -10,6 +11,10 @@ const WIKI_PREFIX = 'knowledge';
|
|||
|
||||
export type { WikiFrontmatter };
|
||||
|
||||
function isHistoricSqlPathSegment(segment: string): boolean {
|
||||
return /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment);
|
||||
}
|
||||
|
||||
export class KnowledgeWikiService {
|
||||
private isWorktreeScoped = false;
|
||||
|
||||
|
|
@ -53,7 +58,7 @@ export class KnowledgeWikiService {
|
|||
}
|
||||
|
||||
pagePath(scope: string, scopeId: string | null | undefined, pageKey: string): string {
|
||||
return `${this.scopeDir(scope, scopeId)}/${pageKey}.md`;
|
||||
return `${this.scopeDir(scope, scopeId)}/${assertFlatWikiKey(pageKey)}.md`;
|
||||
}
|
||||
|
||||
// ── Parsing / serialization ───────────────────────────────────
|
||||
|
|
@ -140,7 +145,7 @@ export class KnowledgeWikiService {
|
|||
const name = f.replace(`${dir}/`, '').replace(/\.md$/, '');
|
||||
return name;
|
||||
})
|
||||
.filter((name) => !name.includes('/'));
|
||||
.filter(isFlatWikiKey);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
|
@ -417,6 +422,7 @@ export class KnowledgeWikiService {
|
|||
* Parse a `knowledge/<scope>/...` file path into its scope and page key.
|
||||
* `knowledge/global/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'foo' }
|
||||
* `knowledge/user/<id>/bar.md` → { scope: 'USER', scopeId: '<id>', pageKey: 'bar' }
|
||||
* `knowledge/global/historic-sql/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'historic-sql/foo' }
|
||||
*/
|
||||
function parseKnowledgePath(path: string): { scope: string; scopeId: string | null; pageKey: string } | null {
|
||||
if (!path.endsWith('.md')) {
|
||||
|
|
@ -428,10 +434,19 @@ function parseKnowledgePath(path: string): { scope: string; scopeId: string | nu
|
|||
}
|
||||
const rest = segments.slice(1);
|
||||
if (rest.length === 2 && rest[0] === 'global') {
|
||||
return { scope: 'GLOBAL', scopeId: null, pageKey: rest[1].replace(/\.md$/, '') };
|
||||
const pageKey = rest[1].replace(/\.md$/, '');
|
||||
return isFlatWikiKey(pageKey) ? { scope: 'GLOBAL', scopeId: null, pageKey } : null;
|
||||
}
|
||||
if (rest.length >= 3 && rest[0] === 'global' && rest[1] === 'historic-sql') {
|
||||
const historicPath = rest.slice(2).join('/').replace(/\.md$/, '');
|
||||
if (historicPath.split('/').every(isHistoricSqlPathSegment)) {
|
||||
return { scope: 'GLOBAL', scopeId: null, pageKey: `historic-sql/${historicPath}` };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (rest.length === 3 && rest[0] === 'user') {
|
||||
return { scope: 'USER', scopeId: rest[1], pageKey: rest[2].replace(/\.md$/, '') };
|
||||
const pageKey = rest[2].replace(/\.md$/, '');
|
||||
return isFlatWikiKey(pageKey) ? { scope: 'USER', scopeId: rest[1], pageKey } : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
it('writes, reads, lists, and searches global knowledge pages', async () => {
|
||||
const write = await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -46,11 +46,11 @@ describe('local knowledge helpers', () => {
|
|||
slRefs: ['orders'],
|
||||
});
|
||||
|
||||
expect(write.path).toBe('knowledge/global/metrics/revenue.md');
|
||||
expect(write.path).toBe('knowledge/global/metrics-revenue.md');
|
||||
expect(write.operation).toBe('write');
|
||||
|
||||
await expect(readLocalKnowledgePage(project, { key: 'metrics/revenue', userId: 'local' })).resolves.toMatchObject({
|
||||
key: 'metrics/revenue',
|
||||
await expect(readLocalKnowledgePage(project, { key: 'metrics-revenue', userId: 'local' })).resolves.toMatchObject({
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -61,8 +61,8 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([
|
||||
{
|
||||
key: 'metrics/revenue',
|
||||
path: 'knowledge/global/metrics/revenue.md',
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
},
|
||||
|
|
@ -71,8 +71,8 @@ describe('local knowledge helpers', () => {
|
|||
const search = await searchLocalKnowledgePages(project, { query: 'paid order', userId: 'local' });
|
||||
expect(search).toEqual([
|
||||
expect.objectContaining({
|
||||
key: 'metrics/revenue',
|
||||
path: 'knowledge/global/metrics/revenue.md',
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
scope: 'GLOBAL',
|
||||
score: expect.any(Number),
|
||||
matchReasons: expect.arrayContaining(['lexical']),
|
||||
|
|
@ -85,7 +85,7 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
it('adds the token lane alongside lexical wiki matches', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -95,7 +95,7 @@ describe('local knowledge helpers', () => {
|
|||
const search = await searchLocalKnowledgePages(project, { query: 'paid---', userId: 'local', limit: 5 });
|
||||
|
||||
expect(search[0]).toMatchObject({
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
matchReasons: expect.arrayContaining(['token']),
|
||||
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'token', status: 'available' })]),
|
||||
});
|
||||
|
|
@ -103,14 +103,14 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
it('uses stored page embeddings when a wiki embedding backend is configured', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Semantic revenue definition',
|
||||
content: 'Revenue search text.',
|
||||
tags: ['finance'],
|
||||
});
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'support/escalations',
|
||||
key: 'support-escalations',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Support escalation process',
|
||||
content: 'Support search text.',
|
||||
|
|
@ -125,7 +125,7 @@ describe('local knowledge helpers', () => {
|
|||
});
|
||||
|
||||
expect(search[0]).toMatchObject({
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
matchReasons: expect.arrayContaining(['semantic']),
|
||||
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'semantic', status: 'available' })]),
|
||||
});
|
||||
|
|
@ -133,7 +133,7 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
it('reports semantic lane as skipped when wiki embeddings are not configured', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -172,7 +172,7 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
it('serializes historic-SQL frontmatter fields for global pages', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'queries/monthly-paid-orders',
|
||||
key: 'monthly-paid-orders',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Monthly paid orders',
|
||||
content: '## Monthly paid order count',
|
||||
|
|
@ -195,7 +195,7 @@ describe('local knowledge helpers', () => {
|
|||
fingerprints: ['fp_paid_orders'],
|
||||
});
|
||||
|
||||
const raw = await project.fileStore.readFile('knowledge/global/queries/monthly-paid-orders.md');
|
||||
const raw = await project.fileStore.readFile('knowledge/global/monthly-paid-orders.md');
|
||||
expect(raw.content).toContain('source: historic-sql');
|
||||
expect(raw.content).toContain('intent: Monthly paid order count');
|
||||
expect(raw.content).toContain(['tables:', ' - analytics.orders'].join('\n'));
|
||||
|
|
@ -207,7 +207,7 @@ describe('local knowledge helpers', () => {
|
|||
it('falls back to Markdown scanning when the config does not select sqlite-fts5', async () => {
|
||||
project.config.storage.search = 'postgres-hybrid';
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -216,7 +216,7 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
await expect(searchLocalKnowledgePages(project, { query: 'paid order', userId: 'local' })).resolves.toEqual([
|
||||
expect.objectContaining({
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
score: 3,
|
||||
matchReasons: ['token'],
|
||||
}),
|
||||
|
|
@ -231,6 +231,17 @@ describe('local knowledge helpers', () => {
|
|||
summary: 'bad',
|
||||
content: 'bad',
|
||||
}),
|
||||
).rejects.toThrow('Unsafe knowledge key');
|
||||
).rejects.toThrow('Invalid wiki key "../secret". Wiki keys must be flat; use "secret".');
|
||||
});
|
||||
|
||||
it('rejects slash-delimited knowledge keys with a flat-key suggestion', async () => {
|
||||
await expect(
|
||||
writeLocalKnowledgePage(project, {
|
||||
key: 'orbit/company-overview',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'bad',
|
||||
content: 'bad',
|
||||
}),
|
||||
).rejects.toThrow('Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import type { KtxEmbeddingPort, KtxFileWriteResult } from '../core/index.js';
|
|||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js';
|
||||
import { buildKnowledgeSearchText } from './knowledge-search-text.js';
|
||||
import { assertFlatWikiKey, isFlatWikiKey } from './keys.js';
|
||||
import { SqliteKnowledgeIndex, type SqliteKnowledgeIndexPage } from './sqlite-knowledge-index.js';
|
||||
import type { HistoricSqlWikiUsageFrontmatter, WikiSearchLaneSummary, WikiSearchMatchReason } from './types.js';
|
||||
|
||||
|
|
@ -67,28 +68,39 @@ function assertSafePathToken(kind: string, value: string): string {
|
|||
return value;
|
||||
}
|
||||
|
||||
function assertSafeKnowledgeKey(key: string): string {
|
||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_/-]*$/.test(key)) {
|
||||
throw new Error(`Unsafe knowledge key: ${key}`);
|
||||
}
|
||||
return assertSafePathToken('knowledge key', key);
|
||||
}
|
||||
|
||||
function stringArray(value: unknown): string[] {
|
||||
return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : [];
|
||||
}
|
||||
|
||||
function knowledgePath(scope: LocalKnowledgeScope, userId: string | undefined, key: string): string {
|
||||
const safeKey = assertSafeKnowledgeKey(key);
|
||||
const safeKey = assertFlatWikiKey(key);
|
||||
if (scope === 'GLOBAL') {
|
||||
return `knowledge/global/${safeKey}.md`;
|
||||
}
|
||||
return `knowledge/user/${assertSafePathToken('user id', userId ?? 'local')}/${safeKey}.md`;
|
||||
}
|
||||
|
||||
function keyFromKnowledgePath(path: string, scope: LocalKnowledgeScope, userId: string): string {
|
||||
function isHistoricSqlPathSegment(segment: string): boolean {
|
||||
return /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment);
|
||||
}
|
||||
|
||||
function keyFromKnowledgePath(path: string, scope: LocalKnowledgeScope, userId: string): string | null {
|
||||
const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${assertSafePathToken('user id', userId)}/`;
|
||||
return path.slice(prefix.length).replace(/\.md$/, '');
|
||||
const key = path.slice(prefix.length).replace(/\.md$/, '');
|
||||
if (isFlatWikiKey(key)) {
|
||||
return key;
|
||||
}
|
||||
if (
|
||||
scope === 'GLOBAL' &&
|
||||
key.startsWith('historic-sql/') &&
|
||||
key
|
||||
.slice('historic-sql/'.length)
|
||||
.split('/')
|
||||
.every(isHistoricSqlPathSegment)
|
||||
) {
|
||||
return key;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function parseKnowledgePage(key: string, path: string, scope: LocalKnowledgeScope, raw: string): LocalKnowledgePage {
|
||||
|
|
@ -187,6 +199,9 @@ export async function listLocalKnowledgePages(
|
|||
const listed = await project.fileStore.listFiles(root);
|
||||
for (const path of listed.files.filter((file) => file.endsWith('.md')).sort()) {
|
||||
const key = keyFromKnowledgePath(path, scope, userId);
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
const page = await readPageAtPath(project, key, path, scope);
|
||||
if (page) {
|
||||
pages.push({ key, path, scope, summary: page.summary });
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue