mirror of
https://github.com/Kaelio/ktx.git
synced 2026-07-01 08:59:39 +02:00
Improve schema setup and Notion ingest UX (#14)
* Improve schema setup and Notion ingest UX * Handle Postgres network scan failures * WIP: save local changes before main merge * Refine setup prompt choices * Tighten ingest reconciliation guidance * Commit setup config updates * Canonicalize unmapped fallback details * Count reconciliation actions in reports * Harden semantic layer source validation * Return wiki content after edits * Validate SL sources against manifests * Validate wiki refs before writes * Simplify CLI next steps * Clarify agent setup summary * Surface dbt target SL sources * Recover SL write fallbacks * Preserve failed context build metadata * Track raw paths for ingest actions * test(cli): update seeded demo expectations * fix(ingest): scope fallback recovery checks * fix(sl): tighten source validation guards * fix(wiki): ignore empty embedding vectors * Improve Notion ingest UX * Enforce flat wiki keys * test(context): update wiki key assertion --------- Co-authored-by: Andrey Avtomonov <andreybavt@gmail.com>
This commit is contained in:
parent
866d33e71a
commit
60457e9407
116 changed files with 4177 additions and 610 deletions
|
|
@ -21,6 +21,7 @@ export {
|
|||
notionConnectionToPullConfig,
|
||||
parseNotionConnectionConfig,
|
||||
redactNotionConnectionConfig,
|
||||
resolveNotionConnectionAuthToken,
|
||||
resolveNotionAuthToken,
|
||||
type KtxNotionConnectionConfig,
|
||||
type RedactedKtxNotionConnectionConfig,
|
||||
|
|
|
|||
|
|
@ -30,18 +30,36 @@ describe('standalone Notion connection config', () => {
|
|||
|
||||
expect(parsed).toEqual({
|
||||
driver: 'notion',
|
||||
auth_token: null,
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
root_database_ids: [],
|
||||
root_data_source_ids: [],
|
||||
max_pages_per_run: 1000,
|
||||
max_knowledge_creates_per_run: 5,
|
||||
max_knowledge_creates_per_run: 25,
|
||||
max_knowledge_updates_per_run: 20,
|
||||
last_successful_cursor: null,
|
||||
});
|
||||
});
|
||||
|
||||
it('parses inline Notion auth tokens without requiring auth_token_ref', () => {
|
||||
const parsed = parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token: ' ntn_inline_token ',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
});
|
||||
|
||||
expect(parsed).toMatchObject({
|
||||
driver: 'notion',
|
||||
auth_token: 'ntn_inline_token',
|
||||
auth_token_ref: null,
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
});
|
||||
});
|
||||
|
||||
it('redacts token references from display output', () => {
|
||||
expect(
|
||||
redactNotionConnectionConfig(
|
||||
|
|
@ -60,7 +78,7 @@ describe('standalone Notion connection config', () => {
|
|||
rootDatabaseIds: [],
|
||||
rootDataSourceIds: [],
|
||||
maxPagesPerRun: 80,
|
||||
maxKnowledgeCreatesPerRun: 5,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
warning: 'Anything accessible to this Notion integration can become organization knowledge.',
|
||||
});
|
||||
|
|
@ -117,4 +135,23 @@ describe('standalone Notion connection config', () => {
|
|||
lastSuccessfulCursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}',
|
||||
});
|
||||
});
|
||||
|
||||
it('uses inline Notion auth_token when building adapter pull config', async () => {
|
||||
const pullConfig = await notionConnectionToPullConfig(
|
||||
parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token: 'ntn_inline_token',
|
||||
auth_token_ref: 'env:STALE_NOTION_TOKEN',
|
||||
crawl_mode: 'all_accessible',
|
||||
}),
|
||||
{
|
||||
env: {},
|
||||
readTextFile: async () => {
|
||||
throw new Error('readTextFile should not be called for inline auth_token');
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
expect(pullConfig.authToken).toBe('ntn_inline_token');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { type NotionPullConfig, notionPullConfigSchema } from '../ingest/adapters/notion/types.js';
|
||||
import {
|
||||
NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
type NotionPullConfig,
|
||||
notionPullConfigSchema,
|
||||
} from '../ingest/adapters/notion/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../project/config.js';
|
||||
|
||||
export const KTX_NOTION_ORG_KNOWLEDGE_WARNING =
|
||||
|
|
@ -11,7 +15,8 @@ type KtxNotionCrawlMode = 'all_accessible' | 'selected_roots';
|
|||
|
||||
export interface KtxNotionConnectionConfig extends KtxProjectConnectionConfig {
|
||||
driver: 'notion';
|
||||
auth_token_ref: string;
|
||||
auth_token: string | null;
|
||||
auth_token_ref: string | null;
|
||||
crawl_mode: KtxNotionCrawlMode;
|
||||
root_page_ids: string[];
|
||||
root_database_ids: string[];
|
||||
|
|
@ -89,11 +94,12 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo
|
|||
if (input.driver !== 'notion') {
|
||||
throw new Error('Notion connection config requires driver: notion');
|
||||
}
|
||||
const authTokenRef = stringValue(input.auth_token_ref, '');
|
||||
if (!authTokenRef) {
|
||||
throw new Error('Notion connection config requires auth_token_ref');
|
||||
const authToken = optionalString(input.auth_token);
|
||||
const authTokenRef = optionalString(input.auth_token_ref);
|
||||
if (!authToken && !authTokenRef) {
|
||||
throw new Error('Notion connection config requires auth_token or auth_token_ref');
|
||||
}
|
||||
if (!authTokenRef.startsWith('env:') && !authTokenRef.startsWith('file:')) {
|
||||
if (authTokenRef && !authTokenRef.startsWith('env:') && !authTokenRef.startsWith('file:')) {
|
||||
throw new Error('Notion auth_token_ref must use env:NAME or file:/path');
|
||||
}
|
||||
|
||||
|
|
@ -111,6 +117,7 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo
|
|||
return {
|
||||
...input,
|
||||
driver: 'notion',
|
||||
auth_token: authToken,
|
||||
auth_token_ref: authTokenRef,
|
||||
crawl_mode: crawlMode,
|
||||
root_page_ids: rootPageIds,
|
||||
|
|
@ -119,7 +126,7 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo
|
|||
max_pages_per_run: boundedInteger(input.max_pages_per_run, 1000, 'max_pages_per_run', 1, 10_000),
|
||||
max_knowledge_creates_per_run: boundedInteger(
|
||||
input.max_knowledge_creates_per_run,
|
||||
5,
|
||||
NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
'max_knowledge_creates_per_run',
|
||||
0,
|
||||
25,
|
||||
|
|
@ -138,7 +145,7 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo
|
|||
export function redactNotionConnectionConfig(config: KtxNotionConnectionConfig): RedactedKtxNotionConnectionConfig {
|
||||
return {
|
||||
driver: 'notion',
|
||||
hasAuthToken: Boolean(config.auth_token_ref),
|
||||
hasAuthToken: Boolean(config.auth_token ?? config.auth_token_ref),
|
||||
crawlMode: config.crawl_mode,
|
||||
rootPageIds: config.root_page_ids,
|
||||
rootDatabaseIds: config.root_database_ids,
|
||||
|
|
@ -178,12 +185,20 @@ export async function resolveNotionAuthToken(
|
|||
throw new Error('Notion auth_token_ref must use env:NAME or file:/path');
|
||||
}
|
||||
|
||||
export async function resolveNotionConnectionAuthToken(
|
||||
config: Pick<KtxNotionConnectionConfig, 'auth_token' | 'auth_token_ref'>,
|
||||
options: ResolveNotionTokenOptions = {},
|
||||
): Promise<string> {
|
||||
return config.auth_token ?? (await resolveNotionAuthToken(config.auth_token_ref ?? '', options));
|
||||
}
|
||||
|
||||
export async function notionConnectionToPullConfig(
|
||||
config: KtxNotionConnectionConfig,
|
||||
options: ResolveNotionTokenOptions = {},
|
||||
): Promise<NotionPullConfig> {
|
||||
const authToken = await resolveNotionConnectionAuthToken(config, options);
|
||||
return notionPullConfigSchema.parse({
|
||||
authToken: await resolveNotionAuthToken(config.auth_token_ref, options),
|
||||
authToken,
|
||||
crawlMode: config.crawl_mode,
|
||||
rootPageIds: config.root_page_ids,
|
||||
rootDatabaseIds: config.root_database_ids,
|
||||
|
|
|
|||
|
|
@ -48,4 +48,10 @@ describe('DbtSourceAdapter', () => {
|
|||
it('implements fetch() for git-backed dbt source setup', () => {
|
||||
expect(adapter.fetch).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
it('reports mapped warehouse targets for bundle SL discovery', async () => {
|
||||
adapter = new DbtSourceAdapter({ targetConnectionIds: ['postgres-warehouse', 'postgres-warehouse'] });
|
||||
|
||||
await expect(adapter.listTargetConnectionIds?.(stagedDir)).resolves.toEqual(['postgres-warehouse']);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import { parseDbtStagedDir } from './parse.js';
|
|||
|
||||
interface DbtSourceAdapterOptions {
|
||||
homeDir?: string;
|
||||
targetConnectionIds?: string[];
|
||||
}
|
||||
|
||||
export class DbtSourceAdapter implements SourceAdapter {
|
||||
|
|
@ -24,6 +25,10 @@ export class DbtSourceAdapter implements SourceAdapter {
|
|||
return detectDbtStagedDir(stagedDir);
|
||||
}
|
||||
|
||||
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
|
||||
return [...new Set(this.options.targetConnectionIds ?? [])].sort((left, right) => left.localeCompare(right));
|
||||
}
|
||||
|
||||
async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
|
||||
const config = pullConfig as DbtPullConfig | undefined;
|
||||
if (!config?.repoUrl) {
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ async function readJson(path: string): Promise<unknown> {
|
|||
async function writeYamlAtomic(path: string, value: unknown): Promise<void> {
|
||||
await mkdir(dirname(path), { recursive: true });
|
||||
const tmp = `${path}.tmp`;
|
||||
await writeFile(tmp, YAML.stringify(value, { indent: 2, lineWidth: 0 }), 'utf-8');
|
||||
await writeFile(tmp, YAML.stringify(value, { indent: 2, lineWidth: 0, version: '1.1' }), 'utf-8');
|
||||
await rename(tmp, path);
|
||||
}
|
||||
|
||||
|
|
@ -270,7 +270,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
}
|
||||
}
|
||||
}
|
||||
const after = YAML.stringify(shard, { indent: 2, lineWidth: 0 });
|
||||
const after = YAML.stringify(shard, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
if (after !== before) {
|
||||
await writeYamlAtomic(path, shard);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ import { notionManifestSchema, notionMetadataSchema } from './types.js';
|
|||
const MAX_NOTION_WORK_UNIT_CHARS = 40_000;
|
||||
export const NOTION_ORG_KNOWLEDGE_WARNING =
|
||||
'Anything accessible to this Notion integration can become organization knowledge.';
|
||||
const NOTION_SL_WRITE_GUIDANCE =
|
||||
'Write wiki entries with wiki_write. Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview. Search existing wiki pages for the same tables or sl_refs before creating a new page. Only write or edit SL sources after sl_discover/sl_read_source confirms a mapped non-Notion target source; if no mapped target exists, emit_unmapped_fallback and keep the fact wiki-only. Notion dataSourceCount counts Notion databases/data sources only, not warehouse/dbt mappings. If a warehouse/dbt connection exists but the named table or source is absent, use reason no_physical_table rather than no_connection_mapping. Do not create SL sources under the Notion connection just because a page mentions a warehouse table.';
|
||||
|
||||
async function walk(root: string): Promise<string[]> {
|
||||
const entries = await readdir(root, { withFileTypes: true, recursive: true });
|
||||
|
|
@ -92,7 +94,7 @@ export async function chunkNotionStagedDir(stagedDir: string, diffSet?: DiffSet)
|
|||
rawFiles,
|
||||
dependencyPaths,
|
||||
peerFileIndex,
|
||||
notes: `Synthesize durable wiki and SL knowledge from this Notion page span only. Use read_raw_span on ${pagePath} for lines ${range.startLine}-${range.endLine}; do not call read_raw_file for oversized pages. Cite evidence chunk/page IDs.`,
|
||||
notes: `Synthesize durable wiki and SL knowledge from this Notion page span only. Use read_raw_span on ${pagePath} for lines ${range.startLine}-${range.endLine}; do not call read_raw_file for oversized pages. ${NOTION_SL_WRITE_GUIDANCE} Cite evidence chunk/page IDs.`,
|
||||
});
|
||||
}
|
||||
continue;
|
||||
|
|
@ -105,7 +107,7 @@ export async function chunkNotionStagedDir(stagedDir: string, diffSet?: DiffSet)
|
|||
dependencyPaths,
|
||||
peerFileIndex,
|
||||
notes:
|
||||
'Synthesize durable wiki and SL knowledge from this Notion page. Write wiki entries with wiki_write and SL sources with sl_write_source; cite evidence chunk/page IDs.',
|
||||
`Synthesize durable wiki and SL knowledge from this Notion page. ${NOTION_SL_WRITE_GUIDANCE} Cite evidence chunk/page IDs.`,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -115,6 +117,8 @@ export async function chunkNotionStagedDir(stagedDir: string, diffSet?: DiffSet)
|
|||
reconcileNotes: [
|
||||
`Notion maxKnowledgeCreatesPerRun=${manifest.maxKnowledgeCreatesPerRun}`,
|
||||
`Notion maxKnowledgeUpdatesPerRun=${manifest.maxKnowledgeUpdatesPerRun}`,
|
||||
'Notion dataSourceCount is Notion-only; use sl_discover for warehouse/dbt mapping decisions.',
|
||||
'Reconcile Notion wiki pages sharing tables/sl_refs before creating distinct artifacts.',
|
||||
],
|
||||
contextReport: {
|
||||
capped: manifest.capped,
|
||||
|
|
|
|||
|
|
@ -79,9 +79,27 @@ describe('clusterNotionWorkUnits', () => {
|
|||
expect(wu.unitKey).toMatch(/^notion-cluster-\d+$/);
|
||||
expect(wu.rawFiles.length).toBeGreaterThan(0);
|
||||
expect(wu.notes).toMatch(/Synthesize/);
|
||||
expect(wu.notes).toContain('emit_unmapped_fallback');
|
||||
expect(wu.notes).toContain('Do not create SL sources under the Notion connection');
|
||||
}
|
||||
});
|
||||
|
||||
test('merges pages into one synthesis unit at the clustering threshold', async () => {
|
||||
const pages = Array.from({ length: MIN_PAGES_TO_CLUSTER }, (_, i) => ({
|
||||
id: `p${i}`,
|
||||
title: `Customer source reference ${i}`,
|
||||
body: `Customer source reference maps to orbit_analytics.customer ${i}`.repeat(10),
|
||||
}));
|
||||
const stagedDir = await makeStaged(pages);
|
||||
const wus = makeWorkUnits(pages);
|
||||
const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: mockEmbed });
|
||||
expect(out).toHaveLength(1);
|
||||
expect(out[0].unitKey).toBe('notion-cluster-1');
|
||||
expect(new Set(out[0].rawFiles)).toEqual(new Set(wus.flatMap((wu) => wu.rawFiles)));
|
||||
expect(out[0].notes).toContain('emit_unmapped_fallback');
|
||||
expect(out[0].notes).toContain('Do not create SL sources under the Notion connection');
|
||||
});
|
||||
|
||||
test('preserves coverage: every input rawFile appears in some cluster', async () => {
|
||||
const pages = Array.from({ length: 12 }, (_, i) => ({
|
||||
id: `p${i}`,
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ import { notionMetadataSchema } from './types.js';
|
|||
export const MIN_PAGES_TO_CLUSTER = 5;
|
||||
const CLUSTER_TEXT_BODY_CHARS = 1024;
|
||||
const CLUSTER_SEED = 42;
|
||||
const NOTION_CLUSTER_SL_WRITE_GUIDANCE =
|
||||
'Write wiki entries directly with wiki_write. Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview. Search existing wiki pages for the same tables or sl_refs before creating a new page. Only write or edit SL sources after sl_discover/sl_read_source confirms a mapped non-Notion target source; if no mapped target exists, emit_unmapped_fallback and keep the fact wiki-only. Notion dataSourceCount counts Notion databases/data sources only, not warehouse/dbt mappings. If a warehouse/dbt connection exists but the named table or source is absent, use reason no_physical_table rather than no_connection_mapping. Do not create SL sources under the Notion connection just because a page mentions a warehouse table.';
|
||||
|
||||
interface ClusterNotionWorkUnitsArgs {
|
||||
workUnits: WorkUnit[];
|
||||
|
|
@ -63,7 +65,7 @@ function mergeWorkUnits(bucket: WorkUnit[], clusterIndex: number): WorkUnit {
|
|||
`Synthesize durable wiki and SL knowledge from these ${bucket.length} related Notion pages. ` +
|
||||
'Read each page with read_raw_file (or read_raw_span for oversized pages). ' +
|
||||
'Search nearby evidence with context_evidence_search/_read/_neighbors when needed. ' +
|
||||
'Write wiki entries directly with wiki_write and SL sources directly with sl_write_source. ' +
|
||||
`${NOTION_CLUSTER_SL_WRITE_GUIDANCE} ` +
|
||||
'Do not call context_candidate_write.',
|
||||
};
|
||||
}
|
||||
|
|
@ -72,7 +74,7 @@ export async function clusterNotionWorkUnits(args: ClusterNotionWorkUnitsArgs):
|
|||
const { workUnits, stagedDir, embedding } = args;
|
||||
if (workUnits.length < MIN_PAGES_TO_CLUSTER) return workUnits;
|
||||
const k = pickK(workUnits.length);
|
||||
if (k <= 1) return workUnits;
|
||||
if (k <= 1) return [mergeWorkUnits(workUnits, 0)];
|
||||
const texts = await Promise.all(workUnits.map((wu) => buildClusterText(wu, stagedDir)));
|
||||
let vectors: number[][];
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ describe('NotionSourceAdapter', () => {
|
|||
continuedFromCursor: false,
|
||||
partialSnapshot: true,
|
||||
maxPagesPerRun: 1,
|
||||
maxKnowledgeCreatesPerRun: 5,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
skipped: [],
|
||||
warnings: ['maxPagesPerRun reached at 1'],
|
||||
|
|
@ -167,7 +167,7 @@ describe('NotionSourceAdapter', () => {
|
|||
continuedFromCursor: true,
|
||||
partialSnapshot: true,
|
||||
maxPagesPerRun: 100,
|
||||
maxKnowledgeCreatesPerRun: 5,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
nextSuccessfulCursor: null,
|
||||
skipped: [],
|
||||
|
|
@ -218,7 +218,7 @@ describe('NotionSourceAdapter', () => {
|
|||
continuedFromCursor: false,
|
||||
partialSnapshot: false,
|
||||
maxPagesPerRun: 100,
|
||||
maxKnowledgeCreatesPerRun: 5,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
skipped: [],
|
||||
warnings: [],
|
||||
|
|
@ -241,13 +241,58 @@ describe('NotionSourceAdapter', () => {
|
|||
dependencyPaths: ['manifest.json', 'pages/page-1/blocks.json'],
|
||||
});
|
||||
expect(result.workUnits[0].notes).toContain('Synthesize durable wiki and SL knowledge');
|
||||
expect(result.workUnits[0].notes).toContain('emit_unmapped_fallback');
|
||||
expect(result.workUnits[0].notes).toContain('use reason no_physical_table rather than no_connection_mapping');
|
||||
expect(result.workUnits[0].notes).toContain('Do not create SL sources under the Notion connection');
|
||||
expect(result.workUnits[0].notes).toContain(
|
||||
'Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview',
|
||||
);
|
||||
expect(result.reconcileNotes).toEqual([
|
||||
'Notion maxKnowledgeCreatesPerRun=5',
|
||||
'Notion maxKnowledgeCreatesPerRun=25',
|
||||
'Notion maxKnowledgeUpdatesPerRun=20',
|
||||
'Notion dataSourceCount is Notion-only; use sl_discover for warehouse/dbt mapping decisions.',
|
||||
'Reconcile Notion wiki pages sharing tables/sl_refs before creating distinct artifacts.',
|
||||
]);
|
||||
expect(result.contextReport).toEqual({ capped: false, warnings: [NOTION_ORG_KNOWLEDGE_WARNING] });
|
||||
});
|
||||
|
||||
it('chunks retried pages when failed provenance makes unchanged raw files look added again', async () => {
|
||||
await writeFile(
|
||||
join(stagedDir, 'manifest.json'),
|
||||
JSON.stringify({
|
||||
source: 'notion',
|
||||
apiVersion: '2026-03-11',
|
||||
crawlMode: 'selected_roots',
|
||||
rootPageIds: ['page-1'],
|
||||
rootDatabaseIds: [],
|
||||
rootDataSourceIds: [],
|
||||
fetchedAt: '2026-04-28T00:00:00.000Z',
|
||||
pageCount: 1,
|
||||
databaseCount: 0,
|
||||
dataSourceCount: 0,
|
||||
capped: false,
|
||||
continuedFromCursor: false,
|
||||
partialSnapshot: false,
|
||||
maxPagesPerRun: 100,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
skipped: [],
|
||||
warnings: [],
|
||||
}),
|
||||
'utf-8',
|
||||
);
|
||||
await writePage('page-1', 'Retry Me');
|
||||
|
||||
const result = await adapter.chunk(stagedDir, {
|
||||
added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'],
|
||||
modified: [],
|
||||
deleted: [],
|
||||
unchanged: ['manifest.json', 'pages/page-1/blocks.json'],
|
||||
});
|
||||
|
||||
expect(result.workUnits.map((workUnit) => workUnit.unitKey)).toEqual(['notion-page-page-1']);
|
||||
});
|
||||
|
||||
it('reports malformed manifests with a Notion-specific error', async () => {
|
||||
await writeFile(join(stagedDir, 'manifest.json'), '{bad json', 'utf-8');
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { z } from 'zod';
|
|||
|
||||
export const NOTION_API_VERSION = '2026-03-11';
|
||||
export const NOTION_SOURCE_KEY = 'notion';
|
||||
export const NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN = 25;
|
||||
|
||||
export const notionPullConfigSchema = z.object({
|
||||
authToken: z.string().min(1),
|
||||
|
|
@ -10,7 +11,7 @@ export const notionPullConfigSchema = z.object({
|
|||
rootDatabaseIds: z.array(z.string().min(1)).default([]),
|
||||
rootDataSourceIds: z.array(z.string().min(1)).default([]),
|
||||
maxPagesPerRun: z.number().int().min(1).max(10_000).default(1000),
|
||||
maxKnowledgeCreatesPerRun: z.number().int().min(0).max(25).default(5),
|
||||
maxKnowledgeCreatesPerRun: z.number().int().min(0).max(25).default(NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN),
|
||||
maxKnowledgeUpdatesPerRun: z.number().int().min(0).max(100).default(20),
|
||||
lastSuccessfulCursor: z.string().nullable().default(null),
|
||||
});
|
||||
|
|
|
|||
|
|
@ -315,6 +315,7 @@ export type {
|
|||
MetricflowPullConfig,
|
||||
} from './adapters/metricflow/pull-config.js';
|
||||
export { NOTION_ORG_KNOWLEDGE_WARNING } from './adapters/notion/chunk.js';
|
||||
export { NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN } from './adapters/notion/types.js';
|
||||
export { NotionSourceAdapter, type NotionSourceAdapterDeps } from './adapters/notion/notion.adapter.js';
|
||||
export { NotionClient, type NotionApi, type NotionBotInfo } from './adapters/notion/notion-client.js';
|
||||
export { bucketDistinctUsers, bucketErrorRate, bucketExecutions, bucketP95Runtime, bucketRecency } from './adapters/historic-sql/buckets.js';
|
||||
|
|
|
|||
|
|
@ -184,7 +184,11 @@ const makeDeps = () => {
|
|||
.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve(connectionId === 'warehouse-2' ? ['looker__orders.yaml'] : []),
|
||||
),
|
||||
loadAllSources: vi.fn().mockResolvedValue([]),
|
||||
loadAllSources: vi
|
||||
.fn()
|
||||
.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve(connectionId === 'warehouse-2' ? [{ name: 'looker__orders' }] : []),
|
||||
),
|
||||
};
|
||||
const slSearchService = {
|
||||
indexSources: vi.fn().mockResolvedValue(undefined),
|
||||
|
|
@ -1261,8 +1265,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu',
|
||||
);
|
||||
expect(deps.adapter.listTargetConnectionIds).toHaveBeenCalledWith('/tmp/stage/upload-x');
|
||||
expect(deps.semanticLayerService.listFilesForConnection).toHaveBeenCalledWith('looker-run');
|
||||
expect(deps.semanticLayerService.listFilesForConnection).toHaveBeenCalledWith('warehouse-2');
|
||||
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('looker-run');
|
||||
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2');
|
||||
expect(workUnitCall?.[0].userPrompt).toContain('looker__orders');
|
||||
expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['looker-run', 'warehouse-2']);
|
||||
});
|
||||
|
|
@ -1556,6 +1560,49 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
expect(deps.knowledgeIndex.listPagesForUser).toHaveBeenCalledWith('system');
|
||||
});
|
||||
|
||||
it('includes manifest-backed target sources in WorkUnit prompts', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['postgres-warehouse']);
|
||||
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve(connectionId === 'postgres-warehouse' ? [{ name: 'stg_accounts' }] : []),
|
||||
);
|
||||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['models/schema.yml', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/dbt-main/dbt/s',
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
||||
await runner.run({
|
||||
jobId: 'j1',
|
||||
connectionId: 'dbt-main',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
|
||||
});
|
||||
|
||||
const workUnitCall = deps.agentRunner.runLoop.mock.calls.find(
|
||||
([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu',
|
||||
);
|
||||
expect(workUnitCall?.[0].userPrompt).toContain('## postgres-warehouse');
|
||||
expect(workUnitCall?.[0].userPrompt).toContain('stg_accounts');
|
||||
expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['dbt-main', 'postgres-warehouse']);
|
||||
});
|
||||
|
||||
it('does not resolve qualified fallback table refs by source name alone', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.semanticLayerService.loadAllSources.mockResolvedValue([{ name: 'orders', table: 'sales.orders' }]);
|
||||
const runner = buildRunner(deps);
|
||||
|
||||
await expect(
|
||||
(runner as any).tableRefExistsInSemanticLayer(deps.semanticLayerService, ['warehouse'], 'finance.orders'),
|
||||
).resolves.toBe(false);
|
||||
await expect(
|
||||
(runner as any).tableRefExistsInSemanticLayer(deps.semanticLayerService, ['warehouse'], 'sales.orders'),
|
||||
).resolves.toBe(true);
|
||||
});
|
||||
|
||||
it('passes relevant canonical pins into the reconciliation system prompt', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.diffSetService.compute.mockResolvedValue({
|
||||
|
|
|
|||
|
|
@ -5,9 +5,10 @@ import pLimit from 'p-limit';
|
|||
import { z } from 'zod';
|
||||
import { type KtxLogger, noopLogger } from '../core/index.js';
|
||||
import type { CaptureSession, MemoryAction } from '../memory/index.js';
|
||||
import type { SlValidationDeps } from '../sl/index.js';
|
||||
import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js';
|
||||
import { createTouchedSlSources, type ToolContext, type ToolSession } from '../tools/index.js';
|
||||
import { actionTargetConnectionId } from './action-identity.js';
|
||||
import { NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN } from './adapters/notion/types.js';
|
||||
import { selectRelevantCanonicalPins } from './canonical-pins.js';
|
||||
import { sanitizeMemoryFlowError } from './memory-flow/live-buffer.js';
|
||||
import type { MemoryFlowPlannedWorkUnit } from './memory-flow/types.js';
|
||||
|
|
@ -39,6 +40,11 @@ import { createReadRawSpanTool } from './tools/read-raw-span.tool.js';
|
|||
import { createStageDiffTool } from './tools/stage-diff.tool.js';
|
||||
import { createStageListTool } from './tools/stage-list.tool.js';
|
||||
import { type ToolCallLogEntry, wrapToolsWithLogger } from './tools/tool-call-logger.js';
|
||||
import {
|
||||
createMutableToolTranscriptSummary,
|
||||
recordToolTranscriptEntry,
|
||||
type MutableToolTranscriptSummary,
|
||||
} from './tools/tool-transcript-summary.js';
|
||||
import type {
|
||||
EvictionUnit,
|
||||
IngestBundleJob,
|
||||
|
|
@ -48,14 +54,6 @@ import type {
|
|||
WorkUnit,
|
||||
} from './types.js';
|
||||
|
||||
interface MutableToolTranscriptSummary {
|
||||
unitKey: string;
|
||||
path: string;
|
||||
toolCallCount: number;
|
||||
errorCount: number;
|
||||
toolNames: Set<string>;
|
||||
}
|
||||
|
||||
function workUnitToMemoryFlowPlannedWorkUnit(workUnit: WorkUnit): MemoryFlowPlannedWorkUnit {
|
||||
return {
|
||||
unitKey: workUnit.unitKey,
|
||||
|
|
@ -80,21 +78,6 @@ function countMemoryFlowActions(actions: MemoryAction[], target: MemoryAction['t
|
|||
return actions.filter((action) => action.target === target).length;
|
||||
}
|
||||
|
||||
function isStructuredToolFailure(output: unknown): boolean {
|
||||
if (!output || typeof output !== 'object') {
|
||||
return false;
|
||||
}
|
||||
const structured = (output as { structured?: unknown }).structured;
|
||||
return !!structured && typeof structured === 'object' && (structured as { success?: unknown }).success === false;
|
||||
}
|
||||
|
||||
function isFailedToolCall(entry: ToolCallLogEntry): boolean {
|
||||
if (entry.error) {
|
||||
return true;
|
||||
}
|
||||
return (entry.toolName === 'sl_write_source' || entry.toolName === 'wiki_write') && isStructuredToolFailure(entry.output);
|
||||
}
|
||||
|
||||
function reportIdFromCreateResult(result: unknown): string | undefined {
|
||||
if (!result || typeof result !== 'object' || !('id' in result)) {
|
||||
return undefined;
|
||||
|
|
@ -103,6 +86,46 @@ function reportIdFromCreateResult(result: unknown): string | undefined {
|
|||
return typeof id === 'string' && id.length > 0 ? id : undefined;
|
||||
}
|
||||
|
||||
function normalizeTableReference(value: string): string {
|
||||
return value
|
||||
.trim()
|
||||
.replace(/["`]/g, '')
|
||||
.replace(/[\[\]]/g, '')
|
||||
.toLowerCase();
|
||||
}
|
||||
|
||||
function finalReferenceSegment(value: string): string {
|
||||
const parts = value.split('.').filter((part) => part.length > 0);
|
||||
return parts.at(-1) ?? value;
|
||||
}
|
||||
|
||||
function semanticSourceMatchesTableRef(source: SemanticLayerSource, tableRef: string): boolean {
|
||||
const normalizedRef = normalizeTableReference(tableRef);
|
||||
if (!normalizedRef) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const refIsQualified = normalizedRef.includes('.');
|
||||
const normalizedSourceName = normalizeTableReference(source.name);
|
||||
if (normalizedSourceName === normalizedRef) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const table = typeof source.table === 'string' ? normalizeTableReference(source.table) : '';
|
||||
if (table && (table === normalizedRef || table.endsWith(`.${normalizedRef}`))) {
|
||||
return true;
|
||||
}
|
||||
if (!refIsQualified && table && finalReferenceSegment(table) === normalizedRef) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function rawPathsForAction(action: MemoryAction, fallbackRawPaths: string[]): string[] {
|
||||
return action.rawPaths && action.rawPaths.length > 0 ? [...new Set(action.rawPaths)] : fallbackRawPaths;
|
||||
}
|
||||
|
||||
export class IngestBundleRunner {
|
||||
private readonly logger: KtxLogger;
|
||||
private readonly chainByConnection = new Map<string, Promise<unknown>>();
|
||||
|
|
@ -276,18 +299,46 @@ export class IngestBundleRunner {
|
|||
const blocks = await Promise.all(
|
||||
connectionIds.map(async (connectionId) => {
|
||||
try {
|
||||
const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId);
|
||||
const names = files.filter((f) => !f.startsWith('_schema/')).map((f) => f.replace(/\.yaml$/, ''));
|
||||
const sources = await this.deps.semanticLayerService.loadAllSources(connectionId);
|
||||
const names = sources.map((source) => source.name).sort((left, right) => left.localeCompare(right));
|
||||
const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
|
||||
return `## ${connectionId}\n${body}`;
|
||||
} catch {
|
||||
return `## ${connectionId}\n(empty)`;
|
||||
try {
|
||||
const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId);
|
||||
const names = files
|
||||
.filter((f) => !f.startsWith('_schema/'))
|
||||
.map((f) => f.replace(/\.yaml$/, ''))
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
|
||||
return `## ${connectionId}\n${body}`;
|
||||
} catch {
|
||||
return `## ${connectionId}\n(empty)`;
|
||||
}
|
||||
}
|
||||
}),
|
||||
);
|
||||
return blocks.join('\n\n');
|
||||
}
|
||||
|
||||
private async tableRefExistsInSemanticLayer(
|
||||
semanticLayerService: SemanticLayerService,
|
||||
connectionIds: string[],
|
||||
tableRef: string,
|
||||
): Promise<boolean> {
|
||||
for (const connectionId of connectionIds) {
|
||||
try {
|
||||
const sources = await semanticLayerService.loadAllSources(connectionId);
|
||||
if (sources.some((source) => semanticSourceMatchesTableRef(source, tableRef))) {
|
||||
return true;
|
||||
}
|
||||
} catch {
|
||||
// Fallback diagnostics should not fail an ingest stage if an index lookup is temporarily unavailable.
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private resolveContextCuratorBudget(
|
||||
bundleRef: IngestBundleJob['bundleRef'],
|
||||
stageIndex: StageIndex,
|
||||
|
|
@ -297,7 +348,9 @@ export class IngestBundleRunner {
|
|||
? (bundleRef.config as Record<string, unknown>)
|
||||
: {};
|
||||
const configuredCreates =
|
||||
typeof rawConfig.maxKnowledgeCreatesPerRun === 'number' ? rawConfig.maxKnowledgeCreatesPerRun : 5;
|
||||
typeof rawConfig.maxKnowledgeCreatesPerRun === 'number'
|
||||
? rawConfig.maxKnowledgeCreatesPerRun
|
||||
: NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN;
|
||||
const configuredUpdates =
|
||||
typeof rawConfig.maxKnowledgeUpdatesPerRun === 'number' ? rawConfig.maxKnowledgeUpdatesPerRun : 20;
|
||||
const wikiActions = stageIndex.workUnits.flatMap((wu) => wu.actions).filter((action) => action.target === 'wiki');
|
||||
|
|
@ -351,17 +404,8 @@ export class IngestBundleRunner {
|
|||
(path: string) =>
|
||||
(entry: ToolCallLogEntry): void => {
|
||||
const current =
|
||||
transcriptSummaries.get(entry.wuKey) ??
|
||||
({
|
||||
unitKey: entry.wuKey,
|
||||
path,
|
||||
toolCallCount: 0,
|
||||
errorCount: 0,
|
||||
toolNames: new Set<string>(),
|
||||
} satisfies MutableToolTranscriptSummary);
|
||||
current.toolCallCount += 1;
|
||||
current.errorCount += isFailedToolCall(entry) ? 1 : 0;
|
||||
current.toolNames.add(entry.toolName);
|
||||
transcriptSummaries.get(entry.wuKey) ?? createMutableToolTranscriptSummary(entry.wuKey, path);
|
||||
recordToolTranscriptEntry(current, entry);
|
||||
transcriptSummaries.set(entry.wuKey, current);
|
||||
};
|
||||
const overrideReport = await this.loadOverrideReport(job);
|
||||
|
|
@ -617,6 +661,7 @@ export class IngestBundleRunner {
|
|||
preHead: sessionWorktree.baseSha,
|
||||
touchedSlSources: session.touchedSlSources,
|
||||
actions: sessionActions,
|
||||
allowedRawPaths: new Set(wu.rawFiles),
|
||||
semanticLayerService: scopedSemanticLayerService,
|
||||
wikiService: scopedWikiService,
|
||||
configService: sessionWorktree.config,
|
||||
|
|
@ -681,6 +726,8 @@ export class IngestBundleRunner {
|
|||
emit_unmapped_fallback: createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(wu.rawFiles),
|
||||
tableRefExists: (tableRef) =>
|
||||
this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef),
|
||||
}),
|
||||
};
|
||||
|
||||
|
|
@ -728,7 +775,7 @@ export class IngestBundleRunner {
|
|||
sourceKey: job.sourceKey,
|
||||
connectionId: job.connectionId,
|
||||
jobId: job.jobId,
|
||||
toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.errorCount ?? 0,
|
||||
toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0,
|
||||
onStepFinish: ({ stepIndex, stepBudget }) => {
|
||||
memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget });
|
||||
},
|
||||
|
|
@ -839,6 +886,10 @@ export class IngestBundleRunner {
|
|||
const reconcileActions: MemoryAction[] = [];
|
||||
const rcScopedWiki = this.deps.wikiService.forWorktree(sessionWorktree.workdir);
|
||||
const rcScopedSl = this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir);
|
||||
const reconciliationAllowedRawPaths = new Set<string>([
|
||||
...currentHashes.keys(),
|
||||
...(eviction?.deletedRawPaths ?? []),
|
||||
]);
|
||||
|
||||
const rcToolSession: ToolSession = {
|
||||
connectionId: job.connectionId,
|
||||
|
|
@ -846,6 +897,7 @@ export class IngestBundleRunner {
|
|||
preHead: reconcileSession.preHead,
|
||||
touchedSlSources: reconcileSession.touchedSlSources,
|
||||
actions: reconcileActions,
|
||||
allowedRawPaths: reconciliationAllowedRawPaths,
|
||||
semanticLayerService: rcScopedSl,
|
||||
wikiService: rcScopedWiki,
|
||||
configService: sessionWorktree.config,
|
||||
|
|
@ -910,6 +962,7 @@ export class IngestBundleRunner {
|
|||
emit_unmapped_fallback: createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: allStagedPaths,
|
||||
tableRefExists: (tableRef) => this.tableRefExistsInSemanticLayer(rcScopedSl, slConnectionIds, tableRef),
|
||||
}),
|
||||
};
|
||||
|
||||
|
|
@ -1167,26 +1220,34 @@ export class IngestBundleRunner {
|
|||
return a.type === 'created' ? 'source_created' : 'measure_added';
|
||||
};
|
||||
const producedPaths = new Set<string>();
|
||||
const pushActionProvenance = (rawPath: string, action: MemoryAction): void => {
|
||||
const hash = currentHashes.get(rawPath) ?? 'unknown';
|
||||
provenanceRows.push({
|
||||
connectionId: job.connectionId,
|
||||
sourceKey: job.sourceKey,
|
||||
syncId,
|
||||
rawPath,
|
||||
rawContentHash: hash,
|
||||
artifactKind: action.target,
|
||||
artifactKey: action.key,
|
||||
targetConnectionId: action.target === 'sl' ? actionTargetConnectionId(action, job.connectionId) : null,
|
||||
artifactContentHash: null,
|
||||
actionType: actionToType(action),
|
||||
});
|
||||
producedPaths.add(rawPath);
|
||||
};
|
||||
for (const wu of stageIndex.workUnits) {
|
||||
for (const rawPath of wu.rawFiles) {
|
||||
const hash = currentHashes.get(rawPath) ?? 'unknown';
|
||||
for (const action of wu.actions) {
|
||||
provenanceRows.push({
|
||||
connectionId: job.connectionId,
|
||||
sourceKey: job.sourceKey,
|
||||
syncId,
|
||||
rawPath,
|
||||
rawContentHash: hash,
|
||||
artifactKind: action.target,
|
||||
artifactKey: action.key,
|
||||
targetConnectionId: action.target === 'sl' ? (action.targetConnectionId ?? null) : null,
|
||||
artifactContentHash: null,
|
||||
actionType: actionToType(action),
|
||||
});
|
||||
producedPaths.add(rawPath);
|
||||
for (const action of wu.actions) {
|
||||
for (const rawPath of rawPathsForAction(action, wu.rawFiles)) {
|
||||
pushActionProvenance(rawPath, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const action of reconcileActions) {
|
||||
for (const rawPath of action.rawPaths ?? []) {
|
||||
pushActionProvenance(rawPath, action);
|
||||
}
|
||||
}
|
||||
for (const resolution of stageIndex.artifactResolutions ?? []) {
|
||||
const hash = currentHashes.get(resolution.rawPath) ?? 'unknown';
|
||||
provenanceRows.push({
|
||||
|
|
|
|||
|
|
@ -466,6 +466,38 @@ describe('local ingest adapters', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('exposes configured primary warehouses as dbt target connections', async () => {
|
||||
const dbtProject: KtxLocalProject = {
|
||||
...projectWithConnections({
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
url: 'postgresql://example/db',
|
||||
},
|
||||
analytics_dbt: {
|
||||
driver: 'dbt',
|
||||
source_dir: '/repo/dbt',
|
||||
},
|
||||
}),
|
||||
config: {
|
||||
...project.config,
|
||||
setup: { database_connection_ids: ['warehouse'], completed_steps: [] },
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
url: 'postgresql://example/db',
|
||||
},
|
||||
analytics_dbt: {
|
||||
driver: 'dbt',
|
||||
source_dir: '/repo/dbt',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
const adapter = createDefaultLocalIngestAdapters(dbtProject).find((candidate) => candidate.source === 'dbt');
|
||||
|
||||
await expect(adapter?.listTargetConnectionIds?.('/tmp/staged-dbt')).resolves.toEqual(['warehouse']);
|
||||
});
|
||||
|
||||
it('resolves MetricFlow auth_token_ref without writing literal tokens to config', async () => {
|
||||
const project = projectWithConnections({
|
||||
metricflow_main: {
|
||||
|
|
|
|||
|
|
@ -89,7 +89,10 @@ export function createDefaultLocalIngestAdapters(
|
|||
}),
|
||||
}),
|
||||
new LookmlSourceAdapter({ homeDir: join(project.projectDir, '.ktx/cache') }),
|
||||
new DbtSourceAdapter({ homeDir: join(project.projectDir, '.ktx/cache') }),
|
||||
new DbtSourceAdapter({
|
||||
homeDir: join(project.projectDir, '.ktx/cache'),
|
||||
targetConnectionIds: primaryWarehouseConnectionIds(project),
|
||||
}),
|
||||
createLocalMetabaseSourceAdapter(project, {
|
||||
...(options.logger ? { logger: options.logger } : {}),
|
||||
}),
|
||||
|
|
@ -128,6 +131,21 @@ export function createDefaultLocalIngestAdapters(
|
|||
return adapters;
|
||||
}
|
||||
|
||||
function primaryWarehouseConnectionIds(project: KtxLocalProject): string[] {
|
||||
const configuredPrimaryIds = project.config.setup?.database_connection_ids ?? [];
|
||||
const configured = configuredPrimaryIds.filter((connectionId) =>
|
||||
Boolean(localConnectionToWarehouseDescriptor(connectionId, project.config.connections[connectionId])),
|
||||
);
|
||||
if (configured.length > 0) {
|
||||
return [...new Set(configured)];
|
||||
}
|
||||
|
||||
return Object.entries(project.config.connections)
|
||||
.filter(([connectionId, connection]) => Boolean(localConnectionToWarehouseDescriptor(connectionId, connection)))
|
||||
.map(([connectionId]) => connectionId)
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,6 +88,35 @@ class WikiWritingAgentRunner extends AgentRunnerService {
|
|||
}
|
||||
}
|
||||
|
||||
class WikiWritingWithRawPathAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
|
||||
const wikiWrite = params.toolSet.wiki_write;
|
||||
if (!wikiWrite?.execute) {
|
||||
throw new Error('wiki_write tool was not available to the WorkUnit');
|
||||
}
|
||||
const result = await wikiWrite.execute(
|
||||
{
|
||||
key: 'orders_context',
|
||||
summary: 'Orders source context',
|
||||
content: 'Orders are purchase records used for revenue analysis.',
|
||||
tags: ['orders'],
|
||||
rawPaths: ['orders/orders.json'],
|
||||
},
|
||||
{ toolCallId: 'wiki-write' },
|
||||
);
|
||||
if (!result.structured.success) {
|
||||
throw new Error(result.markdown);
|
||||
}
|
||||
}
|
||||
return { stopReason: 'natural' as const };
|
||||
});
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
}
|
||||
|
||||
class HistoricSqlEvidenceAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
if (
|
||||
|
|
@ -366,14 +395,94 @@ describe('canonical local ingest', () => {
|
|||
expect(result.result.failedWorkUnits).toEqual([]);
|
||||
const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true });
|
||||
try {
|
||||
expect(db.prepare('SELECT key, summary FROM knowledge_pages ORDER BY key').all()).toEqual([
|
||||
{ key: 'orders_context', summary: 'Orders source context' },
|
||||
expect(db.prepare('SELECT key, summary, embedding_json IS NOT NULL AS has_embedding FROM knowledge_pages ORDER BY key').all()).toEqual([
|
||||
{ key: 'orders_context', summary: 'Orders source context', has_embedding: 1 },
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('does not persist noop embedding vectors when local embeddings are disabled', async () => {
|
||||
await writeFile(
|
||||
join(project.projectDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: warehouse',
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: postgres',
|
||||
'ingest:',
|
||||
' adapters:',
|
||||
' - fake',
|
||||
' embeddings:',
|
||||
' backend: none',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
project = await loadKtxProject({ projectDir: project.projectDir });
|
||||
const sourceDir = join(tempDir, 'source');
|
||||
await mkdir(join(sourceDir, 'orders'), { recursive: true });
|
||||
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
|
||||
const agentRunner = new WikiWritingAgentRunner();
|
||||
|
||||
const result = await runLocalIngest({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
adapter: 'fake',
|
||||
connectionId: 'warehouse',
|
||||
sourceDir,
|
||||
jobId: 'wiki-local-no-embeddings-1',
|
||||
agentRunner,
|
||||
});
|
||||
|
||||
expect(result.result.failedWorkUnits).toEqual([]);
|
||||
const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true });
|
||||
try {
|
||||
expect(db.prepare('SELECT key, summary, embedding_json IS NOT NULL AS has_embedding FROM knowledge_pages ORDER BY key').all()).toEqual([
|
||||
{ key: 'orders_context', summary: 'Orders source context', has_embedding: 0 },
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('uses explicit action raw paths to avoid over-attributing work-unit provenance', async () => {
|
||||
const sourceDir = join(tempDir, 'source');
|
||||
await mkdir(join(sourceDir, 'orders'), { recursive: true });
|
||||
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
|
||||
await writeFile(join(sourceDir, 'orders', 'unrelated.json'), '{"name":"unrelated"}\n', 'utf-8');
|
||||
const agentRunner = new WikiWritingWithRawPathAgentRunner();
|
||||
|
||||
const result = await runLocalIngest({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
adapter: 'fake',
|
||||
connectionId: 'warehouse',
|
||||
sourceDir,
|
||||
jobId: 'wiki-raw-path-local-1',
|
||||
agentRunner,
|
||||
});
|
||||
|
||||
expect(result.result.failedWorkUnits).toEqual([]);
|
||||
expect(result.report.body.provenanceRows).toEqual([
|
||||
{
|
||||
rawPath: 'orders/orders.json',
|
||||
artifactKind: 'wiki',
|
||||
artifactKey: 'orders_context',
|
||||
targetConnectionId: null,
|
||||
actionType: 'wiki_written',
|
||||
},
|
||||
{
|
||||
rawPath: 'orders/unrelated.json',
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
targetConnectionId: null,
|
||||
actionType: 'skipped',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs historic-SQL evidence projection through the local bundle post-processor', async () => {
|
||||
const projectDir = join(tempDir, 'historic-sql-project');
|
||||
await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ import {
|
|||
type ToolSession,
|
||||
} from '../tools/index.js';
|
||||
import {
|
||||
buildKnowledgeSearchText,
|
||||
type KnowledgeEventPort,
|
||||
type KnowledgeIndexPort,
|
||||
KnowledgeWikiService,
|
||||
|
|
@ -289,7 +290,10 @@ function scoreText(text: string, query: string): number {
|
|||
class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
||||
private readonly sqlite: SqliteKnowledgeIndex;
|
||||
|
||||
constructor(private readonly project: KtxLocalProject) {
|
||||
constructor(
|
||||
private readonly project: KtxLocalProject,
|
||||
private readonly embedding: KtxEmbeddingPort,
|
||||
) {
|
||||
this.sqlite = new SqliteKnowledgeIndex({ dbPath: ktxLocalStateDbPath(project) });
|
||||
}
|
||||
|
||||
|
|
@ -391,6 +395,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
|
||||
private async syncAllPagesFromDisk(): Promise<void> {
|
||||
const listed = await this.project.fileStore.listFiles('knowledge', true);
|
||||
const existingPages = this.sqlite.getExistingPages();
|
||||
const pages: SqliteKnowledgeIndexPage[] = [];
|
||||
for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) {
|
||||
const parsedPath = parseKnowledgeIndexPath(file);
|
||||
|
|
@ -400,14 +405,21 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
const path = `knowledge/${file}`;
|
||||
const raw = await this.project.fileStore.readFile(path);
|
||||
const parsed = parseWiki(raw.content);
|
||||
const tags = parseWikiTags(raw.content);
|
||||
const searchText = buildKnowledgeSearchText(parsedPath.pageKey, parsed.summary, parsed.content, tags);
|
||||
const existing = existingPages.get(path);
|
||||
const embedding =
|
||||
existing?.searchText === searchText && existing.embedding
|
||||
? existing.embedding
|
||||
: await this.embedding.computeEmbedding(searchText).catch(() => null);
|
||||
pages.push({
|
||||
path,
|
||||
key: parsedPath.pageKey,
|
||||
scope: parsedPath.scope,
|
||||
summary: parsed.summary,
|
||||
content: parsed.content,
|
||||
tags: parseWikiTags(raw.content),
|
||||
embedding: null,
|
||||
tags,
|
||||
embedding,
|
||||
});
|
||||
}
|
||||
this.sqlite.sync(pages);
|
||||
|
|
@ -417,10 +429,19 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
function parseKnowledgeIndexPath(file: string): { scope: 'GLOBAL' | 'USER'; pageKey: string } | null {
|
||||
const segments = file.split('/');
|
||||
if (segments.length === 2 && segments[0] === 'global') {
|
||||
return { scope: 'GLOBAL', pageKey: segments[1].replace(/\.md$/, '') };
|
||||
const pageKey = segments[1].replace(/\.md$/, '');
|
||||
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'GLOBAL', pageKey } : null;
|
||||
}
|
||||
if (segments.length >= 3 && segments[0] === 'global' && segments[1] === 'historic-sql') {
|
||||
const historicPath = segments.slice(2).join('/').replace(/\.md$/, '');
|
||||
if (historicPath.split('/').every((segment) => /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment))) {
|
||||
return { scope: 'GLOBAL', pageKey: `historic-sql/${historicPath}` };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (segments.length === 3 && segments[0] === 'user') {
|
||||
return { scope: 'USER', pageKey: segments[2].replace(/\.md$/, '') };
|
||||
const pageKey = segments[2].replace(/\.md$/, '');
|
||||
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'USER', pageKey } : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
@ -591,7 +612,7 @@ export function createLocalBundleIngestRuntime(
|
|||
);
|
||||
const slSourcesRepository = new SqliteSlSourcesIndex({ dbPath });
|
||||
const slSearchService = new SlSearchService(embedding, slSourcesRepository, logger);
|
||||
const knowledgeIndex = new LocalKnowledgeIndex(options.project);
|
||||
const knowledgeIndex = new LocalKnowledgeIndex(options.project, embedding);
|
||||
const knowledgeEvents = new NoopKnowledgeEventPort();
|
||||
const wikiService = new KnowledgeWikiService(rootFileStore, embedding, knowledgeIndex, options.project.git, logger);
|
||||
const { agentRunner, llmProvider } = resolveAgentRunner(options);
|
||||
|
|
|
|||
|
|
@ -611,7 +611,7 @@ describe('local ingest', () => {
|
|||
continuedFromCursor: false,
|
||||
partialSnapshot: false,
|
||||
maxPagesPerRun: 1000,
|
||||
maxKnowledgeCreatesPerRun: 5,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
nextSuccessfulCursor: null,
|
||||
skipped: [],
|
||||
|
|
@ -654,6 +654,7 @@ describe('local ingest', () => {
|
|||
crawlMode: 'selected_roots',
|
||||
rootPageIds: ['page-1'],
|
||||
maxPagesPerRun: 1000,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
}),
|
||||
expect.any(String),
|
||||
{ connectionId: 'notion-main', sourceKey: 'notion' },
|
||||
|
|
|
|||
28
packages/context/src/ingest/memory-flow/known-errors.ts
Normal file
28
packages/context/src/ingest/memory-flow/known-errors.ts
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
interface MemoryFlowErrorContext {
|
||||
adapter: string;
|
||||
}
|
||||
|
||||
export function isNotionAuthorizationExpired(
|
||||
context: MemoryFlowErrorContext,
|
||||
reason: string | undefined,
|
||||
): boolean {
|
||||
if (context.adapter !== 'notion') {
|
||||
return false;
|
||||
}
|
||||
const normalized = (reason ?? '').toLowerCase();
|
||||
return (
|
||||
normalized.includes('invalid_grant') &&
|
||||
(normalized.includes('invalid_rapt') || normalized.includes('reauth'))
|
||||
);
|
||||
}
|
||||
|
||||
export function formatNotionAuthorizationExpiredDetail(unitKey: string): string {
|
||||
return `${unitKey} could not read Notion because the saved OAuth grant expired or requires reauthentication (invalid_grant / invalid_rapt).`;
|
||||
}
|
||||
|
||||
export function notionAuthorizationFixSuggestions(connectionId: string): string[] {
|
||||
return [
|
||||
`Refresh the Notion token referenced by auth_token_ref for ${connectionId}. If it uses env:NAME, export a fresh token in that variable; if it uses file:/path, replace that file.`,
|
||||
`Run ktx connection notion pick ${connectionId} to confirm Notion access, then rerun ktx ingest ${connectionId}.`,
|
||||
];
|
||||
}
|
||||
|
|
@ -60,6 +60,36 @@ describe('formatMemoryFlowFinalSummary', () => {
|
|||
).toContain('Trust issues: 3');
|
||||
});
|
||||
|
||||
it('explains expired Notion authorization with fix suggestions', () => {
|
||||
const rawReason =
|
||||
'notion-cluster-1 failed: {"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)","error_uri":"https://accounts.example/reauth"}';
|
||||
const summary = formatMemoryFlowFinalSummary(
|
||||
input({
|
||||
connectionId: 'notion-main',
|
||||
adapter: 'notion',
|
||||
status: 'error',
|
||||
events: [
|
||||
{ type: 'source_acquired', adapter: 'notion', trigger: 'manual_resync', fileCount: 37 },
|
||||
{ type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 },
|
||||
{ type: 'work_unit_finished', unitKey: 'notion-cluster-1', status: 'failed', reason: rawReason },
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary).toContain('Memory-flow summary: error');
|
||||
expect(summary).toContain(
|
||||
'Notion authorization expired: notion-cluster-1 could not read Notion because the saved OAuth grant expired or requires reauthentication (invalid_grant / invalid_rapt).',
|
||||
);
|
||||
expect(summary).toContain('Fix suggestions:');
|
||||
expect(summary).toContain(
|
||||
'- Refresh the Notion token referenced by auth_token_ref for notion-main. If it uses env:NAME, export a fresh token in that variable; if it uses file:/path, replace that file.',
|
||||
);
|
||||
expect(summary).toContain(
|
||||
'- Run ktx connection notion pick notion-main to confirm Notion access, then rerun ktx ingest notion-main.',
|
||||
);
|
||||
expect(summary).not.toContain('error_uri');
|
||||
});
|
||||
|
||||
it('labels replay source metadata in final summaries', () => {
|
||||
const summary = formatMemoryFlowFinalSummary({
|
||||
metadata: {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { sanitizeMemoryFlowError } from './live-buffer.js';
|
||||
import type { MemoryFlowEvent, MemoryFlowReplayInput } from './types.js';
|
||||
import { buildMemoryFlowViewModel } from './view-model.js';
|
||||
import { isNotionAuthorizationExpired, notionAuthorizationFixSuggestions } from './known-errors.js';
|
||||
|
||||
function latest<T extends MemoryFlowEvent['type']>(
|
||||
events: MemoryFlowEvent[],
|
||||
|
|
@ -42,6 +43,14 @@ function humanizeSummaryText(value: string): string {
|
|||
.replace(/\bSL\b/g, 'semantic layer');
|
||||
}
|
||||
|
||||
function fixSuggestions(input: MemoryFlowReplayInput): string[] {
|
||||
const workUnitReasons = eventsOf(input.events, 'work_unit_finished').map((event) => event.reason);
|
||||
const hasNotionAuthFailure = [...workUnitReasons, ...input.errors].some((reason) =>
|
||||
isNotionAuthorizationExpired(input, reason),
|
||||
);
|
||||
return hasNotionAuthFailure ? notionAuthorizationFixSuggestions(input.connectionId) : [];
|
||||
}
|
||||
|
||||
export function formatMemoryFlowFinalSummary(input: MemoryFlowReplayInput): string {
|
||||
const sources = eventsOf(input.events, 'source_acquired');
|
||||
const source = sources.at(-1);
|
||||
|
|
@ -84,6 +93,14 @@ export function formatMemoryFlowFinalSummary(input: MemoryFlowReplayInput): stri
|
|||
}
|
||||
}
|
||||
|
||||
const suggestions = fixSuggestions(input);
|
||||
if (suggestions.length > 0) {
|
||||
lines.push('Fix suggestions:');
|
||||
for (const suggestion of suggestions) {
|
||||
lines.push(`- ${suggestion}`);
|
||||
}
|
||||
}
|
||||
|
||||
for (const error of input.errors.slice(0, 3)) {
|
||||
lines.push(`Error: ${sanitizeMemoryFlowError(error)}`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import type {
|
|||
MemoryFlowViewModel,
|
||||
} from './types.js';
|
||||
import { sanitizeMemoryFlowError } from './live-buffer.js';
|
||||
import { formatNotionAuthorizationExpiredDetail, isNotionAuthorizationExpired } from './known-errors.js';
|
||||
|
||||
function latest<T extends MemoryFlowEvent['type']>(
|
||||
events: MemoryFlowEvent[],
|
||||
|
|
@ -109,7 +110,7 @@ function errorDetails(input: MemoryFlowReplayInput): string[] {
|
|||
}
|
||||
|
||||
function isValidationFailure(reason: string | undefined): boolean {
|
||||
return /semantic-layer|validation|invalid/i.test(reason ?? '');
|
||||
return /semantic-layer|validation/i.test(reason ?? '');
|
||||
}
|
||||
|
||||
function failedWorkUnitDetails(failed: Array<Extract<MemoryFlowEvent, { type: 'work_unit_finished' }>>): string[] {
|
||||
|
|
@ -180,11 +181,14 @@ function buildMemoryFlowTrustIssues(input: MemoryFlowReplayInput): MemoryFlowTru
|
|||
|
||||
for (const event of failed) {
|
||||
const reason = sanitizeMemoryFlowError(event.reason ?? 'failed');
|
||||
const knownNotionAuthFailure = isNotionAuthorizationExpired(input, event.reason);
|
||||
issues.push({
|
||||
id: `work-unit-failed:${event.unitKey}`,
|
||||
severity: 'failed',
|
||||
title: 'WorkUnit failed',
|
||||
detail: `${event.unitKey} failed: ${reason}`,
|
||||
title: knownNotionAuthFailure ? 'Notion authorization expired' : 'WorkUnit failed',
|
||||
detail: knownNotionAuthFailure
|
||||
? formatNotionAuthorizationExpiredDetail(event.unitKey)
|
||||
: `${event.unitKey} failed: ${reason}`,
|
||||
columnId: 'workUnits',
|
||||
targetLabel: event.unitKey,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ const ingestActionSchema = z.object({
|
|||
key: z.string(),
|
||||
detail: z.string(),
|
||||
targetConnectionId: z.string().nullable().default(null),
|
||||
rawPaths: z.array(z.string()).optional(),
|
||||
});
|
||||
|
||||
const touchedSlSourceSchema = z.object({
|
||||
|
|
@ -153,6 +154,7 @@ export const ingestReportSnapshotSchema = z
|
|||
),
|
||||
failedWorkUnits: z.array(z.string()),
|
||||
reconciliationSkipped: z.boolean(),
|
||||
reconciliationActions: z.array(ingestActionSchema).default([]),
|
||||
conflictsResolved: z.array(conflictResolvedSchema).default([]),
|
||||
evictionsApplied: z.array(evictionAppliedSchema).default([]),
|
||||
unmappedFallbacks: z.array(unmappedFallbackSchema).default([]),
|
||||
|
|
|
|||
|
|
@ -55,6 +55,10 @@ export interface IngestReportBody {
|
|||
workUnits: IngestReportWorkUnit[];
|
||||
failedWorkUnits: string[];
|
||||
reconciliationSkipped: boolean;
|
||||
// Actions emitted by the reconciliation stage (wiki/sl writes from
|
||||
// cross-WU reconciliation). Counted alongside workUnit.actions in
|
||||
// savedMemoryCountsForReport so progress reports reflect all writes.
|
||||
reconciliationActions?: MemoryAction[];
|
||||
conflictsResolved: ConflictResolvedRecord[];
|
||||
evictionsApplied: EvictionAppliedRecord[];
|
||||
unmappedFallbacks: UnmappedFallbackRecord[];
|
||||
|
|
@ -111,7 +115,9 @@ export function postProcessorSavedMemoryCounts(
|
|||
}
|
||||
|
||||
export function savedMemoryCountsForReport(report: IngestReportSnapshot): IngestSavedMemoryCounts {
|
||||
const actions = report.body.workUnits.flatMap((workUnit) => workUnit.actions);
|
||||
const workUnitActions = report.body.workUnits.flatMap((workUnit) => workUnit.actions);
|
||||
const reconciliationActions = report.body.reconciliationActions ?? [];
|
||||
const actions = [...workUnitActions, ...reconciliationActions];
|
||||
const directCounts = {
|
||||
wikiCount: actions.filter((action) => action.target === 'wiki').length,
|
||||
slCount: actions.filter((action) => action.target === 'sl').length,
|
||||
|
|
|
|||
|
|
@ -66,6 +66,27 @@ function reportBody(syncId: string, supersededBy: string | null = null): IngestR
|
|||
};
|
||||
}
|
||||
|
||||
function emptyReportBody(syncId: string, overrides: Partial<IngestReportBody> = {}): IngestReportBody {
|
||||
return {
|
||||
syncId,
|
||||
diffSummary: diffSummary({ added: 0, modified: 0, deleted: 0, unchanged: 1 }),
|
||||
commitSha: null,
|
||||
workUnits: [],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: true,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
evictionInputs: [],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('SqliteBundleIngestStore', () => {
|
||||
let tempDir: string;
|
||||
let dbPath: string;
|
||||
|
|
@ -226,6 +247,204 @@ describe('SqliteBundleIngestStore', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('does not baseline skipped provenance from failed work units or zero-work retry runs', async () => {
|
||||
const store = new SqliteBundleIngestStore({ dbPath });
|
||||
const rawHashes = new Map([
|
||||
['pages/page-1/metadata.json', 'hash-metadata'],
|
||||
['pages/page-1/page.md', 'hash-page'],
|
||||
]);
|
||||
|
||||
const failedRun = await store.create(runArgs({ jobId: 'job-failed-review', syncId: 'sync-failed-review' }));
|
||||
await store.insertMany(
|
||||
[...rawHashes].map(([rawPath, rawContentHash]) => ({
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
syncId: 'sync-failed-review',
|
||||
rawPath,
|
||||
rawContentHash,
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
artifactContentHash: null,
|
||||
actionType: 'skipped' as const,
|
||||
})),
|
||||
);
|
||||
await store.markCompleted(failedRun.id, diffSummary({ added: 2 }));
|
||||
await store.create({
|
||||
runId: failedRun.id,
|
||||
jobId: 'job-failed-review',
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
body: emptyReportBody('sync-failed-review', {
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'notion-page-page-1',
|
||||
rawFiles: [...rawHashes.keys()],
|
||||
status: 'failed',
|
||||
reason: 'invalid_grant',
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
failedWorkUnits: ['notion-page-page-1'],
|
||||
}),
|
||||
});
|
||||
|
||||
const noWorkRun = await store.create(runArgs({ jobId: 'job-no-work', syncId: 'sync-no-work' }));
|
||||
await store.insertMany(
|
||||
[...rawHashes].map(([rawPath, rawContentHash]) => ({
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
syncId: 'sync-no-work',
|
||||
rawPath,
|
||||
rawContentHash,
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
artifactContentHash: null,
|
||||
actionType: 'skipped' as const,
|
||||
})),
|
||||
);
|
||||
await store.markCompleted(noWorkRun.id, diffSummary({ unchanged: 2 }));
|
||||
await store.create({
|
||||
runId: noWorkRun.id,
|
||||
jobId: 'job-no-work',
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
body: emptyReportBody('sync-no-work', { workUnits: [], failedWorkUnits: [] }),
|
||||
});
|
||||
|
||||
await expect(store.findLatestHashesForCompletedSyncs('docs', 'notion')).resolves.toEqual(new Map());
|
||||
await expect(new DiffSetService(store).compute('docs', 'notion', rawHashes)).resolves.toEqual({
|
||||
added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'],
|
||||
modified: [],
|
||||
deleted: [],
|
||||
unchanged: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('baselines skipped provenance from successful no-output work unit runs', async () => {
|
||||
const store = new SqliteBundleIngestStore({ dbPath });
|
||||
const run = await store.create(runArgs({ jobId: 'job-reviewed-no-output', syncId: 'sync-reviewed-no-output' }));
|
||||
|
||||
await store.insertMany([
|
||||
{
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
syncId: 'sync-reviewed-no-output',
|
||||
rawPath: 'pages/page-1/page.md',
|
||||
rawContentHash: 'hash-reviewed',
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
artifactContentHash: null,
|
||||
actionType: 'skipped',
|
||||
},
|
||||
]);
|
||||
await store.markCompleted(run.id, diffSummary({ added: 1 }));
|
||||
await store.create({
|
||||
runId: run.id,
|
||||
jobId: 'job-reviewed-no-output',
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
body: emptyReportBody('sync-reviewed-no-output', {
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'notion-page-page-1',
|
||||
rawFiles: ['pages/page-1/page.md'],
|
||||
status: 'success',
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
failedWorkUnits: [],
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(store.findLatestHashesForCompletedSyncs('docs', 'notion')).resolves.toEqual(
|
||||
new Map([['pages/page-1/page.md', 'hash-reviewed']]),
|
||||
);
|
||||
await expect(
|
||||
new DiffSetService(store).compute('docs', 'notion', new Map([['pages/page-1/page.md', 'hash-reviewed']])),
|
||||
).resolves.toMatchObject({
|
||||
added: [],
|
||||
unchanged: ['pages/page-1/page.md'],
|
||||
});
|
||||
});
|
||||
|
||||
it('baselines artifact provenance in partial failures but not skipped-only failed paths', async () => {
|
||||
const store = new SqliteBundleIngestStore({ dbPath });
|
||||
const run = await store.create(runArgs({ jobId: 'job-partial', syncId: 'sync-partial' }));
|
||||
|
||||
await store.insertMany([
|
||||
{
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
syncId: 'sync-partial',
|
||||
rawPath: 'pages/success/page.md',
|
||||
rawContentHash: 'hash-success',
|
||||
artifactKind: 'wiki',
|
||||
artifactKey: 'knowledge/notion/success.md',
|
||||
artifactContentHash: 'artifact-success',
|
||||
actionType: 'wiki_written',
|
||||
},
|
||||
{
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
syncId: 'sync-partial',
|
||||
rawPath: 'pages/failed/page.md',
|
||||
rawContentHash: 'hash-failed',
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
artifactContentHash: null,
|
||||
actionType: 'skipped',
|
||||
},
|
||||
]);
|
||||
await store.markCompleted(run.id, diffSummary({ added: 2 }));
|
||||
await store.create({
|
||||
runId: run.id,
|
||||
jobId: 'job-partial',
|
||||
connectionId: 'docs',
|
||||
sourceKey: 'notion',
|
||||
body: emptyReportBody('sync-partial', {
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'notion-page-success',
|
||||
rawFiles: ['pages/success/page.md'],
|
||||
status: 'success',
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
{
|
||||
unitKey: 'notion-page-failed',
|
||||
rawFiles: ['pages/failed/page.md'],
|
||||
status: 'failed',
|
||||
reason: 'invalid_grant',
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
failedWorkUnits: ['notion-page-failed'],
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(store.findLatestHashesForCompletedSyncs('docs', 'notion')).resolves.toEqual(
|
||||
new Map([['pages/success/page.md', 'hash-success']]),
|
||||
);
|
||||
await expect(
|
||||
new DiffSetService(store).compute(
|
||||
'docs',
|
||||
'notion',
|
||||
new Map([
|
||||
['pages/success/page.md', 'hash-success'],
|
||||
['pages/failed/page.md', 'hash-failed'],
|
||||
]),
|
||||
),
|
||||
).resolves.toEqual({
|
||||
added: ['pages/failed/page.md'],
|
||||
modified: [],
|
||||
deleted: [],
|
||||
unchanged: ['pages/success/page.md'],
|
||||
});
|
||||
});
|
||||
|
||||
it('returns the latest stored report across bundle ingest runs', async () => {
|
||||
const store = new SqliteBundleIngestStore({
|
||||
dbPath,
|
||||
|
|
|
|||
|
|
@ -46,6 +46,13 @@ interface ProvenanceRow {
|
|||
action_type: string;
|
||||
}
|
||||
|
||||
interface ProvenanceHashCandidateRow {
|
||||
raw_path: string;
|
||||
raw_content_hash: string;
|
||||
action_type: string;
|
||||
report_body_json: string | null;
|
||||
}
|
||||
|
||||
function parseArtifactKind(kind: string | null): IngestProvenanceRow['artifact_kind'] {
|
||||
if (kind === null || kind === 'sl' || kind === 'wiki') {
|
||||
return kind;
|
||||
|
|
@ -93,6 +100,31 @@ function toPortProvenanceRow(row: ProvenanceRow): IngestProvenanceRow {
|
|||
};
|
||||
}
|
||||
|
||||
function recordValue(value: unknown, key: string): unknown {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value)
|
||||
? (value as Record<string, unknown>)[key]
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function isSuccessfulNoOutputSkippedBaseline(reportBodyJson: string | null): boolean {
|
||||
if (reportBodyJson === null) {
|
||||
return true;
|
||||
}
|
||||
const body = JSON.parse(reportBodyJson) as unknown;
|
||||
const workUnits = recordValue(body, 'workUnits');
|
||||
const failedWorkUnits = recordValue(body, 'failedWorkUnits');
|
||||
return (
|
||||
Array.isArray(workUnits) &&
|
||||
workUnits.length > 0 &&
|
||||
Array.isArray(failedWorkUnits) &&
|
||||
failedWorkUnits.length === 0
|
||||
);
|
||||
}
|
||||
|
||||
function isProcessedHashBaseline(row: ProvenanceHashCandidateRow): boolean {
|
||||
return row.action_type !== 'skipped' || isSuccessfulNoOutputSkippedBaseline(row.report_body_json);
|
||||
}
|
||||
|
||||
function placeholders(values: readonly unknown[]): string {
|
||||
return values.map(() => '?').join(', ');
|
||||
}
|
||||
|
|
@ -275,23 +307,34 @@ export class SqliteBundleIngestStore
|
|||
const rows = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT p.raw_path, p.raw_content_hash
|
||||
SELECT
|
||||
p.raw_path,
|
||||
p.raw_content_hash,
|
||||
p.action_type,
|
||||
br.body_json AS report_body_json
|
||||
FROM bundle_ingest_provenance p
|
||||
INNER JOIN bundle_ingest_runs r
|
||||
ON r.connection_id = p.connection_id
|
||||
AND r.source_key = p.source_key
|
||||
AND r.sync_id = p.sync_id
|
||||
LEFT JOIN bundle_ingest_reports br
|
||||
ON br.run_id = r.id
|
||||
WHERE p.connection_id = ?
|
||||
AND p.source_key = ?
|
||||
AND r.status = 'completed'
|
||||
ORDER BY r.completed_at DESC, r.rowid DESC, p.created_at DESC, p.rowid DESC
|
||||
`,
|
||||
)
|
||||
.all(connectionId, sourceKey) as Array<{ raw_path: string; raw_content_hash: string }>;
|
||||
.all(connectionId, sourceKey) as ProvenanceHashCandidateRow[];
|
||||
|
||||
const latest = new Map<string, string>();
|
||||
const seen = new Set<string>();
|
||||
for (const row of rows) {
|
||||
if (!latest.has(row.raw_path)) {
|
||||
if (seen.has(row.raw_path)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(row.raw_path);
|
||||
if (isProcessedHashBaseline(row)) {
|
||||
latest.set(row.raw_path, row.raw_content_hash);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { buildReconcileSystemPrompt, buildReconcileToolSet } from './build-reconcile-context.js';
|
||||
import { buildReconcileSystemPrompt, buildReconcileToolSet, buildReconcileUserPrompt } from './build-reconcile-context.js';
|
||||
|
||||
describe('buildReconcileSystemPrompt', () => {
|
||||
it('appends canonical pins when relevant pins are supplied', () => {
|
||||
|
|
@ -39,6 +39,40 @@ describe('buildReconcileSystemPrompt', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('buildReconcileUserPrompt', () => {
|
||||
it('includes action details so reconciliation can compare different keys for the same table', () => {
|
||||
const prompt = buildReconcileUserPrompt(
|
||||
{
|
||||
jobId: 'j1',
|
||||
connectionId: 'notion',
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'notion-a',
|
||||
rawFiles: ['pages/a/page.md'],
|
||||
status: 'success',
|
||||
actions: [
|
||||
{
|
||||
target: 'wiki',
|
||||
type: 'created',
|
||||
key: 'orbit-customer-source-reference',
|
||||
detail: 'tables: orbit_analytics.customer',
|
||||
},
|
||||
],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
},
|
||||
undefined,
|
||||
);
|
||||
|
||||
expect(prompt).toContain('orbit-customer-source-reference');
|
||||
expect(prompt).toContain('tables: orbit_analytics.customer');
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildReconcileToolSet', () => {
|
||||
it('includes emit_unmapped_fallback with the reconciliation tools', () => {
|
||||
const toolSet = buildReconcileToolSet({
|
||||
|
|
|
|||
|
|
@ -104,6 +104,10 @@ function curatorPassStateSummary(runState?: ReconcilePromptRunState): string {
|
|||
].join('\n');
|
||||
}
|
||||
|
||||
function formatStageActionDetail(detail: string): string {
|
||||
return detail.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
export function buildReconcileUserPrompt(
|
||||
stageIndex: StageIndex,
|
||||
ev: EvictionUnit | undefined,
|
||||
|
|
@ -119,7 +123,14 @@ export function buildReconcileUserPrompt(
|
|||
const actions =
|
||||
wu.actions.length === 0
|
||||
? ' actions: (none)'
|
||||
: wu.actions.map((a) => ` - ${a.target}:${a.type} ${a.key}`).join('\n');
|
||||
: wu.actions
|
||||
.map((a) => {
|
||||
const detail = formatStageActionDetail(a.detail);
|
||||
return detail.length > 0
|
||||
? ` - ${a.target}:${a.type} ${a.key}; detail: ${detail}`
|
||||
: ` - ${a.target}:${a.type} ${a.key}`;
|
||||
})
|
||||
.join('\n');
|
||||
return `- unitKey: ${wu.unitKey} (status=${wu.status})\n${actions}`;
|
||||
})
|
||||
.join('\n');
|
||||
|
|
|
|||
|
|
@ -148,6 +148,7 @@ describe('reconciliation emit tools', () => {
|
|||
{
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'no_physical_table',
|
||||
detail: expect.stringContaining('not present as a source'),
|
||||
fallback: 'flagged',
|
||||
},
|
||||
]);
|
||||
|
|
@ -175,6 +176,7 @@ describe('reconciliation emit tools', () => {
|
|||
{
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'no_physical_table',
|
||||
detail: expect.stringContaining('not present as a source'),
|
||||
fallback: 'flagged',
|
||||
},
|
||||
]);
|
||||
|
|
@ -199,6 +201,27 @@ describe('reconciliation emit tools', () => {
|
|||
expect(stageIndex.unmappedFallbacks).toEqual([]);
|
||||
});
|
||||
|
||||
it('rejects missing-table fallback decisions when the table resolves to an existing semantic source', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(['cards/revenue.json']),
|
||||
tableRefExists: async (tableRef) => tableRef === 'orbit_analytics.mart_revenue_daily',
|
||||
});
|
||||
|
||||
const output = await executeTool(tool, {
|
||||
rawPath: 'cards/revenue.json',
|
||||
reason: 'no_physical_table',
|
||||
tableRef: 'orbit_analytics.mart_revenue_daily',
|
||||
fallback: 'wiki_only',
|
||||
});
|
||||
|
||||
expect(output).toContain(
|
||||
'Error: tableRef "orbit_analytics.mart_revenue_daily" already resolves to a semantic source',
|
||||
);
|
||||
expect(stageIndex.unmappedFallbacks).toEqual([]);
|
||||
});
|
||||
|
||||
it('records explicit artifact resolutions for provenance rows', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitArtifactResolutionTool({
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { StageIndex, UnmappedFallbackRecord } from '../stages/stage-index.types.js';
|
||||
import type { StageIndex, UnmappedFallbackRecord, UnmappedFallbackReason } from '../stages/stage-index.types.js';
|
||||
|
||||
interface EmitUnmappedFallbackDeps {
|
||||
stageIndex: StageIndex;
|
||||
allowedPaths: ReadonlySet<string>;
|
||||
tableRefExists?: (tableRef: string) => Promise<boolean>;
|
||||
}
|
||||
|
||||
const unmappedFallbackReasonSchema = z.enum([
|
||||
|
|
@ -22,31 +23,78 @@ function sameUnmappedFallback(left: UnmappedFallbackRecord, right: UnmappedFallb
|
|||
return left.rawPath === right.rawPath && left.reason === right.reason && left.fallback === right.fallback;
|
||||
}
|
||||
|
||||
// Generates a canonical description for each reason so the recorded `detail`
|
||||
// is always consistent with the reason code. Free-form text from the LLM
|
||||
// previously caused contradictions like "no_physical_table" being explained
|
||||
// as "no mapped connection exists" — the tool now owns the core sentence and
|
||||
// the LLM may add optional clarification context.
|
||||
function canonicalDetail(reason: UnmappedFallbackReason, tableRef: string | undefined): string {
|
||||
const tableClause = tableRef ? `'${tableRef}'` : 'the referenced object';
|
||||
switch (reason) {
|
||||
case 'no_physical_table':
|
||||
return `${tableClause} is described but is not present as a source in any mapped warehouse/dbt connection.`;
|
||||
case 'no_connection_mapping':
|
||||
return `${tableClause} has no non-Notion warehouse/dbt connection to map against.`;
|
||||
case 'missing_target_table':
|
||||
return `${tableClause} is referenced but the target table could not be located.`;
|
||||
case 'looker_template_unresolved':
|
||||
return `${tableClause} uses LookML templating that could not be resolved.`;
|
||||
case 'derived_table_not_supported':
|
||||
return `${tableClause} is a derived/inline definition that is not yet supported as a semantic-layer source.`;
|
||||
case 'multiple_table_references':
|
||||
return `${tableClause} references multiple tables; cannot map to a single source.`;
|
||||
case 'unsupported_dialect':
|
||||
return `${tableClause} uses a SQL dialect that is not yet supported.`;
|
||||
case 'parse_error':
|
||||
return `${tableClause} could not be parsed.`;
|
||||
}
|
||||
}
|
||||
|
||||
function requiresMissingTableValidation(reason: UnmappedFallbackReason): boolean {
|
||||
return reason === 'no_physical_table' || reason === 'missing_target_table';
|
||||
}
|
||||
|
||||
export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) {
|
||||
return tool({
|
||||
description:
|
||||
'Record one unmapped fallback decision for the final IngestReport. The rawPath must be available to the current ingest stage. The reason MUST be one of the structured codes; put any human-readable context in detail.',
|
||||
'Record one unmapped fallback decision for the final IngestReport. The rawPath must be available to the current ingest stage. The tool generates the canonical detail from the structured reason and optional tableRef; use clarification only to add context that does not contradict the reason code.',
|
||||
inputSchema: z.object({
|
||||
rawPath: z.string().min(1),
|
||||
reason: unmappedFallbackReasonSchema,
|
||||
detail: z.string().optional(),
|
||||
tableRef: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('The fully-qualified table or source reference that triggered the fallback (e.g. "orbit_analytics.customer"). Used to generate canonical detail text.'),
|
||||
clarification: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Optional extra context appended to the canonical detail. Must not contradict the reason code.'),
|
||||
fallback: z.enum(['sql_standalone', 'wiki_only', 'flagged']),
|
||||
}),
|
||||
execute: async (input): Promise<string> => {
|
||||
if (!deps.allowedPaths.has(input.rawPath)) {
|
||||
return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`;
|
||||
}
|
||||
if (input.tableRef && requiresMissingTableValidation(input.reason) && deps.tableRefExists) {
|
||||
const exists = await deps.tableRefExists(input.tableRef);
|
||||
if (exists) {
|
||||
return `Error: tableRef "${input.tableRef}" already resolves to a semantic source; do not record ${input.reason} for an existing table.`;
|
||||
}
|
||||
}
|
||||
|
||||
const base = canonicalDetail(input.reason, input.tableRef);
|
||||
const detail = input.clarification ? `${base} ${input.clarification.trim()}`.trim() : base;
|
||||
|
||||
const record: UnmappedFallbackRecord = {
|
||||
rawPath: input.rawPath,
|
||||
reason: input.reason,
|
||||
...(input.detail !== undefined ? { detail: input.detail } : {}),
|
||||
detail,
|
||||
fallback: input.fallback,
|
||||
};
|
||||
if (!deps.stageIndex.unmappedFallbacks.some((candidate) => sameUnmappedFallback(candidate, record))) {
|
||||
deps.stageIndex.unmappedFallbacks.push(record);
|
||||
}
|
||||
return `recorded unmapped fallback for ${record.rawPath} (${record.fallback})`;
|
||||
return `recorded unmapped fallback for ${record.rawPath} (${record.fallback}): ${detail}`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,14 @@ describe('stage_list tool', () => {
|
|||
unitKey: 'u2',
|
||||
rawFiles: ['b.yml'],
|
||||
status: 'success',
|
||||
actions: [{ target: 'wiki', type: 'created', key: 'page_b', detail: '' }],
|
||||
actions: [
|
||||
{
|
||||
target: 'wiki',
|
||||
type: 'created',
|
||||
key: 'page_b',
|
||||
detail: 'tables: orbit_analytics.customer',
|
||||
},
|
||||
],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
|
|
@ -36,6 +43,7 @@ describe('stage_list tool', () => {
|
|||
expect(out).toContain('src_a');
|
||||
expect(out).toContain('u2');
|
||||
expect(out).toContain('page_b');
|
||||
expect(out).toContain('tables: orbit_analytics.customer');
|
||||
});
|
||||
|
||||
it('says empty when no writes', async () => {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ export interface StageListDeps {
|
|||
stageIndex: StageIndex;
|
||||
}
|
||||
|
||||
function formatActionDetail(detail: string): string {
|
||||
return detail.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
export function createStageListTool(deps: StageListDeps) {
|
||||
return tool({
|
||||
description:
|
||||
|
|
@ -20,7 +24,14 @@ export function createStageListTool(deps: StageListDeps) {
|
|||
const actions =
|
||||
wu.actions.length === 0
|
||||
? ' (no actions)'
|
||||
: wu.actions.map((a) => ` - ${a.target}:${a.type} ${a.key}`).join('\n');
|
||||
: wu.actions
|
||||
.map((a) => {
|
||||
const detail = formatActionDetail(a.detail);
|
||||
return detail.length > 0
|
||||
? ` - ${a.target}:${a.type} ${a.key}; detail: ${detail}`
|
||||
: ` - ${a.target}:${a.type} ${a.key}`;
|
||||
})
|
||||
.join('\n');
|
||||
return `- unitKey: ${wu.unitKey} (status=${wu.status})\n rawFiles: ${wu.rawFiles.join(', ') || '(none)'}\n actions:\n${actions}`;
|
||||
})
|
||||
.join('\n');
|
||||
|
|
|
|||
|
|
@ -0,0 +1,196 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import type { ToolCallLogEntry } from './tool-call-logger.js';
|
||||
import { createMutableToolTranscriptSummary, recordToolTranscriptEntry } from './tool-transcript-summary.js';
|
||||
|
||||
function entry(overrides: Partial<ToolCallLogEntry>): ToolCallLogEntry {
|
||||
return {
|
||||
ts: '2026-05-11T00:00:00.000Z',
|
||||
wuKey: 'wu-1',
|
||||
toolName: 'wiki_write',
|
||||
durationMs: 1,
|
||||
input: {},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('tool transcript summaries', () => {
|
||||
it('keeps recovered wiki_write structured failures out of fatal failures', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'orbit-customers' },
|
||||
output: { structured: { success: false, key: 'orbit-customers' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'orbit-customers' },
|
||||
output: { structured: { success: true, key: 'orbit-customers' } },
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('counts unrecovered wiki_remove structured failures as fatal transcript errors', () => {
|
||||
const summary = createMutableToolTranscriptSummary('reconcile', '/tmp/reconcile.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(summary, {
|
||||
ts: '2026-05-11T00:00:00.000Z',
|
||||
wuKey: 'reconcile',
|
||||
toolCallId: 'remove-1',
|
||||
toolName: 'wiki_remove',
|
||||
durationMs: 1,
|
||||
input: { key: 'duplicate-page' },
|
||||
output: { structured: { success: false, key: 'duplicate-page' } },
|
||||
});
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(1);
|
||||
});
|
||||
|
||||
it('keeps unrecovered structured write failures fatal', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'orbit-customers' },
|
||||
output: { structured: { success: false, key: 'orbit-customers' } },
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(1);
|
||||
});
|
||||
|
||||
it('treats a later sl_edit_source success as recovery for the same SL source', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_write_source',
|
||||
input: { connectionId: 'warehouse', sourceName: 'orbit_customers' },
|
||||
output: { structured: { success: false, sourceName: 'orbit_customers' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_edit_source',
|
||||
input: { connectionId: 'warehouse', sourceName: 'orbit_customers' },
|
||||
output: { structured: { success: true, sourceName: 'orbit_customers' } },
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('treats explicit unmapped fallback as recovery for guarded SL write failures', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_write_source',
|
||||
input: { connectionId: 'dbt-main', sourceName: 'stg_accounts' },
|
||||
output: { structured: { success: false, sourceName: 'stg_accounts' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'emit_unmapped_fallback',
|
||||
input: { rawPath: 'models/schema.yml', reason: 'no_physical_table', tableRef: 'stg_accounts', fallback: 'wiki_only' },
|
||||
output: 'recorded unmapped fallback for models/schema.yml (wiki_only)',
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('treats an untargeted unmapped fallback as recovery when there is only one pending SL failure', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_write_source',
|
||||
input: { connectionId: 'dbt-main', sourceName: 'stg_accounts' },
|
||||
output: { structured: { success: false, sourceName: 'stg_accounts' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'emit_unmapped_fallback',
|
||||
input: { rawPath: 'models/schema.yml', reason: 'no_physical_table', fallback: 'wiki_only' },
|
||||
output: 'recorded unmapped fallback for models/schema.yml (wiki_only)',
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('keeps unrelated SL write failures fatal when one source gets an unmapped fallback', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_write_source',
|
||||
input: { connectionId: 'dbt-main', sourceName: 'stg_accounts' },
|
||||
output: { structured: { success: false, sourceName: 'stg_accounts' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'sl_write_source',
|
||||
input: { connectionId: 'dbt-main', sourceName: 'stg_orders' },
|
||||
output: { structured: { success: false, sourceName: 'stg_orders' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
toolName: 'emit_unmapped_fallback',
|
||||
input: { rawPath: 'models/schema.yml', reason: 'no_physical_table', tableRef: 'stg_accounts', fallback: 'wiki_only' },
|
||||
output: 'recorded unmapped fallback for models/schema.yml (wiki_only)',
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(2);
|
||||
expect(summary.fatalErrorCount).toBe(1);
|
||||
});
|
||||
|
||||
it('keeps thrown tool errors fatal even after a successful write', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'orbit-customers' },
|
||||
error: { message: 'tool crashed' },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'orbit-customers' },
|
||||
output: { structured: { success: true, key: 'orbit-customers' } },
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(1);
|
||||
});
|
||||
});
|
||||
185
packages/context/src/ingest/tools/tool-transcript-summary.ts
Normal file
185
packages/context/src/ingest/tools/tool-transcript-summary.ts
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
import type { ToolCallLogEntry } from './tool-call-logger.js';
|
||||
|
||||
export interface MutableToolTranscriptSummary {
|
||||
unitKey: string;
|
||||
path: string;
|
||||
toolCallCount: number;
|
||||
errorCount: number;
|
||||
fatalErrorCount: number;
|
||||
toolNames: Set<string>;
|
||||
hardErrorCount: number;
|
||||
recoverableFailureCounts: Map<string, number>;
|
||||
}
|
||||
|
||||
export function createMutableToolTranscriptSummary(unitKey: string, path: string): MutableToolTranscriptSummary {
|
||||
return {
|
||||
unitKey,
|
||||
path,
|
||||
toolCallCount: 0,
|
||||
errorCount: 0,
|
||||
fatalErrorCount: 0,
|
||||
toolNames: new Set<string>(),
|
||||
hardErrorCount: 0,
|
||||
recoverableFailureCounts: new Map<string, number>(),
|
||||
};
|
||||
}
|
||||
|
||||
export function recordToolTranscriptEntry(summary: MutableToolTranscriptSummary, entry: ToolCallLogEntry): void {
|
||||
summary.toolCallCount += 1;
|
||||
summary.toolNames.add(entry.toolName);
|
||||
|
||||
if (entry.error) {
|
||||
summary.errorCount += 1;
|
||||
summary.hardErrorCount += 1;
|
||||
refreshFatalErrorCount(summary);
|
||||
return;
|
||||
}
|
||||
|
||||
const recoverableFailureKey = recoverableStructuredFailureKey(entry);
|
||||
if (recoverableFailureKey) {
|
||||
summary.errorCount += 1;
|
||||
summary.recoverableFailureCounts.set(
|
||||
recoverableFailureKey,
|
||||
(summary.recoverableFailureCounts.get(recoverableFailureKey) ?? 0) + 1,
|
||||
);
|
||||
refreshFatalErrorCount(summary);
|
||||
return;
|
||||
}
|
||||
|
||||
const recoveryKey = recoverableStructuredSuccessKey(entry);
|
||||
if (recoveryKey) {
|
||||
summary.recoverableFailureCounts.delete(recoveryKey);
|
||||
}
|
||||
if (entry.toolName === 'emit_unmapped_fallback') {
|
||||
const fallbackTarget = fallbackSlTargetKey(entry);
|
||||
const pendingSlKeys = [...summary.recoverableFailureCounts.keys()].filter((key) => key.startsWith('sl:'));
|
||||
for (const key of pendingSlKeys) {
|
||||
if (
|
||||
(fallbackTarget && slFailureKeyMatchesFallback(key, fallbackTarget)) ||
|
||||
(!fallbackTarget && pendingSlKeys.length === 1)
|
||||
) {
|
||||
summary.recoverableFailureCounts.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
refreshFatalErrorCount(summary);
|
||||
}
|
||||
|
||||
function refreshFatalErrorCount(summary: MutableToolTranscriptSummary): void {
|
||||
summary.fatalErrorCount =
|
||||
summary.hardErrorCount + [...summary.recoverableFailureCounts.values()].reduce((sum, count) => sum + count, 0);
|
||||
}
|
||||
|
||||
function recoverableStructuredFailureKey(entry: ToolCallLogEntry): string | null {
|
||||
if (!isStructuredToolFailure(entry.output)) {
|
||||
return null;
|
||||
}
|
||||
if (entry.toolName === 'wiki_write' || entry.toolName === 'wiki_remove') {
|
||||
return wikiTargetKey(entry);
|
||||
}
|
||||
if (entry.toolName === 'sl_write_source') {
|
||||
return slTargetKey(entry);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function recoverableStructuredSuccessKey(entry: ToolCallLogEntry): string | null {
|
||||
if (!isStructuredToolSuccess(entry.output)) {
|
||||
return null;
|
||||
}
|
||||
if (entry.toolName === 'wiki_write' || entry.toolName === 'wiki_remove') {
|
||||
return wikiTargetKey(entry);
|
||||
}
|
||||
if (entry.toolName === 'sl_write_source' || entry.toolName === 'sl_edit_source') {
|
||||
return slTargetKey(entry);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function isStructuredToolFailure(output: unknown): boolean {
|
||||
return structuredSuccess(output) === false;
|
||||
}
|
||||
|
||||
function isStructuredToolSuccess(output: unknown): boolean {
|
||||
return structuredSuccess(output) === true;
|
||||
}
|
||||
|
||||
function structuredSuccess(output: unknown): boolean | null {
|
||||
const structured = recordField(output, 'structured');
|
||||
const success = structured?.success;
|
||||
return typeof success === 'boolean' ? success : null;
|
||||
}
|
||||
|
||||
function wikiTargetKey(entry: ToolCallLogEntry): string | null {
|
||||
const key = stringField(recordField(entry.output, 'structured'), 'key') ?? stringField(entry.input, 'key');
|
||||
return key ? `wiki:${key}` : null;
|
||||
}
|
||||
|
||||
function slTargetKey(entry: ToolCallLogEntry): string | null {
|
||||
const structured = recordField(entry.output, 'structured');
|
||||
const sourceName = stringField(structured, 'sourceName') ?? stringField(entry.input, 'sourceName');
|
||||
if (!sourceName) {
|
||||
return null;
|
||||
}
|
||||
const connectionId = stringField(entry.input, 'connectionId') ?? '';
|
||||
return `sl:${connectionId}:${sourceName}`;
|
||||
}
|
||||
|
||||
function fallbackSlTargetKey(entry: ToolCallLogEntry): { connectionId?: string; sourceName: string } | null {
|
||||
const tableRef = stringField(entry.input, 'tableRef');
|
||||
if (!tableRef) {
|
||||
return null;
|
||||
}
|
||||
const sourceName = finalReferenceSegment(tableRef);
|
||||
if (!sourceName) {
|
||||
return null;
|
||||
}
|
||||
const connectionId = stringField(entry.input, 'connectionId');
|
||||
return {
|
||||
sourceName,
|
||||
...(connectionId ? { connectionId } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function slFailureKeyMatchesFallback(
|
||||
failureKey: string,
|
||||
fallback: { connectionId?: string; sourceName: string },
|
||||
): boolean {
|
||||
const match = /^sl:([^:]*):(.*)$/.exec(failureKey);
|
||||
if (!match) {
|
||||
return false;
|
||||
}
|
||||
const [, connectionId, sourceName] = match;
|
||||
if (fallback.connectionId && connectionId !== fallback.connectionId) {
|
||||
return false;
|
||||
}
|
||||
return normalizeReferenceSegment(sourceName ?? '') === normalizeReferenceSegment(fallback.sourceName);
|
||||
}
|
||||
|
||||
function finalReferenceSegment(value: string): string {
|
||||
const normalized = value
|
||||
.trim()
|
||||
.replace(/["`]/g, '')
|
||||
.replace(/[\[\]]/g, '');
|
||||
return normalized.split('.').filter(Boolean).at(-1) ?? '';
|
||||
}
|
||||
|
||||
function normalizeReferenceSegment(value: string): string {
|
||||
return finalReferenceSegment(value).toLowerCase();
|
||||
}
|
||||
|
||||
function recordField(value: unknown, field: string): Record<string, unknown> | null {
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
return null;
|
||||
}
|
||||
const nested = (value as Record<string, unknown>)[field];
|
||||
return nested && typeof nested === 'object' && !Array.isArray(nested) ? (nested as Record<string, unknown>) : null;
|
||||
}
|
||||
|
||||
function stringField(value: unknown, field: string): string | null {
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
return null;
|
||||
}
|
||||
const raw = (value as Record<string, unknown>)[field];
|
||||
return typeof raw === 'string' && raw.length > 0 ? raw : null;
|
||||
}
|
||||
|
|
@ -617,7 +617,7 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
userId: 'local-user',
|
||||
key: '../outside',
|
||||
}),
|
||||
).rejects.toThrow('Unsafe knowledge key');
|
||||
).rejects.toThrow('Invalid wiki key "../outside". Wiki keys must be flat; use "outside".');
|
||||
|
||||
await expect(
|
||||
ports.semanticLayer?.readSource({
|
||||
|
|
|
|||
|
|
@ -512,7 +512,7 @@ export function createLocalProjectMcpContextPorts(
|
|||
}
|
||||
|
||||
const yaml =
|
||||
input.yaml ?? YAML.stringify({ ...input.source, name: input.sourceName }, { indent: 2, lineWidth: 0 });
|
||||
input.yaml ?? YAML.stringify({ ...input.source, name: input.sourceName }, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
parseYamlRecord(yaml);
|
||||
await project.fileStore.writeFile(
|
||||
path,
|
||||
|
|
|
|||
|
|
@ -90,6 +90,25 @@ describe('memory runtime assets', () => {
|
|||
expect(body).not.toContain('a standalone SL source only when raw evidence contains enough table or SQL structure');
|
||||
});
|
||||
|
||||
it('ships Metabase guidance that avoids invalid joins for SQL-only card outputs', async () => {
|
||||
const body = await readFile(join(skillsDir, 'metabase_ingest', 'SKILL.md'), 'utf-8');
|
||||
|
||||
expect(body).toContain('Do not declare a KTX join just because the card SQL joins that table internally');
|
||||
expect(body).toContain('only when the card output exposes a local key that matches the target source grain');
|
||||
expect(body).toContain('If `sl_discover` resolves the table, it is not outside the manifest');
|
||||
expect(body).toContain('reason: "parse_error"');
|
||||
expect(body).not.toContain('Tables outside the manifest');
|
||||
expect(body).not.toContain('reason: "metabase_sql_untranslated"');
|
||||
});
|
||||
|
||||
it('ships Notion guidance for physical-table fallbacks and duplicate wiki reconciliation', async () => {
|
||||
const body = await readFile(join(skillsDir, 'notion_synthesize', 'SKILL.md'), 'utf-8');
|
||||
|
||||
expect(body).toContain('Notion `dataSourceCount` counts Notion databases/data sources only');
|
||||
expect(body).toContain('Search existing wiki pages for the same `tables:` or `sl_refs:` frontmatter');
|
||||
expect(body).toContain('no_physical_table');
|
||||
});
|
||||
|
||||
it('packages LookML connection-mismatch SL gate guidance', async () => {
|
||||
const body = await readFile(join(skillsDir, 'lookml_ingest', 'SKILL.md'), 'utf-8');
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ export interface MemoryAction {
|
|||
key: string;
|
||||
detail: string;
|
||||
targetConnectionId?: string | null;
|
||||
rawPaths?: string[];
|
||||
}
|
||||
|
||||
export interface MemoryAgentResult {
|
||||
|
|
|
|||
|
|
@ -299,7 +299,7 @@ export async function writeLocalScanManifestShards(
|
|||
const path = `${schemaDir(input.connectionId)}/${shardKey}.yaml`;
|
||||
await input.project.fileStore.writeFile(
|
||||
path,
|
||||
YAML.stringify(shard, { indent: 2, lineWidth: 0 }),
|
||||
YAML.stringify(shard, { indent: 2, lineWidth: 0, version: '1.1' }),
|
||||
LOCAL_AUTHOR,
|
||||
LOCAL_AUTHOR_EMAIL,
|
||||
`scan(${LIVE_DATABASE_ADAPTER}): write manifest shard ${shardKey} syncId=${input.syncId}`,
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void>
|
|||
|
||||
async function seedWikiProject(project: KtxLocalProject): Promise<void> {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Semantic revenue definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -155,7 +155,7 @@ async function seedWikiProject(project: KtxLocalProject): Promise<void> {
|
|||
slRefs: ['orders'],
|
||||
});
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'support/escalations',
|
||||
key: 'support-escalations',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Support escalation process',
|
||||
content: 'Escalations move urgent support tickets to the operations queue.',
|
||||
|
|
@ -338,9 +338,9 @@ describe('SQLite hybrid search backend conformance', () => {
|
|||
surface: 'wiki',
|
||||
caseName: 'lexical page ranking',
|
||||
results: lexical.map(toWikiConformanceResult),
|
||||
expectedTopIds: ['metrics/revenue'],
|
||||
expectedTopIds: ['metrics-revenue'],
|
||||
expectedReasonsById: {
|
||||
'metrics/revenue': ['lexical'],
|
||||
'metrics-revenue': ['lexical'],
|
||||
},
|
||||
expectedLanes: {
|
||||
lexical: { status: 'available' },
|
||||
|
|
@ -359,9 +359,9 @@ describe('SQLite hybrid search backend conformance', () => {
|
|||
surface: 'wiki',
|
||||
caseName: 'semantic page ranking',
|
||||
results: semantic.map(toWikiConformanceResult),
|
||||
expectedTopIds: ['metrics/revenue'],
|
||||
expectedTopIds: ['metrics-revenue'],
|
||||
expectedReasonsById: {
|
||||
'metrics/revenue': ['semantic'],
|
||||
'metrics-revenue': ['semantic'],
|
||||
},
|
||||
expectedLanes: {
|
||||
semantic: { status: 'available' },
|
||||
|
|
@ -378,9 +378,9 @@ describe('SQLite hybrid search backend conformance', () => {
|
|||
surface: 'wiki',
|
||||
caseName: 'token page fallback',
|
||||
results: token.map(toWikiConformanceResult),
|
||||
expectedTopIds: ['metrics/revenue'],
|
||||
expectedTopIds: ['metrics-revenue'],
|
||||
expectedReasonsById: {
|
||||
'metrics/revenue': ['token'],
|
||||
'metrics-revenue': ['token'],
|
||||
},
|
||||
expectedLanes: {
|
||||
token: { status: 'available' },
|
||||
|
|
|
|||
|
|
@ -89,6 +89,39 @@ describe('local semantic-layer helpers', () => {
|
|||
await expect(validateLocalSlSource(ORDERS_YAML)).resolves.toEqual({ valid: true, errors: [] });
|
||||
});
|
||||
|
||||
it('validates table-backed sources against matching physical manifests when project context is provided', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml',
|
||||
`tables:
|
||||
int_active_contract_arr:
|
||||
table: orbit_analytics.int_active_contract_arr
|
||||
columns:
|
||||
- { name: contract_id, type: string }
|
||||
- { name: contract_arr_cents, type: number }
|
||||
`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add warehouse manifest',
|
||||
);
|
||||
|
||||
const invalidDbtSource = [
|
||||
'name: int_active_contract_arr',
|
||||
'table: orbit_analytics.int_active_contract_arr',
|
||||
'grain: [contract_id]',
|
||||
'columns:',
|
||||
' - { name: contract_id, type: string }',
|
||||
' - { name: arr_cents, type: number }',
|
||||
'measures:',
|
||||
' - { name: arr, expr: sum(arr_cents) }',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
const result = await validateLocalSlSource(invalidDbtSource, { project, connectionId: 'dbt-main' });
|
||||
expect(result.valid).toBe(false);
|
||||
expect(result.errors.join('\n')).toContain('arr_cents');
|
||||
expect(result.errors.join('\n')).toContain('absent from physical table');
|
||||
});
|
||||
|
||||
it('lists and reads manifest-backed scan sources as queryable sources', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
|
|
@ -345,7 +378,7 @@ describe('local semantic-layer helpers', () => {
|
|||
|
||||
await expect(validateLocalSlSource(invalidYaml)).resolves.toMatchObject({
|
||||
valid: false,
|
||||
errors: [expect.stringContaining('grain')],
|
||||
errors: expect.arrayContaining([expect.stringContaining('grain')]),
|
||||
});
|
||||
|
||||
await expect(
|
||||
|
|
|
|||
|
|
@ -7,7 +7,12 @@ import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index
|
|||
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import { sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||
import { composeOverlay, type ManifestTableEntry, projectManifestEntry } from './semantic-layer.service.js';
|
||||
import {
|
||||
composeOverlay,
|
||||
type ManifestTableEntry,
|
||||
projectManifestEntry,
|
||||
SemanticLayerService,
|
||||
} from './semantic-layer.service.js';
|
||||
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
|
||||
import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
|
||||
import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
|
||||
|
|
@ -157,7 +162,7 @@ function summarizeSource(args: { connectionId: string; path: string; raw: string
|
|||
}
|
||||
|
||||
function sourceToYaml(source: SemanticLayerSource): string {
|
||||
return YAML.stringify(source, { indent: 2, lineWidth: 0 });
|
||||
return YAML.stringify(source, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
}
|
||||
|
||||
function summarizeSemanticSource(args: {
|
||||
|
|
@ -246,12 +251,24 @@ export async function loadLocalSlSourceRecords(
|
|||
return [...sources.values()].sort((left, right) => left.name.localeCompare(right.name));
|
||||
}
|
||||
|
||||
export async function validateLocalSlSource(rawYaml: string): Promise<LocalSlValidationResult> {
|
||||
export async function validateLocalSlSource(
|
||||
rawYaml: string,
|
||||
options?: { project?: KtxLocalProject; connectionId?: string },
|
||||
): Promise<LocalSlValidationResult> {
|
||||
try {
|
||||
const parsed = parseYamlRecord(rawYaml);
|
||||
const schema = parsed.table || parsed.sql ? sourceDefinitionSchema : sourceOverlaySchema;
|
||||
schema.parse(parsed);
|
||||
return { valid: true, errors: [] };
|
||||
const result = schema.parse(parsed);
|
||||
const errors: string[] = [];
|
||||
|
||||
if (options?.project && options.connectionId && 'table' in result && result.table) {
|
||||
const service = new SemanticLayerService(options.project.fileStore, {} as never, {} as never);
|
||||
errors.push(
|
||||
...(await service.validatePhysicalTableReferences(options.connectionId, [result as SemanticLayerSource])),
|
||||
);
|
||||
}
|
||||
|
||||
return { valid: errors.length === 0, errors };
|
||||
} catch (error) {
|
||||
return { valid: false, errors: validationErrors(error) };
|
||||
}
|
||||
|
|
@ -261,7 +278,7 @@ export async function writeLocalSlSource(
|
|||
project: KtxLocalProject,
|
||||
input: { connectionId: string; sourceName: string; yaml: string },
|
||||
): Promise<KtxFileWriteResult> {
|
||||
const validation = await validateLocalSlSource(input.yaml);
|
||||
const validation = await validateLocalSlSource(input.yaml, { project, connectionId: input.connectionId });
|
||||
if (!validation.valid) {
|
||||
throw new Error(`Invalid semantic-layer source: ${validation.errors.join('; ')}`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,6 +63,14 @@ const sourceFreshnessSchema = z.object({
|
|||
dbt: freshnessDbtSchema.optional(),
|
||||
});
|
||||
|
||||
// Identifiers (grain entries, column names) must be unqualified output-column
|
||||
// names. A dot would mean the agent emitted a table-qualified reference like
|
||||
// `activity.account_id` — those break SQL generation and grain semantics.
|
||||
const unqualifiedNameSchema = z
|
||||
.string()
|
||||
.min(1)
|
||||
.regex(/^[^.]+$/, "must be unqualified (no '.') — use the output column name");
|
||||
|
||||
const joinDeclarationSchema = z.object({
|
||||
to: z.string().min(1),
|
||||
on: z.string().min(1),
|
||||
|
|
@ -71,7 +79,7 @@ const joinDeclarationSchema = z.object({
|
|||
});
|
||||
|
||||
const sourceColumnSchema = z.object({
|
||||
name: z.string().min(1),
|
||||
name: unqualifiedNameSchema,
|
||||
// type/description optional on standalone sources: compose-time enrichment fills them
|
||||
// from the manifest entry named in `inherits_columns_from`. If the agent does not set
|
||||
// `inherits_columns_from`, or the column is not in the manifest, type must be present
|
||||
|
|
@ -90,7 +98,7 @@ const sourceColumnSchema = z.object({
|
|||
/** Overlay column: type requires expr (structural types are inherited from manifest). */
|
||||
const overlayColumnSchema = z
|
||||
.object({
|
||||
name: z.string().min(1),
|
||||
name: unqualifiedNameSchema,
|
||||
type: z.enum(columnTypeValues).optional(),
|
||||
role: z.enum(columnRoleValues).optional(),
|
||||
visibility: z.enum(columnVisibilityValues).optional(),
|
||||
|
|
@ -118,8 +126,13 @@ export const sourceDefinitionSchema = z
|
|||
// agent write `columns: [{name: FOO}]` instead of redeclaring known fields.
|
||||
// Lookup is fuzzy: bare key, fully-qualified table path, or any suffix all match.
|
||||
inherits_columns_from: z.string().optional(),
|
||||
grain: z.array(z.string()).min(1),
|
||||
columns: z.array(sourceColumnSchema).default([]),
|
||||
grain: z.array(unqualifiedNameSchema).min(1),
|
||||
// Standalone sources MUST declare columns. An empty columns array means
|
||||
// there's nothing to query or join against and breaks grain validation
|
||||
// (the grain must reference declared columns). Inheritance from a manifest
|
||||
// via `inherits_columns_from` only fills in type/description on declared
|
||||
// columns — the column names themselves must be listed here.
|
||||
columns: z.array(sourceColumnSchema).min(1),
|
||||
joins: z.array(joinDeclarationSchema).default([]),
|
||||
measures: z.array(slMeasureDefinitionSchema).default([]),
|
||||
segments: z.array(segmentDefinitionSchema).optional(),
|
||||
|
|
@ -139,7 +152,7 @@ export const sourceOverlaySchema = z
|
|||
name: z.string().min(1),
|
||||
description: z.string().optional(),
|
||||
descriptions: z.record(z.string(), z.string()).optional(),
|
||||
grain: z.array(z.string()).optional(),
|
||||
grain: z.array(unqualifiedNameSchema).optional(),
|
||||
columns: z.array(overlayColumnSchema).optional(),
|
||||
joins: z.array(joinDeclarationSchema).optional(),
|
||||
measures: z.array(slMeasureDefinitionSchema).optional(),
|
||||
|
|
|
|||
|
|
@ -388,6 +388,34 @@ describe('sourceDefinitionSchema', () => {
|
|||
externalOwner: 'analytics',
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects qualified grain names (e.g. 'activity.account_id')", () => {
|
||||
const result = sourceDefinitionSchema.safeParse({
|
||||
name: 'activity',
|
||||
table: 'public.activity',
|
||||
grain: ['activity.account_id'],
|
||||
columns: [{ name: 'account_id', type: 'number' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
});
|
||||
expect(result.success).toBe(false);
|
||||
if (result.success) return;
|
||||
expect(result.error.issues.some((i) => i.path.join('.').startsWith('grain'))).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects qualified column names', () => {
|
||||
const result = sourceDefinitionSchema.safeParse({
|
||||
name: 'activity',
|
||||
table: 'public.activity',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'activity.account_id', type: 'number' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
});
|
||||
expect(result.success).toBe(false);
|
||||
if (result.success) return;
|
||||
expect(result.error.issues.some((i) => i.path.join('.').startsWith('columns'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('projectManifestEntry', () => {
|
||||
|
|
@ -781,6 +809,210 @@ describe('validateWithProposedSource', () => {
|
|||
expect(result.errors[0]).toMatch(/Overlay 'orphan' has no matching manifest entry/);
|
||||
expect(pythonPort.validateSources).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rejects table-backed sources whose declared columns are absent from a matching physical manifest', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
if (dir === 'semantic-layer/dbt-main') {
|
||||
return Promise.resolve({ files: [] });
|
||||
}
|
||||
if (dir === 'semantic-layer') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') {
|
||||
return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] });
|
||||
}
|
||||
return Promise.resolve({ files: [] });
|
||||
});
|
||||
configService.readFile.mockImplementation((path: string) => {
|
||||
if (path === schemaPath) {
|
||||
return Promise.resolve({
|
||||
content: [
|
||||
'tables:',
|
||||
' int_procurement_qualifying_actions:',
|
||||
' table: orbit_analytics.int_procurement_qualifying_actions',
|
||||
' columns:',
|
||||
' - { name: action_id, type: string }',
|
||||
' - { name: account_id, type: string }',
|
||||
' - { name: user_id, type: string }',
|
||||
' - { name: action_date, type: time }',
|
||||
' - { name: action_type, type: string }',
|
||||
].join('\n'),
|
||||
});
|
||||
}
|
||||
return Promise.reject(new Error(`Unexpected readFile: ${path}`));
|
||||
});
|
||||
pythonPort.validateSources.mockResolvedValue({
|
||||
data: { errors: [], warnings: [] },
|
||||
});
|
||||
|
||||
const result = await service.validateWithProposedSource('dbt-main', {
|
||||
name: 'int_procurement_qualifying_actions',
|
||||
table: 'orbit_analytics.int_procurement_qualifying_actions',
|
||||
grain: ['purchase_request_id'],
|
||||
columns: [
|
||||
{ name: 'purchase_request_id', type: 'string' },
|
||||
{ name: 'account_id', type: 'string' },
|
||||
{ name: 'requester_user_id', type: 'string' },
|
||||
{ name: 'action_week', type: 'time' },
|
||||
],
|
||||
joins: [],
|
||||
measures: [{ name: 'qualifying_action_count', expr: 'count(purchase_request_id)' }],
|
||||
});
|
||||
|
||||
expect(result.errors.join('\n')).toMatch(/declared column\(s\) absent from physical table/);
|
||||
expect(result.errors.join('\n')).toMatch(/purchase_request_id/);
|
||||
expect(result.errors.join('\n')).toMatch(/requester_user_id/);
|
||||
expect(result.errors.join('\n')).toMatch(/action_week/);
|
||||
expect(result.errors.join('\n')).toMatch(/measure "qualifying_action_count" references unknown column\(s\)/);
|
||||
});
|
||||
|
||||
it('keeps valid table-backed sources clean when a physical manifest matches', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
if (dir === 'semantic-layer/dbt-main') {
|
||||
return Promise.resolve({ files: [] });
|
||||
}
|
||||
if (dir === 'semantic-layer') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') {
|
||||
return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] });
|
||||
}
|
||||
return Promise.resolve({ files: [] });
|
||||
});
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: [
|
||||
'tables:',
|
||||
' mart_revenue_daily:',
|
||||
' table: orbit_analytics.mart_revenue_daily',
|
||||
' columns:',
|
||||
' - { name: revenue_date, type: time }',
|
||||
' - { name: gross_revenue_cents, type: number }',
|
||||
' - { name: credits_cents, type: number }',
|
||||
' - { name: refunds_cents, type: number }',
|
||||
' - { name: net_revenue_cents, type: number }',
|
||||
].join('\n'),
|
||||
});
|
||||
pythonPort.validateSources.mockResolvedValue({
|
||||
data: { errors: [], warnings: [] },
|
||||
});
|
||||
|
||||
const result = await service.validateWithProposedSource('dbt-main', {
|
||||
name: 'mart_revenue_daily',
|
||||
table: 'orbit_analytics.mart_revenue_daily',
|
||||
grain: ['revenue_date'],
|
||||
columns: [
|
||||
{ name: 'revenue_date', type: 'time' },
|
||||
{ name: 'gross_revenue_cents', type: 'number' },
|
||||
{ name: 'credits_cents', type: 'number' },
|
||||
{ name: 'refunds_cents', type: 'number' },
|
||||
{ name: 'net_revenue_cents', type: 'number' },
|
||||
],
|
||||
joins: [],
|
||||
measures: [{ name: 'net_revenue', expr: 'sum(net_revenue_cents)' }],
|
||||
});
|
||||
|
||||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('allows SQL syntax tokens and cast types in physical expression validation', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
if (dir === 'semantic-layer/dbt-main') {
|
||||
return Promise.resolve({ files: [] });
|
||||
}
|
||||
if (dir === 'semantic-layer') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') {
|
||||
return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] });
|
||||
}
|
||||
return Promise.resolve({ files: [] });
|
||||
});
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: [
|
||||
'tables:',
|
||||
' mart_revenue_daily:',
|
||||
' table: orbit_analytics.mart_revenue_daily',
|
||||
' columns:',
|
||||
' - { name: order_id, type: string }',
|
||||
' - { name: revenue_date, type: time }',
|
||||
' - { name: amount, type: number }',
|
||||
' - { name: status, type: string }',
|
||||
' - { name: created_at, type: time }',
|
||||
].join('\n'),
|
||||
});
|
||||
pythonPort.validateSources.mockResolvedValue({
|
||||
data: { errors: [], warnings: [] },
|
||||
});
|
||||
|
||||
const result = await service.validateWithProposedSource('dbt-main', {
|
||||
name: 'mart_revenue_daily',
|
||||
table: 'orbit_analytics.mart_revenue_daily',
|
||||
grain: ['order_id'],
|
||||
columns: [
|
||||
{ name: 'order_id', type: 'string' },
|
||||
{ name: 'revenue_date', type: 'time' },
|
||||
{ name: 'amount', type: 'number' },
|
||||
{ name: 'status', type: 'string' },
|
||||
{ name: 'created_at', type: 'time' },
|
||||
{ name: 'status_text', type: 'string', expr: 'status::text' },
|
||||
],
|
||||
segments: [{ name: 'current_or_paid', expr: "created_at <= current_date OR status = 'paid'" }],
|
||||
joins: [],
|
||||
measures: [
|
||||
{ name: 'paid_amount', expr: "sum(amount) FILTER (WHERE status = 'paid')" },
|
||||
{ name: 'cast_amount_count', expr: 'count(cast(amount as text))' },
|
||||
],
|
||||
});
|
||||
|
||||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('rejects join keys that are absent from matched physical sources', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
if (dir === 'semantic-layer/dbt-main') {
|
||||
return Promise.resolve({ files: [] });
|
||||
}
|
||||
if (dir === 'semantic-layer') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') {
|
||||
return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] });
|
||||
}
|
||||
return Promise.resolve({ files: [] });
|
||||
});
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: [
|
||||
'tables:',
|
||||
' activity:',
|
||||
' table: orbit_analytics.activity',
|
||||
' columns:',
|
||||
' - { name: account_id, type: string }',
|
||||
' accounts:',
|
||||
' table: orbit_analytics.accounts',
|
||||
' columns:',
|
||||
' - { name: account_id, type: string }',
|
||||
].join('\n'),
|
||||
});
|
||||
pythonPort.validateSources.mockResolvedValue({
|
||||
data: { errors: [], warnings: [] },
|
||||
});
|
||||
|
||||
const result = await service.validateWithProposedSource('dbt-main', {
|
||||
name: 'activity',
|
||||
table: 'orbit_analytics.activity',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [{ to: 'accounts', on: 'activity.account_name = accounts.account_uuid', relationship: 'many_to_one' }],
|
||||
measures: [],
|
||||
});
|
||||
|
||||
expect(result.errors.join('\n')).toMatch(/local column "account_name"/);
|
||||
expect(result.errors.join('\n')).toMatch(/target column "account_uuid"/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('findDanglingSegmentRefs', () => {
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ export class SemanticLayerService {
|
|||
|
||||
const path = this.sourcePath(connectionId, source.name);
|
||||
const normalizedSource = normalizeSemanticLayerDescriptions(source);
|
||||
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0 });
|
||||
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
const message = commitMessage ?? `Update semantic layer source: ${source.name}`;
|
||||
const result = await this.configService.writeFile(path, content, author, authorEmail, message, {
|
||||
skipLock: options?.skipLock,
|
||||
|
|
@ -398,6 +398,174 @@ export class SemanticLayerService {
|
|||
return null;
|
||||
}
|
||||
|
||||
async findManifestEntryByTableRefAcrossConnections(
|
||||
preferredConnectionId: string,
|
||||
ref: string,
|
||||
): Promise<{ connectionId: string; source: SemanticLayerSource } | null> {
|
||||
const preferred = await this.findManifestEntryByTableRef(preferredConnectionId, ref);
|
||||
if (preferred) {
|
||||
return { connectionId: preferredConnectionId, source: preferred };
|
||||
}
|
||||
|
||||
for (const entry of await this.listAllManifestEntries()) {
|
||||
if (entry.connectionId === preferredConnectionId) {
|
||||
continue;
|
||||
}
|
||||
if (manifestEntryMatchesRef(entry.source, ref)) {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async validatePhysicalTableReferences(
|
||||
connectionId: string,
|
||||
sources: SemanticLayerSource[],
|
||||
): Promise<string[]> {
|
||||
const errors: string[] = [];
|
||||
const sourceNames = new Set(sources.map((s) => s.name.toLowerCase()));
|
||||
const sourcesByName = new Map(sources.map((s) => [s.name.toLowerCase(), s]));
|
||||
|
||||
for (const source of sources) {
|
||||
if (!source.table) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const manifestMatch = await this.findManifestEntryByTableRefAcrossConnections(connectionId, source.table);
|
||||
if (!manifestMatch) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const manifestSource = manifestMatch.source;
|
||||
const manifestColumns = new Map(manifestSource.columns.map((c) => [c.name.toLowerCase(), c.name]));
|
||||
const declaredColumns = source.columns ?? [];
|
||||
const declaredByLower = new Map(declaredColumns.map((c) => [c.name.toLowerCase(), c]));
|
||||
const validOutputColumns = new Set(
|
||||
declaredColumns
|
||||
.filter((c) => c.expr || manifestColumns.has(c.name.toLowerCase()))
|
||||
.map((c) => c.name.toLowerCase()),
|
||||
);
|
||||
const measureNames = new Set((source.measures ?? []).map((m) => m.name.toLowerCase()));
|
||||
const manifestLabel =
|
||||
manifestMatch.connectionId === connectionId
|
||||
? manifestSource.name
|
||||
: `${manifestMatch.connectionId}/${manifestSource.name}`;
|
||||
|
||||
const absentDeclaredColumns = declaredColumns
|
||||
.filter((c) => !c.expr && !manifestColumns.has(c.name.toLowerCase()))
|
||||
.map((c) => c.name);
|
||||
if (absentDeclaredColumns.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: table "${source.table}" matched manifest ${manifestLabel}, ` +
|
||||
`but declared column(s) absent from physical table: ${absentDeclaredColumns.join(', ')}. ` +
|
||||
`Available columns: ${[...manifestColumns.values()].join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
const missingGrainColumns = (source.grain ?? []).filter((grain) => {
|
||||
const declared = declaredByLower.get(grain.toLowerCase());
|
||||
return !declared || (!declared.expr && !manifestColumns.has(grain.toLowerCase()));
|
||||
});
|
||||
if (missingGrainColumns.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: grain column(s) absent from physical table "${source.table}": ${missingGrainColumns.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
for (const column of declaredColumns) {
|
||||
if (!column.expr) {
|
||||
continue;
|
||||
}
|
||||
const missing = missingLocalExpressionRefs({
|
||||
expr: column.expr,
|
||||
sourceName: source.name,
|
||||
sourceNames,
|
||||
validColumns: new Set([...manifestColumns.keys(), ...validOutputColumns]),
|
||||
validMeasures: new Set(),
|
||||
});
|
||||
if (missing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: computed column "${column.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (const segment of source.segments ?? []) {
|
||||
const missing = missingLocalExpressionRefs({
|
||||
expr: segment.expr,
|
||||
sourceName: source.name,
|
||||
sourceNames,
|
||||
validColumns: validOutputColumns,
|
||||
validMeasures: new Set(),
|
||||
});
|
||||
if (missing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: segment "${segment.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (const measure of source.measures ?? []) {
|
||||
const exprMissing = missingLocalExpressionRefs({
|
||||
expr: measure.expr,
|
||||
sourceName: source.name,
|
||||
sourceNames,
|
||||
validColumns: validOutputColumns,
|
||||
validMeasures: measureNames,
|
||||
});
|
||||
if (exprMissing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: measure "${measure.name}" references unknown column(s): ${exprMissing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (measure.filter) {
|
||||
const filterMissing = missingLocalExpressionRefs({
|
||||
expr: measure.filter,
|
||||
sourceName: source.name,
|
||||
sourceNames,
|
||||
validColumns: validOutputColumns,
|
||||
validMeasures: new Set(),
|
||||
});
|
||||
if (filterMissing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: measure "${measure.name}" filter references unknown column(s): ${filterMissing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const join of source.joins ?? []) {
|
||||
const parsed = parseJoinColumns(join.on, source.name, join.to);
|
||||
if (!parsed) {
|
||||
continue;
|
||||
}
|
||||
if (!validOutputColumns.has(parsed.localColumn.toLowerCase())) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: join to "${join.to}" references local column ` +
|
||||
`"${parsed.localColumn}" that is not a valid output column`,
|
||||
);
|
||||
}
|
||||
|
||||
const targetSource =
|
||||
sourcesByName.get(join.to.toLowerCase()) ??
|
||||
(await this.findManifestEntryByTableRefAcrossConnections(connectionId, join.to))?.source;
|
||||
if (targetSource) {
|
||||
const targetColumns = new Set(targetSource.columns.map((c) => c.name.toLowerCase()));
|
||||
if (!targetColumns.has(parsed.targetColumn.toLowerCase())) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: join to "${join.to}" references target column ` +
|
||||
`"${parsed.targetColumn}" that does not exist on the target source`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
async getDialectForConnection(connectionId: string): Promise<string> {
|
||||
const connection = await this.connections.getConnectionById(connectionId);
|
||||
if (!connection) {
|
||||
|
|
@ -502,10 +670,15 @@ export class SemanticLayerService {
|
|||
return { errors: [errorMsg], warnings: [], perSourceWarnings: {} };
|
||||
}
|
||||
if (!data) {
|
||||
return { errors: [], warnings: [], perSourceWarnings: {} };
|
||||
return {
|
||||
errors: await this.validatePhysicalTableReferences(connectionId, validatable),
|
||||
warnings: [],
|
||||
perSourceWarnings: {},
|
||||
};
|
||||
}
|
||||
const physicalErrors = await this.validatePhysicalTableReferences(connectionId, validatable);
|
||||
return {
|
||||
errors: data.errors ?? [],
|
||||
errors: [...(data.errors ?? []), ...physicalErrors],
|
||||
warnings: data.warnings ?? [],
|
||||
perSourceWarnings: data.per_source_warnings ?? {},
|
||||
};
|
||||
|
|
@ -531,14 +704,40 @@ export class SemanticLayerService {
|
|||
return { errors: [formatPortError(error, 'Unknown validation error')], warnings: [] };
|
||||
}
|
||||
if (!data) {
|
||||
return { errors: [], warnings: [] };
|
||||
return { errors: await this.validatePhysicalTableReferences(connectionId, sources), warnings: [] };
|
||||
}
|
||||
const physicalErrors = await this.validatePhysicalTableReferences(connectionId, sources);
|
||||
return {
|
||||
errors: data.errors ?? [],
|
||||
errors: [...(data.errors ?? []), ...physicalErrors],
|
||||
warnings: data.warnings ?? [],
|
||||
};
|
||||
}
|
||||
|
||||
private async listAllManifestEntries(): Promise<Array<{ connectionId: string; source: SemanticLayerSource }>> {
|
||||
let files: string[];
|
||||
try {
|
||||
files = (await this.configService.listFiles(SL_DIR_PREFIX)).files;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
||||
const schemaFiles = files.filter((file) => /^semantic-layer\/[^/]+\/_schema\/.+\.ya?ml$/.test(file));
|
||||
const entries: Array<{ connectionId: string; source: SemanticLayerSource }> = [];
|
||||
for (const filePath of schemaFiles) {
|
||||
const connectionId = filePath.split('/')[1];
|
||||
try {
|
||||
const { content } = await this.configService.readFile(filePath);
|
||||
const shard = YAML.parse(content) as { tables?: Record<string, ManifestTableEntry> };
|
||||
for (const [name, entry] of Object.entries(shard?.tables ?? {})) {
|
||||
entries.push({ connectionId, source: projectManifestEntry(name, entry) });
|
||||
}
|
||||
} catch {
|
||||
// skip unparseable shards
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate overlays and standalone sources against the current manifest.
|
||||
* Returns warnings for stale references (non-blocking).
|
||||
|
|
@ -963,6 +1162,8 @@ const SQL_KEYWORDS = new Set([
|
|||
'in',
|
||||
'between',
|
||||
'like',
|
||||
'where',
|
||||
'filter',
|
||||
'cast',
|
||||
'coalesce',
|
||||
'nullif',
|
||||
|
|
@ -971,6 +1172,48 @@ const SQL_KEYWORDS = new Set([
|
|||
'false',
|
||||
'asc',
|
||||
'desc',
|
||||
'date',
|
||||
'day',
|
||||
'month',
|
||||
'quarter',
|
||||
'week',
|
||||
'year',
|
||||
'interval',
|
||||
'extract',
|
||||
'from',
|
||||
'over',
|
||||
'partition',
|
||||
'by',
|
||||
'rows',
|
||||
'range',
|
||||
'current',
|
||||
'current_date',
|
||||
'current_time',
|
||||
'current_timestamp',
|
||||
'localtime',
|
||||
'localtimestamp',
|
||||
'row',
|
||||
'numeric',
|
||||
'decimal',
|
||||
'int',
|
||||
'integer',
|
||||
'bigint',
|
||||
'smallint',
|
||||
'float',
|
||||
'double',
|
||||
'real',
|
||||
'string',
|
||||
'text',
|
||||
'char',
|
||||
'character',
|
||||
'varchar',
|
||||
'timestamp',
|
||||
'time',
|
||||
'uuid',
|
||||
'json',
|
||||
'jsonb',
|
||||
'bool',
|
||||
'boolean',
|
||||
]);
|
||||
|
||||
function extractColumnReferences(expr: string): string[] {
|
||||
|
|
@ -979,6 +1222,122 @@ function extractColumnReferences(expr: string): string[] {
|
|||
return [...new Set(tokens.filter((t) => !SQL_KEYWORDS.has(t.toLowerCase())))];
|
||||
}
|
||||
|
||||
function manifestEntryMatchesRef(source: SemanticLayerSource, ref: string): boolean {
|
||||
if (source.name.toLowerCase() === ref.toLowerCase()) {
|
||||
return true;
|
||||
}
|
||||
const table = source.table?.toLowerCase();
|
||||
const lowered = ref.toLowerCase();
|
||||
return !!table && (table === lowered || table.endsWith(`.${lowered}`));
|
||||
}
|
||||
|
||||
function normalizeSqlExpressionForIdentifierScan(expr: string): string {
|
||||
return expr
|
||||
.replace(/--.*$/gm, ' ')
|
||||
.replace(/\/\*[\s\S]*?\*\//g, ' ')
|
||||
.replace(/'([^']|'')*'/g, ' ')
|
||||
.replace(/"([^"]+)"/g, '$1')
|
||||
.replace(/`([^`]+)`/g, '$1')
|
||||
.replace(/\[([^\]]+)\]/g, '$1')
|
||||
.replace(/::\s*[A-Za-z_][\w$]*(?:\s*\([^)]*\))?/g, ' ');
|
||||
}
|
||||
|
||||
function extractSqlIdentifierRefs(expr: string): Array<{ qualifier?: string; name: string }> {
|
||||
const normalized = normalizeSqlExpressionForIdentifierScan(expr);
|
||||
const refs = new Map<string, { qualifier?: string; name: string }>();
|
||||
const re = /(?:\b([A-Za-z_][\w$]*)\s*\.\s*)?(\b[A-Za-z_][\w$]*)\b/g;
|
||||
for (const match of normalized.matchAll(re)) {
|
||||
const qualifier = match[1];
|
||||
const name = match[2];
|
||||
if (!name) {
|
||||
continue;
|
||||
}
|
||||
const nameLower = name.toLowerCase();
|
||||
const qualifierLower = qualifier?.toLowerCase();
|
||||
const after = normalized.slice((match.index ?? 0) + match[0].length).trimStart();
|
||||
if (!qualifier && after.startsWith('(')) {
|
||||
continue;
|
||||
}
|
||||
if (SQL_KEYWORDS.has(nameLower) || (qualifierLower && SQL_KEYWORDS.has(qualifierLower))) {
|
||||
continue;
|
||||
}
|
||||
refs.set(`${qualifierLower ?? ''}.${nameLower}`, qualifier ? { qualifier, name } : { name });
|
||||
}
|
||||
return [...refs.values()];
|
||||
}
|
||||
|
||||
function refBelongsToSource(
|
||||
ref: { qualifier?: string; name: string },
|
||||
sourceName: string,
|
||||
sourceNames: Set<string>,
|
||||
): boolean {
|
||||
if (!ref.qualifier) {
|
||||
return true;
|
||||
}
|
||||
const qualifier = ref.qualifier.toLowerCase();
|
||||
if (qualifier === sourceName.toLowerCase()) {
|
||||
return true;
|
||||
}
|
||||
return !sourceNames.has(qualifier);
|
||||
}
|
||||
|
||||
function missingLocalExpressionRefs(input: {
|
||||
expr: string;
|
||||
sourceName: string;
|
||||
sourceNames: Set<string>;
|
||||
validColumns: Set<string>;
|
||||
validMeasures: Set<string>;
|
||||
}): string[] {
|
||||
const missing = new Set<string>();
|
||||
for (const ref of extractSqlIdentifierRefs(input.expr)) {
|
||||
if (!refBelongsToSource(ref, input.sourceName, input.sourceNames)) {
|
||||
continue;
|
||||
}
|
||||
const name = ref.name.toLowerCase();
|
||||
if (!input.validColumns.has(name) && !input.validMeasures.has(name)) {
|
||||
missing.add(ref.name);
|
||||
}
|
||||
}
|
||||
return [...missing].sort();
|
||||
}
|
||||
|
||||
function parseJoinSide(side: string): { qualifier?: string; column: string } | null {
|
||||
const match = side.trim().match(/^(?:(\w+)\.)?(\w+)$/);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
return match[1] ? { qualifier: match[1], column: match[2] } : { column: match[2] };
|
||||
}
|
||||
|
||||
function parseJoinColumns(
|
||||
on: string,
|
||||
sourceName: string,
|
||||
targetName: string,
|
||||
): { localColumn: string; targetColumn: string } | null {
|
||||
const sides = on.split('=');
|
||||
if (sides.length !== 2) {
|
||||
return null;
|
||||
}
|
||||
const left = parseJoinSide(sides[0]);
|
||||
const right = parseJoinSide(sides[1]);
|
||||
if (!left || !right) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const sourceLower = sourceName.toLowerCase();
|
||||
const targetLower = targetName.toLowerCase();
|
||||
const leftQualifier = left.qualifier?.toLowerCase();
|
||||
const rightQualifier = right.qualifier?.toLowerCase();
|
||||
|
||||
if (leftQualifier === targetLower || rightQualifier === sourceLower) {
|
||||
return { localColumn: right.column, targetColumn: left.column };
|
||||
}
|
||||
if (rightQualifier === targetLower || leftQualifier === sourceLower || !leftQualifier) {
|
||||
return { localColumn: left.column, targetColumn: right.column };
|
||||
}
|
||||
return { localColumn: left.column, targetColumn: right.column };
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns one message per measure-level segment reference that doesn't resolve to
|
||||
* a segment defined on the source. Array is empty when every reference checks out.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
|
||||
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
|
||||
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
|
|
@ -25,6 +25,10 @@ const slEditSourceInputSchema = z.object({
|
|||
.optional()
|
||||
.describe('Targeted exact-match search/replace edits on the raw YAML content.'),
|
||||
delete: z.boolean().optional().describe('Set to true to delete this source entirely'),
|
||||
rawPaths: z
|
||||
.array(z.string().min(1))
|
||||
.optional()
|
||||
.describe('In ingest sessions, raw source file paths that directly support this SL action.'),
|
||||
});
|
||||
|
||||
type SlEditSourceInput = z.infer<typeof slEditSourceInputSchema>;
|
||||
|
|
@ -75,6 +79,10 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
|
||||
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
|
||||
const skipIndex = context.session?.isWorktreeScoped === true;
|
||||
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
|
||||
if (!rawPathValidation.ok) {
|
||||
return this.buildOutput(false, [rawPathValidation.error], sourceName);
|
||||
}
|
||||
|
||||
// Handle delete
|
||||
if (input.delete) {
|
||||
|
|
@ -88,6 +96,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
key: sourceName,
|
||||
detail: 'Deleted source',
|
||||
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
|
||||
...(rawPathValidation.rawPaths ? { rawPaths: rawPathValidation.rawPaths } : {}),
|
||||
});
|
||||
}
|
||||
return this.buildOutput(true, [], sourceName, { yaml: undefined, commitHash: undefined });
|
||||
|
|
@ -151,7 +160,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||
|
||||
// Re-serialize and write
|
||||
const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0 });
|
||||
const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
|
||||
const { errors: validationErrors, warnings: validationWarnings } =
|
||||
await semanticLayerService.validateWithProposedSource(connectionId, source);
|
||||
|
|
@ -184,6 +193,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
key: sourceName,
|
||||
detail: `Applied ${editCount} edit(s)`,
|
||||
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
|
||||
...(rawPathValidation.rawPaths ? { rawPaths: rawPathValidation.rawPaths } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ function makeDeps(opts: { sourceYaml: string; executeQuery: ReturnType<typeof vi
|
|||
listManifestSourceNames: vi.fn().mockResolvedValue([]),
|
||||
loadSource: vi.fn().mockResolvedValue(null),
|
||||
loadAllSources: vi.fn().mockResolvedValue([]),
|
||||
validatePhysicalTableReferences: vi.fn().mockResolvedValue([]),
|
||||
} as never,
|
||||
connections: {
|
||||
executeQuery: opts.executeQuery,
|
||||
|
|
@ -117,4 +118,29 @@ joins: []
|
|||
expect(probeSql).toMatch(/LIMIT 1\b/);
|
||||
expect(probeSql).not.toMatch(/LIMIT 0\b/);
|
||||
});
|
||||
|
||||
it('adds physical manifest errors for table-backed sources', async () => {
|
||||
const yaml = `name: int_active_contract_arr
|
||||
table: orbit_analytics.int_active_contract_arr
|
||||
grain: [contract_id]
|
||||
columns:
|
||||
- {name: contract_id, type: string}
|
||||
- {name: arr_cents, type: number}
|
||||
measures:
|
||||
- {name: arr, expr: sum(arr_cents)}
|
||||
joins: []
|
||||
`;
|
||||
const executeQuery = vi.fn();
|
||||
const deps = makeDeps({ sourceYaml: yaml, executeQuery }) as any;
|
||||
deps.semanticLayerService.validatePhysicalTableReferences.mockResolvedValue([
|
||||
'int_active_contract_arr.yaml: declared column(s) absent from physical table: arr_cents',
|
||||
]);
|
||||
|
||||
const result = await validateSingleSource(deps, 'conn-1', 'int_active_contract_arr');
|
||||
|
||||
expect(result.errors).toContain(
|
||||
'int_active_contract_arr.yaml: declared column(s) absent from physical table: arr_cents',
|
||||
);
|
||||
expect(executeQuery).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { SYSTEM_GIT_AUTHOR } from '../../tools/index.js';
|
|||
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
|
||||
import { sourceOverlaySchema } from '../schemas.js';
|
||||
import { SemanticLayerService } from '../semantic-layer.service.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
import { sourceDefinitionSchema } from './base-semantic-layer.tool.js';
|
||||
|
||||
export interface SlValidationDeps {
|
||||
|
|
@ -118,6 +119,14 @@ export async function validateSingleSource(
|
|||
return { errors, warnings };
|
||||
}
|
||||
|
||||
if (!isOverlay && 'table' in result.data && result.data.table) {
|
||||
errors.push(
|
||||
...(await deps.semanticLayerService.validatePhysicalTableReferences(connectionId, [
|
||||
result.data as SemanticLayerSource,
|
||||
])),
|
||||
);
|
||||
}
|
||||
|
||||
const measures = (parsed.measures as Array<{ name: string }> | undefined) ?? [];
|
||||
const seenMeasures = new Set<string>();
|
||||
for (const m of measures) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
|
||||
import { sourceOverlaySchema } from '../schemas.js';
|
||||
import type { SemanticLayerService } from '../semantic-layer.service.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
|
|
@ -25,6 +25,10 @@ const slWriteSourceInputSchema = z.object({
|
|||
.optional()
|
||||
.describe('Source definition (standalone with table/sql) or overlay (measures, computed columns, etc.)'),
|
||||
delete: z.boolean().optional().describe('Set to true to delete this source entirely'),
|
||||
rawPaths: z
|
||||
.array(z.string().min(1))
|
||||
.optional()
|
||||
.describe('In ingest sessions, raw source file paths that directly support this SL action.'),
|
||||
});
|
||||
|
||||
type SlWriteSourceInput = z.infer<typeof slWriteSourceInputSchema>;
|
||||
|
|
@ -99,6 +103,10 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
|
||||
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
|
||||
const skipIndex = context.session?.isWorktreeScoped === true;
|
||||
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
|
||||
if (!rawPathValidation.ok) {
|
||||
return this.buildOutput(false, [rawPathValidation.error], sourceName);
|
||||
}
|
||||
|
||||
// Handle delete
|
||||
if (input.delete) {
|
||||
|
|
@ -116,6 +124,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
key: sourceName,
|
||||
detail: 'Deleted source',
|
||||
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
|
||||
...(rawPathValidation.rawPaths ? { rawPaths: rawPathValidation.rawPaths } : {}),
|
||||
});
|
||||
}
|
||||
return this.buildOutput(true, [], sourceName, { yaml: undefined, commitHash: undefined });
|
||||
|
|
@ -142,6 +151,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
context,
|
||||
semanticLayerService,
|
||||
skipIndex,
|
||||
rawPathValidation.rawPaths,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -154,6 +164,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
context: ToolContext,
|
||||
semanticLayerService: SemanticLayerService,
|
||||
skipIndex: boolean,
|
||||
rawPaths: string[] | undefined,
|
||||
): Promise<ToolOutput<SemanticLayerStructured>> {
|
||||
const normalizedSource = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||
const isOverlay =
|
||||
|
|
@ -164,7 +175,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
? `${isOverlay ? 'Update overlay' : 'Rewrite source'}: ${sourceName}`
|
||||
: `${isOverlay ? 'Create overlay' : 'Create source'}: ${sourceName}`;
|
||||
|
||||
const yamlContent = YAML.stringify(normalizedSource);
|
||||
const yamlContent = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
|
||||
const orphanError = await this.rejectOrphanOverlay(semanticLayerService, connectionId, sourceName, yamlContent);
|
||||
if (orphanError) {
|
||||
|
|
@ -211,6 +222,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
key: sourceName,
|
||||
detail: existing ? `Rewrote source` : `Created source`,
|
||||
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
|
||||
...(rawPaths ? { rawPaths } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
30
packages/context/src/tools/action-raw-paths.ts
Normal file
30
packages/context/src/tools/action-raw-paths.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import type { ToolSession } from './tool-session.js';
|
||||
|
||||
type ActionRawPathValidation =
|
||||
| { ok: true; rawPaths?: string[] }
|
||||
| { ok: false; error: string };
|
||||
|
||||
export function validateActionRawPaths(
|
||||
session: ToolSession | undefined,
|
||||
rawPaths: readonly string[] | undefined,
|
||||
): ActionRawPathValidation {
|
||||
if (!rawPaths || rawPaths.length === 0) {
|
||||
return { ok: true };
|
||||
}
|
||||
|
||||
const uniqueRawPaths = [...new Set(rawPaths)];
|
||||
const allowedRawPaths = session?.allowedRawPaths;
|
||||
if (!allowedRawPaths) {
|
||||
return { ok: true, rawPaths: uniqueRawPaths };
|
||||
}
|
||||
|
||||
const unavailable = uniqueRawPaths.filter((rawPath) => !allowedRawPaths.has(rawPath));
|
||||
if (unavailable.length > 0) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `rawPaths include unavailable ingest file(s): ${unavailable.join(', ')}`,
|
||||
};
|
||||
}
|
||||
|
||||
return { ok: true, rawPaths: uniqueRawPaths };
|
||||
}
|
||||
|
|
@ -31,6 +31,7 @@ export { ingestMetadataRequired, resolveIngestMetadata } from './context-ingest-
|
|||
export type { SqlEdit } from './sql-edit-replacer.js';
|
||||
export { applySqlEdits } from './sql-edit-replacer.js';
|
||||
export type { IngestToolMetadata, MemoryAction, ToolSession } from './tool-session.js';
|
||||
export { validateActionRawPaths } from './action-raw-paths.js';
|
||||
export type { TouchedSlSource, TouchedSlSourceSet } from './touched-sl-sources.js';
|
||||
export {
|
||||
addTouchedSlSource,
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ export interface MemoryAction {
|
|||
key: string;
|
||||
detail: string;
|
||||
targetConnectionId?: string | null;
|
||||
rawPaths?: string[];
|
||||
}
|
||||
|
||||
interface EvictionDecisionRecord {
|
||||
|
|
@ -45,6 +46,7 @@ export interface ToolSession {
|
|||
preHead: string | null;
|
||||
touchedSlSources: TouchedSlSourceSet;
|
||||
actions: MemoryAction[];
|
||||
allowedRawPaths?: ReadonlySet<string>;
|
||||
semanticLayerService: SemanticLayerService;
|
||||
wikiService: KnowledgeWikiService;
|
||||
configService: KtxFileStorePort;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,11 @@
|
|||
export { buildKnowledgeSearchText } from './knowledge-search-text.js';
|
||||
export {
|
||||
assertFlatWikiKey,
|
||||
invalidFlatWikiKeyMessage,
|
||||
isFlatWikiKey,
|
||||
suggestFlatWikiKey,
|
||||
validateFlatWikiKey,
|
||||
} from './keys.js';
|
||||
export { KnowledgeWikiService } from './knowledge-wiki.service.js';
|
||||
export * from './local-knowledge.js';
|
||||
export type {
|
||||
|
|
|
|||
31
packages/context/src/wiki/keys.ts
Normal file
31
packages/context/src/wiki/keys.ts
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
const FLAT_WIKI_KEY_PATTERN = /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/;
|
||||
|
||||
export function suggestFlatWikiKey(key: string): string {
|
||||
const suggested = key
|
||||
.trim()
|
||||
.replace(/[\\/]+/g, '-')
|
||||
.replace(/[^a-zA-Z0-9_-]+/g, '-')
|
||||
.replace(/-+/g, '-')
|
||||
.replace(/^[-_]+|[-_]+$/g, '');
|
||||
return suggested.length > 0 ? suggested : 'page-key';
|
||||
}
|
||||
|
||||
export function invalidFlatWikiKeyMessage(key: string): string {
|
||||
return `Invalid wiki key "${key}". Wiki keys must be flat; use "${suggestFlatWikiKey(key)}".`;
|
||||
}
|
||||
|
||||
export function isFlatWikiKey(key: string): boolean {
|
||||
return FLAT_WIKI_KEY_PATTERN.test(key);
|
||||
}
|
||||
|
||||
export function validateFlatWikiKey(key: string): { ok: true; key: string } | { ok: false; error: string } {
|
||||
return isFlatWikiKey(key) ? { ok: true, key } : { ok: false, error: invalidFlatWikiKeyMessage(key) };
|
||||
}
|
||||
|
||||
export function assertFlatWikiKey(key: string): string {
|
||||
const result = validateFlatWikiKey(key);
|
||||
if (!result.ok) {
|
||||
throw new Error(result.error);
|
||||
}
|
||||
return result.key;
|
||||
}
|
||||
|
|
@ -33,13 +33,19 @@ function makeService() {
|
|||
diffNameStatus: vi.fn().mockResolvedValue([]),
|
||||
getFileAtCommit: vi.fn().mockResolvedValue(''),
|
||||
};
|
||||
const logger = {
|
||||
log: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
};
|
||||
const service = new KnowledgeWikiService(
|
||||
configService as any,
|
||||
embeddingService as any,
|
||||
pagesRepository as any,
|
||||
gitService as any,
|
||||
logger as any,
|
||||
);
|
||||
return { service, pagesRepository, embeddingService, configService, gitService };
|
||||
return { service, pagesRepository, embeddingService, configService, gitService, logger };
|
||||
}
|
||||
|
||||
const fm: WikiFrontmatter = { summary: 'sum', usage_mode: 'auto' };
|
||||
|
|
@ -107,6 +113,53 @@ describe('KnowledgeWikiService.syncFromCommit', () => {
|
|||
expect(call.deletes).toEqual([{ scope: 'GLOBAL', scopeId: null, pageKey: 'gone-page' }]);
|
||||
});
|
||||
|
||||
it('indexes historic-SQL nested pages but skips other nested wiki paths from commit sync', async () => {
|
||||
const { service, pagesRepository, gitService, logger } = makeService();
|
||||
|
||||
gitService.diffNameStatus.mockResolvedValue([
|
||||
{ status: 'A', path: 'knowledge/global/revenue-policy.md' },
|
||||
{ status: 'A', path: 'knowledge/global/historic-sql/order-lifecycle.md' },
|
||||
{ status: 'A', path: 'knowledge/global/historic-sql/_archived/retired-pattern.md' },
|
||||
{ status: 'A', path: 'knowledge/global/orbit/company-overview.md' },
|
||||
]);
|
||||
gitService.getFileAtCommit.mockImplementation((path: string) => {
|
||||
if (path.endsWith('revenue-policy.md')) {
|
||||
return Promise.resolve('---\nsummary: revenue\nusage_mode: auto\n---\n\nbody-revenue\n');
|
||||
}
|
||||
if (path.endsWith('order-lifecycle.md')) {
|
||||
return Promise.resolve('---\nsummary: order lifecycle\nusage_mode: auto\n---\n\nbody-orders\n');
|
||||
}
|
||||
if (path.endsWith('retired-pattern.md')) {
|
||||
return Promise.resolve('---\nsummary: retired\nusage_mode: never\n---\n\nbody-retired\n');
|
||||
}
|
||||
return Promise.reject(new Error(`unexpected getFileAtCommit path: ${path}`));
|
||||
});
|
||||
|
||||
await service.syncFromCommit('sha-before', 'sha-after', 'run-uuid');
|
||||
|
||||
expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('knowledge/global/orbit/company-overview.md', 'sha-after');
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'[knowledge.sync] skipping unparseable path: knowledge/global/orbit/company-overview.md',
|
||||
);
|
||||
const call = pagesRepository.applyDiffTransactional.mock.calls[0][0];
|
||||
expect(call.upserts).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ scope: 'GLOBAL', pageKey: 'revenue-policy', summary: 'revenue' }),
|
||||
expect.objectContaining({
|
||||
scope: 'GLOBAL',
|
||||
pageKey: 'historic-sql/order-lifecycle',
|
||||
summary: 'order lifecycle',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
scope: 'GLOBAL',
|
||||
pageKey: 'historic-sql/_archived/retired-pattern',
|
||||
summary: 'retired',
|
||||
}),
|
||||
]),
|
||||
);
|
||||
expect(call.upserts).toHaveLength(3);
|
||||
});
|
||||
|
||||
it('is a no-op when the diff between shas has no knowledge changes', async () => {
|
||||
const { service, pagesRepository, gitService } = makeService();
|
||||
gitService.diffNameStatus.mockResolvedValue([]);
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { createHash } from 'node:crypto';
|
|||
import YAML from 'yaml';
|
||||
import type { KtxEmbeddingPort, KtxFileStorePort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger } from '../core/index.js';
|
||||
import { assertFlatWikiKey, isFlatWikiKey } from './keys.js';
|
||||
import { buildKnowledgeSearchText } from './knowledge-search-text.js';
|
||||
import type { KnowledgeGitDiffPort, KnowledgeIndexPort, UpsertPageParams } from './ports.js';
|
||||
import type { WikiFrontmatter, WikiPage, WikiPageWithScope } from './types.js';
|
||||
|
|
@ -10,6 +11,10 @@ const WIKI_PREFIX = 'knowledge';
|
|||
|
||||
export type { WikiFrontmatter };
|
||||
|
||||
function isHistoricSqlPathSegment(segment: string): boolean {
|
||||
return /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment);
|
||||
}
|
||||
|
||||
export class KnowledgeWikiService {
|
||||
private isWorktreeScoped = false;
|
||||
|
||||
|
|
@ -53,7 +58,7 @@ export class KnowledgeWikiService {
|
|||
}
|
||||
|
||||
pagePath(scope: string, scopeId: string | null | undefined, pageKey: string): string {
|
||||
return `${this.scopeDir(scope, scopeId)}/${pageKey}.md`;
|
||||
return `${this.scopeDir(scope, scopeId)}/${assertFlatWikiKey(pageKey)}.md`;
|
||||
}
|
||||
|
||||
// ── Parsing / serialization ───────────────────────────────────
|
||||
|
|
@ -140,7 +145,7 @@ export class KnowledgeWikiService {
|
|||
const name = f.replace(`${dir}/`, '').replace(/\.md$/, '');
|
||||
return name;
|
||||
})
|
||||
.filter((name) => !name.includes('/'));
|
||||
.filter(isFlatWikiKey);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
|
@ -417,6 +422,7 @@ export class KnowledgeWikiService {
|
|||
* Parse a `knowledge/<scope>/...` file path into its scope and page key.
|
||||
* `knowledge/global/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'foo' }
|
||||
* `knowledge/user/<id>/bar.md` → { scope: 'USER', scopeId: '<id>', pageKey: 'bar' }
|
||||
* `knowledge/global/historic-sql/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'historic-sql/foo' }
|
||||
*/
|
||||
function parseKnowledgePath(path: string): { scope: string; scopeId: string | null; pageKey: string } | null {
|
||||
if (!path.endsWith('.md')) {
|
||||
|
|
@ -428,10 +434,19 @@ function parseKnowledgePath(path: string): { scope: string; scopeId: string | nu
|
|||
}
|
||||
const rest = segments.slice(1);
|
||||
if (rest.length === 2 && rest[0] === 'global') {
|
||||
return { scope: 'GLOBAL', scopeId: null, pageKey: rest[1].replace(/\.md$/, '') };
|
||||
const pageKey = rest[1].replace(/\.md$/, '');
|
||||
return isFlatWikiKey(pageKey) ? { scope: 'GLOBAL', scopeId: null, pageKey } : null;
|
||||
}
|
||||
if (rest.length >= 3 && rest[0] === 'global' && rest[1] === 'historic-sql') {
|
||||
const historicPath = rest.slice(2).join('/').replace(/\.md$/, '');
|
||||
if (historicPath.split('/').every(isHistoricSqlPathSegment)) {
|
||||
return { scope: 'GLOBAL', scopeId: null, pageKey: `historic-sql/${historicPath}` };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (rest.length === 3 && rest[0] === 'user') {
|
||||
return { scope: 'USER', scopeId: rest[1], pageKey: rest[2].replace(/\.md$/, '') };
|
||||
const pageKey = rest[2].replace(/\.md$/, '');
|
||||
return isFlatWikiKey(pageKey) ? { scope: 'USER', scopeId: rest[1], pageKey } : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
it('writes, reads, lists, and searches global knowledge pages', async () => {
|
||||
const write = await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -46,11 +46,11 @@ describe('local knowledge helpers', () => {
|
|||
slRefs: ['orders'],
|
||||
});
|
||||
|
||||
expect(write.path).toBe('knowledge/global/metrics/revenue.md');
|
||||
expect(write.path).toBe('knowledge/global/metrics-revenue.md');
|
||||
expect(write.operation).toBe('write');
|
||||
|
||||
await expect(readLocalKnowledgePage(project, { key: 'metrics/revenue', userId: 'local' })).resolves.toMatchObject({
|
||||
key: 'metrics/revenue',
|
||||
await expect(readLocalKnowledgePage(project, { key: 'metrics-revenue', userId: 'local' })).resolves.toMatchObject({
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -61,8 +61,8 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([
|
||||
{
|
||||
key: 'metrics/revenue',
|
||||
path: 'knowledge/global/metrics/revenue.md',
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
},
|
||||
|
|
@ -71,8 +71,8 @@ describe('local knowledge helpers', () => {
|
|||
const search = await searchLocalKnowledgePages(project, { query: 'paid order', userId: 'local' });
|
||||
expect(search).toEqual([
|
||||
expect.objectContaining({
|
||||
key: 'metrics/revenue',
|
||||
path: 'knowledge/global/metrics/revenue.md',
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
scope: 'GLOBAL',
|
||||
score: expect.any(Number),
|
||||
matchReasons: expect.arrayContaining(['lexical']),
|
||||
|
|
@ -85,7 +85,7 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
it('adds the token lane alongside lexical wiki matches', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -95,7 +95,7 @@ describe('local knowledge helpers', () => {
|
|||
const search = await searchLocalKnowledgePages(project, { query: 'paid---', userId: 'local', limit: 5 });
|
||||
|
||||
expect(search[0]).toMatchObject({
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
matchReasons: expect.arrayContaining(['token']),
|
||||
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'token', status: 'available' })]),
|
||||
});
|
||||
|
|
@ -103,14 +103,14 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
it('uses stored page embeddings when a wiki embedding backend is configured', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Semantic revenue definition',
|
||||
content: 'Revenue search text.',
|
||||
tags: ['finance'],
|
||||
});
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'support/escalations',
|
||||
key: 'support-escalations',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Support escalation process',
|
||||
content: 'Support search text.',
|
||||
|
|
@ -125,7 +125,7 @@ describe('local knowledge helpers', () => {
|
|||
});
|
||||
|
||||
expect(search[0]).toMatchObject({
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
matchReasons: expect.arrayContaining(['semantic']),
|
||||
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'semantic', status: 'available' })]),
|
||||
});
|
||||
|
|
@ -133,7 +133,7 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
it('reports semantic lane as skipped when wiki embeddings are not configured', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -172,7 +172,7 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
it('serializes historic-SQL frontmatter fields for global pages', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'queries/monthly-paid-orders',
|
||||
key: 'monthly-paid-orders',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Monthly paid orders',
|
||||
content: '## Monthly paid order count',
|
||||
|
|
@ -195,7 +195,7 @@ describe('local knowledge helpers', () => {
|
|||
fingerprints: ['fp_paid_orders'],
|
||||
});
|
||||
|
||||
const raw = await project.fileStore.readFile('knowledge/global/queries/monthly-paid-orders.md');
|
||||
const raw = await project.fileStore.readFile('knowledge/global/monthly-paid-orders.md');
|
||||
expect(raw.content).toContain('source: historic-sql');
|
||||
expect(raw.content).toContain('intent: Monthly paid order count');
|
||||
expect(raw.content).toContain(['tables:', ' - analytics.orders'].join('\n'));
|
||||
|
|
@ -207,7 +207,7 @@ describe('local knowledge helpers', () => {
|
|||
it('falls back to Markdown scanning when the config does not select sqlite-fts5', async () => {
|
||||
project.config.storage.search = 'postgres-hybrid';
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
content: 'Revenue is recognized when an order is paid.',
|
||||
|
|
@ -216,7 +216,7 @@ describe('local knowledge helpers', () => {
|
|||
|
||||
await expect(searchLocalKnowledgePages(project, { query: 'paid order', userId: 'local' })).resolves.toEqual([
|
||||
expect.objectContaining({
|
||||
key: 'metrics/revenue',
|
||||
key: 'metrics-revenue',
|
||||
score: 3,
|
||||
matchReasons: ['token'],
|
||||
}),
|
||||
|
|
@ -231,6 +231,17 @@ describe('local knowledge helpers', () => {
|
|||
summary: 'bad',
|
||||
content: 'bad',
|
||||
}),
|
||||
).rejects.toThrow('Unsafe knowledge key');
|
||||
).rejects.toThrow('Invalid wiki key "../secret". Wiki keys must be flat; use "secret".');
|
||||
});
|
||||
|
||||
it('rejects slash-delimited knowledge keys with a flat-key suggestion', async () => {
|
||||
await expect(
|
||||
writeLocalKnowledgePage(project, {
|
||||
key: 'orbit/company-overview',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'bad',
|
||||
content: 'bad',
|
||||
}),
|
||||
).rejects.toThrow('Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import type { KtxEmbeddingPort, KtxFileWriteResult } from '../core/index.js';
|
|||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js';
|
||||
import { buildKnowledgeSearchText } from './knowledge-search-text.js';
|
||||
import { assertFlatWikiKey, isFlatWikiKey } from './keys.js';
|
||||
import { SqliteKnowledgeIndex, type SqliteKnowledgeIndexPage } from './sqlite-knowledge-index.js';
|
||||
import type { HistoricSqlWikiUsageFrontmatter, WikiSearchLaneSummary, WikiSearchMatchReason } from './types.js';
|
||||
|
||||
|
|
@ -67,28 +68,39 @@ function assertSafePathToken(kind: string, value: string): string {
|
|||
return value;
|
||||
}
|
||||
|
||||
function assertSafeKnowledgeKey(key: string): string {
|
||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_/-]*$/.test(key)) {
|
||||
throw new Error(`Unsafe knowledge key: ${key}`);
|
||||
}
|
||||
return assertSafePathToken('knowledge key', key);
|
||||
}
|
||||
|
||||
function stringArray(value: unknown): string[] {
|
||||
return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : [];
|
||||
}
|
||||
|
||||
function knowledgePath(scope: LocalKnowledgeScope, userId: string | undefined, key: string): string {
|
||||
const safeKey = assertSafeKnowledgeKey(key);
|
||||
const safeKey = assertFlatWikiKey(key);
|
||||
if (scope === 'GLOBAL') {
|
||||
return `knowledge/global/${safeKey}.md`;
|
||||
}
|
||||
return `knowledge/user/${assertSafePathToken('user id', userId ?? 'local')}/${safeKey}.md`;
|
||||
}
|
||||
|
||||
function keyFromKnowledgePath(path: string, scope: LocalKnowledgeScope, userId: string): string {
|
||||
function isHistoricSqlPathSegment(segment: string): boolean {
|
||||
return /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment);
|
||||
}
|
||||
|
||||
function keyFromKnowledgePath(path: string, scope: LocalKnowledgeScope, userId: string): string | null {
|
||||
const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${assertSafePathToken('user id', userId)}/`;
|
||||
return path.slice(prefix.length).replace(/\.md$/, '');
|
||||
const key = path.slice(prefix.length).replace(/\.md$/, '');
|
||||
if (isFlatWikiKey(key)) {
|
||||
return key;
|
||||
}
|
||||
if (
|
||||
scope === 'GLOBAL' &&
|
||||
key.startsWith('historic-sql/') &&
|
||||
key
|
||||
.slice('historic-sql/'.length)
|
||||
.split('/')
|
||||
.every(isHistoricSqlPathSegment)
|
||||
) {
|
||||
return key;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function parseKnowledgePage(key: string, path: string, scope: LocalKnowledgeScope, raw: string): LocalKnowledgePage {
|
||||
|
|
@ -187,6 +199,9 @@ export async function listLocalKnowledgePages(
|
|||
const listed = await project.fileStore.listFiles(root);
|
||||
for (const path of listed.files.filter((file) => file.endsWith('.md')).sort()) {
|
||||
const key = keyFromKnowledgePath(path, scope, userId);
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
const page = await readPageAtPath(project, key, path, scope);
|
||||
if (page) {
|
||||
pages.push({ key, path, scope, summary: page.summary });
|
||||
|
|
|
|||
|
|
@ -82,6 +82,14 @@ describe('SqliteKnowledgeIndex', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('does not treat empty embeddings as indexed semantic vectors', () => {
|
||||
const index = new SqliteKnowledgeIndex({ dbPath });
|
||||
index.sync([page({ path: 'knowledge/global/revenue.md', key: 'revenue', embedding: [] })]);
|
||||
|
||||
expect(index.getExistingPages().get('knowledge/global/revenue.md')?.embedding).toBeNull();
|
||||
expect(index.searchSemanticCandidates({ queryEmbedding: [1, 0], limit: 10 })).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns semantic lane candidates from stored page embeddings', () => {
|
||||
const index = new SqliteKnowledgeIndex({ dbPath });
|
||||
index.sync([
|
||||
|
|
|
|||
|
|
@ -75,7 +75,9 @@ function parseEmbedding(raw: string | null): number[] | null {
|
|||
}
|
||||
try {
|
||||
const embedding = JSON.parse(raw) as unknown;
|
||||
return Array.isArray(embedding) && embedding.every((value) => typeof value === 'number') ? embedding : null;
|
||||
return Array.isArray(embedding) && embedding.length > 0 && embedding.every((value) => typeof value === 'number')
|
||||
? embedding
|
||||
: null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -170,7 +172,7 @@ export class SqliteKnowledgeIndex {
|
|||
content: searchText,
|
||||
tags: page.tags.join(' '),
|
||||
searchText,
|
||||
embeddingJson: page.embedding ? JSON.stringify(page.embedding) : null,
|
||||
embeddingJson: page.embedding && page.embedding.length > 0 ? JSON.stringify(page.embedding) : null,
|
||||
};
|
||||
upsertPage.run(row);
|
||||
deleteFts.run(row);
|
||||
|
|
|
|||
79
packages/context/src/wiki/tools/wiki-read.tool.test.ts
Normal file
79
packages/context/src/wiki/tools/wiki-read.tool.test.ts
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { ToolSession } from '../../tools/index.js';
|
||||
import { createTouchedSlSources, type ToolContext } from '../../tools/index.js';
|
||||
import { WikiReadTool } from './wiki-read.tool.js';
|
||||
|
||||
describe('WikiReadTool', () => {
|
||||
const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' };
|
||||
|
||||
it('reads from the session wiki service when a worktree-scoped ingest session is present', async () => {
|
||||
const rootWikiService = { readPageForUser: vi.fn().mockResolvedValue(null) };
|
||||
const sessionWikiService = {
|
||||
readPageForUser: vi.fn().mockResolvedValue({
|
||||
pageKey: 'staged-page',
|
||||
scope: 'GLOBAL',
|
||||
frontmatter: { summary: 'Staged', tags: ['notion'], refs: ['related'] },
|
||||
content: 'A page written earlier in the same ingest worktree.',
|
||||
}),
|
||||
};
|
||||
const pagesRepository = { findPageByKey: vi.fn().mockResolvedValue({ id: 'page-1' }), incrementUsageCount: vi.fn() };
|
||||
const tool = new WikiReadTool(rootWikiService as any, pagesRepository as any);
|
||||
const session: ToolSession = {
|
||||
connectionId: 'c',
|
||||
isWorktreeScoped: true,
|
||||
preHead: null,
|
||||
touchedSlSources: createTouchedSlSources(),
|
||||
actions: [],
|
||||
semanticLayerService: {} as any,
|
||||
wikiService: sessionWikiService as any,
|
||||
configService: {} as any,
|
||||
gitService: {} as any,
|
||||
};
|
||||
|
||||
const result = await tool.call({ key: 'staged-page' }, { ...baseContext, session });
|
||||
|
||||
expect(rootWikiService.readPageForUser).not.toHaveBeenCalled();
|
||||
expect(sessionWikiService.readPageForUser).toHaveBeenCalledWith('u', 'staged-page');
|
||||
expect(result.structured).toMatchObject({ found: true, blockKey: 'staged-page', scope: 'GLOBAL' });
|
||||
expect(result.markdown).toContain('A page written earlier in the same ingest worktree.');
|
||||
});
|
||||
|
||||
it('rejects slash-delimited page keys with a flat-key suggestion', async () => {
|
||||
const rootWikiService = { readPageForUser: vi.fn().mockResolvedValue(null) };
|
||||
const pagesRepository = { findPageByKey: vi.fn(), incrementUsageCount: vi.fn() };
|
||||
const tool = new WikiReadTool(rootWikiService as any, pagesRepository as any);
|
||||
|
||||
const result = await tool.call({ key: 'orbit/company-overview' }, baseContext);
|
||||
|
||||
expect(result.structured).toEqual({
|
||||
blockKey: 'orbit/company-overview',
|
||||
content: '',
|
||||
scope: '',
|
||||
found: false,
|
||||
});
|
||||
expect(result.markdown).toContain(
|
||||
'Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".',
|
||||
);
|
||||
expect(rootWikiService.readPageForUser).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('does not append derived refs to the editable markdown body', async () => {
|
||||
const rootWikiService = {
|
||||
readPageForUser: vi.fn().mockResolvedValue({
|
||||
pageKey: 'orbit-how-we-work',
|
||||
scope: 'GLOBAL',
|
||||
frontmatter: { summary: 'How we work', tags: ['policy'], refs: ['orbit-company-overview'] },
|
||||
content: '## How We Work\n\nUse written-first operating norms.',
|
||||
}),
|
||||
};
|
||||
const pagesRepository = { findPageByKey: vi.fn().mockResolvedValue(null), incrementUsageCount: vi.fn() };
|
||||
const tool = new WikiReadTool(rootWikiService as any, pagesRepository as any);
|
||||
|
||||
const result = await tool.call({ key: 'orbit-how-we-work' }, baseContext);
|
||||
|
||||
expect(result.markdown).toBe('## How We Work\n\nUse written-first operating norms.');
|
||||
expect(result.markdown).not.toContain('See also');
|
||||
expect(result.markdown).not.toContain('[[orbit-company-overview]]');
|
||||
expect(result.structured.refs).toEqual(['orbit-company-overview']);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
import { z } from 'zod';
|
||||
import type { KnowledgeIndexPort } from '../ports.js';
|
||||
import { KnowledgeWikiService } from '../index.js';
|
||||
import { validateFlatWikiKey } from '../keys.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js';
|
||||
|
||||
const WikiReadInputSchema = z.object({
|
||||
|
|
@ -34,6 +35,7 @@ export class WikiReadTool extends BaseTool<typeof WikiReadInputSchema> {
|
|||
return (
|
||||
'Load the full content of a knowledge block by its key. ' +
|
||||
'Use this to retrieve detailed rules, preferences, or definitions listed in the <knowledge_index>. ' +
|
||||
'The markdown output is the exact stored page body; use it verbatim for wiki_write replacements. ' +
|
||||
'Call this when the user query relates to a topic covered by an available knowledge block.'
|
||||
);
|
||||
}
|
||||
|
|
@ -43,7 +45,15 @@ export class WikiReadTool extends BaseTool<typeof WikiReadInputSchema> {
|
|||
}
|
||||
|
||||
async call(input: WikiReadInput, context: ToolContext): Promise<ToolOutput<WikiReadStructured>> {
|
||||
const page = await this.wikiService.readPageForUser(context.userId, input.key);
|
||||
const keyValidation = validateFlatWikiKey(input.key);
|
||||
if (!keyValidation.ok) {
|
||||
return {
|
||||
markdown: keyValidation.error,
|
||||
structured: { blockKey: input.key, content: '', scope: '', found: false },
|
||||
};
|
||||
}
|
||||
const wikiService = context.session?.wikiService ?? this.wikiService;
|
||||
const page = await wikiService.readPageForUser(context.userId, input.key);
|
||||
|
||||
if (!page) {
|
||||
return {
|
||||
|
|
@ -61,14 +71,8 @@ export class WikiReadTool extends BaseTool<typeof WikiReadInputSchema> {
|
|||
void this.pagesRepository.incrementUsageCount([indexEntry.id]);
|
||||
}
|
||||
|
||||
let md = `## ${page.pageKey}\n\n${page.content}`;
|
||||
const refs = page.frontmatter.refs;
|
||||
if (refs && refs.length > 0) {
|
||||
md += `\n\nSee also: ${refs.map((r) => `[[${r}]]`).join(', ')}`;
|
||||
}
|
||||
|
||||
return {
|
||||
markdown: md,
|
||||
markdown: page.content,
|
||||
structured: {
|
||||
blockKey: page.pageKey,
|
||||
content: page.content,
|
||||
|
|
|
|||
|
|
@ -22,8 +22,28 @@ describe('WikiRemoveTool', () => {
|
|||
expect(result.markdown).toMatch(/removed/i);
|
||||
});
|
||||
|
||||
it('rejects slash-delimited page keys with a flat-key suggestion', async () => {
|
||||
const wikiService = {
|
||||
deletePage: vi.fn().mockResolvedValue(undefined),
|
||||
deleteFromIndex: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
const pagesRepository = { findPageByKey: vi.fn().mockResolvedValue({ page_key: 'old' }) };
|
||||
const knowledgeRepository = { createEvent: vi.fn().mockResolvedValue(undefined) };
|
||||
const tool = new WikiRemoveTool(wikiService as any, pagesRepository as any, knowledgeRepository as any);
|
||||
|
||||
const result = await tool.call({ key: 'orbit/company-overview' } as any, baseContext);
|
||||
|
||||
expect(result.structured).toEqual({ success: false, key: 'orbit/company-overview' });
|
||||
expect(result.markdown).toContain(
|
||||
'Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".',
|
||||
);
|
||||
expect(pagesRepository.findPageByKey).not.toHaveBeenCalled();
|
||||
expect(wikiService.deletePage).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('skips deleteFromIndex when session is worktree-scoped', async () => {
|
||||
const wikiService = {
|
||||
readPage: vi.fn().mockResolvedValue({ pageKey: 'old', frontmatter: { summary: 'Old' }, content: 'body' }),
|
||||
deletePage: vi.fn().mockResolvedValue(undefined),
|
||||
deleteFromIndex: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
|
@ -47,6 +67,35 @@ describe('WikiRemoveTool', () => {
|
|||
expect(session.actions).toContainEqual(expect.objectContaining({ target: 'wiki', type: 'removed', key: 'old' }));
|
||||
});
|
||||
|
||||
it('finds pages through the session wiki service even when the shared index has not seen the worktree write', async () => {
|
||||
const wikiService = {
|
||||
readPage: vi.fn().mockResolvedValue({ pageKey: 'staged', frontmatter: { summary: 'Staged' }, content: 'body' }),
|
||||
deletePage: vi.fn().mockResolvedValue(undefined),
|
||||
deleteFromIndex: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
const pagesRepository = { findPageByKey: vi.fn().mockResolvedValue(null) };
|
||||
const knowledgeRepository = { createEvent: vi.fn().mockResolvedValue(undefined) };
|
||||
const tool = new WikiRemoveTool(wikiService as any, pagesRepository as any, knowledgeRepository as any);
|
||||
const session: ToolSession = {
|
||||
connectionId: 'c',
|
||||
isWorktreeScoped: true,
|
||||
preHead: null,
|
||||
touchedSlSources: createTouchedSlSources(),
|
||||
actions: [],
|
||||
semanticLayerService: {} as any,
|
||||
wikiService: wikiService as any,
|
||||
configService: {} as any,
|
||||
gitService: {} as any,
|
||||
};
|
||||
|
||||
const result = await tool.call({ key: 'staged' } as any, { ...baseContext, session });
|
||||
|
||||
expect(pagesRepository.findPageByKey).not.toHaveBeenCalled();
|
||||
expect(wikiService.readPage).toHaveBeenCalledWith('GLOBAL', null, 'staged');
|
||||
expect(wikiService.deletePage).toHaveBeenCalledTimes(1);
|
||||
expect(result.structured).toEqual({ success: true, key: 'staged' });
|
||||
});
|
||||
|
||||
it('returns a friendly message when the page does not exist', async () => {
|
||||
const wikiService = { deletePage: vi.fn(), deleteFromIndex: vi.fn() };
|
||||
const pagesRepository = { findPageByKey: vi.fn().mockResolvedValue(null) };
|
||||
|
|
|
|||
|
|
@ -3,13 +3,18 @@ import type { KnowledgeIndexPort } from '../ports.js';
|
|||
import type { KnowledgeEventPort } from '../ports.js';
|
||||
type BlockScope = 'GLOBAL' | 'USER';
|
||||
import { KnowledgeWikiService } from '../index.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js';
|
||||
import { validateFlatWikiKey } from '../keys.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
|
||||
|
||||
const SYSTEM_AUTHOR = 'System User';
|
||||
const SYSTEM_EMAIL = 'system@example.com';
|
||||
|
||||
const wikiRemoveInputSchema = z.object({
|
||||
key: z.string().describe('The page key to remove'),
|
||||
rawPaths: z
|
||||
.array(z.string().min(1))
|
||||
.optional()
|
||||
.describe('In ingest sessions, raw source file paths that directly support this removal.'),
|
||||
});
|
||||
|
||||
type WikiRemoveInput = z.infer<typeof wikiRemoveInputSchema>;
|
||||
|
|
@ -42,11 +47,27 @@ export class WikiRemoveTool extends BaseTool<typeof wikiRemoveInputSchema> {
|
|||
const wikiService = context.session?.wikiService ?? this.wikiService;
|
||||
const writesGlobal = !!context.session;
|
||||
const skipIndex = context.session?.isWorktreeScoped === true;
|
||||
const keyValidation = validateFlatWikiKey(input.key);
|
||||
if (!keyValidation.ok) {
|
||||
return {
|
||||
markdown: keyValidation.error,
|
||||
structured: { success: false, key: input.key },
|
||||
};
|
||||
}
|
||||
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
|
||||
if (!rawPathValidation.ok) {
|
||||
return {
|
||||
markdown: `Error: ${rawPathValidation.error}`,
|
||||
structured: { success: false, key: input.key },
|
||||
};
|
||||
}
|
||||
|
||||
const scope: BlockScope = writesGlobal ? 'GLOBAL' : 'USER';
|
||||
const scopeId = scope === 'USER' ? context.userId : null;
|
||||
|
||||
const existing = await this.pagesRepository.findPageByKey(scope, scopeId, input.key);
|
||||
const existing = context.session
|
||||
? await wikiService.readPage(scope, scopeId, input.key)
|
||||
: await this.pagesRepository.findPageByKey(scope, scopeId, input.key);
|
||||
if (!existing) {
|
||||
return {
|
||||
markdown: `Page "${input.key}" not found.`,
|
||||
|
|
@ -74,6 +95,7 @@ export class WikiRemoveTool extends BaseTool<typeof wikiRemoveInputSchema> {
|
|||
type: 'removed',
|
||||
key: input.key,
|
||||
detail: `Removed page "${input.key}"`,
|
||||
...(rawPathValidation.rawPaths ? { rawPaths: rawPathValidation.rawPaths } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ describe('WikiSearchTool', () => {
|
|||
const search = vi.fn(async () => ({
|
||||
results: [
|
||||
{
|
||||
key: 'metrics/revenue',
|
||||
path: 'knowledge/global/metrics/revenue.md',
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
scope: 'GLOBAL' as const,
|
||||
summary: 'Revenue metric definition',
|
||||
score: 0.02459016393442623,
|
||||
|
|
@ -27,8 +27,8 @@ describe('WikiSearchTool', () => {
|
|||
expect(result.structured).toEqual({
|
||||
results: [
|
||||
{
|
||||
blockKey: 'metrics/revenue',
|
||||
path: 'knowledge/global/metrics/revenue.md',
|
||||
blockKey: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
summary: 'Revenue metric definition',
|
||||
score: 0.02459016393442623,
|
||||
matchReasons: ['lexical', 'token'],
|
||||
|
|
@ -36,6 +36,6 @@ describe('WikiSearchTool', () => {
|
|||
],
|
||||
totalFound: 1,
|
||||
});
|
||||
expect(result.markdown).toContain('**metrics/revenue**');
|
||||
expect(result.markdown).toContain('**metrics-revenue**');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import { WikiWriteTool } from './wiki-write.tool.js';
|
|||
function makeTool(overrides: any = {}) {
|
||||
const wikiService = {
|
||||
readPage: vi.fn().mockResolvedValue(null),
|
||||
listPageKeys: vi.fn().mockResolvedValue([]),
|
||||
writePage: vi.fn().mockResolvedValue(undefined),
|
||||
syncSinglePage: vi.fn().mockResolvedValue(undefined),
|
||||
...overrides.wikiService,
|
||||
|
|
@ -37,6 +38,21 @@ describe('WikiWriteTool', () => {
|
|||
expect(result.markdown).toMatch(/created/i);
|
||||
});
|
||||
|
||||
it('rejects slash-delimited page keys with a flat-key suggestion', async () => {
|
||||
const { tool, wikiService } = makeTool();
|
||||
const result = await tool.call(
|
||||
{ key: 'orbit/company-overview', summary: 'Company overview', content: '# Orbit' } as any,
|
||||
baseContext,
|
||||
);
|
||||
|
||||
expect(result.structured).toEqual({ success: false, key: 'orbit/company-overview' });
|
||||
expect(result.markdown).toContain(
|
||||
'Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".',
|
||||
);
|
||||
expect(wikiService.readPage).not.toHaveBeenCalled();
|
||||
expect(wikiService.writePage).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('normalizes accidentally escaped markdown newlines before writing', async () => {
|
||||
const { tool, wikiService } = makeTool();
|
||||
|
||||
|
|
@ -100,12 +116,56 @@ describe('WikiWriteTool', () => {
|
|||
expect(result.markdown).toMatch(/content.*or.*replacements/i);
|
||||
});
|
||||
|
||||
it('updates frontmatter only on an existing page while preserving content', async () => {
|
||||
const { tool, wikiService } = makeTool({
|
||||
wikiService: {
|
||||
readPage: vi.fn().mockResolvedValue({
|
||||
pageKey: 'orbit-customers',
|
||||
frontmatter: {
|
||||
summary: 'Customer source details',
|
||||
usage_mode: 'auto',
|
||||
sort_order: 0,
|
||||
tags: ['notion'],
|
||||
refs: ['notion:old'],
|
||||
sl_refs: ['postgres-warehouse/orbit_analytics.customer'],
|
||||
},
|
||||
content: '# Orbit Customers\n\nSource: Notion - Orbit Customers Source.',
|
||||
}),
|
||||
},
|
||||
});
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
key: 'orbit-customers',
|
||||
summary: 'Customer source details mapped to the warehouse customer view',
|
||||
sl_refs: ['postgres-warehouse/orbit_analytics.customer', 'dbt-main/customer'],
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
|
||||
expect(result.structured).toMatchObject({ success: true, key: 'orbit-customers', action: 'updated' });
|
||||
expect(wikiService.writePage).toHaveBeenCalledWith(
|
||||
'USER',
|
||||
'u',
|
||||
'orbit-customers',
|
||||
expect.objectContaining({
|
||||
summary: 'Customer source details mapped to the warehouse customer view',
|
||||
tags: ['notion'],
|
||||
refs: ['notion:old'],
|
||||
sl_refs: ['postgres-warehouse/orbit_analytics.customer', 'dbt-main/customer'],
|
||||
}),
|
||||
'# Orbit Customers\n\nSource: Notion - Orbit Customers Source.',
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
|
||||
it('writes historic-SQL frontmatter fields', async () => {
|
||||
const { tool, wikiService } = makeTool();
|
||||
|
||||
await tool.call(
|
||||
{
|
||||
key: 'queries/monthly-paid-orders',
|
||||
key: 'monthly-paid-orders',
|
||||
summary: 'Monthly paid orders',
|
||||
tags: ['historic-sql', 'query-pattern'],
|
||||
sl_refs: ['analytics.orders'],
|
||||
|
|
@ -180,7 +240,7 @@ describe('WikiWriteTool', () => {
|
|||
const { tool, wikiService } = makeTool({
|
||||
wikiService: {
|
||||
readPage: vi.fn().mockResolvedValue({
|
||||
pageKey: 'queries/monthly-paid-orders',
|
||||
pageKey: 'monthly-paid-orders',
|
||||
frontmatter: existingFrontmatter,
|
||||
content: 'old body',
|
||||
}),
|
||||
|
|
@ -189,7 +249,7 @@ describe('WikiWriteTool', () => {
|
|||
|
||||
await tool.call(
|
||||
{
|
||||
key: 'queries/monthly-paid-orders',
|
||||
key: 'monthly-paid-orders',
|
||||
summary: 'Monthly paid orders updated',
|
||||
content: '## Monthly paid order count updated',
|
||||
} as any,
|
||||
|
|
@ -201,4 +261,47 @@ describe('WikiWriteTool', () => {
|
|||
summary: 'Monthly paid orders updated',
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects frontmatter refs that target missing wiki pages', async () => {
|
||||
const { tool, wikiService } = makeTool({
|
||||
wikiService: {
|
||||
listPageKeys: vi.fn().mockResolvedValue(['orbit-company-overview']),
|
||||
},
|
||||
});
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
key: 'orbit-how-we-work',
|
||||
summary: 'Operating norms',
|
||||
content: '## How We Work',
|
||||
refs: ['orbit-company-overview', 'orbit-team-lanes-detail'],
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(false);
|
||||
expect(result.markdown).toMatch(/orbit-team-lanes-detail/);
|
||||
expect(wikiService.writePage).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rejects inline wiki links that target missing wiki pages', async () => {
|
||||
const { tool, wikiService } = makeTool({
|
||||
wikiService: {
|
||||
listPageKeys: vi.fn().mockResolvedValue(['orbit-company-overview']),
|
||||
},
|
||||
});
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
key: 'orbit-how-we-work',
|
||||
summary: 'Operating norms',
|
||||
content: 'See [[orbit-company-overview]] and [[orbit-team-lanes-detail]].',
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(false);
|
||||
expect(result.markdown).toMatch(/orbit-team-lanes-detail/);
|
||||
expect(wikiService.writePage).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -3,8 +3,9 @@ import type { KnowledgeIndexPort } from '../ports.js';
|
|||
import type { KnowledgeEventPort } from '../ports.js';
|
||||
type BlockScope = 'GLOBAL' | 'USER';
|
||||
import { KnowledgeWikiService, type WikiFrontmatter } from '../index.js';
|
||||
import { validateFlatWikiKey } from '../keys.js';
|
||||
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
|
||||
|
||||
const MAX_USER_BLOCKS = 100;
|
||||
const SYSTEM_AUTHOR = 'System User';
|
||||
|
|
@ -37,6 +38,10 @@ const wikiWriteInputSchema = z.object({
|
|||
representative_sql: z.string().optional(),
|
||||
usage: historicSqlUsageFrontmatterSchema.optional(),
|
||||
fingerprints: z.array(z.string()).optional(),
|
||||
rawPaths: z
|
||||
.array(z.string().min(1))
|
||||
.optional()
|
||||
.describe('In ingest sessions, raw source file paths that directly support this wiki action.'),
|
||||
});
|
||||
|
||||
type WikiWriteInput = z.infer<typeof wikiWriteInputSchema>;
|
||||
|
|
@ -45,6 +50,7 @@ interface WikiWriteStructured {
|
|||
success: boolean;
|
||||
key: string;
|
||||
action?: 'created' | 'updated';
|
||||
content?: string;
|
||||
}
|
||||
|
||||
function looksLikeEscapedMarkdown(content: string): boolean {
|
||||
|
|
@ -63,6 +69,71 @@ function normalizeAccidentalEscapedMarkdownNewlines(content: string): string {
|
|||
return content.replace(/\\r\\n/g, '\n').replace(/\\n/g, '\n').replace(/\\r/g, '\n');
|
||||
}
|
||||
|
||||
function isWikiPageKeyRef(ref: string): boolean {
|
||||
return /^[a-z0-9][a-z0-9_-]*(?:-[a-z0-9_]+)*$/.test(ref);
|
||||
}
|
||||
|
||||
function extractInlineWikiRefs(content: string): string[] {
|
||||
const refs = new Set<string>();
|
||||
const re = /\[\[([^\]\n]+)\]\]/g;
|
||||
for (const match of content.matchAll(re)) {
|
||||
const target = match[1]?.split('|', 1)[0]?.trim();
|
||||
if (target && isWikiPageKeyRef(target)) {
|
||||
refs.add(target);
|
||||
}
|
||||
}
|
||||
return [...refs].sort();
|
||||
}
|
||||
|
||||
async function visibleWikiPageKeys(
|
||||
wikiService: KnowledgeWikiService,
|
||||
scope: BlockScope,
|
||||
scopeId: string | null,
|
||||
): Promise<Set<string>> {
|
||||
const keys = new Set<string>();
|
||||
if (scope === 'USER') {
|
||||
for (const key of await wikiService.listPageKeys('GLOBAL', null)) {
|
||||
keys.add(key);
|
||||
}
|
||||
for (const key of await wikiService.listPageKeys('USER', scopeId)) {
|
||||
keys.add(key);
|
||||
}
|
||||
return keys;
|
||||
}
|
||||
|
||||
for (const key of await wikiService.listPageKeys('GLOBAL', null)) {
|
||||
keys.add(key);
|
||||
}
|
||||
return keys;
|
||||
}
|
||||
|
||||
async function findMissingWikiRefs(input: {
|
||||
wikiService: KnowledgeWikiService;
|
||||
scope: BlockScope;
|
||||
scopeId: string | null;
|
||||
pageKey: string;
|
||||
refs?: string[];
|
||||
content: string;
|
||||
}): Promise<string[]> {
|
||||
const candidates = new Set<string>();
|
||||
for (const ref of input.refs ?? []) {
|
||||
if (isWikiPageKeyRef(ref)) {
|
||||
candidates.add(ref);
|
||||
}
|
||||
}
|
||||
for (const ref of extractInlineWikiRefs(input.content)) {
|
||||
candidates.add(ref);
|
||||
}
|
||||
|
||||
if (candidates.size === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const available = await visibleWikiPageKeys(input.wikiService, input.scope, input.scopeId);
|
||||
available.add(input.pageKey);
|
||||
return [...candidates].filter((ref) => !available.has(ref)).sort();
|
||||
}
|
||||
|
||||
export class WikiWriteTool extends BaseTool<typeof wikiWriteInputSchema> {
|
||||
readonly name = 'wiki_write';
|
||||
|
||||
|
|
@ -77,6 +148,7 @@ export class WikiWriteTool extends BaseTool<typeof wikiWriteInputSchema> {
|
|||
get description(): string {
|
||||
return `<purpose>
|
||||
Create or update a knowledge page. Provide content for create/rewrite, or replacements for targeted edits.
|
||||
For existing pages, you may provide only frontmatter fields such as summary, tags, refs, or sl_refs to update metadata while preserving content.
|
||||
tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to clear, [values] to set.
|
||||
</purpose>`;
|
||||
}
|
||||
|
|
@ -89,10 +161,17 @@ tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to
|
|||
const wikiService = context.session?.wikiService ?? this.wikiService;
|
||||
const writesGlobal = !!context.session;
|
||||
const skipIndex = context.session?.isWorktreeScoped === true;
|
||||
|
||||
if (!input.content && (!input.replacements || input.replacements.length === 0)) {
|
||||
const keyValidation = validateFlatWikiKey(input.key);
|
||||
if (!keyValidation.ok) {
|
||||
return {
|
||||
markdown: 'Error: provide either content (for create/rewrite) or replacements (for edits).',
|
||||
markdown: keyValidation.error,
|
||||
structured: { success: false, key: input.key },
|
||||
};
|
||||
}
|
||||
const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths);
|
||||
if (!rawPathValidation.ok) {
|
||||
return {
|
||||
markdown: `Error: ${rawPathValidation.error}`,
|
||||
structured: { success: false, key: input.key },
|
||||
};
|
||||
}
|
||||
|
|
@ -101,6 +180,16 @@ tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to
|
|||
const scopeId = scope === 'USER' ? context.userId : null;
|
||||
const existing = await wikiService.readPage(scope, scopeId, input.key);
|
||||
|
||||
const content = input.content;
|
||||
const hasContent = typeof content === 'string' && content.length > 0;
|
||||
const hasReplacements = !!input.replacements && input.replacements.length > 0;
|
||||
if (!existing && !hasContent && !hasReplacements) {
|
||||
return {
|
||||
markdown: 'Error: provide either content (for create/rewrite) or replacements (for edits).',
|
||||
structured: { success: false, key: input.key },
|
||||
};
|
||||
}
|
||||
|
||||
if (!existing && !input.content) {
|
||||
return {
|
||||
markdown: `Page "${input.key}" does not exist. Provide content to create it.`,
|
||||
|
|
@ -140,9 +229,9 @@ tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to
|
|||
fingerprints: input.fingerprints === undefined ? existingFm?.fingerprints : input.fingerprints,
|
||||
};
|
||||
|
||||
if (input.content) {
|
||||
finalContent = normalizeAccidentalEscapedMarkdownNewlines(input.content);
|
||||
} else {
|
||||
if (hasContent) {
|
||||
finalContent = normalizeAccidentalEscapedMarkdownNewlines(content);
|
||||
} else if (hasReplacements) {
|
||||
const editResult = applySqlEdits(existing?.content ?? '', input.replacements ?? []);
|
||||
if (!editResult.success) {
|
||||
return {
|
||||
|
|
@ -151,6 +240,25 @@ tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to
|
|||
};
|
||||
}
|
||||
finalContent = editResult.sql;
|
||||
} else {
|
||||
finalContent = existing?.content ?? '';
|
||||
}
|
||||
|
||||
const missingRefs = await findMissingWikiRefs({
|
||||
wikiService,
|
||||
scope,
|
||||
scopeId,
|
||||
pageKey: input.key,
|
||||
refs: finalFm.refs,
|
||||
content: finalContent,
|
||||
});
|
||||
if (missingRefs.length > 0) {
|
||||
return {
|
||||
markdown:
|
||||
`Error: wiki references target missing page(s): ${missingRefs.join(', ')}. ` +
|
||||
'Create those pages first, retarget the links, or remove the refs.',
|
||||
structured: { success: false, key: input.key },
|
||||
};
|
||||
}
|
||||
|
||||
await wikiService.writePage(scope, scopeId, input.key, finalFm, finalContent, SYSTEM_AUTHOR, SYSTEM_EMAIL);
|
||||
|
|
@ -172,12 +280,26 @@ tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to
|
|||
|
||||
const action = existing ? 'updated' : 'created';
|
||||
if (context.session) {
|
||||
context.session.actions.push({ target: 'wiki', type: action, key: input.key, detail: input.summary });
|
||||
context.session.actions.push({
|
||||
target: 'wiki',
|
||||
type: action,
|
||||
key: input.key,
|
||||
detail: input.summary,
|
||||
...(rawPathValidation.rawPaths ? { rawPaths: rawPathValidation.rawPaths } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
// When the LLM used `replacements` (edit mode), it doesn't have the
|
||||
// post-edit content cached. Returning the result here prevents the
|
||||
// common bug where a follow-up edit uses an oldText string that no
|
||||
// longer matches because a prior edit already changed the page.
|
||||
const markdown = hasReplacements
|
||||
? `Page "${input.key}" ${action}.\n\nCurrent content (use for subsequent edits):\n\n${finalContent}`
|
||||
: `Page "${input.key}" ${action}.`;
|
||||
|
||||
return {
|
||||
markdown: `Page "${input.key}" ${action}.`,
|
||||
structured: { success: true, key: input.key, action },
|
||||
markdown,
|
||||
structured: { success: true, key: input.key, action, content: finalContent },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue