fix: accept ingest wiki forward refs (#125)

This commit is contained in:
Andrey Avtomonov 2026-05-17 10:10:14 +02:00 committed by GitHub
parent 74be832aea
commit f49672ba5b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 403 additions and 66 deletions

View file

@ -412,6 +412,127 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
); );
}); });
it('fails before squash when reconciliation leaves a touched wiki page with dangling refs', async () => {
const deps = makeDeps();
let currentToolSession: any = null;
const scopedWiki = {
listPageKeys: vi.fn().mockResolvedValue(['page-a']),
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, key: string) => {
if (key === 'page-a') {
return Promise.resolve({
pageKey: 'page-a',
frontmatter: { summary: 'Page A', usage_mode: 'auto', refs: ['missing-page'] },
content: 'See [[missing-page]].',
});
}
return Promise.resolve(null);
}),
};
deps.wikiService.forWorktree.mockReturnValue(scopedWiki);
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
currentToolSession.actions.push({ target: 'sl', type: 'updated', key: 'orders', detail: 'Orders source' });
}
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
currentToolSession.actions.push({ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' });
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await expect(
runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
}),
).rejects.toThrow(/wiki references target missing page\(s\): page-a -> missing-page/);
expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1');
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
});
it('allows reconciliation to save circular wiki refs once both pages exist', async () => {
const deps = makeDeps();
let currentToolSession: any = null;
const scopedWiki = {
listPageKeys: vi.fn().mockResolvedValue(['page-a', 'page-b']),
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, key: string) => {
if (key === 'page-a') {
return Promise.resolve({
pageKey: 'page-a',
frontmatter: { summary: 'Page A', usage_mode: 'auto', refs: ['page-b'] },
content: 'See [[page-b]].',
});
}
if (key === 'page-b') {
return Promise.resolve({
pageKey: 'page-b',
frontmatter: { summary: 'Page B', usage_mode: 'auto', refs: ['page-a'] },
content: 'See [[page-a]].',
});
}
return Promise.resolve(null);
}),
};
deps.wikiService.forWorktree.mockReturnValue(scopedWiki);
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
currentToolSession.actions.push({ target: 'sl', type: 'updated', key: 'orders', detail: 'Orders source' });
}
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
currentToolSession.actions.push(
{ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' },
{ target: 'wiki', type: 'created', key: 'page-b', detail: 'Page B' },
);
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const result = await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(result.failedWorkUnits).toEqual([]);
expect(deps.gitService.squashMergeIntoMain).toHaveBeenCalled();
expect(deps.runsRepo.markFailed).not.toHaveBeenCalled();
});
it('threads target warehouse connection names into WorkUnit and reconcile tool sessions', async () => { it('threads target warehouse connection names into WorkUnit and reconcile tool sessions', async () => {
const deps = makeDeps(); const deps = makeDeps();
const sessions: any[] = []; const sessions: any[] = [];

View file

@ -7,6 +7,7 @@ import { createRuntimeToolDescriptorFromAiTool, type KtxRuntimeToolSet } from '.
import type { CaptureSession, MemoryAction } from '../memory/index.js'; import type { CaptureSession, MemoryAction } from '../memory/index.js';
import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js'; import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js';
import { createTouchedSlSources, type ToolContext, type ToolSession } from '../tools/index.js'; import { createTouchedSlSources, type ToolContext, type ToolSession } from '../tools/index.js';
import { findDanglingWikiRefsForActions } from '../wiki/wiki-ref-validation.js';
import { actionTargetConnectionId } from './action-identity.js'; import { actionTargetConnectionId } from './action-identity.js';
import { NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN } from './adapters/notion/types.js'; import { NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN } from './adapters/notion/types.js';
import { selectRelevantCanonicalPins } from './canonical-pins.js'; import { selectRelevantCanonicalPins } from './canonical-pins.js';
@ -762,6 +763,13 @@ export class IngestBundleRunner {
agentRunner: this.deps.agentRunner, agentRunner: this.deps.agentRunner,
validateTouchedSources: (touched) => validateTouchedSources: (touched) =>
validateWuTouchedSources({ ...slValidationDeps, slValidator: this.deps.slValidator }, touched), validateWuTouchedSources({ ...slValidationDeps, slValidator: this.deps.slValidator }, touched),
validateWikiRefs: (actions) =>
findDanglingWikiRefsForActions({
wikiService: scopedWikiService,
scope: 'GLOBAL',
scopeId: null,
actions,
}),
resetHardTo: (targetSha) => sessionWorktree.git.resetHardTo(targetSha), resetHardTo: (targetSha) => sessionWorktree.git.resetHardTo(targetSha),
buildSystemPrompt: () => systemPrompt, buildSystemPrompt: () => systemPrompt,
buildUserPrompt: (wuInner) => buildWuUserPrompt({ wu: wuInner, wikiIndex, slIndex, priorProvenance }), buildUserPrompt: (wuInner) => buildWuUserPrompt({ wu: wuInner, wikiIndex, slIndex, priorProvenance }),
@ -1128,6 +1136,17 @@ export class IngestBundleRunner {
}); });
} }
const danglingReconcileWikiRefs = await findDanglingWikiRefsForActions({
wikiService: rcScopedWiki,
scope: 'GLOBAL',
scopeId: null,
actions: reconcileActions,
});
if (danglingReconcileWikiRefs.length > 0) {
await this.deps.runs.markFailed(runRow.id);
throw new Error(`wiki references target missing page(s): ${danglingReconcileWikiRefs.join(', ')}`);
}
const candidateSummaryAfterReconcile = const candidateSummaryAfterReconcile =
contextReport && this.deps.contextEvidenceCandidates contextReport && this.deps.contextEvidenceCandidates
? await this.deps.contextEvidenceCandidates.getCandidateSummary(runRow.id) ? await this.deps.contextEvidenceCandidates.getCandidateSummary(runRow.id)

View file

@ -121,6 +121,41 @@ describe('Stage 3 — executeWorkUnit', () => {
expect(deps.resetHardTo).toHaveBeenCalledWith('pre'); expect(deps.resetHardTo).toHaveBeenCalledWith('pre');
}); });
it('dangling wiki refs reset to the pre-WU SHA and mark WU failed after the agent loop', async () => {
const deps = makeDeps();
deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
deps.agentRunner.runLoop = vi.fn().mockImplementation(() => {
deps.sessionActions.push({ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' });
return Promise.resolve({ stopReason: 'natural' });
});
(deps as any).validateWikiRefs = vi.fn().mockResolvedValue(['page-a -> page-b']);
const outcome = await executeWorkUnit(deps, makeWu());
expect(outcome.status).toBe('failed');
expect(outcome.reason).toContain('wiki references target missing page(s): page-a -> page-b');
expect(outcome.actions).toEqual([]);
expect(outcome.touchedSlSources).toEqual([]);
expect(deps.resetHardTo).toHaveBeenCalledWith('pre');
});
it('resolved wiki refs pass post-WU validation and preserve actions', async () => {
const deps = makeDeps();
deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');
deps.agentRunner.runLoop = vi.fn().mockImplementation(() => {
deps.sessionActions.push({ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' });
deps.sessionActions.push({ target: 'wiki', type: 'created', key: 'page-b', detail: 'Page B' });
return Promise.resolve({ stopReason: 'natural' });
});
(deps as any).validateWikiRefs = vi.fn().mockResolvedValue([]);
const outcome = await executeWorkUnit(deps, makeWu());
expect(outcome.status).toBe('success');
expect(outcome.actions.map((action) => action.key)).toEqual(['page-a', 'page-b']);
expect(deps.resetHardTo).not.toHaveBeenCalled();
});
it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => { it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => {
const deps = makeDeps(); const deps = makeDeps();
deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post');

View file

@ -14,6 +14,7 @@ export interface TouchedValidationResult {
export interface WorkUnitExecutionDeps { export interface WorkUnitExecutionDeps {
sessionWorktreeGit: { revParseHead(): Promise<string | null> }; sessionWorktreeGit: { revParseHead(): Promise<string | null> };
agentRunner: AgentRunnerPort; agentRunner: AgentRunnerPort;
validateWikiRefs?: (actions: MemoryAction[]) => Promise<string[]>;
validateTouchedSources: (touched: TouchedSlSource[]) => Promise<TouchedValidationResult>; validateTouchedSources: (touched: TouchedSlSource[]) => Promise<TouchedValidationResult>;
resetHardTo: (targetSha: string) => Promise<void>; resetHardTo: (targetSha: string) => Promise<void>;
buildSystemPrompt: (wu: WorkUnit) => string; buildSystemPrompt: (wu: WorkUnit) => string;
@ -133,6 +134,11 @@ export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit)
return failWithReset(`${toolFailureCount} tool call(s) failed during WorkUnit ${wu.unitKey}`); return failWithReset(`${toolFailureCount} tool call(s) failed during WorkUnit ${wu.unitKey}`);
} }
const danglingWikiRefs = (await deps.validateWikiRefs?.(deps.sessionActions)) ?? [];
if (danglingWikiRefs.length > 0) {
return failWithReset(`wiki references target missing page(s): ${danglingWikiRefs.join(', ')}`);
}
const touched = listTouchedSlSources(deps.captureSession.touchedSlSources); const touched = listTouchedSlSources(deps.captureSession.touchedSlSources);
if (touched.length > 0) { if (touched.length > 0) {
const validation = await deps.validateTouchedSources(touched); const validation = await deps.validateTouchedSources(touched);

View file

@ -304,4 +304,40 @@ describe('WikiWriteTool', () => {
expect(result.markdown).toMatch(/orbit-team-lanes-detail/); expect(result.markdown).toMatch(/orbit-team-lanes-detail/);
expect(wikiService.writePage).not.toHaveBeenCalled(); expect(wikiService.writePage).not.toHaveBeenCalled();
}); });
it('accepts forward refs during ingest sessions for post-pass validation', async () => {
const { tool, wikiService } = makeTool({
wikiService: {
listPageKeys: vi.fn().mockResolvedValue(['orbit-company-overview']),
},
});
const session: ToolSession = {
connectionId: 'conn-1',
isWorktreeScoped: true,
preHead: null,
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: {} as any,
wikiService: wikiService as any,
configService: {} as any,
gitService: {} as any,
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'notion' },
};
const result = await tool.call(
{
key: 'orbit-how-we-work',
summary: 'Operating norms',
content: 'See [[orbit-team-lanes-detail]].',
refs: ['orbit-company-overview', 'orbit-team-lanes-detail'],
} as any,
{ ...baseContext, session },
);
expect(result.structured).toMatchObject({ success: true, key: 'orbit-how-we-work', action: 'created' });
expect(wikiService.writePage).toHaveBeenCalledTimes(1);
expect(session.actions).toContainEqual(
expect.objectContaining({ target: 'wiki', type: 'created', key: 'orbit-how-we-work' }),
);
});
}); });

View file

@ -4,6 +4,7 @@ import type { KnowledgeEventPort } from '../ports.js';
type BlockScope = 'GLOBAL' | 'USER'; type BlockScope = 'GLOBAL' | 'USER';
import { KnowledgeWikiService, type WikiFrontmatter } from '../index.js'; import { KnowledgeWikiService, type WikiFrontmatter } from '../index.js';
import { validateFlatWikiKey } from '../keys.js'; import { validateFlatWikiKey } from '../keys.js';
import { findMissingWikiRefs } from '../wiki-ref-validation.js';
import { applySqlEdits } from '../../tools/sql-edit-replacer.js'; import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
import { BaseTool, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js'; import { BaseTool, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js';
@ -69,71 +70,6 @@ function normalizeAccidentalEscapedMarkdownNewlines(content: string): string {
return content.replace(/\\r\\n/g, '\n').replace(/\\n/g, '\n').replace(/\\r/g, '\n'); return content.replace(/\\r\\n/g, '\n').replace(/\\n/g, '\n').replace(/\\r/g, '\n');
} }
function isWikiPageKeyRef(ref: string): boolean {
return /^[a-z0-9][a-z0-9_-]*(?:-[a-z0-9_]+)*$/.test(ref);
}
function extractInlineWikiRefs(content: string): string[] {
const refs = new Set<string>();
const re = /\[\[([^\]\n]+)\]\]/g;
for (const match of content.matchAll(re)) {
const target = match[1]?.split('|', 1)[0]?.trim();
if (target && isWikiPageKeyRef(target)) {
refs.add(target);
}
}
return [...refs].sort();
}
async function visibleWikiPageKeys(
wikiService: KnowledgeWikiService,
scope: BlockScope,
scopeId: string | null,
): Promise<Set<string>> {
const keys = new Set<string>();
if (scope === 'USER') {
for (const key of await wikiService.listPageKeys('GLOBAL', null)) {
keys.add(key);
}
for (const key of await wikiService.listPageKeys('USER', scopeId)) {
keys.add(key);
}
return keys;
}
for (const key of await wikiService.listPageKeys('GLOBAL', null)) {
keys.add(key);
}
return keys;
}
async function findMissingWikiRefs(input: {
wikiService: KnowledgeWikiService;
scope: BlockScope;
scopeId: string | null;
pageKey: string;
refs?: string[];
content: string;
}): Promise<string[]> {
const candidates = new Set<string>();
for (const ref of input.refs ?? []) {
if (isWikiPageKeyRef(ref)) {
candidates.add(ref);
}
}
for (const ref of extractInlineWikiRefs(input.content)) {
candidates.add(ref);
}
if (candidates.size === 0) {
return [];
}
const available = await visibleWikiPageKeys(input.wikiService, input.scope, input.scopeId);
available.add(input.pageKey);
return [...candidates].filter((ref) => !available.has(ref)).sort();
}
export class WikiWriteTool extends BaseTool<typeof wikiWriteInputSchema> { export class WikiWriteTool extends BaseTool<typeof wikiWriteInputSchema> {
readonly name = 'wiki_write'; readonly name = 'wiki_write';
@ -253,7 +189,8 @@ Keys must be flat file names, not directory paths. Use tags/source frontmatter f
refs: finalFm.refs, refs: finalFm.refs,
content: finalContent, content: finalContent,
}); });
if (missingRefs.length > 0) { const deferMissingRefs = !!context.session?.ingest;
if (!deferMissingRefs && missingRefs.length > 0) {
return { return {
markdown: markdown:
`Error: wiki references target missing page(s): ${missingRefs.join(', ')}. ` + `Error: wiki references target missing page(s): ${missingRefs.join(', ')}. ` +

View file

@ -0,0 +1,74 @@
import { describe, expect, it, vi } from 'vitest';
import { findDanglingWikiRefsForActions } from './wiki-ref-validation.js';
function makeWikiService(pages: Record<string, { refs?: string[]; content?: string }>) {
return {
listPageKeys: vi.fn().mockResolvedValue(Object.keys(pages)),
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, pageKey: string) => {
const page = pages[pageKey];
if (!page) {
return Promise.resolve(null);
}
return Promise.resolve({
pageKey,
frontmatter: { summary: pageKey, usage_mode: 'auto', refs: page.refs },
content: page.content ?? '',
});
}),
};
}
describe('wiki ref validation', () => {
it('allows circular refs once both touched pages exist', async () => {
const wikiService = makeWikiService({
'page-a': { refs: ['page-b'], content: 'See [[page-b]].' },
'page-b': { refs: ['page-a'], content: 'See [[page-a]].' },
});
const dangling = await findDanglingWikiRefsForActions({
wikiService: wikiService as any,
scope: 'GLOBAL',
scopeId: null,
actions: [
{ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' },
{ target: 'wiki', type: 'created', key: 'page-b', detail: 'Page B' },
],
});
expect(dangling).toEqual([]);
});
it('treats removed pages as unavailable ref targets', async () => {
const wikiService = makeWikiService({
'page-a': { refs: ['page-b'], content: 'See [[page-b]].' },
});
const dangling = await findDanglingWikiRefsForActions({
wikiService: wikiService as any,
scope: 'GLOBAL',
scopeId: null,
actions: [
{ target: 'wiki', type: 'updated', key: 'page-a', detail: 'Page A' },
{ target: 'wiki', type: 'removed', key: 'page-b', detail: 'Page B' },
],
});
expect(dangling).toEqual(['page-a -> page-b']);
});
it('does not validate existing dangling refs on untouched pages', async () => {
const wikiService = makeWikiService({
'page-a': { refs: [], content: '' },
'old-page': { refs: ['missing-page'], content: 'See [[missing-page]].' },
});
const dangling = await findDanglingWikiRefsForActions({
wikiService: wikiService as any,
scope: 'GLOBAL',
scopeId: null,
actions: [{ target: 'wiki', type: 'updated', key: 'page-a', detail: 'Page A' }],
});
expect(dangling).toEqual([]);
});
});

View file

@ -0,0 +1,109 @@
import type { MemoryAction } from '../tools/index.js';
import { isFlatWikiKey } from './keys.js';
import type { KnowledgeWikiService } from './knowledge-wiki.service.js';
import type { WikiScope } from './types.js';
function isWikiPageKeyRef(ref: string): boolean {
return /^[a-z0-9][a-z0-9_-]*(?:-[a-z0-9_]+)*$/.test(ref);
}
function extractInlineWikiRefs(content: string): string[] {
const refs = new Set<string>();
const re = /\[\[([^\]\n]+)\]\]/g;
for (const match of content.matchAll(re)) {
const target = match[1]?.split('|', 1)[0]?.trim();
if (target && isWikiPageKeyRef(target)) {
refs.add(target);
}
}
return [...refs].sort();
}
async function visibleWikiPageKeys(
wikiService: KnowledgeWikiService,
scope: WikiScope,
scopeId: string | null,
): Promise<Set<string>> {
const keys = new Set<string>();
if (scope === 'USER') {
for (const key of await wikiService.listPageKeys('GLOBAL', null)) {
keys.add(key);
}
for (const key of await wikiService.listPageKeys('USER', scopeId)) {
keys.add(key);
}
return keys;
}
for (const key of await wikiService.listPageKeys('GLOBAL', null)) {
keys.add(key);
}
return keys;
}
export async function findMissingWikiRefs(input: {
wikiService: KnowledgeWikiService;
scope: WikiScope;
scopeId: string | null;
pageKey: string;
refs?: string[];
content: string;
}): Promise<string[]> {
const candidates = new Set<string>();
for (const ref of input.refs ?? []) {
if (isWikiPageKeyRef(ref)) {
candidates.add(ref);
}
}
for (const ref of extractInlineWikiRefs(input.content)) {
candidates.add(ref);
}
if (candidates.size === 0) {
return [];
}
const available = await visibleWikiPageKeys(input.wikiService, input.scope, input.scopeId);
available.add(input.pageKey);
return [...candidates].filter((ref) => !available.has(ref)).sort();
}
export async function findDanglingWikiRefsForActions(input: {
wikiService: KnowledgeWikiService;
scope: WikiScope;
scopeId: string | null;
actions: MemoryAction[];
}): Promise<string[]> {
const latestWikiActionByKey = new Map<string, MemoryAction['type']>();
for (const action of input.actions) {
if (action.target === 'wiki' && isFlatWikiKey(action.key)) {
latestWikiActionByKey.set(action.key, action.type);
}
}
const dangling: string[] = [];
for (const [pageKey, actionType] of [...latestWikiActionByKey.entries()].sort(([left], [right]) =>
left.localeCompare(right),
)) {
if (actionType === 'removed') {
continue;
}
const page = await input.wikiService.readPage(input.scope, input.scopeId, pageKey);
if (!page) {
dangling.push(`${pageKey} -> (missing page)`);
continue;
}
const missingRefs = await findMissingWikiRefs({
wikiService: input.wikiService,
scope: input.scope,
scopeId: input.scopeId,
pageKey,
refs: page.frontmatter.refs,
content: page.content,
});
for (const missingRef of missingRefs) {
dangling.push(`${pageKey} -> ${missingRef}`);
}
}
return dangling;
}