mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-19 08:28:06 +02:00
Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor.
2840 lines
113 KiB
TypeScript
2840 lines
113 KiB
TypeScript
import { cp, mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
|
import { dirname, join } from 'node:path';
|
|
import pLimit from 'p-limit';
|
|
import { z } from 'zod';
|
|
import { type KtxLogger, noopLogger } from '../core/index.js';
|
|
import { createRuntimeToolDescriptorFromAiTool, type KtxRuntimeToolSet } from '../llm/index.js';
|
|
import type { CaptureSession, MemoryAction } from '../memory/index.js';
|
|
import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js';
|
|
import { createTouchedSlSources, type ToolContext, type ToolSession, type TouchedSlSource } from '../tools/index.js';
|
|
import type { KnowledgeWikiService } from '../wiki/index.js';
|
|
import { findDanglingWikiRefsForActions } from '../wiki/wiki-ref-validation.js';
|
|
import { actionTargetConnectionId } from './action-identity.js';
|
|
import { NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN } from './adapters/notion/types.js';
|
|
import { validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js';
|
|
import { selectRelevantCanonicalPins } from './canonical-pins.js';
|
|
import { finalGateRepairPaths, repairFinalGateFailure } from './final-gate-repair.js';
|
|
import {
|
|
compareFinalizationDeclarations,
|
|
deriveFinalizationTouchedSources,
|
|
deriveFinalizationWikiPageKeys,
|
|
} from './finalization-scope.js';
|
|
import { FileIngestTraceWriter, ingestTracePathForJob, type IngestTraceWriter, traceTimed } from './ingest-trace.js';
|
|
import { integrateWorkUnitPatch } from './isolated-diff/patch-integrator.js';
|
|
import { resolveTextualConflict } from './isolated-diff/textual-conflict-resolver.js';
|
|
import { runIsolatedWorkUnit } from './isolated-diff/work-unit-executor.js';
|
|
import { sanitizeMemoryFlowError } from './memory-flow/live-buffer.js';
|
|
import type { CanonicalPin } from './canonical-pins.js';
|
|
import type { MemoryFlowEvent, MemoryFlowEventSink, MemoryFlowPlannedWorkUnit } from './memory-flow/types.js';
|
|
import type {
|
|
ContextEvidenceIndexSummary,
|
|
IngestBundleRunnerDeps,
|
|
IngestProvenanceInsert,
|
|
IngestProvenanceRow,
|
|
IngestRunsPort,
|
|
IngestSessionWorktree,
|
|
PageTriageRunResult,
|
|
} from './ports.js';
|
|
import { buildSyncId, rawSourcesDirForSync } from './raw-sources-paths.js';
|
|
import {
|
|
buildStageIndexFromReportBody,
|
|
type IngestReportFinalizationProvenanceExclusion,
|
|
type IngestReportFinalizationOutcome,
|
|
type IngestReportProvenanceDetail,
|
|
type IngestReportSnapshot,
|
|
type IngestReportWorkUnit,
|
|
} from './reports.js';
|
|
import {
|
|
buildReconcileSystemPrompt,
|
|
buildReconcileToolSet,
|
|
buildReconcileUserPrompt,
|
|
} from './stages/build-reconcile-context.js';
|
|
import { buildWuSystemPrompt, buildWuToolSet, buildWuUserPrompt } from './stages/build-wu-context.js';
|
|
import { stageRawFilesStage1 } from './stages/stage-1-stage-raw-files.js';
|
|
import { executeWorkUnit, type WorkUnitOutcome } from './stages/stage-3-work-units.js';
|
|
import { runReconciliationStage4 } from './stages/stage-4-reconciliation.js';
|
|
import type { StageIndex } from './stages/stage-index.types.js';
|
|
import { validateWuTouchedSources } from './stages/validate-wu-sources.js';
|
|
import { assertSemanticLayerTargetPathsAllowed } from './semantic-layer-target-policy.js';
|
|
import { createEmitArtifactResolutionTool } from './tools/emit-artifact-resolution.tool.js';
|
|
import { createEmitConflictResolutionTool } from './tools/emit-conflict-resolution.tool.js';
|
|
import { createEmitEvictionDecisionTool } from './tools/emit-eviction-decision.tool.js';
|
|
import { createEmitUnmappedFallbackTool } from './tools/emit-unmapped-fallback.tool.js';
|
|
import { createEvictionListTool } from './tools/eviction-list.tool.js';
|
|
import { createReadRawSpanTool } from './tools/read-raw-span.tool.js';
|
|
import { createStageDiffTool } from './tools/stage-diff.tool.js';
|
|
import { createStageListTool } from './tools/stage-list.tool.js';
|
|
import { type ToolCallLogEntry, wrapToolsWithLogger } from './tools/tool-call-logger.js';
|
|
import {
|
|
createMutableToolTranscriptSummary,
|
|
recordToolTranscriptEntry,
|
|
type MutableToolTranscriptSummary,
|
|
} from './tools/tool-transcript-summary.js';
|
|
import type {
|
|
IngestDiffSummary,
|
|
EvictionUnit,
|
|
IngestBundleJob,
|
|
IngestBundleResult,
|
|
IngestJobContext,
|
|
UnresolvedCardInfo,
|
|
WorkUnit,
|
|
} from './types.js';
|
|
import { repairWikiSlRefs, type WikiSlRefRepairResult } from './wiki-sl-ref-repair.js';
|
|
|
|
type MemoryFlowStageProgress = Extract<MemoryFlowEvent, { type: 'stage_progress' }>;
|
|
|
|
async function copyTransientIngestEvidence(sourceWorkdir: string, targetWorkdir: string): Promise<void> {
|
|
const source = join(sourceWorkdir, '.ktx/ingest-evidence');
|
|
const target = join(targetWorkdir, '.ktx/ingest-evidence');
|
|
await cp(source, target, { recursive: true, force: true }).catch((error: NodeJS.ErrnoException) => {
|
|
if (error.code !== 'ENOENT') {
|
|
throw error;
|
|
}
|
|
});
|
|
}
|
|
|
|
function workUnitToMemoryFlowPlannedWorkUnit(workUnit: WorkUnit): MemoryFlowPlannedWorkUnit {
|
|
return {
|
|
unitKey: workUnit.unitKey,
|
|
rawFiles: workUnit.rawFiles,
|
|
peerFileCount: workUnit.peerFileIndex.length,
|
|
dependencyCount: workUnit.dependencyPaths.length,
|
|
};
|
|
}
|
|
|
|
function stageIndexWorkUnitToMemoryFlowPlannedWorkUnit(
|
|
workUnit: StageIndex['workUnits'][number],
|
|
): MemoryFlowPlannedWorkUnit {
|
|
return {
|
|
unitKey: workUnit.unitKey,
|
|
rawFiles: workUnit.rawFiles,
|
|
peerFileCount: 0,
|
|
dependencyCount: 0,
|
|
};
|
|
}
|
|
|
|
function countMemoryFlowActions(actions: MemoryAction[], target: MemoryAction['target']): number {
|
|
return actions.filter((action) => action.target === target).length;
|
|
}
|
|
|
|
function reportIdFromCreateResult(result: unknown): string | undefined {
|
|
if (!result || typeof result !== 'object' || !('id' in result)) {
|
|
return undefined;
|
|
}
|
|
const id = (result as { id?: unknown }).id;
|
|
return typeof id === 'string' && id.length > 0 ? id : undefined;
|
|
}
|
|
|
|
function normalizeTableReference(value: string): string {
|
|
return value
|
|
.trim()
|
|
.replace(/["`]/g, '')
|
|
.replace(/[\[\]]/g, '')
|
|
.toLowerCase();
|
|
}
|
|
|
|
function finalReferenceSegment(value: string): string {
|
|
const parts = value.split('.').filter((part) => part.length > 0);
|
|
return parts.at(-1) ?? value;
|
|
}
|
|
|
|
function semanticSourceMatchesTableRef(source: SemanticLayerSource, tableRef: string): boolean {
|
|
const normalizedRef = normalizeTableReference(tableRef);
|
|
if (!normalizedRef) {
|
|
return false;
|
|
}
|
|
|
|
const refIsQualified = normalizedRef.includes('.');
|
|
const normalizedSourceName = normalizeTableReference(source.name);
|
|
if (normalizedSourceName === normalizedRef) {
|
|
return true;
|
|
}
|
|
|
|
const table = typeof source.table === 'string' ? normalizeTableReference(source.table) : '';
|
|
if (table && (table === normalizedRef || table.endsWith(`.${normalizedRef}`))) {
|
|
return true;
|
|
}
|
|
if (!refIsQualified && table && finalReferenceSegment(table) === normalizedRef) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
function rawPathsForAction(action: MemoryAction, fallbackRawPaths: string[]): string[] {
|
|
return action.rawPaths && action.rawPaths.length > 0 ? [...new Set(action.rawPaths)] : fallbackRawPaths;
|
|
}
|
|
|
|
type ProvenanceRowOrigin =
|
|
| {
|
|
source: 'work_unit_action';
|
|
unitKey: string;
|
|
unitIndex: number;
|
|
unitRawFiles: string[];
|
|
actionIndex: number;
|
|
action: MemoryAction;
|
|
}
|
|
| {
|
|
source: 'reconciliation_action';
|
|
actionIndex: number;
|
|
action: MemoryAction;
|
|
}
|
|
| {
|
|
source: 'finalization_action';
|
|
actionIndex: number;
|
|
action: MemoryAction;
|
|
}
|
|
| {
|
|
source: 'artifact_resolution';
|
|
resolutionIndex: number;
|
|
resolution: NonNullable<StageIndex['artifactResolutions']>[number];
|
|
}
|
|
| {
|
|
source: 'raw_snapshot_fallback';
|
|
rawPath: string;
|
|
};
|
|
|
|
interface ProvenanceRowDiagnostic {
|
|
row: IngestProvenanceInsert;
|
|
origin: ProvenanceRowOrigin;
|
|
}
|
|
|
|
interface ProvenancePlan {
|
|
rows: IngestProvenanceInsert[];
|
|
diagnostics: ProvenanceRowDiagnostic[];
|
|
}
|
|
|
|
export class IngestBundleRunner {
|
|
private readonly logger: KtxLogger;
|
|
private readonly chainByConnection = new Map<string, Promise<unknown>>();
|
|
|
|
constructor(private readonly deps: IngestBundleRunnerDeps) {
|
|
this.logger = deps.logger ?? noopLogger;
|
|
}
|
|
|
|
async run(job: IngestBundleJob, ctx?: IngestJobContext): Promise<IngestBundleResult> {
|
|
const key = job.connectionId;
|
|
const previous = this.chainByConnection.get(key);
|
|
if (previous) {
|
|
this.logger.log(`[ingest-bundle] queued behind previous job for connection=${key}`);
|
|
}
|
|
const run = (previous ?? Promise.resolve()).catch(() => undefined).then(() => this.runInner(job, ctx));
|
|
const chainSlot = run.finally(() => {
|
|
if (this.chainByConnection.get(key) === chainSlot) {
|
|
this.chainByConnection.delete(key);
|
|
}
|
|
});
|
|
// Keep the chain alive but silence unhandled rejection — callers await `run` directly.
|
|
chainSlot.catch(() => undefined);
|
|
this.chainByConnection.set(key, chainSlot);
|
|
try {
|
|
const result = await run;
|
|
ctx?.memoryFlow?.finish('done');
|
|
return { ...result, jobId: job.jobId };
|
|
} catch (error) {
|
|
ctx?.memoryFlow?.finish('error', [sanitizeMemoryFlowError(error)]);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
protected stageRawFilesStage1 = stageRawFilesStage1;
|
|
|
|
private async syncKnowledgeSlRefsFromActions(connectionId: string, actions: MemoryAction[]): Promise<void> {
|
|
if (!this.deps.knowledgeSlRefs) {
|
|
return;
|
|
}
|
|
|
|
const slTargetsBySourceName = new Map<string, Set<string>>();
|
|
const wikiActionsByKey = new Map<string, MemoryAction>();
|
|
for (const action of actions) {
|
|
if (action.target === 'sl') {
|
|
const bucket = slTargetsBySourceName.get(action.key) ?? new Set<string>();
|
|
bucket.add(actionTargetConnectionId(action, connectionId));
|
|
slTargetsBySourceName.set(action.key, bucket);
|
|
}
|
|
if (action.target === 'wiki') {
|
|
wikiActionsByKey.set(action.key, action);
|
|
}
|
|
}
|
|
|
|
for (const action of wikiActionsByKey.values()) {
|
|
if (action.type === 'removed') {
|
|
await this.deps.knowledgeSlRefs.syncFromWiki({
|
|
wikiPageKey: action.key,
|
|
wikiScope: 'GLOBAL',
|
|
wikiScopeId: null,
|
|
refs: [],
|
|
});
|
|
continue;
|
|
}
|
|
|
|
const page = await this.deps.wikiService.readPage('GLOBAL', null, action.key);
|
|
const bareSources = [
|
|
...new Set(
|
|
(page?.frontmatter.sl_refs ?? [])
|
|
.map((ref) => ref.split('.')[0])
|
|
.filter((sourceName): sourceName is string => sourceName.length > 0),
|
|
),
|
|
];
|
|
const refs = bareSources.flatMap((sourceName) => {
|
|
const targets = slTargetsBySourceName.get(sourceName);
|
|
if (!targets || targets.size === 0) {
|
|
return [{ connectionId, sourceName }];
|
|
}
|
|
return [...targets].sort().map((targetConnectionId) => ({ connectionId: targetConnectionId, sourceName }));
|
|
});
|
|
|
|
await this.deps.knowledgeSlRefs.syncFromWiki({
|
|
wikiPageKey: action.key,
|
|
wikiScope: 'GLOBAL',
|
|
wikiScopeId: null,
|
|
refs,
|
|
});
|
|
}
|
|
}
|
|
|
|
protected async materializeOverrideSnapshot(
|
|
report: IngestReportSnapshot,
|
|
ctx: { connectionId: string; sourceKey: string; jobId: string },
|
|
): Promise<string> {
|
|
const rawRoot = rawSourcesDirForSync(ctx.connectionId, ctx.sourceKey, report.body.syncId);
|
|
const files = await this.deps.gitService.listFilesAtHead(rawRoot);
|
|
if (files.length === 0) {
|
|
throw new Error(`override ingest: no raw-source files found for prior sync ${report.body.syncId}`);
|
|
}
|
|
|
|
const stagedDir = this.deps.storage.resolvePullDir(ctx.jobId);
|
|
await rm(stagedDir, { recursive: true, force: true });
|
|
await mkdir(stagedDir, { recursive: true });
|
|
|
|
for (const file of files) {
|
|
const relativePath = file.startsWith(`${rawRoot}/`) ? file.slice(rawRoot.length + 1) : file;
|
|
const absoluteTarget = join(stagedDir, relativePath);
|
|
await mkdir(dirname(absoluteTarget), { recursive: true });
|
|
await writeFile(absoluteTarget, await this.deps.gitService.getFileAtCommit(file, 'HEAD'), 'utf-8');
|
|
}
|
|
|
|
return stagedDir;
|
|
}
|
|
|
|
protected async loadOverrideReport(job: IngestBundleJob): Promise<IngestReportSnapshot | null> {
|
|
if (job.bundleRef.kind !== 'override') {
|
|
return null;
|
|
}
|
|
const report = await this.deps.reports.findByJobId(job.bundleRef.priorJobId);
|
|
if (!report) {
|
|
throw new Error(`override ingest: prior report ${job.bundleRef.priorJobId} not found`);
|
|
}
|
|
if (report.connectionId !== job.connectionId || report.sourceKey !== job.sourceKey) {
|
|
throw new Error(
|
|
`override ingest: prior report ${job.bundleRef.priorJobId} belongs to ${report.connectionId}/${report.sourceKey}, not ${job.connectionId}/${job.sourceKey}`,
|
|
);
|
|
}
|
|
return report;
|
|
}
|
|
|
|
protected async resolveStagedDir(
|
|
ref: IngestBundleJob['bundleRef'],
|
|
ctx: { connectionId: string; sourceKey: string; jobId: string; memoryFlow?: MemoryFlowEventSink },
|
|
): Promise<string> {
|
|
if (ref.kind === 'upload') {
|
|
return this.deps.storage.resolveUploadDir(ref.uploadId);
|
|
}
|
|
if (ref.kind === 'override') {
|
|
throw new Error('override bundle refs must be materialized from the prior report snapshot');
|
|
}
|
|
const stagedDir = this.deps.storage.resolvePullDir(ctx.jobId);
|
|
await mkdir(stagedDir, { recursive: true });
|
|
const adapter = this.deps.registry.get(ctx.sourceKey);
|
|
if (!adapter.fetch) {
|
|
throw new Error(`source adapter '${ctx.sourceKey}' does not support scheduled_pull (no fetch() method)`);
|
|
}
|
|
await adapter.fetch(ref.config, stagedDir, {
|
|
connectionId: ctx.connectionId,
|
|
sourceKey: ctx.sourceKey,
|
|
...(ctx.memoryFlow ? { memoryFlow: ctx.memoryFlow } : {}),
|
|
});
|
|
return stagedDir;
|
|
}
|
|
|
|
protected buildCommitMessage(
|
|
job: IngestBundleJob,
|
|
syncId: string,
|
|
diffSummary: { added: number; modified: number; deleted: number; unchanged: number },
|
|
failedWUs: string[],
|
|
): string {
|
|
const diff = `+${diffSummary.added}/~${diffSummary.modified}/-${diffSummary.deleted}/=${diffSummary.unchanged}`;
|
|
const failed = failedWUs.length > 0 ? `; failed WUs: ${failedWUs.join(', ')}` : '';
|
|
return `ingest(${job.sourceKey}): ${job.jobId} syncId=${syncId} diff=${diff}${failed}`;
|
|
}
|
|
|
|
private async buildWikiIndex(): Promise<string> {
|
|
const pages = await this.deps.knowledgeIndex?.listPagesForUser('system');
|
|
if (!pages || pages.length === 0) {
|
|
return '(empty)';
|
|
}
|
|
|
|
return `## Wiki Pages\n${pages.map((page) => `- ${page.page_key}: ${page.summary}`).join('\n')}`;
|
|
}
|
|
|
|
private async buildSlIndex(connectionIds: string[]): Promise<string> {
|
|
const blocks = await Promise.all(
|
|
connectionIds.map(async (connectionId) => {
|
|
try {
|
|
const { sources } = await this.deps.semanticLayerService.loadAllSources(connectionId);
|
|
const names = sources.map((source) => source.name).sort((left, right) => left.localeCompare(right));
|
|
const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
|
|
return `## ${connectionId}\n${body}`;
|
|
} catch {
|
|
try {
|
|
const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId);
|
|
const names = files
|
|
.filter((f) => !f.startsWith('_schema/'))
|
|
.map((f) => f.replace(/\.yaml$/, ''))
|
|
.sort((left, right) => left.localeCompare(right));
|
|
const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
|
|
return `## ${connectionId}\n${body}`;
|
|
} catch {
|
|
return `## ${connectionId}\n(empty)`;
|
|
}
|
|
}
|
|
}),
|
|
);
|
|
return blocks.join('\n\n');
|
|
}
|
|
|
|
private async tableRefExistsInSemanticLayer(
|
|
semanticLayerService: SemanticLayerService,
|
|
connectionIds: string[],
|
|
tableRef: string,
|
|
): Promise<boolean> {
|
|
for (const connectionId of connectionIds) {
|
|
try {
|
|
const { sources } = await semanticLayerService.loadAllSources(connectionId);
|
|
if (sources.some((source) => semanticSourceMatchesTableRef(source, tableRef))) {
|
|
return true;
|
|
}
|
|
} catch {
|
|
// Fallback diagnostics should not fail an ingest stage if an index lookup is temporarily unavailable.
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private async loadSourcesByConnection(
|
|
workdir: string,
|
|
connectionIds: string[],
|
|
): Promise<Map<string, SemanticLayerSource[]>> {
|
|
const service = this.deps.semanticLayerService.forWorktree(workdir);
|
|
const result = new Map<string, SemanticLayerSource[]>();
|
|
for (const connectionId of connectionIds) {
|
|
const { sources } = await service.loadAllSources(connectionId);
|
|
result.set(connectionId, sources);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
private resolveContextCuratorBudget(
|
|
bundleRef: IngestBundleJob['bundleRef'],
|
|
stageIndex: StageIndex,
|
|
): { creates: number; updates: number } {
|
|
const rawConfig =
|
|
bundleRef.kind === 'scheduled_pull' && bundleRef.config && typeof bundleRef.config === 'object'
|
|
? (bundleRef.config as Record<string, unknown>)
|
|
: {};
|
|
const configuredCreates =
|
|
typeof rawConfig.maxKnowledgeCreatesPerRun === 'number'
|
|
? rawConfig.maxKnowledgeCreatesPerRun
|
|
: NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN;
|
|
const configuredUpdates =
|
|
typeof rawConfig.maxKnowledgeUpdatesPerRun === 'number' ? rawConfig.maxKnowledgeUpdatesPerRun : 20;
|
|
const wikiActions = stageIndex.workUnits.flatMap((wu) => wu.actions).filter((action) => action.target === 'wiki');
|
|
const usedCreates = wikiActions.filter((action) => action.type === 'created').length;
|
|
const usedUpdates = wikiActions.filter((action) => action.type === 'updated').length;
|
|
|
|
return {
|
|
creates: Math.max(0, configuredCreates - usedCreates),
|
|
updates: Math.max(0, configuredUpdates - usedUpdates),
|
|
};
|
|
}
|
|
|
|
private filterWorkUnitsForTriage(
|
|
workUnits: WorkUnit[],
|
|
triageResult: { enabled: boolean; fullRawPaths: Set<string> } | null,
|
|
): WorkUnit[] {
|
|
if (!triageResult?.enabled) {
|
|
return workUnits;
|
|
}
|
|
return workUnits.filter((wu) => wu.rawFiles.some((rawPath) => triageResult.fullRawPaths.has(rawPath)));
|
|
}
|
|
|
|
private createTrace(job: IngestBundleJob): IngestTraceWriter {
|
|
const storage = this.deps.storage as typeof this.deps.storage & { resolveTracePath?: (jobId: string) => string };
|
|
return new FileIngestTraceWriter({
|
|
tracePath: storage.resolveTracePath?.(job.jobId) ?? ingestTracePathForJob(this.deps.storage.homeDir, job.jobId),
|
|
jobId: job.jobId,
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
level: this.deps.settings.ingestTraceLevel ?? 'debug',
|
|
});
|
|
}
|
|
|
|
private errorMessage(error: unknown): string {
|
|
return error instanceof Error ? error.message : String(error);
|
|
}
|
|
|
|
private buildProvenancePlan(input: {
|
|
job: IngestBundleJob;
|
|
syncId: string;
|
|
currentHashes: Map<string, string>;
|
|
stageIndex: StageIndex;
|
|
reconcileActions: MemoryAction[];
|
|
finalizationActions: MemoryAction[];
|
|
}): ProvenancePlan {
|
|
const rows: IngestProvenanceInsert[] = [];
|
|
const diagnostics: ProvenanceRowDiagnostic[] = [];
|
|
const actionToType = (action: MemoryAction): IngestProvenanceInsert['actionType'] => {
|
|
if (action.target === 'wiki') {
|
|
return 'wiki_written';
|
|
}
|
|
return action.type === 'created' ? 'source_created' : 'measure_added';
|
|
};
|
|
const producedPaths = new Set<string>();
|
|
const pushRow = (row: IngestProvenanceInsert, origin: ProvenanceRowOrigin): void => {
|
|
rows.push(row);
|
|
diagnostics.push({ row, origin });
|
|
producedPaths.add(row.rawPath);
|
|
};
|
|
const pushActionProvenance = (rawPath: string, action: MemoryAction, origin: ProvenanceRowOrigin): void => {
|
|
const hash = input.currentHashes.get(rawPath) ?? '';
|
|
pushRow(
|
|
{
|
|
connectionId: input.job.connectionId,
|
|
sourceKey: input.job.sourceKey,
|
|
syncId: input.syncId,
|
|
rawPath,
|
|
rawContentHash: hash,
|
|
artifactKind: action.target,
|
|
artifactKey: action.key,
|
|
targetConnectionId: action.target === 'sl' ? actionTargetConnectionId(action, input.job.connectionId) : null,
|
|
artifactContentHash: null,
|
|
actionType: actionToType(action),
|
|
},
|
|
origin,
|
|
);
|
|
};
|
|
|
|
input.stageIndex.workUnits.forEach((wu, unitIndex) => {
|
|
wu.actions.forEach((action, actionIndex) => {
|
|
for (const rawPath of rawPathsForAction(action, wu.rawFiles)) {
|
|
pushActionProvenance(rawPath, action, {
|
|
source: 'work_unit_action',
|
|
unitKey: wu.unitKey,
|
|
unitIndex,
|
|
unitRawFiles: wu.rawFiles,
|
|
actionIndex,
|
|
action,
|
|
});
|
|
}
|
|
});
|
|
});
|
|
input.reconcileActions.forEach((action, actionIndex) => {
|
|
for (const rawPath of action.rawPaths ?? []) {
|
|
pushActionProvenance(rawPath, action, {
|
|
source: 'reconciliation_action',
|
|
actionIndex,
|
|
action,
|
|
});
|
|
}
|
|
});
|
|
input.finalizationActions.forEach((action, actionIndex) => {
|
|
for (const rawPath of action.rawPaths ?? []) {
|
|
pushActionProvenance(rawPath, action, {
|
|
source: 'finalization_action',
|
|
actionIndex,
|
|
action,
|
|
});
|
|
}
|
|
});
|
|
(input.stageIndex.artifactResolutions ?? []).forEach((resolution, resolutionIndex) => {
|
|
const hash = input.currentHashes.get(resolution.rawPath) ?? '';
|
|
pushRow(
|
|
{
|
|
connectionId: input.job.connectionId,
|
|
sourceKey: input.job.sourceKey,
|
|
syncId: input.syncId,
|
|
rawPath: resolution.rawPath,
|
|
rawContentHash: hash,
|
|
artifactKind: resolution.artifactKind,
|
|
artifactKey: resolution.artifactKey,
|
|
targetConnectionId: null,
|
|
artifactContentHash: null,
|
|
actionType: resolution.actionType,
|
|
},
|
|
{
|
|
source: 'artifact_resolution',
|
|
resolutionIndex,
|
|
resolution,
|
|
},
|
|
);
|
|
});
|
|
for (const [rawPath, hash] of input.currentHashes) {
|
|
if (producedPaths.has(rawPath)) {
|
|
continue;
|
|
}
|
|
pushRow(
|
|
{
|
|
connectionId: input.job.connectionId,
|
|
sourceKey: input.job.sourceKey,
|
|
syncId: input.syncId,
|
|
rawPath,
|
|
rawContentHash: hash,
|
|
artifactKind: null,
|
|
artifactKey: null,
|
|
targetConnectionId: null,
|
|
artifactContentHash: null,
|
|
actionType: 'skipped',
|
|
},
|
|
{ source: 'raw_snapshot_fallback', rawPath },
|
|
);
|
|
}
|
|
|
|
return { rows, diagnostics };
|
|
}
|
|
|
|
private partitionFinalizationActionsForProvenance(input: {
|
|
actions: MemoryAction[];
|
|
currentRawPaths: Set<string>;
|
|
currentEvictionRawPaths: Set<string>;
|
|
overrideEvictionRawPaths: Set<string>;
|
|
}): { actions: MemoryAction[]; exclusions: IngestReportFinalizationProvenanceExclusion[] } {
|
|
const defensible = new Set([
|
|
...input.currentRawPaths,
|
|
...input.currentEvictionRawPaths,
|
|
...input.overrideEvictionRawPaths,
|
|
]);
|
|
const actions: MemoryAction[] = [];
|
|
const exclusions: IngestReportFinalizationProvenanceExclusion[] = [];
|
|
for (const action of input.actions) {
|
|
const rawPaths = action.rawPaths ?? [];
|
|
if (rawPaths.length === 0) {
|
|
exclusions.push({ action, reason: 'missing_raw_paths' });
|
|
continue;
|
|
}
|
|
const invalidRawPaths = rawPaths.filter((rawPath) => !defensible.has(rawPath)).sort();
|
|
if (invalidRawPaths.length > 0) {
|
|
exclusions.push({ action, reason: 'raw_path_not_defensible', invalidRawPaths });
|
|
continue;
|
|
}
|
|
actions.push(action);
|
|
}
|
|
return { actions, exclusions };
|
|
}
|
|
|
|
private toReportProvenanceRows(rows: IngestProvenanceInsert[]): IngestReportProvenanceDetail[] {
|
|
return rows.map(({ rawPath, artifactKind, artifactKey, actionType, targetConnectionId }) => ({
|
|
rawPath,
|
|
artifactKind,
|
|
artifactKey,
|
|
targetConnectionId: targetConnectionId ?? null,
|
|
actionType,
|
|
}));
|
|
}
|
|
|
|
private toReportWorkUnits(stageIndex: StageIndex): IngestReportWorkUnit[] {
|
|
return stageIndex.workUnits.map((wu) => ({
|
|
unitKey: wu.unitKey,
|
|
rawFiles: wu.rawFiles,
|
|
status: wu.status,
|
|
reason: wu.reason,
|
|
actions: wu.actions,
|
|
touchedSlSources: wu.touchedSlSources,
|
|
slDisallowed: wu.slDisallowed,
|
|
slDisallowedReason: wu.slDisallowedReason,
|
|
}));
|
|
}
|
|
|
|
private provenanceValidationTraceData(input: {
|
|
plan: ProvenancePlan;
|
|
currentRawPaths: Set<string>;
|
|
deletedRawPaths: Set<string>;
|
|
}): Record<string, unknown> {
|
|
const invalidRows = input.plan.diagnostics.filter(
|
|
({ row }) => !input.currentRawPaths.has(row.rawPath) && !input.deletedRawPaths.has(row.rawPath),
|
|
);
|
|
return {
|
|
rowCount: input.plan.rows.length,
|
|
currentRawPathCount: input.currentRawPaths.size,
|
|
deletedRawPathCount: input.deletedRawPaths.size,
|
|
currentRawPaths: [...input.currentRawPaths].sort(),
|
|
deletedRawPaths: [...input.deletedRawPaths].sort(),
|
|
invalidRawPaths: [...new Set(invalidRows.map(({ row }) => row.rawPath))].sort(),
|
|
invalidRows,
|
|
};
|
|
}
|
|
|
|
private wikiPageKeysFromPaths(paths: string[]): string[] {
|
|
return [
|
|
...new Set(
|
|
paths
|
|
.filter((path) => path.startsWith('wiki/global/') && path.endsWith('.md'))
|
|
.map((path) => path.slice('wiki/global/'.length, -'.md'.length)),
|
|
),
|
|
].sort();
|
|
}
|
|
|
|
private touchedSlSourcesFromPaths(paths: string[]): TouchedSlSource[] {
|
|
return paths
|
|
.filter((path) => path.startsWith('semantic-layer/') && path.endsWith('.yaml') && !path.includes('/_schema/'))
|
|
.map((path) => {
|
|
const [, connectionId, fileName] = path.split('/');
|
|
return { connectionId: connectionId ?? '', sourceName: (fileName ?? '').replace(/\.yaml$/, '') };
|
|
})
|
|
.filter((source) => source.connectionId.length > 0 && source.sourceName.length > 0);
|
|
}
|
|
|
|
private touchedSlSourcesFromActions(actions: MemoryAction[], fallbackConnectionId: string): TouchedSlSource[] {
|
|
return actions
|
|
.filter((action) => action.target === 'sl')
|
|
.map((action) => ({
|
|
connectionId: actionTargetConnectionId(action, fallbackConnectionId),
|
|
sourceName: action.key,
|
|
}));
|
|
}
|
|
|
|
private wikiPageKeysFromActions(actions: MemoryAction[]): string[] {
|
|
return actions.filter((action) => action.target === 'wiki').map((action) => action.key);
|
|
}
|
|
|
|
private uniqueWikiPageKeys(keys: string[]): string[] {
|
|
return [...new Set(keys.filter((key): key is string => typeof key === 'string' && key.length > 0))].sort();
|
|
}
|
|
|
|
private uniqueTouchedSlSources(sources: TouchedSlSource[]): TouchedSlSource[] {
|
|
const seen = new Set<string>();
|
|
const unique: TouchedSlSource[] = [];
|
|
for (const source of sources) {
|
|
const key = `${source.connectionId}:${source.sourceName}`;
|
|
if (seen.has(key)) {
|
|
continue;
|
|
}
|
|
seen.add(key);
|
|
unique.push(source);
|
|
}
|
|
return unique.sort((left, right) => {
|
|
const byConnection = left.connectionId.localeCompare(right.connectionId);
|
|
return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection;
|
|
});
|
|
}
|
|
|
|
private removedWikiPageKeysFromActions(actions: MemoryAction[]): string[] {
|
|
return this.uniqueWikiPageKeys(
|
|
actions.filter((action) => action.target === 'wiki' && action.type === 'removed').map((action) => action.key),
|
|
);
|
|
}
|
|
|
|
private finalGateActionOrigins(input: {
|
|
stageIndex: StageIndex;
|
|
reconcileActions: MemoryAction[];
|
|
fallbackConnectionId: string;
|
|
}) {
|
|
const actionContext = (action: MemoryAction, fallbackRawPaths: string[]) => ({
|
|
target: action.target,
|
|
type: action.type,
|
|
key: action.key,
|
|
detail: action.detail,
|
|
rawPaths: rawPathsForAction(action, fallbackRawPaths),
|
|
...(action.target === 'sl' ? { targetConnectionId: actionTargetConnectionId(action, input.fallbackConnectionId) } : {}),
|
|
});
|
|
|
|
return [
|
|
...input.stageIndex.workUnits.flatMap((workUnit, unitIndex) =>
|
|
workUnit.actions.map((action, actionIndex) => ({
|
|
source: 'work_unit_action',
|
|
unitKey: workUnit.unitKey,
|
|
unitIndex,
|
|
unitRawFiles: workUnit.rawFiles,
|
|
actionIndex,
|
|
action: actionContext(action, workUnit.rawFiles),
|
|
})),
|
|
),
|
|
...input.reconcileActions.map((action, actionIndex) => ({
|
|
source: 'reconciliation_action',
|
|
actionIndex,
|
|
action: actionContext(action, []),
|
|
})),
|
|
];
|
|
}
|
|
|
|
private async wikiPageKeysForFinalGates(input: {
|
|
wikiService: ReturnType<KnowledgeWikiService['forWorktree']>;
|
|
changedWikiPageKeys: string[];
|
|
touchedSlSources: TouchedSlSource[];
|
|
actions: MemoryAction[];
|
|
}): Promise<{
|
|
pageKeys: string[];
|
|
trace: {
|
|
global: boolean;
|
|
reasons: string[];
|
|
changedWikiPageKeys: string[];
|
|
removedWikiPageKeys: string[];
|
|
pageKeysValidated: string[];
|
|
};
|
|
}> {
|
|
const changedWikiPageKeys = this.uniqueWikiPageKeys(input.changedWikiPageKeys);
|
|
const removedWikiPageKeys = this.removedWikiPageKeysFromActions(input.actions);
|
|
const reasons: string[] = [];
|
|
if (input.touchedSlSources.length > 0) {
|
|
reasons.push('semantic_layer_changed');
|
|
}
|
|
if (removedWikiPageKeys.length > 0) {
|
|
reasons.push('wiki_page_removed');
|
|
}
|
|
|
|
let pageKeys = changedWikiPageKeys;
|
|
if (reasons.length > 0) {
|
|
pageKeys = this.uniqueWikiPageKeys([
|
|
...changedWikiPageKeys,
|
|
...(await input.wikiService.listPageKeys('GLOBAL', null)),
|
|
]);
|
|
}
|
|
|
|
return {
|
|
pageKeys,
|
|
trace: {
|
|
global: reasons.length > 0,
|
|
reasons,
|
|
changedWikiPageKeys,
|
|
removedWikiPageKeys,
|
|
pageKeysValidated: pageKeys,
|
|
},
|
|
};
|
|
}
|
|
|
|
private async runWorkUnitInWorktree(input: {
|
|
job: IngestBundleJob;
|
|
syncId: string;
|
|
wu: WorkUnit;
|
|
worktree: IngestSessionWorktree;
|
|
stagedDir: string;
|
|
contextReport: ContextEvidenceIndexSummary | null;
|
|
ingestToolMetadata: { runId: string; jobId: string; syncId: string; sourceKey: string };
|
|
slConnectionIds: string[];
|
|
wikiIndex: string;
|
|
slIndex: string;
|
|
priorProvenance: Map<string, IngestProvenanceRow[]>;
|
|
scopedWikiService: ReturnType<KnowledgeWikiService['forWorktree']>;
|
|
scopedSemanticLayerService: ReturnType<SemanticLayerService['forWorktree']>;
|
|
baseFraming: string;
|
|
skillsPrompt: string;
|
|
canonicalPins: CanonicalPin[];
|
|
workUnitSettings: { maxConcurrency: number; stepBudget: number; failureMode: 'abort' | 'continue' };
|
|
transcriptDir: string;
|
|
transcriptSummaries: Map<string, MutableToolTranscriptSummary>;
|
|
recordTranscriptEntry(path: string): (entry: ToolCallLogEntry) => void;
|
|
stageIndex: StageIndex;
|
|
includeContextEvidenceTools: boolean;
|
|
currentTableExists(tableRef: string): Promise<boolean>;
|
|
memoryFlow?: MemoryFlowEventSink;
|
|
wuSkillNames: string[];
|
|
onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void;
|
|
}): Promise<WorkUnitOutcome> {
|
|
const session: CaptureSession = {
|
|
userId: 'system',
|
|
chatId: input.wu.unitKey,
|
|
userMessage: `ingest(${input.job.sourceKey}) WU=${input.wu.unitKey}`,
|
|
connectionId: input.job.connectionId,
|
|
userScopedEnabled: false,
|
|
forceGlobalScope: true,
|
|
touchedSlSources: createTouchedSlSources(),
|
|
preHead: input.worktree.baseSha,
|
|
};
|
|
const sessionActions: MemoryAction[] = [];
|
|
|
|
const toolSession: ToolSession = {
|
|
connectionId: input.job.connectionId,
|
|
isWorktreeScoped: true,
|
|
preHead: input.worktree.baseSha,
|
|
touchedSlSources: session.touchedSlSources,
|
|
actions: sessionActions,
|
|
allowedRawPaths: new Set(input.wu.rawFiles),
|
|
allowedConnectionNames: new Set(input.slConnectionIds),
|
|
semanticLayerService: input.scopedSemanticLayerService,
|
|
wikiService: input.scopedWikiService,
|
|
configService: input.worktree.config,
|
|
gitService: input.worktree.git,
|
|
ingest: input.ingestToolMetadata,
|
|
};
|
|
|
|
const slValidationDeps: SlValidationDeps = {
|
|
semanticLayerService: input.scopedSemanticLayerService,
|
|
connections: this.deps.connections,
|
|
configService: input.worktree.config,
|
|
gitService: input.worktree.git,
|
|
slSourcesRepository: this.deps.slSourcesRepository,
|
|
probeRowCount: this.deps.settings.probeRowCount,
|
|
};
|
|
|
|
const wuToolset = this.deps.toolsetFactory.createIngestWuToolset(toolSession, {
|
|
includeContextEvidenceTools: input.includeContextEvidenceTools,
|
|
});
|
|
const wuToolContext: ToolContext = {
|
|
sourceId: 'ingest',
|
|
messageId: `${input.job.jobId}-wu-${input.wu.unitKey}`,
|
|
userId: 'system',
|
|
connectionId: input.job.connectionId,
|
|
ingest: input.ingestToolMetadata,
|
|
session: toolSession,
|
|
};
|
|
|
|
const skillsLoadedPerWu: string[] = [];
|
|
const loadSkillTool: KtxRuntimeToolSet = {
|
|
load_skill: {
|
|
name: 'load_skill',
|
|
description:
|
|
'Load a skill to get specialized instructions. Call this when a skill listed in the system prompt matches the current task.',
|
|
inputSchema: z.object({ name: z.string() }),
|
|
execute: async ({ name }) => {
|
|
const skill = await this.deps.skillsRegistry.getSkill(name, 'memory_agent');
|
|
if (!skill) {
|
|
const available =
|
|
(await this.deps.skillsRegistry.listSkills('memory_agent')).map((s) => s.name).join(', ') || '(none)';
|
|
return { markdown: `Skill "${name}" not available. Available: ${available}` };
|
|
}
|
|
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
|
|
if (!skillsLoadedPerWu.includes(skill.name)) {
|
|
skillsLoadedPerWu.push(skill.name);
|
|
}
|
|
const structured = {
|
|
name: skill.name,
|
|
skillDirectory: skill.path,
|
|
content: this.deps.skillsRegistry.stripFrontmatter(body),
|
|
};
|
|
return {
|
|
markdown: `# ${structured.name}\n\n${structured.content}`,
|
|
structured,
|
|
};
|
|
},
|
|
},
|
|
};
|
|
|
|
const wuEmitUnmappedFallbackTool = {
|
|
emit_unmapped_fallback: createRuntimeToolDescriptorFromAiTool(
|
|
'emit_unmapped_fallback',
|
|
createEmitUnmappedFallbackTool({
|
|
stageIndex: input.stageIndex,
|
|
allowedPaths: new Set(input.wu.rawFiles),
|
|
tableRefExists: input.currentTableExists,
|
|
}),
|
|
),
|
|
};
|
|
|
|
const systemPrompt = buildWuSystemPrompt({
|
|
baseFraming: input.baseFraming,
|
|
skillsPrompt: input.skillsPrompt,
|
|
syncId: input.syncId,
|
|
sourceKey: input.job.sourceKey,
|
|
canonicalPins: input.canonicalPins,
|
|
});
|
|
|
|
input.memoryFlow?.emit({
|
|
type: 'work_unit_started',
|
|
unitKey: input.wu.unitKey,
|
|
skills: input.wuSkillNames,
|
|
stepBudget: input.workUnitSettings.stepBudget,
|
|
});
|
|
return executeWorkUnit(
|
|
{
|
|
sessionWorktreeGit: input.worktree.git,
|
|
agentRunner: this.deps.agentRunner,
|
|
validateTouchedSources: (touched) =>
|
|
validateWuTouchedSources({ ...slValidationDeps, slValidator: this.deps.slValidator }, touched),
|
|
validateWikiRefs: (actions) =>
|
|
findDanglingWikiRefsForActions({
|
|
wikiService: input.scopedWikiService,
|
|
scope: 'GLOBAL',
|
|
scopeId: null,
|
|
actions,
|
|
}),
|
|
resetHardTo: (targetSha) => input.worktree.git.resetHardTo(targetSha),
|
|
buildSystemPrompt: () => systemPrompt,
|
|
buildUserPrompt: (wuInner) =>
|
|
buildWuUserPrompt({
|
|
wu: wuInner,
|
|
wikiIndex: input.wikiIndex,
|
|
slIndex: input.slIndex,
|
|
priorProvenance: input.priorProvenance,
|
|
}),
|
|
buildToolSet: (wuInner) =>
|
|
wrapToolsWithLogger(
|
|
buildWuToolSet({
|
|
sourceKey: input.job.sourceKey,
|
|
stagedDir: input.stagedDir,
|
|
wu: wuInner,
|
|
loadSkillTool,
|
|
emitUnmappedFallbackTool: wuEmitUnmappedFallbackTool,
|
|
toolsetTools: wuToolset.toRuntimeTools(wuToolContext),
|
|
}),
|
|
join(input.transcriptDir, `${wuInner.unitKey}.jsonl`),
|
|
wuInner.unitKey,
|
|
{ onEntry: input.recordTranscriptEntry(join(input.transcriptDir, `${wuInner.unitKey}.jsonl`)) },
|
|
),
|
|
captureSession: session,
|
|
sessionActions,
|
|
modelRole: 'candidateExtraction',
|
|
stepBudget: input.workUnitSettings.stepBudget,
|
|
sourceKey: input.job.sourceKey,
|
|
connectionId: input.job.connectionId,
|
|
jobId: input.job.jobId,
|
|
toolFailureCount: (unitKey) => input.transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0,
|
|
onStepFinish: input.onStepFinish,
|
|
},
|
|
input.wu,
|
|
);
|
|
}
|
|
|
|
protected async runInner(job: IngestBundleJob, ctx?: IngestJobContext): Promise<Omit<IngestBundleResult, 'jobId'>> {
|
|
const syncId = buildSyncId(new Date(), job.jobId);
|
|
const trace = this.createTrace(job);
|
|
const transcriptSummaries = new Map<string, MutableToolTranscriptSummary>();
|
|
let activeTrace: IngestTraceWriter = trace;
|
|
let activePhase = 'run';
|
|
let runRow: Awaited<ReturnType<IngestRunsPort['create']>> | null = null;
|
|
let latestDiffSummary: IngestDiffSummary = { added: 0, modified: 0, deleted: 0, unchanged: 0 };
|
|
let latestWorkUnits: WorkUnitOutcome[] = [];
|
|
let latestFailedWorkUnits: string[] = [];
|
|
let latestReconciliationSkipped = true;
|
|
let latestReportWorkUnits: IngestReportWorkUnit[] = [];
|
|
let latestReconciliationActions: MemoryAction[] = [];
|
|
let latestConflictsResolved: StageIndex['conflictsResolved'] = [];
|
|
let latestEvictionsApplied: StageIndex['evictionsApplied'] = [];
|
|
let latestUnmappedFallbacks: StageIndex['unmappedFallbacks'] = [];
|
|
let latestArtifactResolutions: NonNullable<StageIndex['artifactResolutions']> = [];
|
|
let latestEvictionInputs: string[] = [];
|
|
let latestUnresolvedCards: UnresolvedCardInfo[] = [];
|
|
let latestReportProvenanceRows: IngestReportProvenanceDetail[] = [];
|
|
let latestFinalizationOutcome: IngestReportFinalizationOutcome | undefined;
|
|
let activeFailureDetails: Record<string, unknown> | undefined;
|
|
let latestIsolatedDiffSummary:
|
|
| {
|
|
enabled: boolean;
|
|
integrationWorktreePath?: string;
|
|
ingestionBaseSha?: string;
|
|
projectionSha?: string | null;
|
|
acceptedPatches: number;
|
|
textualConflicts: number;
|
|
semanticConflicts: number;
|
|
resolverAttempts: number;
|
|
resolverRepairs: number;
|
|
resolverFailures: number;
|
|
}
|
|
| undefined;
|
|
await trace.event('info', 'run', 'ingest_started', {
|
|
trigger: job.trigger,
|
|
bundleRefKind: job.bundleRef.kind,
|
|
});
|
|
try {
|
|
const memoryFlow = ctx?.memoryFlow;
|
|
const emitStageProgress = (
|
|
stage: MemoryFlowStageProgress['stage'],
|
|
percent: number,
|
|
message: string,
|
|
options: { transient?: boolean } = {},
|
|
): void => {
|
|
memoryFlow?.emit({
|
|
type: 'stage_progress',
|
|
stage,
|
|
percent,
|
|
message,
|
|
...(options.transient !== undefined ? { transient: options.transient } : {}),
|
|
});
|
|
};
|
|
const baseSha = await this.deps.lockingService.withLock('config:repo', () => this.deps.gitService.revParseHead());
|
|
if (!baseSha) {
|
|
throw new Error('ingest-bundle: config repo has no HEAD');
|
|
}
|
|
const transcriptDir = this.deps.storage.resolveTranscriptDir(job.jobId);
|
|
const recordTranscriptEntry =
|
|
(path: string) =>
|
|
(entry: ToolCallLogEntry): void => {
|
|
const current =
|
|
transcriptSummaries.get(entry.wuKey) ?? createMutableToolTranscriptSummary(entry.wuKey, path);
|
|
recordToolTranscriptEntry(current, entry);
|
|
transcriptSummaries.set(entry.wuKey, current);
|
|
};
|
|
const overrideReport = await this.loadOverrideReport(job);
|
|
|
|
const stage1 = ctx?.startPhase(0.08);
|
|
await stage1?.updateProgress(0.0, 'Fetching source files');
|
|
|
|
const adapter = this.deps.registry.get(job.sourceKey);
|
|
activePhase = 'fetch';
|
|
const stagedDir = await traceTimed(
|
|
trace,
|
|
'fetch',
|
|
'resolve_staged_dir',
|
|
{
|
|
bundleRefKind: job.bundleRef.kind,
|
|
sourceKey: job.sourceKey,
|
|
},
|
|
() =>
|
|
overrideReport
|
|
? this.materializeOverrideSnapshot(overrideReport, {
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
jobId: job.jobId,
|
|
})
|
|
: this.resolveStagedDir(job.bundleRef, {
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
jobId: job.jobId,
|
|
...(memoryFlow ? { memoryFlow } : {}),
|
|
}),
|
|
);
|
|
const fetchReport = adapter.readFetchReport ? await adapter.readFetchReport(stagedDir) : null;
|
|
|
|
const scopeDescriptor = adapter.describeScope ? await adapter.describeScope(stagedDir) : null;
|
|
|
|
const sessionWorktree = await this.deps.lockingService.withLock('config:repo', () =>
|
|
this.deps.sessionWorktreeService.create(job.jobId, baseSha),
|
|
);
|
|
let cleanupOutcome: 'success' | 'crash' | 'conflict' = 'crash';
|
|
|
|
try {
|
|
activePhase = 'stage_raw_files';
|
|
const { currentHashes, rawDirInWorktree } = await traceTimed(
|
|
trace,
|
|
'stage_raw_files',
|
|
'stage_raw_files',
|
|
{
|
|
stagedDir,
|
|
worktreePath: sessionWorktree.workdir,
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
syncId,
|
|
},
|
|
() =>
|
|
this.stageRawFilesStage1({
|
|
stagedDir,
|
|
worktreeRoot: sessionWorktree.workdir,
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
syncId,
|
|
}),
|
|
);
|
|
memoryFlow?.update({
|
|
connectionId: job.connectionId,
|
|
adapter: job.sourceKey,
|
|
sourceDir: stagedDir,
|
|
syncId,
|
|
});
|
|
memoryFlow?.emit({
|
|
type: 'source_acquired',
|
|
adapter: job.sourceKey,
|
|
trigger: job.trigger,
|
|
fileCount: currentHashes.size,
|
|
});
|
|
memoryFlow?.emit({ type: 'scope_detected', fingerprint: scopeDescriptor?.fingerprint ?? null });
|
|
memoryFlow?.emit({ type: 'raw_snapshot_written', syncId, rawFileCount: currentHashes.size });
|
|
|
|
await sessionWorktree.git.commitFiles(
|
|
[rawDirInWorktree],
|
|
`ingest(${job.sourceKey}): stage raw files syncId=${syncId}`,
|
|
this.deps.storage.systemGitAuthor.name,
|
|
this.deps.storage.systemGitAuthor.email,
|
|
);
|
|
|
|
await stage1?.updateProgress(0.5, 'Checking what changed');
|
|
|
|
activePhase = 'diff';
|
|
const diffSet = await traceTimed(
|
|
trace,
|
|
'diff',
|
|
'compute_diff_set',
|
|
{
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
currentHashCount: currentHashes.size,
|
|
scopeFingerprint: scopeDescriptor?.fingerprint ?? null,
|
|
},
|
|
() =>
|
|
this.deps.diffSetService.compute(
|
|
job.connectionId,
|
|
job.sourceKey,
|
|
currentHashes,
|
|
scopeDescriptor ? scopeDescriptor.isPathInScope.bind(scopeDescriptor) : undefined,
|
|
),
|
|
);
|
|
const diffSummary = {
|
|
added: diffSet.added.length,
|
|
modified: diffSet.modified.length,
|
|
deleted: diffSet.deleted.length,
|
|
unchanged: diffSet.unchanged.length,
|
|
};
|
|
latestDiffSummary = diffSummary;
|
|
memoryFlow?.emit({ type: 'diff_computed', ...diffSummary });
|
|
|
|
runRow = await this.deps.runs.create({
|
|
jobId: job.jobId,
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
syncId,
|
|
trigger: job.trigger,
|
|
scopeFingerprint: scopeDescriptor?.fingerprint ?? null,
|
|
});
|
|
memoryFlow?.update({ runId: runRow.id });
|
|
const ingestToolMetadata = {
|
|
runId: runRow.id,
|
|
jobId: job.jobId,
|
|
syncId,
|
|
sourceKey: job.sourceKey,
|
|
};
|
|
const runTrace = trace.withContext({ runId: runRow.id, syncId });
|
|
activeTrace = runTrace;
|
|
const createdRunRow = runRow;
|
|
await runTrace.event('debug', 'snapshot', 'input_snapshot', {
|
|
baseSha,
|
|
stagedDir,
|
|
rawFileCount: currentHashes.size,
|
|
rawDirInWorktree,
|
|
diffSummary,
|
|
scopeFingerprint: scopeDescriptor?.fingerprint ?? null,
|
|
});
|
|
|
|
await stage1?.updateProgress(
|
|
1.0,
|
|
`${diffSet.added.length} new, ${diffSet.modified.length} changed, ${diffSet.deleted.length} removed`,
|
|
);
|
|
|
|
activePhase = 'detect';
|
|
const detected = await traceTimed(runTrace, 'detect', 'adapter_detect', { stagedDir, sourceKey: job.sourceKey }, () =>
|
|
adapter.detect(stagedDir),
|
|
);
|
|
await runTrace.event('debug', 'detect', 'adapter_detected', { detected });
|
|
if (!detected) {
|
|
await this.deps.runs.markFailed(runRow.id);
|
|
throw new Error(`source adapter '${job.sourceKey}' did not recognize staged dir`);
|
|
}
|
|
|
|
let contextReport: ContextEvidenceIndexSummary | null = null;
|
|
if (adapter.evidenceIndexing === 'documents' && this.deps.contextEvidenceIndex) {
|
|
contextReport = await this.deps.contextEvidenceIndex.indexStagedDir({
|
|
stagedDir,
|
|
runId: runRow.id,
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
syncId,
|
|
diffSet,
|
|
currentHashes,
|
|
});
|
|
}
|
|
|
|
const stage2 = ctx?.startPhase(0.04);
|
|
await stage2?.updateProgress(0.0, 'Planning updates');
|
|
activePhase = 'planning';
|
|
let workUnits: WorkUnit[] = [];
|
|
let eviction: EvictionUnit | undefined;
|
|
let unresolvedCards: UnresolvedCardInfo[] | undefined;
|
|
let sourceContextReport: { capped?: boolean; warnings?: string[] } | undefined;
|
|
let parseArtifacts: unknown;
|
|
let finalizationOutcome: IngestReportFinalizationOutcome | undefined;
|
|
let wikiSlRefRepairResult: WikiSlRefRepairResult | null = null;
|
|
let reconcileNotes: string[] = [];
|
|
let triageResult: PageTriageRunResult | null = null;
|
|
if (overrideReport) {
|
|
eviction =
|
|
overrideReport.body.evictionInputs.length > 0
|
|
? { deletedRawPaths: overrideReport.body.evictionInputs }
|
|
: undefined;
|
|
unresolvedCards = overrideReport.body.unresolvedCards;
|
|
await stage2?.updateProgress(1.0, `Loaded prior report ${overrideReport.jobId} for override reconciliation`);
|
|
} else {
|
|
const chunk = await traceTimed(
|
|
runTrace,
|
|
'planning',
|
|
'chunk_work_units',
|
|
{
|
|
stagedDir,
|
|
added: diffSet.added.length,
|
|
modified: diffSet.modified.length,
|
|
deleted: diffSet.deleted.length,
|
|
},
|
|
() => adapter.chunk(stagedDir, diffSet),
|
|
);
|
|
workUnits = chunk.workUnits;
|
|
eviction = chunk.eviction;
|
|
unresolvedCards = chunk.unresolvedCards;
|
|
sourceContextReport = chunk.contextReport;
|
|
parseArtifacts = chunk.parseArtifacts;
|
|
reconcileNotes = chunk.reconcileNotes ?? [];
|
|
triageResult =
|
|
contextReport && adapter.triageSupported && this.deps.pageTriage
|
|
? await this.deps.pageTriage.triageRun({
|
|
stagedDir,
|
|
runId: runRow.id,
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
syncId,
|
|
jobId: job.jobId,
|
|
diffSet,
|
|
adapter,
|
|
})
|
|
: null;
|
|
workUnits = this.filterWorkUnitsForTriage(workUnits, triageResult);
|
|
if (adapter.clusterWorkUnits && workUnits.length > 0) {
|
|
workUnits = await adapter.clusterWorkUnits({
|
|
workUnits,
|
|
stagedDir,
|
|
embedding: this.deps.embedding,
|
|
});
|
|
}
|
|
await stage2?.updateProgress(1.0, `Planned ${workUnits.length} update${workUnits.length === 1 ? '' : 's'}`);
|
|
}
|
|
await runTrace.event('debug', 'planning', 'work_units_planned', {
|
|
workUnitCount: workUnits.length,
|
|
evictionCount: eviction?.deletedRawPaths.length ?? 0,
|
|
unresolvedCardCount: unresolvedCards?.length ?? 0,
|
|
triageEnabled: triageResult?.enabled ?? false,
|
|
});
|
|
|
|
const targetConnectionIds = new Set<string>([job.connectionId]);
|
|
if (!overrideReport && adapter.listTargetConnectionIds) {
|
|
for (const connectionId of await adapter.listTargetConnectionIds(stagedDir)) {
|
|
targetConnectionIds.add(connectionId);
|
|
}
|
|
}
|
|
if (overrideReport) {
|
|
for (const wu of overrideReport.body.workUnits) {
|
|
for (const action of wu.actions) {
|
|
if (action.target === 'sl' && action.targetConnectionId) {
|
|
targetConnectionIds.add(action.targetConnectionId);
|
|
}
|
|
}
|
|
for (const touched of wu.touchedSlSources) {
|
|
targetConnectionIds.add(touched.connectionId);
|
|
}
|
|
}
|
|
}
|
|
const slConnectionIds = [...targetConnectionIds].sort();
|
|
await runTrace.event('debug', 'planning', 'target_connections_resolved', {
|
|
connectionIds: slConnectionIds,
|
|
});
|
|
|
|
// Build shared per-job context.
|
|
const [wikiIndex, slIndex] = await Promise.all([this.buildWikiIndex(), this.buildSlIndex(slConnectionIds)]);
|
|
|
|
const baseFraming = await this.deps.promptService.loadPrompt('memory_agent_bundle_ingest_work_unit');
|
|
const wuSkillNames = Array.from(
|
|
new Set<string>([...adapter.skillNames, 'ingest_triage', 'sl_capture', 'wiki_capture']),
|
|
);
|
|
const wuSkills = await this.deps.skillsRegistry.listSkills(wuSkillNames, 'memory_agent');
|
|
const skillsPrompt = this.deps.skillsRegistry.buildSkillsPrompt(wuSkills, 'memory_agent');
|
|
const canonicalPins = await this.deps.canonicalPins.listPins(slConnectionIds);
|
|
|
|
const workUnitOutcomes: WorkUnitOutcome[] = [];
|
|
const failedWorkUnits: string[] = [];
|
|
const stageIndex: StageIndex = overrideReport
|
|
? buildStageIndexFromReportBody(job.jobId, overrideReport.connectionId, overrideReport.body)
|
|
: {
|
|
jobId: job.jobId,
|
|
connectionId: job.connectionId,
|
|
workUnits: [],
|
|
conflictsResolved: [],
|
|
evictionsApplied: [],
|
|
unmappedFallbacks: [],
|
|
artifactResolutions: [],
|
|
};
|
|
const memoryFlowPlannedWorkUnits = overrideReport
|
|
? stageIndex.workUnits.map(stageIndexWorkUnitToMemoryFlowPlannedWorkUnit)
|
|
: workUnits.map(workUnitToMemoryFlowPlannedWorkUnit);
|
|
memoryFlow?.update({ plannedWorkUnits: memoryFlowPlannedWorkUnits });
|
|
memoryFlow?.emit({
|
|
type: 'chunks_planned',
|
|
chunkCount: memoryFlowPlannedWorkUnits.length,
|
|
workUnitCount: memoryFlowPlannedWorkUnits.length,
|
|
evictionCount: eviction?.deletedRawPaths.length ?? 0,
|
|
});
|
|
const isolatedDiffEnabled = !overrideReport;
|
|
const isolatedDiffSummary = {
|
|
enabled: isolatedDiffEnabled,
|
|
integrationWorktreePath: isolatedDiffEnabled ? sessionWorktree.workdir : undefined,
|
|
ingestionBaseSha: undefined as string | undefined,
|
|
projectionSha: null as string | null,
|
|
acceptedPatches: 0,
|
|
textualConflicts: 0,
|
|
semanticConflicts: 0,
|
|
resolverAttempts: 0,
|
|
resolverRepairs: 0,
|
|
resolverFailures: 0,
|
|
gateRepairAttempts: 0,
|
|
gateRepairs: 0,
|
|
gateRepairFailures: 0,
|
|
};
|
|
latestIsolatedDiffSummary = isolatedDiffSummary;
|
|
|
|
const stage3 = ctx?.startPhase(0.6);
|
|
await stage3?.updateProgress(0.0, `Processing ${workUnits.length} update${workUnits.length === 1 ? '' : 's'}`);
|
|
activePhase = 'work_units';
|
|
this.logger.log(`[ingest-bundle] job=${job.jobId} tool-call transcripts: ${transcriptDir}/`);
|
|
let projectionTouchedSources: TouchedSlSource[] = [];
|
|
let projectionChangedWikiPageKeys: string[] = [];
|
|
let projectionTouchedPaths: string[] = [];
|
|
|
|
if (!overrideReport) {
|
|
await runTrace.event('info', 'routing', 'isolated_diff_enabled', {
|
|
sourceKey: job.sourceKey,
|
|
workUnitCount: workUnits.length,
|
|
integrationWorktreePath: sessionWorktree.workdir,
|
|
});
|
|
|
|
if (adapter.project) {
|
|
const preProjectionSha = await sessionWorktree.git.revParseHead();
|
|
const projection = await traceTimed(
|
|
runTrace,
|
|
'projection',
|
|
'deterministic_projection',
|
|
{ sourceKey: job.sourceKey },
|
|
() =>
|
|
adapter.project!({
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
syncId,
|
|
jobId: job.jobId,
|
|
runId: createdRunRow.id,
|
|
stagedDir,
|
|
workdir: sessionWorktree.workdir,
|
|
parseArtifacts,
|
|
semanticLayerService: this.deps.semanticLayerService,
|
|
}),
|
|
);
|
|
if (projection.errors.length > 0) {
|
|
await this.deps.runs.markFailed(runRow.id);
|
|
throw new Error(`deterministic projection failed: ${projection.errors.join('; ')}`);
|
|
}
|
|
projectionTouchedSources = projection.touchedSources;
|
|
projectionChangedWikiPageKeys = projection.changedWikiPageKeys;
|
|
const projectionPaths = [
|
|
...projection.touchedSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
|
|
...projection.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
|
];
|
|
projectionTouchedPaths = projectionPaths;
|
|
const projectionCommit =
|
|
projectionPaths.length > 0
|
|
? await sessionWorktree.git.commitFiles(
|
|
projectionPaths,
|
|
`ingest(${job.sourceKey}): deterministic projection syncId=${syncId}`,
|
|
this.deps.storage.systemGitAuthor.name,
|
|
this.deps.storage.systemGitAuthor.email,
|
|
)
|
|
: await sessionWorktree.git.commitStaged(
|
|
`ingest(${job.sourceKey}): deterministic projection syncId=${syncId}`,
|
|
this.deps.storage.systemGitAuthor.name,
|
|
this.deps.storage.systemGitAuthor.email,
|
|
);
|
|
isolatedDiffSummary.projectionSha =
|
|
projectionCommit.created || projectionCommit.commitHash !== preProjectionSha
|
|
? projectionCommit.commitHash
|
|
: null;
|
|
await runTrace.event('debug', 'projection', 'deterministic_projection_committed', {
|
|
projectionSha: isolatedDiffSummary.projectionSha,
|
|
touchedSources: projectionTouchedSources,
|
|
changedWikiPageKeys: projectionChangedWikiPageKeys,
|
|
warnings: projection.warnings,
|
|
});
|
|
}
|
|
|
|
const ingestionBaseSha = await sessionWorktree.git.revParseHead();
|
|
isolatedDiffSummary.ingestionBaseSha = ingestionBaseSha;
|
|
const patchDir = join(this.deps.storage.homeDir, 'ingest-patches', job.jobId);
|
|
const workUnitSettings = {
|
|
maxConcurrency: this.deps.settings.workUnitMaxConcurrency ?? 1,
|
|
stepBudget: this.deps.settings.workUnitStepBudget ?? 40,
|
|
failureMode: this.deps.settings.workUnitFailureMode ?? 'continue',
|
|
};
|
|
const limitWorkUnit = pLimit(workUnitSettings.maxConcurrency);
|
|
const workUnitOutcomesByIndex: WorkUnitOutcome[] = [];
|
|
let completedWorkUnits = 0;
|
|
|
|
if (workUnits.length === 0) {
|
|
await stage3?.updateProgress(1.0, '0 of 0 work units complete');
|
|
}
|
|
|
|
try {
|
|
await Promise.all(
|
|
workUnits.map((wu, index) =>
|
|
limitWorkUnit(async () => {
|
|
const outcome = await runIsolatedWorkUnit({
|
|
unitIndex: index,
|
|
ingestionBaseSha,
|
|
sessionWorktreeService: this.deps.sessionWorktreeService,
|
|
patchDir,
|
|
trace: runTrace,
|
|
workUnit: wu,
|
|
afterSuccess: (child) => copyTransientIngestEvidence(child.workdir, sessionWorktree.workdir),
|
|
run: async (child) => {
|
|
const scopedWikiService = this.deps.wikiService.forWorktree(child.workdir);
|
|
const scopedSemanticLayerService = this.deps.semanticLayerService.forWorktree(child.workdir);
|
|
return this.runWorkUnitInWorktree({
|
|
job,
|
|
syncId,
|
|
wu,
|
|
worktree: child,
|
|
stagedDir,
|
|
contextReport,
|
|
ingestToolMetadata,
|
|
slConnectionIds,
|
|
wikiIndex,
|
|
slIndex,
|
|
priorProvenance: await this.deps.provenance.findLatestArtifactsForRawPaths(
|
|
job.connectionId,
|
|
job.sourceKey,
|
|
wu.rawFiles,
|
|
),
|
|
scopedWikiService,
|
|
scopedSemanticLayerService,
|
|
baseFraming,
|
|
skillsPrompt,
|
|
canonicalPins,
|
|
workUnitSettings,
|
|
transcriptDir,
|
|
transcriptSummaries,
|
|
recordTranscriptEntry,
|
|
stageIndex,
|
|
includeContextEvidenceTools: adapter.evidenceIndexing === 'documents' && !!contextReport,
|
|
currentTableExists: (tableRef) =>
|
|
this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef),
|
|
memoryFlow,
|
|
wuSkillNames,
|
|
onStepFinish: ({ stepIndex, stepBudget }) => {
|
|
memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget });
|
|
},
|
|
});
|
|
},
|
|
});
|
|
workUnitOutcomesByIndex[index] = outcome;
|
|
for (const action of outcome.actions) {
|
|
memoryFlow?.emit({
|
|
type: 'candidate_action',
|
|
unitKey: outcome.unitKey,
|
|
target: action.target,
|
|
action: action.type,
|
|
key: action.key,
|
|
});
|
|
}
|
|
memoryFlow?.emit({
|
|
type: 'work_unit_finished',
|
|
unitKey: outcome.unitKey,
|
|
status: outcome.status,
|
|
...(outcome.reason ? { reason: outcome.reason } : {}),
|
|
});
|
|
completedWorkUnits += 1;
|
|
await stage3?.updateProgress(
|
|
completedWorkUnits / workUnits.length,
|
|
`${completedWorkUnits} of ${workUnits.length} work units complete`,
|
|
);
|
|
}),
|
|
),
|
|
);
|
|
} catch (error) {
|
|
await this.deps.runs.markFailed(runRow.id);
|
|
throw error;
|
|
}
|
|
|
|
workUnitOutcomes.push(
|
|
...workUnitOutcomesByIndex.filter((outcome): outcome is WorkUnitOutcome => Boolean(outcome)),
|
|
);
|
|
failedWorkUnits.push(
|
|
...workUnitOutcomes.filter((outcome) => outcome.status === 'failed').map((outcome) => outcome.unitKey),
|
|
);
|
|
latestWorkUnits = workUnitOutcomes;
|
|
latestFailedWorkUnits = failedWorkUnits;
|
|
stageIndex.workUnits = workUnitOutcomes.map((o) => ({
|
|
unitKey: o.unitKey,
|
|
rawFiles: workUnits.find((w) => w.unitKey === o.unitKey)?.rawFiles ?? [],
|
|
status: o.status,
|
|
reason: o.reason,
|
|
actions: o.actions,
|
|
touchedSlSources: o.touchedSlSources,
|
|
slDisallowed: o.slDisallowed,
|
|
slDisallowedReason: o.slDisallowedReason,
|
|
}));
|
|
|
|
activePhase = 'integration';
|
|
const integrablePatchCount = workUnitOutcomesByIndex.filter(
|
|
(outcome) => outcome?.status === 'success' && !!outcome.patchPath,
|
|
).length;
|
|
let integratedPatchCount = 0;
|
|
for (const [index, outcome] of workUnitOutcomesByIndex.entries()) {
|
|
if (!outcome || outcome.status !== 'success' || !outcome.patchPath) {
|
|
continue;
|
|
}
|
|
const wu = workUnits[index];
|
|
if (!wu) {
|
|
continue;
|
|
}
|
|
const integrationFailureDetails = {
|
|
unitKey: outcome.unitKey,
|
|
patchPath: outcome.patchPath,
|
|
allowedTargetConnectionIds: slConnectionIds,
|
|
};
|
|
activeFailureDetails = integrationFailureDetails;
|
|
emitStageProgress(
|
|
'integration',
|
|
80,
|
|
`Integrating ${integratedPatchCount + 1}/${integrablePatchCount} patches: ${outcome.unitKey}`,
|
|
);
|
|
const integration = await integrateWorkUnitPatch({
|
|
unitKey: outcome.unitKey,
|
|
patchPath: outcome.patchPath,
|
|
integrationGit: sessionWorktree.git,
|
|
trace: runTrace,
|
|
author: this.deps.storage.systemGitAuthor,
|
|
slDisallowed: wu.slDisallowed === true,
|
|
allowedTargetConnectionIds: new Set(slConnectionIds),
|
|
validateAppliedTree: async (touchedPaths) => {
|
|
await validateFinalIngestArtifacts({
|
|
connectionIds: slConnectionIds,
|
|
changedWikiPageKeys: this.wikiPageKeysFromPaths(touchedPaths),
|
|
touchedSlSources: this.touchedSlSourcesFromPaths(touchedPaths),
|
|
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
|
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
|
validateTouchedSources: (touched) =>
|
|
validateWuTouchedSources(
|
|
{
|
|
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
|
connections: this.deps.connections,
|
|
configService: sessionWorktree.config,
|
|
gitService: sessionWorktree.git,
|
|
slSourcesRepository: this.deps.slSourcesRepository,
|
|
probeRowCount: this.deps.settings.probeRowCount,
|
|
slValidator: this.deps.slValidator,
|
|
},
|
|
touched,
|
|
),
|
|
tableExists: (connectionId, tableRef) =>
|
|
this.tableRefExistsInSemanticLayer(
|
|
this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
|
[connectionId],
|
|
tableRef,
|
|
),
|
|
});
|
|
},
|
|
resolveTextualConflict: async (context) => {
|
|
emitStageProgress('integration', 81, `Resolving text conflict for ${context.unitKey}`);
|
|
const result = await resolveTextualConflict({
|
|
agentRunner: this.deps.agentRunner,
|
|
workdir: sessionWorktree.workdir,
|
|
unitKey: context.unitKey,
|
|
patchPath: context.patchPath,
|
|
touchedPaths: context.touchedPaths,
|
|
trace: runTrace,
|
|
reason: context.reason,
|
|
maxAttempts: 1,
|
|
stepBudget: 12,
|
|
});
|
|
emitStageProgress(
|
|
'integration',
|
|
82,
|
|
result.status === 'repaired'
|
|
? `Resolved text conflict for ${context.unitKey}`
|
|
: `Text conflict resolver failed for ${context.unitKey}`,
|
|
);
|
|
return result;
|
|
},
|
|
repairGateFailure: async (context) => {
|
|
emitStageProgress('integration', 82, `Repairing semantic gate for ${context.unitKey}`);
|
|
const result = await repairFinalGateFailure({
|
|
agentRunner: this.deps.agentRunner,
|
|
workdir: sessionWorktree.workdir,
|
|
gateError: context.reason,
|
|
allowedPaths: context.touchedPaths,
|
|
trace: runTrace,
|
|
repairKind: 'patch_semantic_gate',
|
|
maxAttempts: 1,
|
|
stepBudget: 16,
|
|
});
|
|
emitStageProgress(
|
|
'integration',
|
|
83,
|
|
result.status === 'repaired'
|
|
? `Repaired semantic gate for ${context.unitKey}`
|
|
: `Semantic gate repair failed for ${context.unitKey}`,
|
|
);
|
|
return result;
|
|
},
|
|
});
|
|
if (integration.textualResolution) {
|
|
isolatedDiffSummary.resolverAttempts += integration.textualResolution.attempts;
|
|
if (integration.textualResolution.status === 'repaired') {
|
|
isolatedDiffSummary.textualConflicts += 1;
|
|
isolatedDiffSummary.resolverRepairs += 1;
|
|
} else {
|
|
isolatedDiffSummary.resolverFailures += 1;
|
|
}
|
|
}
|
|
if (integration.gateRepair) {
|
|
isolatedDiffSummary.gateRepairAttempts += integration.gateRepair.attempts;
|
|
if (integration.gateRepair.status === 'repaired') {
|
|
isolatedDiffSummary.semanticConflicts += 1;
|
|
isolatedDiffSummary.gateRepairs += 1;
|
|
} else {
|
|
isolatedDiffSummary.gateRepairFailures += 1;
|
|
}
|
|
}
|
|
if (integration.status === 'textual_conflict') {
|
|
isolatedDiffSummary.textualConflicts += 1;
|
|
await this.deps.runs.markFailed(runRow.id);
|
|
cleanupOutcome = 'conflict';
|
|
activeFailureDetails = {
|
|
...integrationFailureDetails,
|
|
touchedPaths: integration.touchedPaths,
|
|
reason: integration.reason,
|
|
};
|
|
throw new Error(`isolated diff textual conflict in ${outcome.unitKey}: ${integration.reason}`);
|
|
}
|
|
if (integration.status === 'semantic_conflict') {
|
|
isolatedDiffSummary.semanticConflicts += 1;
|
|
await this.deps.runs.markFailed(runRow.id);
|
|
cleanupOutcome = 'conflict';
|
|
activeFailureDetails = {
|
|
...integrationFailureDetails,
|
|
touchedPaths: integration.touchedPaths,
|
|
reason: integration.reason,
|
|
};
|
|
throw new Error(`isolated diff semantic conflict in ${outcome.unitKey}: ${integration.reason}`);
|
|
}
|
|
activeFailureDetails = undefined;
|
|
if (integration.touchedPaths.length > 0) {
|
|
isolatedDiffSummary.acceptedPatches += 1;
|
|
integratedPatchCount += 1;
|
|
}
|
|
emitStageProgress(
|
|
'integration',
|
|
83,
|
|
`Integrated ${integratedPatchCount}/${integrablePatchCount} patches`,
|
|
);
|
|
}
|
|
|
|
}
|
|
const carryForwardResult =
|
|
contextReport && this.deps.contextCandidateCarryforward
|
|
? await this.deps.contextCandidateCarryforward.carryForward({
|
|
runId: runRow.id,
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
})
|
|
: null;
|
|
const dedupResult =
|
|
contextReport && this.deps.candidateDedup ? await this.deps.candidateDedup.deduplicateRun(runRow.id) : null;
|
|
const preReconciliationSha = await sessionWorktree.git.revParseHead();
|
|
activePhase = 'reconciliation';
|
|
|
|
// Stage 4 — reconciliation. Shares scoped wiki/SL with a fresh CaptureSession
|
|
// so reconciliation writes land in the same worktree Stage 3 used.
|
|
const reconcileSession: CaptureSession = {
|
|
userId: 'system',
|
|
chatId: `${job.jobId}-reconcile`,
|
|
userMessage: `reconcile(${job.sourceKey})`,
|
|
connectionId: job.connectionId,
|
|
userScopedEnabled: false,
|
|
forceGlobalScope: true,
|
|
touchedSlSources: createTouchedSlSources(),
|
|
preHead: await sessionWorktree.git.revParseHead(),
|
|
};
|
|
const reconcileActions: MemoryAction[] = [];
|
|
const rcScopedWiki = this.deps.wikiService.forWorktree(sessionWorktree.workdir);
|
|
const rcScopedSl = this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir);
|
|
const reconciliationAllowedRawPaths = new Set<string>([
|
|
...currentHashes.keys(),
|
|
...(eviction?.deletedRawPaths ?? []),
|
|
]);
|
|
|
|
const rcToolSession: ToolSession = {
|
|
connectionId: job.connectionId,
|
|
isWorktreeScoped: true,
|
|
preHead: reconcileSession.preHead,
|
|
touchedSlSources: reconcileSession.touchedSlSources,
|
|
actions: reconcileActions,
|
|
allowedRawPaths: reconciliationAllowedRawPaths,
|
|
allowedConnectionNames: new Set(slConnectionIds),
|
|
semanticLayerService: rcScopedSl,
|
|
wikiService: rcScopedWiki,
|
|
configService: sessionWorktree.config,
|
|
gitService: sessionWorktree.git,
|
|
ingest: ingestToolMetadata,
|
|
evictionDecisions: [],
|
|
};
|
|
|
|
const rcToolset = this.deps.toolsetFactory.createIngestWuToolset(rcToolSession, {
|
|
includeContextEvidenceTools: adapter.evidenceIndexing === 'documents' && !!contextReport,
|
|
});
|
|
const rcToolContext: ToolContext = {
|
|
sourceId: 'ingest',
|
|
messageId: `${job.jobId}-reconcile`,
|
|
userId: 'system',
|
|
connectionId: job.connectionId,
|
|
ingest: ingestToolMetadata,
|
|
session: rcToolSession,
|
|
};
|
|
const rcLoadSkill: KtxRuntimeToolSet = {
|
|
load_skill: {
|
|
name: 'load_skill',
|
|
description: 'Load a skill.',
|
|
inputSchema: z.object({ name: z.string() }),
|
|
execute: async ({ name }) => {
|
|
const skill = await this.deps.skillsRegistry.getSkill(name, 'memory_agent');
|
|
if (!skill) {
|
|
return { markdown: `Skill "${name}" not found` };
|
|
}
|
|
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
|
|
const structured = { name: skill.name, content: this.deps.skillsRegistry.stripFrontmatter(body) };
|
|
return { markdown: `# ${structured.name}\n\n${structured.content}`, structured };
|
|
},
|
|
},
|
|
};
|
|
const allStagedPaths = new Set<string>([...currentHashes.keys()]);
|
|
const rcRawSpanTool = {
|
|
read_raw_span: createRuntimeToolDescriptorFromAiTool(
|
|
'read_raw_span',
|
|
createReadRawSpanTool({ stagedDir, allowedPaths: allStagedPaths }),
|
|
),
|
|
};
|
|
const rcStageListTool = {
|
|
stage_list: createRuntimeToolDescriptorFromAiTool('stage_list', createStageListTool({ stageIndex })),
|
|
};
|
|
const rcStageDiffTool = {
|
|
stage_diff: createRuntimeToolDescriptorFromAiTool('stage_diff', createStageDiffTool({ stageIndex })),
|
|
};
|
|
const rcEvictionListTool = {
|
|
eviction_list: createRuntimeToolDescriptorFromAiTool(
|
|
'eviction_list',
|
|
createEvictionListTool({
|
|
provenance: this.deps.provenance,
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
deletedRawPaths: eviction?.deletedRawPaths ?? [],
|
|
}),
|
|
),
|
|
};
|
|
const rcEmitConflictResolutionTool = {
|
|
emit_conflict_resolution: createRuntimeToolDescriptorFromAiTool(
|
|
'emit_conflict_resolution',
|
|
createEmitConflictResolutionTool({ stageIndex }),
|
|
),
|
|
};
|
|
const rcEmitEvictionDecisionTool = {
|
|
emit_eviction_decision: createRuntimeToolDescriptorFromAiTool(
|
|
'emit_eviction_decision',
|
|
createEmitEvictionDecisionTool({
|
|
stageIndex,
|
|
deletedRawPaths: eviction?.deletedRawPaths ?? [],
|
|
}),
|
|
),
|
|
};
|
|
const rcEmitArtifactResolutionTool = {
|
|
emit_artifact_resolution: createRuntimeToolDescriptorFromAiTool(
|
|
'emit_artifact_resolution',
|
|
createEmitArtifactResolutionTool({
|
|
stageIndex,
|
|
allowedPaths: allStagedPaths,
|
|
}),
|
|
),
|
|
};
|
|
const rcEmitUnmappedFallbackTool = {
|
|
emit_unmapped_fallback: createRuntimeToolDescriptorFromAiTool(
|
|
'emit_unmapped_fallback',
|
|
createEmitUnmappedFallbackTool({
|
|
stageIndex,
|
|
allowedPaths: allStagedPaths,
|
|
tableRefExists: (tableRef) => this.tableRefExistsInSemanticLayer(rcScopedSl, slConnectionIds, tableRef),
|
|
}),
|
|
),
|
|
};
|
|
|
|
const reconcileBaseFraming = await this.deps.promptService.loadPrompt('memory_agent_bundle_ingest_reconcile');
|
|
const reconcileSkills = await this.deps.skillsRegistry.listSkills(
|
|
Array.from(
|
|
new Set(['ingest_triage', 'sl_capture', 'wiki_capture', ...(adapter.reconcileSkillNames ?? [])]),
|
|
),
|
|
'memory_agent',
|
|
);
|
|
const reconcileSkillsPrompt = this.deps.skillsRegistry.buildSkillsPrompt(reconcileSkills, 'memory_agent');
|
|
const relevantCanonicalPins = selectRelevantCanonicalPins(stageIndex, canonicalPins);
|
|
|
|
const stage4 = ctx?.startPhase(0.16);
|
|
const hasCandidateReconcileWork = (dedupResult?.representatives.length ?? 0) > 0;
|
|
const hasReconcileWork =
|
|
stageIndex.workUnits.some((wu) => wu.actions.length > 0) ||
|
|
(eviction?.deletedRawPaths.length ?? 0) > 0 ||
|
|
hasCandidateReconcileWork;
|
|
if (hasReconcileWork || overrideReport) {
|
|
emitStageProgress('reconciliation', 84, 'Reconciling results');
|
|
await stage4?.updateProgress(0.0, 'Reconciling results');
|
|
}
|
|
|
|
let curatorReport = null;
|
|
let curatorWarnings: string[] = [];
|
|
let reconcileOutcome: Awaited<ReturnType<typeof runReconciliationStage4>>;
|
|
|
|
if (contextReport && this.deps.curatorPagination) {
|
|
const curatorOutcome = await this.deps.curatorPagination.reconcile({
|
|
runId: runRow.id,
|
|
sourceKey: job.sourceKey,
|
|
jobId: job.jobId,
|
|
stageIndex,
|
|
evictionUnit: eviction,
|
|
representatives: dedupResult?.representatives ?? [],
|
|
initialBudget: this.resolveContextCuratorBudget(job.bundleRef, stageIndex),
|
|
modelRole: 'curator',
|
|
buildSystemPrompt: () =>
|
|
buildReconcileSystemPrompt({
|
|
baseFraming: reconcileBaseFraming,
|
|
skillsPrompt: reconcileSkillsPrompt,
|
|
syncId,
|
|
sourceKey: job.sourceKey,
|
|
canonicalPins: relevantCanonicalPins,
|
|
}),
|
|
buildUserPrompt: ({ summary, items, runState }) =>
|
|
buildReconcileUserPrompt(stageIndex, eviction, { summary, items }, reconcileNotes, runState),
|
|
buildToolSet: (_passNumber) =>
|
|
wrapToolsWithLogger(
|
|
buildReconcileToolSet({
|
|
loadSkillTool: rcLoadSkill,
|
|
stageListTool: rcStageListTool,
|
|
stageDiffTool: rcStageDiffTool,
|
|
evictionListTool: rcEvictionListTool,
|
|
emitConflictResolutionTool: rcEmitConflictResolutionTool,
|
|
emitEvictionDecisionTool: rcEmitEvictionDecisionTool,
|
|
emitArtifactResolutionTool: rcEmitArtifactResolutionTool,
|
|
emitUnmappedFallbackTool: rcEmitUnmappedFallbackTool,
|
|
readRawSpanTool: rcRawSpanTool,
|
|
toolsetTools: rcToolset.toRuntimeTools(rcToolContext),
|
|
}),
|
|
join(transcriptDir, 'reconcile.jsonl'),
|
|
'reconcile',
|
|
{ onEntry: recordTranscriptEntry(join(transcriptDir, 'reconcile.jsonl')) },
|
|
),
|
|
getReconciliationActions: () => reconcileActions,
|
|
onStepFinish: stage4
|
|
? ({ passNumber, stepIndex, stepBudget }) => {
|
|
emitStageProgress(
|
|
'reconciliation',
|
|
85,
|
|
`Reconciling results: pass ${passNumber} step ${stepIndex}/${stepBudget}`,
|
|
{ transient: true },
|
|
);
|
|
void stage4.updateProgress(
|
|
stepIndex / stepBudget,
|
|
`Reconciling results · pass ${passNumber} step ${stepIndex}`,
|
|
);
|
|
}
|
|
: undefined,
|
|
});
|
|
curatorReport = curatorOutcome.report;
|
|
curatorWarnings = curatorOutcome.warnings;
|
|
reconcileOutcome = {
|
|
skipped: curatorOutcome.skipped,
|
|
stopReason: curatorOutcome.stopReason,
|
|
error: curatorOutcome.error,
|
|
};
|
|
} else {
|
|
reconcileOutcome = await runReconciliationStage4({
|
|
stageIndex,
|
|
evictionUnit: eviction,
|
|
agentRunner: this.deps.agentRunner,
|
|
buildSystemPrompt: () =>
|
|
buildReconcileSystemPrompt({
|
|
baseFraming: reconcileBaseFraming,
|
|
skillsPrompt: reconcileSkillsPrompt,
|
|
syncId,
|
|
sourceKey: job.sourceKey,
|
|
canonicalPins: relevantCanonicalPins,
|
|
}),
|
|
buildUserPrompt: (idx, ev) => buildReconcileUserPrompt(idx, ev, undefined, reconcileNotes),
|
|
buildToolSet: () =>
|
|
wrapToolsWithLogger(
|
|
buildReconcileToolSet({
|
|
loadSkillTool: rcLoadSkill,
|
|
stageListTool: rcStageListTool,
|
|
stageDiffTool: rcStageDiffTool,
|
|
evictionListTool: rcEvictionListTool,
|
|
emitConflictResolutionTool: rcEmitConflictResolutionTool,
|
|
emitEvictionDecisionTool: rcEmitEvictionDecisionTool,
|
|
emitArtifactResolutionTool: rcEmitArtifactResolutionTool,
|
|
emitUnmappedFallbackTool: rcEmitUnmappedFallbackTool,
|
|
readRawSpanTool: rcRawSpanTool,
|
|
toolsetTools: rcToolset.toRuntimeTools(rcToolContext),
|
|
}),
|
|
join(transcriptDir, 'reconcile.jsonl'),
|
|
'reconcile',
|
|
{ onEntry: recordTranscriptEntry(join(transcriptDir, 'reconcile.jsonl')) },
|
|
),
|
|
modelRole: 'reconcile',
|
|
stepBudget: 60,
|
|
sourceKey: job.sourceKey,
|
|
jobId: job.jobId,
|
|
force: !!overrideReport,
|
|
onStepFinish: stage4
|
|
? ({ stepIndex, stepBudget }) => {
|
|
emitStageProgress('reconciliation', 85, `Reconciling results: step ${stepIndex}/${stepBudget}`, {
|
|
transient: true,
|
|
});
|
|
void stage4.updateProgress(stepIndex / stepBudget, `Reconciling results · step ${stepIndex}`);
|
|
}
|
|
: undefined,
|
|
});
|
|
}
|
|
latestReconciliationSkipped = reconcileOutcome.skipped;
|
|
|
|
const danglingReconcileWikiRefs = await findDanglingWikiRefsForActions({
|
|
wikiService: rcScopedWiki,
|
|
scope: 'GLOBAL',
|
|
scopeId: null,
|
|
actions: reconcileActions,
|
|
});
|
|
if (danglingReconcileWikiRefs.length > 0) {
|
|
await this.deps.runs.markFailed(runRow.id);
|
|
throw new Error(`wiki references target missing page(s): ${danglingReconcileWikiRefs.join(', ')}`);
|
|
}
|
|
|
|
const candidateSummaryAfterReconcile =
|
|
contextReport && this.deps.contextEvidenceCandidates
|
|
? await this.deps.contextEvidenceCandidates.getCandidateSummary(runRow.id)
|
|
: null;
|
|
memoryFlow?.emit({
|
|
type: 'reconciliation_finished',
|
|
conflictCount: stageIndex.conflictsResolved.length,
|
|
fallbackCount: stageIndex.unmappedFallbacks.length,
|
|
});
|
|
await runTrace.event('debug', 'reconciliation', 'reconciliation_finished', {
|
|
skipped: reconcileOutcome.skipped,
|
|
stopReason: reconcileOutcome.stopReason ?? null,
|
|
actionCount: reconcileActions.length,
|
|
conflictCount: stageIndex.conflictsResolved.length,
|
|
fallbackCount: stageIndex.unmappedFallbacks.length,
|
|
artifactResolutionCount: stageIndex.artifactResolutions?.length ?? 0,
|
|
});
|
|
|
|
await stage4?.updateProgress(1.0, reconcileOutcome.skipped ? 'No reconciliation needed' : 'Reconciled');
|
|
|
|
const preFinalizationSha = await sessionWorktree.git.revParseHead();
|
|
const preFinalizationSourcesByConnection = await this.loadSourcesByConnection(
|
|
sessionWorktree.workdir,
|
|
slConnectionIds,
|
|
);
|
|
let finalizationActions: MemoryAction[] = [];
|
|
let finalizationTouchedPaths: string[] = [];
|
|
let finalizationTouchedSources: TouchedSlSource[] = [];
|
|
let finalizationChangedWikiPageKeys: string[] = [];
|
|
let finalizationSha: string | null = null;
|
|
|
|
activePhase = 'finalization';
|
|
if (adapter.finalize) {
|
|
const stageFinalization = ctx?.startPhase(0.04);
|
|
emitStageProgress('finalization', 87, 'Running deterministic finalization');
|
|
await stageFinalization?.updateProgress(0.0, 'Running deterministic finalization');
|
|
await runTrace.event('debug', 'finalization', 'finalization_started', { sourceKey: job.sourceKey });
|
|
const result = await adapter.finalize({
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
syncId,
|
|
jobId: job.jobId,
|
|
runId: createdRunRow.id,
|
|
stagedDir,
|
|
workdir: sessionWorktree.workdir,
|
|
...(overrideReport ? {} : { parseArtifacts }),
|
|
stageIndex,
|
|
workUnitOutcomes,
|
|
reconciliationActions: reconcileActions,
|
|
...(overrideReport
|
|
? {
|
|
overrideReplay: {
|
|
priorJobId: overrideReport.jobId,
|
|
priorRunId: overrideReport.runId,
|
|
priorSyncId: overrideReport.body.syncId,
|
|
evictionRawPaths: overrideReport.body.evictionInputs,
|
|
},
|
|
}
|
|
: {}),
|
|
});
|
|
if (result.errors.length > 0) {
|
|
finalizationOutcome = {
|
|
sourceKey: job.sourceKey,
|
|
status: 'failed',
|
|
commitSha: null,
|
|
touchedPaths: [],
|
|
declaredTouchedSources: result.touchedSources,
|
|
derivedTouchedSources: [],
|
|
declaredChangedWikiPageKeys: result.changedWikiPageKeys,
|
|
derivedChangedWikiPageKeys: [],
|
|
mismatches: [],
|
|
result: result.result,
|
|
errors: result.errors,
|
|
warnings: result.warnings,
|
|
actions: result.actions ?? [],
|
|
provenanceExclusions: [],
|
|
};
|
|
latestFinalizationOutcome = finalizationOutcome;
|
|
await runTrace.event('error', 'finalization', 'finalization_failed', {
|
|
sourceKey: job.sourceKey,
|
|
errors: result.errors,
|
|
warnings: result.warnings,
|
|
});
|
|
throw new Error(`deterministic finalization failed: ${result.errors.join('; ')}`);
|
|
}
|
|
|
|
const changedBeforeFinalization = new Set([
|
|
...projectionTouchedPaths,
|
|
...workUnitOutcomes.flatMap((outcome) => outcome.patchTouchedPaths ?? []),
|
|
...(preReconciliationSha && preFinalizationSha !== preReconciliationSha
|
|
? (await sessionWorktree.git.diffNameStatus(preReconciliationSha, preFinalizationSha)).map(
|
|
(entry) => entry.path,
|
|
)
|
|
: []),
|
|
]);
|
|
finalizationTouchedPaths = await sessionWorktree.git.changedPaths();
|
|
const overlapping = finalizationTouchedPaths.filter((path) => changedBeforeFinalization.has(path));
|
|
if (overlapping.length > 0) {
|
|
await runTrace.event('error', 'finalization', 'finalization_failed', {
|
|
sourceKey: job.sourceKey,
|
|
reason: 'path_overlap',
|
|
overlappingPaths: overlapping.sort(),
|
|
});
|
|
throw new Error(
|
|
`finalization modified path(s) already changed earlier in this run: ${overlapping.sort().join(', ')}`,
|
|
);
|
|
}
|
|
|
|
const finalizationCommit =
|
|
finalizationTouchedPaths.length > 0
|
|
? await sessionWorktree.git.commitFiles(
|
|
finalizationTouchedPaths,
|
|
`ingest(${job.sourceKey}): deterministic finalization syncId=${syncId}`,
|
|
this.deps.storage.systemGitAuthor.name,
|
|
this.deps.storage.systemGitAuthor.email,
|
|
)
|
|
: await sessionWorktree.git.commitStaged(
|
|
`ingest(${job.sourceKey}): deterministic finalization syncId=${syncId}`,
|
|
this.deps.storage.systemGitAuthor.name,
|
|
this.deps.storage.systemGitAuthor.email,
|
|
);
|
|
finalizationSha = finalizationCommit.created ? finalizationCommit.commitHash : null;
|
|
const postFinalizationSha = await sessionWorktree.git.revParseHead();
|
|
finalizationTouchedPaths =
|
|
preFinalizationSha !== postFinalizationSha
|
|
? (await sessionWorktree.git.diffNameStatus(preFinalizationSha, postFinalizationSha)).map(
|
|
(entry) => entry.path,
|
|
)
|
|
: [];
|
|
|
|
const changedConnectionIds = [
|
|
...new Set([
|
|
...slConnectionIds,
|
|
...finalizationTouchedPaths
|
|
.filter((path) => path.startsWith('semantic-layer/'))
|
|
.map((path) => path.split('/')[1])
|
|
.filter((connectionId): connectionId is string => Boolean(connectionId)),
|
|
]),
|
|
].sort();
|
|
const postFinalizationSourcesByConnection = await this.loadSourcesByConnection(
|
|
sessionWorktree.workdir,
|
|
changedConnectionIds,
|
|
);
|
|
const scope = await deriveFinalizationTouchedSources({
|
|
changedPaths: finalizationTouchedPaths,
|
|
beforeSourcesByConnection: preFinalizationSourcesByConnection,
|
|
afterSourcesByConnection: postFinalizationSourcesByConnection,
|
|
});
|
|
if (scope.unresolvedPaths.length > 0) {
|
|
await runTrace.event('error', 'finalization', 'finalization_failed', {
|
|
sourceKey: job.sourceKey,
|
|
reason: 'unresolved_semantic_layer_paths',
|
|
unresolvedPaths: scope.unresolvedPaths,
|
|
});
|
|
throw new Error(`could not resolve finalization semantic-layer path(s): ${scope.unresolvedPaths.join(', ')}`);
|
|
}
|
|
finalizationTouchedSources = scope.touchedSources;
|
|
finalizationChangedWikiPageKeys = deriveFinalizationWikiPageKeys(finalizationTouchedPaths);
|
|
const mismatches = compareFinalizationDeclarations({
|
|
declaredTouchedSources: result.touchedSources,
|
|
derivedTouchedSources: finalizationTouchedSources,
|
|
declaredChangedWikiPageKeys: result.changedWikiPageKeys,
|
|
derivedChangedWikiPageKeys: finalizationChangedWikiPageKeys,
|
|
});
|
|
if (mismatches.length > 0) {
|
|
finalizationOutcome = {
|
|
sourceKey: job.sourceKey,
|
|
status: 'failed',
|
|
commitSha: finalizationSha,
|
|
touchedPaths: finalizationTouchedPaths,
|
|
declaredTouchedSources: result.touchedSources,
|
|
derivedTouchedSources: finalizationTouchedSources,
|
|
declaredChangedWikiPageKeys: result.changedWikiPageKeys,
|
|
derivedChangedWikiPageKeys: finalizationChangedWikiPageKeys,
|
|
mismatches,
|
|
result: result.result,
|
|
errors: ['finalization touched artifact declaration mismatch'],
|
|
warnings: result.warnings,
|
|
actions: result.actions ?? [],
|
|
provenanceExclusions: [],
|
|
};
|
|
latestFinalizationOutcome = finalizationOutcome;
|
|
await runTrace.event('error', 'finalization', 'finalization_failed', {
|
|
sourceKey: job.sourceKey,
|
|
reason: 'declaration_mismatch',
|
|
mismatches,
|
|
});
|
|
throw new Error(
|
|
`finalization touched artifact declaration mismatch: ${mismatches
|
|
.map((mismatch) => `${mismatch.direction}:${mismatch.artifactKind}:${mismatch.key}`)
|
|
.join(', ')}`,
|
|
);
|
|
}
|
|
finalizationActions = result.actions ?? [];
|
|
finalizationOutcome = {
|
|
sourceKey: job.sourceKey,
|
|
status: 'success',
|
|
commitSha: finalizationSha,
|
|
touchedPaths: finalizationTouchedPaths,
|
|
declaredTouchedSources: result.touchedSources,
|
|
derivedTouchedSources: finalizationTouchedSources,
|
|
declaredChangedWikiPageKeys: result.changedWikiPageKeys,
|
|
derivedChangedWikiPageKeys: finalizationChangedWikiPageKeys,
|
|
mismatches,
|
|
result: result.result,
|
|
errors: [],
|
|
warnings: result.warnings,
|
|
actions: finalizationActions,
|
|
provenanceExclusions: [],
|
|
};
|
|
latestFinalizationOutcome = finalizationOutcome;
|
|
emitStageProgress('finalization', 88, 'Deterministic finalization complete');
|
|
await stageFinalization?.updateProgress(1.0, 'Deterministic finalization complete');
|
|
await runTrace.event('debug', 'finalization', 'finalization_committed', {
|
|
sourceKey: job.sourceKey,
|
|
commitSha: finalizationSha,
|
|
touchedPaths: finalizationTouchedPaths,
|
|
touchedSources: finalizationTouchedSources,
|
|
changedWikiPageKeys: finalizationChangedWikiPageKeys,
|
|
warnings: result.warnings,
|
|
});
|
|
} else {
|
|
await runTrace.event('debug', 'finalization', 'finalization_skipped', { sourceKey: job.sourceKey });
|
|
}
|
|
|
|
const repairConnectionIds = [
|
|
...new Set([
|
|
...slConnectionIds,
|
|
...finalizationTouchedSources.map((source) => source.connectionId),
|
|
]),
|
|
].sort();
|
|
activePhase = 'wiki_sl_ref_repair';
|
|
emitStageProgress('wiki_sl_ref_repair', 88, 'Repairing wiki semantic-layer references');
|
|
wikiSlRefRepairResult = await traceTimed(
|
|
runTrace,
|
|
'wiki_sl_ref_repair',
|
|
'wiki_sl_refs_repair',
|
|
{ connectionIds: repairConnectionIds },
|
|
() =>
|
|
repairWikiSlRefs({
|
|
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
|
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
|
configService: sessionWorktree.config,
|
|
connectionIds: repairConnectionIds,
|
|
}),
|
|
);
|
|
await runTrace.event('debug', 'wiki_sl_ref_repair', 'wiki_sl_refs_repaired', {
|
|
repairCount: wikiSlRefRepairResult.repairs.length,
|
|
repairs: wikiSlRefRepairResult.repairs,
|
|
warnings: wikiSlRefRepairResult.warnings,
|
|
});
|
|
emitStageProgress('wiki_sl_ref_repair', 88, 'Checked wiki semantic-layer references');
|
|
const postReconciliationSha = await sessionWorktree.git.revParseHead();
|
|
const postReconciliationPaths =
|
|
preReconciliationSha && postReconciliationSha && preReconciliationSha !== postReconciliationSha
|
|
? (await sessionWorktree.git.diffNameStatus(preReconciliationSha, postReconciliationSha)).map((entry) => entry.path)
|
|
: [];
|
|
const baseFinalChangedWikiPageKeys = this.uniqueWikiPageKeys([
|
|
...(isolatedDiffEnabled ? projectionChangedWikiPageKeys : []),
|
|
...workUnitOutcomes
|
|
.flatMap((outcome) => outcome.patchTouchedPaths ?? [])
|
|
.flatMap((path) => this.wikiPageKeysFromPaths([path])),
|
|
...this.wikiPageKeysFromActions(reconcileActions),
|
|
...finalizationChangedWikiPageKeys,
|
|
...postReconciliationPaths.flatMap((path) => this.wikiPageKeysFromPaths([path])),
|
|
...wikiSlRefRepairResult.repairs.filter((repair) => repair.scope === 'GLOBAL').map((repair) => repair.pageKey),
|
|
]);
|
|
const finalTouchedSlSources = this.uniqueTouchedSlSources([
|
|
...(isolatedDiffEnabled ? projectionTouchedSources : []),
|
|
...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources),
|
|
...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId),
|
|
...this.touchedSlSourcesFromPaths(postReconciliationPaths),
|
|
...finalizationTouchedSources,
|
|
]);
|
|
const finalWikiGateScope = await this.wikiPageKeysForFinalGates({
|
|
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
|
changedWikiPageKeys: baseFinalChangedWikiPageKeys,
|
|
touchedSlSources: finalTouchedSlSources,
|
|
actions: [...stageIndex.workUnits.flatMap((wu) => wu.actions), ...reconcileActions],
|
|
});
|
|
const finalChangedWikiPageKeys = finalWikiGateScope.pageKeys;
|
|
|
|
const finalTargetPolicyPaths = [
|
|
...projectionTouchedPaths,
|
|
...workUnitOutcomes.flatMap((outcome) => outcome.patchTouchedPaths ?? []),
|
|
...postReconciliationPaths,
|
|
...finalizationTouchedPaths,
|
|
];
|
|
const targetPolicyTraceData = {
|
|
allowedTargetConnectionIds: slConnectionIds,
|
|
touchedPaths: [...new Set(finalTargetPolicyPaths)].sort(),
|
|
};
|
|
activePhase = 'target_policy';
|
|
activeFailureDetails = targetPolicyTraceData;
|
|
emitStageProgress('final_gates', 88, 'Checking semantic-layer target policy');
|
|
await traceTimed(runTrace, 'target_policy', 'semantic_layer_target_policy', targetPolicyTraceData, async () => {
|
|
assertSemanticLayerTargetPathsAllowed({
|
|
paths: finalTargetPolicyPaths,
|
|
allowedConnectionIds: new Set(slConnectionIds),
|
|
});
|
|
});
|
|
activeFailureDetails = undefined;
|
|
|
|
const finalArtifactGateTraceData = {
|
|
changedWikiPageKeys: finalChangedWikiPageKeys,
|
|
wikiReferenceGateScope: finalWikiGateScope.trace,
|
|
touchedSlSources: finalTouchedSlSources,
|
|
projectionTouchedPaths,
|
|
workUnitPatchTouchedPaths: workUnitOutcomes.flatMap((outcome) => outcome.patchTouchedPaths ?? []),
|
|
actionOrigins: this.finalGateActionOrigins({
|
|
stageIndex,
|
|
reconcileActions,
|
|
fallbackConnectionId: job.connectionId,
|
|
}),
|
|
preReconciliationSha,
|
|
postReconciliationSha,
|
|
postReconciliationPaths,
|
|
reconciliationActionCount: reconcileActions.length,
|
|
wikiSlRefRepairCount: wikiSlRefRepairResult.repairs.length,
|
|
};
|
|
activePhase = 'final_gates';
|
|
activeFailureDetails = finalArtifactGateTraceData;
|
|
emitStageProgress('final_gates', 89, 'Running final artifact gates');
|
|
try {
|
|
await traceTimed(
|
|
runTrace,
|
|
'final_gates',
|
|
'final_artifact_gates',
|
|
finalArtifactGateTraceData,
|
|
async () => {
|
|
await validateFinalIngestArtifacts({
|
|
connectionIds: repairConnectionIds,
|
|
changedWikiPageKeys: finalChangedWikiPageKeys,
|
|
touchedSlSources: finalTouchedSlSources,
|
|
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
|
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
|
validateTouchedSources: (touched) =>
|
|
validateWuTouchedSources(
|
|
{
|
|
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
|
connections: this.deps.connections,
|
|
configService: sessionWorktree.config,
|
|
gitService: sessionWorktree.git,
|
|
slSourcesRepository: this.deps.slSourcesRepository,
|
|
probeRowCount: this.deps.settings.probeRowCount,
|
|
slValidator: this.deps.slValidator,
|
|
},
|
|
touched,
|
|
),
|
|
tableExists: (connectionId, tableRef) =>
|
|
this.tableRefExistsInSemanticLayer(
|
|
this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
|
[connectionId],
|
|
tableRef,
|
|
),
|
|
});
|
|
},
|
|
);
|
|
} catch (error) {
|
|
const gateError = this.errorMessage(error);
|
|
const repairPaths = finalGateRepairPaths({
|
|
changedWikiPageKeys: finalChangedWikiPageKeys,
|
|
touchedSlSources: finalTouchedSlSources,
|
|
});
|
|
emitStageProgress('final_gates', 89, 'Repairing final artifact gates');
|
|
const gateRepair = await repairFinalGateFailure({
|
|
agentRunner: this.deps.agentRunner,
|
|
workdir: sessionWorktree.workdir,
|
|
gateError,
|
|
allowedPaths: repairPaths,
|
|
trace: runTrace,
|
|
repairKind: 'final_artifact_gate',
|
|
maxAttempts: 1,
|
|
stepBudget: 16,
|
|
});
|
|
|
|
isolatedDiffSummary.gateRepairAttempts += gateRepair.attempts;
|
|
if (gateRepair.status === 'failed') {
|
|
isolatedDiffSummary.gateRepairFailures += 1;
|
|
activeFailureDetails = {
|
|
...finalArtifactGateTraceData,
|
|
gateRepair,
|
|
gateError,
|
|
};
|
|
throw new Error(`${gateError}\ngate repair failed: ${gateRepair.reason}`);
|
|
}
|
|
|
|
isolatedDiffSummary.gateRepairs += 1;
|
|
await traceTimed(
|
|
runTrace,
|
|
'final_gates',
|
|
'final_artifact_gates_after_gate_repair',
|
|
{
|
|
...finalArtifactGateTraceData,
|
|
repairedPaths: gateRepair.changedPaths,
|
|
},
|
|
async () => {
|
|
await validateFinalIngestArtifacts({
|
|
connectionIds: repairConnectionIds,
|
|
changedWikiPageKeys: finalChangedWikiPageKeys,
|
|
touchedSlSources: finalTouchedSlSources,
|
|
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
|
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
|
validateTouchedSources: (touched) =>
|
|
validateWuTouchedSources(
|
|
{
|
|
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
|
connections: this.deps.connections,
|
|
configService: sessionWorktree.config,
|
|
gitService: sessionWorktree.git,
|
|
slSourcesRepository: this.deps.slSourcesRepository,
|
|
probeRowCount: this.deps.settings.probeRowCount,
|
|
slValidator: this.deps.slValidator,
|
|
},
|
|
touched,
|
|
),
|
|
tableExists: (connectionId, tableRef) =>
|
|
this.tableRefExistsInSemanticLayer(
|
|
this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
|
[connectionId],
|
|
tableRef,
|
|
),
|
|
});
|
|
},
|
|
);
|
|
|
|
const repairCommit = await sessionWorktree.git.commitFiles(
|
|
gateRepair.changedPaths,
|
|
`ingest(${job.sourceKey}): repair final gates syncId=${syncId}`,
|
|
this.deps.storage.systemGitAuthor.name,
|
|
this.deps.storage.systemGitAuthor.email,
|
|
);
|
|
if (!repairCommit.created) {
|
|
isolatedDiffSummary.gateRepairFailures += 1;
|
|
throw new Error('final gate repair produced no committable changes');
|
|
}
|
|
await runTrace.event('debug', 'final_gates', 'final_gate_repair_committed', {
|
|
commitSha: repairCommit.commitHash,
|
|
repairedPaths: gateRepair.changedPaths,
|
|
});
|
|
}
|
|
activeFailureDetails = undefined;
|
|
|
|
activePhase = 'provenance_validation';
|
|
emitStageProgress('provenance', 90, 'Validating provenance rows');
|
|
latestReportWorkUnits = this.toReportWorkUnits(stageIndex);
|
|
latestReconciliationActions = reconcileActions;
|
|
latestConflictsResolved = stageIndex.conflictsResolved;
|
|
latestEvictionsApplied = stageIndex.evictionsApplied;
|
|
latestUnmappedFallbacks = stageIndex.unmappedFallbacks;
|
|
latestArtifactResolutions = stageIndex.artifactResolutions ?? [];
|
|
latestEvictionInputs = eviction?.deletedRawPaths ?? [];
|
|
latestUnresolvedCards = unresolvedCards ?? [];
|
|
const finalizationProvenance = this.partitionFinalizationActionsForProvenance({
|
|
actions: finalizationActions,
|
|
currentRawPaths: new Set(currentHashes.keys()),
|
|
currentEvictionRawPaths: new Set(stageIndex.evictionsApplied.map((entry) => entry.rawPath)),
|
|
overrideEvictionRawPaths: new Set(overrideReport?.body.evictionInputs ?? []),
|
|
});
|
|
if (finalizationOutcome) {
|
|
finalizationOutcome.provenanceExclusions = finalizationProvenance.exclusions;
|
|
latestFinalizationOutcome = finalizationOutcome;
|
|
}
|
|
const provenancePlan = this.buildProvenancePlan({
|
|
job,
|
|
syncId,
|
|
currentHashes,
|
|
stageIndex,
|
|
reconcileActions,
|
|
finalizationActions: finalizationProvenance.actions,
|
|
});
|
|
const provenanceRows = provenancePlan.rows;
|
|
const currentRawPaths = new Set(currentHashes.keys());
|
|
const deletedRawPaths = new Set(eviction?.deletedRawPaths ?? []);
|
|
const provenanceValidationData = this.provenanceValidationTraceData({
|
|
plan: provenancePlan,
|
|
currentRawPaths,
|
|
deletedRawPaths,
|
|
});
|
|
const reportProvenanceRows = this.toReportProvenanceRows(provenanceRows);
|
|
latestReportProvenanceRows = reportProvenanceRows;
|
|
activeFailureDetails = provenanceValidationData;
|
|
await traceTimed(
|
|
runTrace,
|
|
'provenance',
|
|
'provenance_rows_validation',
|
|
provenanceValidationData,
|
|
async () => {
|
|
validateProvenanceRawPaths({
|
|
rows: provenanceRows,
|
|
currentRawPaths,
|
|
deletedRawPaths,
|
|
});
|
|
},
|
|
);
|
|
activeFailureDetails = undefined;
|
|
|
|
// Stage 6 — squash commit
|
|
activePhase = 'squash';
|
|
const stage6 = ctx?.startPhase(0.04);
|
|
emitStageProgress('save', 91, 'Saving changes');
|
|
await stage6?.updateProgress(0.0, 'Saving changes');
|
|
try {
|
|
await sessionWorktree.git.assertWorktreeClean();
|
|
} catch (error) {
|
|
await this.deps.runs.markFailed(runRow.id);
|
|
throw error;
|
|
}
|
|
const commitMessage = this.buildCommitMessage(job, syncId, diffSummary, failedWorkUnits);
|
|
const squashResult = await this.deps.lockingService.withLock('config:repo', async () => {
|
|
const preSquashSha = await this.deps.gitService.revParseHead();
|
|
const merge = await this.deps.gitService.squashMergeIntoMain(
|
|
sessionWorktree.branch,
|
|
this.deps.storage.systemGitAuthor.name,
|
|
this.deps.storage.systemGitAuthor.email,
|
|
commitMessage,
|
|
);
|
|
return { preSquashSha, merge };
|
|
});
|
|
const mergeResult = squashResult.merge;
|
|
if (!mergeResult.ok) {
|
|
await this.deps.runs.markFailed(runRow.id);
|
|
throw new Error(`squash merge conflict: ${mergeResult.conflictPaths.join(', ')}`);
|
|
}
|
|
const commitSha = mergeResult.touchedPaths.length === 0 ? null : mergeResult.squashSha;
|
|
await runTrace.event('debug', 'squash', 'squash_finished', {
|
|
commitSha,
|
|
touchedPaths: mergeResult.touchedPaths,
|
|
});
|
|
const memoryFlowSavedActions = stageIndex.workUnits
|
|
.flatMap((wu) => wu.actions)
|
|
.concat(reconcileActions)
|
|
.concat(finalizationActions);
|
|
memoryFlow?.emit({
|
|
type: 'saved',
|
|
commitSha,
|
|
wikiCount: countMemoryFlowActions(memoryFlowSavedActions, 'wiki'),
|
|
slCount: countMemoryFlowActions(memoryFlowSavedActions, 'sl'),
|
|
});
|
|
await stage6?.updateProgress(1.0, commitSha ? `Saved changes (${commitSha.slice(0, 8)})` : 'No changes to save');
|
|
|
|
// Sync the shared `knowledge` index from the squashed diff in a single
|
|
// transaction. If this throws, the run fails and no partial index state
|
|
// survives (thanks to the transactional upsert in applyDiffTransactional).
|
|
if (commitSha) {
|
|
// Multi-file squash → omit path so the handler diffs the whole commit
|
|
// (a comma-joined pathspec would match nothing and the job would no-op).
|
|
const pathFilter = mergeResult.touchedPaths.length === 1 ? mergeResult.touchedPaths[0] : '';
|
|
await this.deps.commitMessages.enqueueForExternalCommit({ commitHash: commitSha }, commitMessage, pathFilter);
|
|
await this.deps.wikiService.syncFromCommit(squashResult.preSquashSha, commitSha, runRow.id);
|
|
await this.syncKnowledgeSlRefsFromActions(job.connectionId, memoryFlowSavedActions);
|
|
const touchedConnections = [
|
|
...new Set(
|
|
memoryFlowSavedActions
|
|
.filter((action) => action.target === 'sl')
|
|
.map((action) => actionTargetConnectionId(action, job.connectionId))
|
|
.concat(finalizationTouchedSources.map((source) => source.connectionId)),
|
|
),
|
|
].sort();
|
|
for (const connectionId of touchedConnections) {
|
|
try {
|
|
const { sources: allSources } = await this.deps.semanticLayerService.loadAllSources(connectionId);
|
|
await this.deps.slSearchService.indexSources(connectionId, allSources);
|
|
} catch (err) {
|
|
this.logger.warn(
|
|
`[ingest-bundle] post-squash SL reindex failed for connection=${connectionId}: ${err instanceof Error ? err.message : String(err)}`,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
const stage5 = ctx?.startPhase(0.04);
|
|
emitStageProgress('provenance', 95, 'Recording history');
|
|
await stage5?.updateProgress(0.0, 'Recording history');
|
|
activePhase = 'provenance';
|
|
|
|
await this.deps.provenance.insertMany(provenanceRows);
|
|
await runTrace.event('debug', 'provenance', 'provenance_rows_inserted', {
|
|
rowCount: provenanceRows.length,
|
|
});
|
|
memoryFlow?.emit({ type: 'provenance_recorded', rowCount: provenanceRows.length });
|
|
await stage5?.updateProgress(
|
|
1.0,
|
|
`Recorded ${provenanceRows.length} history entr${provenanceRows.length === 1 ? 'y' : 'ies'}`,
|
|
);
|
|
|
|
const stage7 = ctx?.startPhase(0.04);
|
|
emitStageProgress('report', 97, 'Wrapping up');
|
|
await stage7?.updateProgress(0.0, 'Wrapping up');
|
|
activePhase = 'report';
|
|
|
|
const reportToolTranscripts = Array.from(transcriptSummaries.values()).map((summary) => ({
|
|
unitKey: summary.unitKey,
|
|
path: summary.path,
|
|
toolCallCount: summary.toolCallCount,
|
|
errorCount: summary.errorCount,
|
|
toolNames: Array.from(summary.toolNames).sort(),
|
|
}));
|
|
const capturedMemoryFlow = memoryFlow?.snapshot();
|
|
const reportMemoryFlow = capturedMemoryFlow
|
|
? {
|
|
...capturedMemoryFlow,
|
|
metadata: {
|
|
schemaVersion: 1 as const,
|
|
mode: 'full' as const,
|
|
origin: 'captured' as const,
|
|
timing: 'captured' as const,
|
|
capturedAt: new Date().toISOString(),
|
|
sourceReportId: null,
|
|
sourceReportPath: null,
|
|
fallbackReason: null,
|
|
},
|
|
}
|
|
: undefined;
|
|
|
|
const reportBody = {
|
|
status: 'completed' as const,
|
|
syncId,
|
|
diffSummary,
|
|
fetch: fetchReport ?? undefined,
|
|
commitSha,
|
|
tracePath: runTrace.tracePath,
|
|
isolatedDiff: !overrideReport ? isolatedDiffSummary : undefined,
|
|
workUnits: stageIndex.workUnits.map((wu) => ({
|
|
unitKey: wu.unitKey,
|
|
rawFiles: wu.rawFiles,
|
|
status: wu.status,
|
|
reason: wu.reason,
|
|
actions: wu.actions,
|
|
touchedSlSources: wu.touchedSlSources,
|
|
slDisallowed: wu.slDisallowed,
|
|
slDisallowedReason: wu.slDisallowedReason,
|
|
})),
|
|
failedWorkUnits,
|
|
reconciliationSkipped: reconcileOutcome.skipped,
|
|
conflictsResolved: stageIndex.conflictsResolved,
|
|
evictionsApplied: stageIndex.evictionsApplied,
|
|
unmappedFallbacks: stageIndex.unmappedFallbacks,
|
|
artifactResolutions: stageIndex.artifactResolutions ?? [],
|
|
evictionInputs: eviction?.deletedRawPaths ?? [],
|
|
reconciliationActions: reconcileActions,
|
|
evictionDecisions: rcToolSession.evictionDecisions ?? [],
|
|
unresolvedCards: unresolvedCards ?? [],
|
|
supersededBy: null,
|
|
overrideOf: overrideReport?.jobId ?? null,
|
|
provenanceRows: reportProvenanceRows,
|
|
toolTranscripts: reportToolTranscripts,
|
|
finalization: finalizationOutcome,
|
|
wikiSlRefRepairs: wikiSlRefRepairResult.repairs,
|
|
wikiSlRefRepairWarnings: wikiSlRefRepairResult.warnings,
|
|
...(reportMemoryFlow ? { memoryFlow: reportMemoryFlow } : {}),
|
|
context: contextReport
|
|
? {
|
|
documentsIndexed: contextReport.documentsIndexed,
|
|
chunksIndexed: contextReport.chunksIndexed,
|
|
documentsDeleted: contextReport.documentsDeleted,
|
|
embeddingFailures: contextReport.embeddingFailures,
|
|
candidatesCreated: candidateSummaryAfterReconcile?.total ?? 0,
|
|
candidatesPromoted: candidateSummaryAfterReconcile?.promoted ?? 0,
|
|
candidatesRejected: candidateSummaryAfterReconcile?.rejected ?? 0,
|
|
triage: triageResult?.report,
|
|
dedup: dedupResult?.enabled
|
|
? {
|
|
candidatesIn: dedupResult.candidatesIn,
|
|
clustersOut: dedupResult.clustersOut,
|
|
mergedCount: dedupResult.mergedCount,
|
|
largestClusterSize: dedupResult.largestClusterSize,
|
|
embeddingFailures: dedupResult.embeddingFailures,
|
|
}
|
|
: undefined,
|
|
curator: curatorReport ?? undefined,
|
|
knowledgeCreates: stageIndex.workUnits
|
|
.flatMap((wu) => wu.actions)
|
|
.concat(reconcileActions)
|
|
.filter((action) => action.target === 'wiki' && action.type === 'created').length,
|
|
knowledgeUpdates: stageIndex.workUnits
|
|
.flatMap((wu) => wu.actions)
|
|
.concat(reconcileActions)
|
|
.filter((action) => action.target === 'wiki' && action.type === 'updated').length,
|
|
capped: sourceContextReport?.capped ?? false,
|
|
warnings: [
|
|
...new Set([
|
|
...contextReport.warnings,
|
|
...(sourceContextReport?.warnings ?? []),
|
|
...(triageResult?.warnings ?? []),
|
|
...(carryForwardResult?.warnings ?? []),
|
|
...(dedupResult?.warnings ?? []),
|
|
...curatorWarnings,
|
|
]),
|
|
],
|
|
}
|
|
: undefined,
|
|
};
|
|
const createdReport = await this.deps.reports.create({
|
|
runId: runRow.id,
|
|
jobId: job.jobId,
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
body: reportBody,
|
|
});
|
|
const reportId = reportIdFromCreateResult(createdReport);
|
|
await runTrace.event('debug', 'report', 'success_report_created', {
|
|
reportId,
|
|
runId: runRow.id,
|
|
tracePath: runTrace.tracePath,
|
|
});
|
|
memoryFlow?.update({
|
|
...(reportId ? { reportId, reportPath: reportId } : {}),
|
|
});
|
|
memoryFlow?.emit({
|
|
type: 'report_created',
|
|
runId: runRow.id,
|
|
...(reportId ? { reportPath: reportId } : {}),
|
|
});
|
|
if (overrideReport) {
|
|
await this.deps.reports.markSuperseded(overrideReport.jobId, job.jobId);
|
|
}
|
|
if (contextReport && this.deps.contextEvidenceIndex) {
|
|
await this.deps.contextEvidenceIndex.publishSync({
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
syncId,
|
|
diffSet,
|
|
});
|
|
}
|
|
|
|
// Stage 7 — status
|
|
await this.deps.runs.markCompleted(
|
|
runRow.id,
|
|
diffSummary,
|
|
fetchReport?.status === 'partial' ? 'partial' : 'completed',
|
|
);
|
|
if (job.bundleRef.kind === 'scheduled_pull') {
|
|
await adapter.onPullSucceeded?.({
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
syncId,
|
|
trigger: job.trigger,
|
|
completedAt: new Date(),
|
|
stagedDir,
|
|
});
|
|
}
|
|
await stage7?.updateProgress(1.0, 'Done');
|
|
await runTrace.event('info', 'run', 'ingest_finished', {
|
|
status: 'completed',
|
|
commitSha,
|
|
failedWorkUnits,
|
|
tracePath: runTrace.tracePath,
|
|
});
|
|
|
|
cleanupOutcome = 'success';
|
|
return {
|
|
runId: runRow.id,
|
|
syncId,
|
|
diffSummary,
|
|
workUnitCount: workUnits.length,
|
|
failedWorkUnits,
|
|
artifactsWritten: provenanceRows.filter((r) => r.actionType !== 'skipped').length,
|
|
commitSha,
|
|
};
|
|
} finally {
|
|
await this.deps.sessionWorktreeService.cleanup(sessionWorktree, cleanupOutcome);
|
|
}
|
|
} catch (error) {
|
|
await activeTrace.event(
|
|
'error',
|
|
'run',
|
|
'ingest_failed',
|
|
{
|
|
tracePath: activeTrace.tracePath,
|
|
phase: activePhase,
|
|
runId: runRow?.id ?? null,
|
|
syncId,
|
|
},
|
|
error,
|
|
);
|
|
if (runRow) {
|
|
await this.deps.runs.markFailed(runRow.id);
|
|
await this.deps.reports.create({
|
|
runId: runRow.id,
|
|
jobId: job.jobId,
|
|
connectionId: job.connectionId,
|
|
sourceKey: job.sourceKey,
|
|
body: {
|
|
status: 'failed' as const,
|
|
syncId,
|
|
diffSummary: latestDiffSummary,
|
|
commitSha: null,
|
|
tracePath: activeTrace.tracePath,
|
|
isolatedDiff: latestIsolatedDiffSummary,
|
|
failure: {
|
|
phase: activePhase,
|
|
message: this.errorMessage(error),
|
|
...(activeFailureDetails ? { details: activeFailureDetails } : {}),
|
|
},
|
|
workUnits:
|
|
latestReportWorkUnits.length > 0
|
|
? latestReportWorkUnits
|
|
: latestWorkUnits.map((wu) => ({
|
|
unitKey: wu.unitKey,
|
|
rawFiles: [],
|
|
status: wu.status,
|
|
reason: wu.reason,
|
|
actions: wu.actions,
|
|
touchedSlSources: wu.touchedSlSources,
|
|
slDisallowed: wu.slDisallowed,
|
|
slDisallowedReason: wu.slDisallowedReason,
|
|
})),
|
|
failedWorkUnits: latestFailedWorkUnits,
|
|
reconciliationSkipped: latestReconciliationSkipped,
|
|
conflictsResolved: latestConflictsResolved,
|
|
evictionsApplied: latestEvictionsApplied,
|
|
unmappedFallbacks: latestUnmappedFallbacks,
|
|
artifactResolutions: latestArtifactResolutions,
|
|
evictionInputs: latestEvictionInputs,
|
|
reconciliationActions: latestReconciliationActions,
|
|
finalization: latestFinalizationOutcome,
|
|
evictionDecisions: [],
|
|
unresolvedCards: latestUnresolvedCards,
|
|
supersededBy: null,
|
|
overrideOf: null,
|
|
provenanceRows: latestReportProvenanceRows,
|
|
toolTranscripts: Array.from(transcriptSummaries.values()).map((summary) => ({
|
|
unitKey: summary.unitKey,
|
|
path: summary.path,
|
|
toolCallCount: summary.toolCallCount,
|
|
errorCount: summary.errorCount,
|
|
toolNames: Array.from(summary.toolNames).sort(),
|
|
})),
|
|
},
|
|
});
|
|
await activeTrace.event('info', 'report', 'failure_report_created', {
|
|
runId: runRow.id,
|
|
jobId: job.jobId,
|
|
tracePath: activeTrace.tracePath,
|
|
});
|
|
}
|
|
throw error;
|
|
}
|
|
}
|
|
}
|