From 9a9e40939ae3208989b332f4ec29dcfb7dfb5435 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 16:21:05 +0200 Subject: [PATCH] Improve Notion ingest UX --- .pre-commit-config.yaml | 8 - .../src/commands/connection-notion.test.ts | 47 ++++ .../cli/src/commands/connection-notion.ts | 4 +- packages/cli/src/ingest-viz.test.ts | 85 +++++++ packages/cli/src/ingest.test.ts | 40 ++-- packages/cli/src/ingest.ts | 23 +- packages/context/src/connections/index.ts | 1 + .../src/connections/notion-config.test.ts | 37 +++ .../context/src/connections/notion-config.ts | 25 +- .../adapters/notion/notion.adapter.test.ts | 37 +++ .../src/ingest/memory-flow/known-errors.ts | 28 +++ .../src/ingest/memory-flow/summary.test.ts | 30 +++ .../context/src/ingest/memory-flow/summary.ts | 17 ++ .../src/ingest/memory-flow/view-model.ts | 10 +- .../ingest/sqlite-bundle-ingest-store.test.ts | 219 ++++++++++++++++++ .../src/ingest/sqlite-bundle-ingest-store.ts | 49 +++- scripts/package-artifacts.mjs | 2 +- scripts/package-artifacts.test.mjs | 2 +- scripts/precommit-check.mjs | 188 --------------- scripts/precommit-check.test.mjs | 42 ---- 20 files changed, 615 insertions(+), 279 deletions(-) create mode 100644 packages/context/src/ingest/memory-flow/known-errors.ts delete mode 100644 scripts/precommit-check.mjs delete mode 100644 scripts/precommit-check.test.mjs diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 555d7fc0..8908b532 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,14 +33,6 @@ repos: name: ruff format (python) files: ^python/ - - repo: local - hooks: - - id: ktx-package-checks - name: ktx package checks - entry: node scripts/precommit-check.mjs - language: system - files: ^(packages/|scripts/|python/|package\.json$|pnpm-lock\.yaml$|pnpm-workspace\.yaml$|release-policy\.json$|tsconfig\.base\.json$|pyproject\.toml$|uv\.lock$|uv\.toml$) - - repo: https://github.com/Yelp/detect-secrets rev: v1.5.0 hooks: diff --git a/packages/cli/src/commands/connection-notion.test.ts b/packages/cli/src/commands/connection-notion.test.ts index b03484d4..3315e1cc 100644 --- a/packages/cli/src/commands/connection-notion.test.ts +++ b/packages/cli/src/commands/connection-notion.test.ts @@ -355,6 +355,53 @@ describe('runKtxConnectionNotion', () => { expect(io.stdout()).toContain('rootPageIds: 1'); }); + it('uses inline Notion auth_token for interactive discovery', async () => { + const projectDir = join(tempDir, 'project'); + const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' }); + await writeProjectConfig(projectDir, { + ...initialized.config, + connections: { + 'notion-main': { + driver: 'notion', + auth_token: 'ntn_inline_token', + crawl_mode: 'selected_roots', + root_page_ids: [PAGE_IDS.engineering], + root_database_ids: [], + root_data_source_ids: [], + max_pages_per_run: 12, + max_knowledge_creates_per_run: 2, + max_knowledge_updates_per_run: 7, + last_successful_cursor: null, + }, + }, + }); + const api = fakeNotionApi([notionPage(PAGE_IDS.engineering, 'Engineering')]); + const createNotionApi = vi.fn((authToken: string) => { + expect(authToken).toBe('ntn_inline_token'); + return api; + }); + const io = makeIo(); + + await expect( + runKtxConnectionNotion( + { + command: 'pick', + projectDir, + connectionId: 'notion-main', + mode: 'interactive', + }, + io.io, + { + createNotionApi, + renderPicker: vi.fn(async (): Promise => ({ kind: 'quit' })), + }, + ), + ).resolves.toBe(0); + + expect(createNotionApi).toHaveBeenCalledOnce(); + expect(io.stdout()).toContain('No changes saved.'); + }); + it('passes partial-discovery warnings into the TUI banner state', async () => { const projectDir = join(tempDir, 'project'); const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' }); diff --git a/packages/cli/src/commands/connection-notion.ts b/packages/cli/src/commands/connection-notion.ts index ecdf0f36..e0f68c0b 100644 --- a/packages/cli/src/commands/connection-notion.ts +++ b/packages/cli/src/commands/connection-notion.ts @@ -1,4 +1,4 @@ -import { parseNotionConnectionConfig, resolveNotionAuthToken } from '@ktx/context/connections'; +import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from '@ktx/context/connections'; import { type NotionApi, type NotionBotInfo, NotionClient } from '@ktx/context/ingest'; import { type KtxLocalProject, @@ -223,7 +223,7 @@ export async function runKtxConnectionNotion( const project = await loadProject({ projectDir: args.projectDir }); const rawConnection = notionConnection(project, args.connectionId); const notion = parseNotionConnectionConfig(rawConnection); - const authToken = await resolveNotionAuthToken(notion.auth_token_ref, { env: deps.env }); + const authToken = await resolveNotionConnectionAuthToken(notion, { env: deps.env }); const api = deps.createNotionApi ? deps.createNotionApi(authToken) : new NotionClient(authToken); const discovery = await discoverNotionPickerPages(api); const tree = buildPickerTree(discovery.pages); diff --git a/packages/cli/src/ingest-viz.test.ts b/packages/cli/src/ingest-viz.test.ts index 6963d277..a37c1ed8 100644 --- a/packages/cli/src/ingest-viz.test.ts +++ b/packages/cli/src/ingest-viz.test.ts @@ -186,6 +186,91 @@ describe('runKtxIngest viz and replay', () => { expect(io.stdout()).toContain('Connection: warehouse'); }); + it('prints live viz final summaries as errors when the report has failed work units', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const liveSession = { + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + }; + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'notion', trigger: 'manual_resync', fileCount: 37 }); + input.memoryFlow?.update({ + syncId: 'sync-notion', + plannedWorkUnits: [ + { + unitKey: 'notion-cluster-1', + rawFiles: ['pages/a.md'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + input.memoryFlow?.emit({ + type: 'work_unit_finished', + unitKey: 'notion-cluster-1', + status: 'failed', + reason: 'notion-cluster-1 failed: {"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)"}', + }); + input.memoryFlow?.emit({ type: 'report_created', runId: 'live-failed' }); + input.memoryFlow?.finish('done'); + + const failedWorkUnit = { + ...localFakeBundleReport('live-failed').body.workUnits[0], + unitKey: 'notion-cluster-1', + rawFiles: ['pages/a.md'], + status: 'failed' as const, + reason: 'notion-cluster-1 failed: {"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)"}', + actions: [], + touchedSlSources: [], + }; + const report = localFakeBundleReport('live-failed', { + id: 'report-live-failed', + runId: 'run-live-failed', + connectionId: input.connectionId, + sourceKey: input.adapter, + body: { + workUnits: [failedWorkUnit], + failedWorkUnits: [failedWorkUnit.unitKey], + }, + }); + return { + result: { + jobId: 'live-failed', + runId: report.runId, + syncId: report.body.syncId, + diffSummary: report.body.diffSummary, + workUnitCount: report.body.workUnits.length, + failedWorkUnits: report.body.failedWorkUnits, + artifactsWritten: report.body.provenanceRows.length, + commitSha: report.body.commitSha, + }, + report, + }; + }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'notion-main', + adapter: 'notion', + outputMode: 'viz', + }, + io.io, + { runLocalIngest: runLocal, startLiveMemoryFlow }, + ), + ).resolves.toBe(1); + + expect(io.stdout()).toContain('Memory-flow summary: error'); + expect(io.stdout()).toContain('Notion authorization expired'); + }); + it('falls back to text live rendering when the TUI live session is unavailable', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index b547d185..dd612f5c 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -1076,17 +1076,17 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - const stdout = io.stdout(); - expect(stdout).toContain('[5%] Fetching source files for warehouse/historic-sql'); - expect(stdout).toContain('[15%] Fetched 3 source files from historic-sql'); - expect(stdout).toContain('[45%] Planned 1 work unit'); - expect(stdout).toContain('[80%] Processed 1/1 work units'); - expect(stdout).toContain('[100%] Ingest completed'); - expect(stdout).toContain('Report: report-live-1'); - expect(io.stderr()).toBe(''); + const stderr = io.stderr(); + expect(stderr).toContain('[5%] Fetching source files for warehouse/historic-sql'); + expect(stderr).toContain('[15%] Fetched 3 source files from historic-sql'); + expect(stderr).toContain('[45%] Planned 1 work unit'); + expect(stderr).toContain('[80%] Processed 1/1 work units'); + expect(stderr).toContain('[100%] Ingest completed'); + expect(io.stdout()).toContain('Report: report-live-1'); + expect(io.stdout()).not.toContain('[5%]'); }); - it('writes plain TTY ingest progress and final report to stdout', async () => { + it('writes plain TTY ingest progress to stderr and final report to stdout', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); const sourceDir = join(tempDir, 'source'); @@ -1113,9 +1113,9 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - expect(io.stdout()).toContain('[5%] Fetching source files for warehouse/fake'); + expect(io.stderr()).toContain('[5%] Fetching source files for warehouse/fake'); expect(io.stdout()).toContain('Report: report-live-1'); - expect(io.stderr()).toBe(''); + expect(io.stdout()).not.toContain('[5%]'); }); it('prints plain WorkUnit step progress during long-running local ingest', async () => { @@ -1202,13 +1202,13 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - const stdout = io.stdout(); - expect(stdout).toContain('[45%] Planned 2 work units'); - expect(stdout).toContain('[55%] Processing 1/2 work units: historic-sql-table-public-orders'); - expect(stdout).toContain( + const stderr = io.stderr(); + expect(stderr).toContain('[45%] Planned 2 work units'); + expect(stderr).toContain('[55%] Processing 1/2 work units: historic-sql-table-public-orders'); + expect(stderr).toContain( '\r[58%] Processing work units: 0/2 complete, 1 active; latest historic-sql-table-public-orders step 7/40\u001b[K', ); - expect(stdout).toContain('[68%] Processed 1/2 work units'); + expect(stderr).toContain('[68%] Processed 1/2 work units'); }); it('renders concurrent WorkUnit step progress as transient aggregate status', async () => { @@ -1294,14 +1294,14 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - const stdout = io.stdout(); - expect(stdout).toContain( + const stderr = io.stderr(); + expect(stderr).toContain( '\r[56%] Processing work units: 0/6 complete, 6 active; latest historic-sql-table-public-suppliers step 1/40\u001b[K', ); - expect(stdout).not.toContain( + expect(stderr).not.toContain( '\n[56%] Processing 6/6 work units: historic-sql-table-public-suppliers step 1/40\n', ); - expect(stdout).toContain('\n[100%] Ingest completed\n'); + expect(stderr).toContain('\n[100%] Ingest completed\n'); }); it('passes local Looker pull-config options and agent runner into scheduled ingest for Looker scheduled ingest', async () => { diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index 571bc1ef..cf7a7aff 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -306,7 +306,7 @@ function createPlainIngestProgressRenderer( if (!hasPendingTransient) { return; } - io.stdout.write('\n'); + io.stderr.write('\n'); hasPendingTransient = false; }; @@ -315,12 +315,12 @@ function createPlainIngestProgressRenderer( lastPercent = nextPercent; const line = `[${nextPercent}%] ${message}`; if (options?.transient === true) { - io.stdout.write(`\r${line}\u001b[K`); + io.stderr.write(`\r${line}\u001b[K`); hasPendingTransient = true; return; } flush(); - io.stdout.write(`${line}\n`); + io.stderr.write(`${line}\n`); }; return { @@ -437,6 +437,21 @@ function initialRunMemoryFlowInput( }; } +function finalRunMemoryFlowInput(snapshot: MemoryFlowReplayInput, report: IngestReportSnapshot): MemoryFlowReplayInput { + const status = reportStatus(report); + return { + ...snapshot, + runId: report.runId, + connectionId: report.connectionId, + adapter: report.sourceKey, + status, + syncId: report.body.syncId, + reportId: report.id, + reportPath: report.id, + errors: status === 'error' ? report.body.failedWorkUnits : snapshot.errors, + }; +} + function managedDaemonOptionsForIngestRun( args: Extract, io: KtxIngestIo, @@ -592,7 +607,7 @@ export async function runKtxIngest( ...(memoryFlow ? { memoryFlow } : {}), }); if (shouldUseLiveViz && memoryFlow) { - latestMemoryFlowSnapshot = memoryFlow.snapshot(); + latestMemoryFlowSnapshot = finalRunMemoryFlowInput(memoryFlow.snapshot(), result.report); liveTui?.close(); liveTui = null; io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot)); diff --git a/packages/context/src/connections/index.ts b/packages/context/src/connections/index.ts index ce24a2c7..513818fa 100644 --- a/packages/context/src/connections/index.ts +++ b/packages/context/src/connections/index.ts @@ -21,6 +21,7 @@ export { notionConnectionToPullConfig, parseNotionConnectionConfig, redactNotionConnectionConfig, + resolveNotionConnectionAuthToken, resolveNotionAuthToken, type KtxNotionConnectionConfig, type RedactedKtxNotionConnectionConfig, diff --git a/packages/context/src/connections/notion-config.test.ts b/packages/context/src/connections/notion-config.test.ts index 1d595a5b..0167f186 100644 --- a/packages/context/src/connections/notion-config.test.ts +++ b/packages/context/src/connections/notion-config.test.ts @@ -30,6 +30,7 @@ describe('standalone Notion connection config', () => { expect(parsed).toEqual({ driver: 'notion', + auth_token: null, auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'selected_roots', root_page_ids: ['page-1'], @@ -42,6 +43,23 @@ describe('standalone Notion connection config', () => { }); }); + it('parses inline Notion auth tokens without requiring auth_token_ref', () => { + const parsed = parseNotionConnectionConfig({ + driver: 'notion', + auth_token: ' ntn_inline_token ', + crawl_mode: 'selected_roots', + root_page_ids: ['page-1'], + }); + + expect(parsed).toMatchObject({ + driver: 'notion', + auth_token: 'ntn_inline_token', + auth_token_ref: null, + crawl_mode: 'selected_roots', + root_page_ids: ['page-1'], + }); + }); + it('redacts token references from display output', () => { expect( redactNotionConnectionConfig( @@ -117,4 +135,23 @@ describe('standalone Notion connection config', () => { lastSuccessfulCursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}', }); }); + + it('uses inline Notion auth_token when building adapter pull config', async () => { + const pullConfig = await notionConnectionToPullConfig( + parseNotionConnectionConfig({ + driver: 'notion', + auth_token: 'ntn_inline_token', + auth_token_ref: 'env:STALE_NOTION_TOKEN', + crawl_mode: 'all_accessible', + }), + { + env: {}, + readTextFile: async () => { + throw new Error('readTextFile should not be called for inline auth_token'); + }, + }, + ); + + expect(pullConfig.authToken).toBe('ntn_inline_token'); + }); }); diff --git a/packages/context/src/connections/notion-config.ts b/packages/context/src/connections/notion-config.ts index 367ef444..d678ba2f 100644 --- a/packages/context/src/connections/notion-config.ts +++ b/packages/context/src/connections/notion-config.ts @@ -15,7 +15,8 @@ type KtxNotionCrawlMode = 'all_accessible' | 'selected_roots'; export interface KtxNotionConnectionConfig extends KtxProjectConnectionConfig { driver: 'notion'; - auth_token_ref: string; + auth_token: string | null; + auth_token_ref: string | null; crawl_mode: KtxNotionCrawlMode; root_page_ids: string[]; root_database_ids: string[]; @@ -93,11 +94,12 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo if (input.driver !== 'notion') { throw new Error('Notion connection config requires driver: notion'); } - const authTokenRef = stringValue(input.auth_token_ref, ''); - if (!authTokenRef) { - throw new Error('Notion connection config requires auth_token_ref'); + const authToken = optionalString(input.auth_token); + const authTokenRef = optionalString(input.auth_token_ref); + if (!authToken && !authTokenRef) { + throw new Error('Notion connection config requires auth_token or auth_token_ref'); } - if (!authTokenRef.startsWith('env:') && !authTokenRef.startsWith('file:')) { + if (authTokenRef && !authTokenRef.startsWith('env:') && !authTokenRef.startsWith('file:')) { throw new Error('Notion auth_token_ref must use env:NAME or file:/path'); } @@ -115,6 +117,7 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo return { ...input, driver: 'notion', + auth_token: authToken, auth_token_ref: authTokenRef, crawl_mode: crawlMode, root_page_ids: rootPageIds, @@ -142,7 +145,7 @@ export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionCo export function redactNotionConnectionConfig(config: KtxNotionConnectionConfig): RedactedKtxNotionConnectionConfig { return { driver: 'notion', - hasAuthToken: Boolean(config.auth_token_ref), + hasAuthToken: Boolean(config.auth_token ?? config.auth_token_ref), crawlMode: config.crawl_mode, rootPageIds: config.root_page_ids, rootDatabaseIds: config.root_database_ids, @@ -182,12 +185,20 @@ export async function resolveNotionAuthToken( throw new Error('Notion auth_token_ref must use env:NAME or file:/path'); } +export async function resolveNotionConnectionAuthToken( + config: Pick, + options: ResolveNotionTokenOptions = {}, +): Promise { + return config.auth_token ?? (await resolveNotionAuthToken(config.auth_token_ref ?? '', options)); +} + export async function notionConnectionToPullConfig( config: KtxNotionConnectionConfig, options: ResolveNotionTokenOptions = {}, ): Promise { + const authToken = await resolveNotionConnectionAuthToken(config, options); return notionPullConfigSchema.parse({ - authToken: await resolveNotionAuthToken(config.auth_token_ref, options), + authToken, crawlMode: config.crawl_mode, rootPageIds: config.root_page_ids, rootDatabaseIds: config.root_database_ids, diff --git a/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts b/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts index b0544141..7e752e9b 100644 --- a/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts +++ b/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts @@ -253,6 +253,43 @@ describe('NotionSourceAdapter', () => { expect(result.contextReport).toEqual({ capped: false, warnings: [NOTION_ORG_KNOWLEDGE_WARNING] }); }); + it('chunks retried pages when failed provenance makes unchanged raw files look added again', async () => { + await writeFile( + join(stagedDir, 'manifest.json'), + JSON.stringify({ + source: 'notion', + apiVersion: '2026-03-11', + crawlMode: 'selected_roots', + rootPageIds: ['page-1'], + rootDatabaseIds: [], + rootDataSourceIds: [], + fetchedAt: '2026-04-28T00:00:00.000Z', + pageCount: 1, + databaseCount: 0, + dataSourceCount: 0, + capped: false, + continuedFromCursor: false, + partialSnapshot: false, + maxPagesPerRun: 100, + maxKnowledgeCreatesPerRun: 25, + maxKnowledgeUpdatesPerRun: 20, + skipped: [], + warnings: [], + }), + 'utf-8', + ); + await writePage('page-1', 'Retry Me'); + + const result = await adapter.chunk(stagedDir, { + added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'], + modified: [], + deleted: [], + unchanged: ['manifest.json', 'pages/page-1/blocks.json'], + }); + + expect(result.workUnits.map((workUnit) => workUnit.unitKey)).toEqual(['notion-page-page-1']); + }); + it('reports malformed manifests with a Notion-specific error', async () => { await writeFile(join(stagedDir, 'manifest.json'), '{bad json', 'utf-8'); diff --git a/packages/context/src/ingest/memory-flow/known-errors.ts b/packages/context/src/ingest/memory-flow/known-errors.ts new file mode 100644 index 00000000..8273ed86 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/known-errors.ts @@ -0,0 +1,28 @@ +interface MemoryFlowErrorContext { + adapter: string; +} + +export function isNotionAuthorizationExpired( + context: MemoryFlowErrorContext, + reason: string | undefined, +): boolean { + if (context.adapter !== 'notion') { + return false; + } + const normalized = (reason ?? '').toLowerCase(); + return ( + normalized.includes('invalid_grant') && + (normalized.includes('invalid_rapt') || normalized.includes('reauth')) + ); +} + +export function formatNotionAuthorizationExpiredDetail(unitKey: string): string { + return `${unitKey} could not read Notion because the saved OAuth grant expired or requires reauthentication (invalid_grant / invalid_rapt).`; +} + +export function notionAuthorizationFixSuggestions(connectionId: string): string[] { + return [ + `Refresh the Notion token referenced by auth_token_ref for ${connectionId}. If it uses env:NAME, export a fresh token in that variable; if it uses file:/path, replace that file.`, + `Run ktx connection notion pick ${connectionId} to confirm Notion access, then rerun ktx ingest ${connectionId}.`, + ]; +} diff --git a/packages/context/src/ingest/memory-flow/summary.test.ts b/packages/context/src/ingest/memory-flow/summary.test.ts index 61c25aa5..0720dbae 100644 --- a/packages/context/src/ingest/memory-flow/summary.test.ts +++ b/packages/context/src/ingest/memory-flow/summary.test.ts @@ -60,6 +60,36 @@ describe('formatMemoryFlowFinalSummary', () => { ).toContain('Trust issues: 3'); }); + it('explains expired Notion authorization with fix suggestions', () => { + const rawReason = + 'notion-cluster-1 failed: {"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)","error_uri":"https://accounts.example/reauth"}'; + const summary = formatMemoryFlowFinalSummary( + input({ + connectionId: 'notion-main', + adapter: 'notion', + status: 'error', + events: [ + { type: 'source_acquired', adapter: 'notion', trigger: 'manual_resync', fileCount: 37 }, + { type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }, + { type: 'work_unit_finished', unitKey: 'notion-cluster-1', status: 'failed', reason: rawReason }, + ], + }), + ); + + expect(summary).toContain('Memory-flow summary: error'); + expect(summary).toContain( + 'Notion authorization expired: notion-cluster-1 could not read Notion because the saved OAuth grant expired or requires reauthentication (invalid_grant / invalid_rapt).', + ); + expect(summary).toContain('Fix suggestions:'); + expect(summary).toContain( + '- Refresh the Notion token referenced by auth_token_ref for notion-main. If it uses env:NAME, export a fresh token in that variable; if it uses file:/path, replace that file.', + ); + expect(summary).toContain( + '- Run ktx connection notion pick notion-main to confirm Notion access, then rerun ktx ingest notion-main.', + ); + expect(summary).not.toContain('error_uri'); + }); + it('labels replay source metadata in final summaries', () => { const summary = formatMemoryFlowFinalSummary({ metadata: { diff --git a/packages/context/src/ingest/memory-flow/summary.ts b/packages/context/src/ingest/memory-flow/summary.ts index b71526a1..a23d7146 100644 --- a/packages/context/src/ingest/memory-flow/summary.ts +++ b/packages/context/src/ingest/memory-flow/summary.ts @@ -1,6 +1,7 @@ import { sanitizeMemoryFlowError } from './live-buffer.js'; import type { MemoryFlowEvent, MemoryFlowReplayInput } from './types.js'; import { buildMemoryFlowViewModel } from './view-model.js'; +import { isNotionAuthorizationExpired, notionAuthorizationFixSuggestions } from './known-errors.js'; function latest( events: MemoryFlowEvent[], @@ -42,6 +43,14 @@ function humanizeSummaryText(value: string): string { .replace(/\bSL\b/g, 'semantic layer'); } +function fixSuggestions(input: MemoryFlowReplayInput): string[] { + const workUnitReasons = eventsOf(input.events, 'work_unit_finished').map((event) => event.reason); + const hasNotionAuthFailure = [...workUnitReasons, ...input.errors].some((reason) => + isNotionAuthorizationExpired(input, reason), + ); + return hasNotionAuthFailure ? notionAuthorizationFixSuggestions(input.connectionId) : []; +} + export function formatMemoryFlowFinalSummary(input: MemoryFlowReplayInput): string { const sources = eventsOf(input.events, 'source_acquired'); const source = sources.at(-1); @@ -84,6 +93,14 @@ export function formatMemoryFlowFinalSummary(input: MemoryFlowReplayInput): stri } } + const suggestions = fixSuggestions(input); + if (suggestions.length > 0) { + lines.push('Fix suggestions:'); + for (const suggestion of suggestions) { + lines.push(`- ${suggestion}`); + } + } + for (const error of input.errors.slice(0, 3)) { lines.push(`Error: ${sanitizeMemoryFlowError(error)}`); } diff --git a/packages/context/src/ingest/memory-flow/view-model.ts b/packages/context/src/ingest/memory-flow/view-model.ts index 7c908fbc..c8f784b8 100644 --- a/packages/context/src/ingest/memory-flow/view-model.ts +++ b/packages/context/src/ingest/memory-flow/view-model.ts @@ -9,6 +9,7 @@ import type { MemoryFlowViewModel, } from './types.js'; import { sanitizeMemoryFlowError } from './live-buffer.js'; +import { formatNotionAuthorizationExpiredDetail, isNotionAuthorizationExpired } from './known-errors.js'; function latest( events: MemoryFlowEvent[], @@ -109,7 +110,7 @@ function errorDetails(input: MemoryFlowReplayInput): string[] { } function isValidationFailure(reason: string | undefined): boolean { - return /semantic-layer|validation|invalid/i.test(reason ?? ''); + return /semantic-layer|validation/i.test(reason ?? ''); } function failedWorkUnitDetails(failed: Array>): string[] { @@ -180,11 +181,14 @@ function buildMemoryFlowTrustIssues(input: MemoryFlowReplayInput): MemoryFlowTru for (const event of failed) { const reason = sanitizeMemoryFlowError(event.reason ?? 'failed'); + const knownNotionAuthFailure = isNotionAuthorizationExpired(input, event.reason); issues.push({ id: `work-unit-failed:${event.unitKey}`, severity: 'failed', - title: 'WorkUnit failed', - detail: `${event.unitKey} failed: ${reason}`, + title: knownNotionAuthFailure ? 'Notion authorization expired' : 'WorkUnit failed', + detail: knownNotionAuthFailure + ? formatNotionAuthorizationExpiredDetail(event.unitKey) + : `${event.unitKey} failed: ${reason}`, columnId: 'workUnits', targetLabel: event.unitKey, }); diff --git a/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts b/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts index 3c27ab2d..2798c64a 100644 --- a/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts +++ b/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts @@ -66,6 +66,27 @@ function reportBody(syncId: string, supersededBy: string | null = null): IngestR }; } +function emptyReportBody(syncId: string, overrides: Partial = {}): IngestReportBody { + return { + syncId, + diffSummary: diffSummary({ added: 0, modified: 0, deleted: 0, unchanged: 1 }), + commitSha: null, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + ...overrides, + }; +} + describe('SqliteBundleIngestStore', () => { let tempDir: string; let dbPath: string; @@ -226,6 +247,204 @@ describe('SqliteBundleIngestStore', () => { ); }); + it('does not baseline skipped provenance from failed work units or zero-work retry runs', async () => { + const store = new SqliteBundleIngestStore({ dbPath }); + const rawHashes = new Map([ + ['pages/page-1/metadata.json', 'hash-metadata'], + ['pages/page-1/page.md', 'hash-page'], + ]); + + const failedRun = await store.create(runArgs({ jobId: 'job-failed-review', syncId: 'sync-failed-review' })); + await store.insertMany( + [...rawHashes].map(([rawPath, rawContentHash]) => ({ + connectionId: 'docs', + sourceKey: 'notion', + syncId: 'sync-failed-review', + rawPath, + rawContentHash, + artifactKind: null, + artifactKey: null, + artifactContentHash: null, + actionType: 'skipped' as const, + })), + ); + await store.markCompleted(failedRun.id, diffSummary({ added: 2 })); + await store.create({ + runId: failedRun.id, + jobId: 'job-failed-review', + connectionId: 'docs', + sourceKey: 'notion', + body: emptyReportBody('sync-failed-review', { + workUnits: [ + { + unitKey: 'notion-page-page-1', + rawFiles: [...rawHashes.keys()], + status: 'failed', + reason: 'invalid_grant', + actions: [], + touchedSlSources: [], + }, + ], + failedWorkUnits: ['notion-page-page-1'], + }), + }); + + const noWorkRun = await store.create(runArgs({ jobId: 'job-no-work', syncId: 'sync-no-work' })); + await store.insertMany( + [...rawHashes].map(([rawPath, rawContentHash]) => ({ + connectionId: 'docs', + sourceKey: 'notion', + syncId: 'sync-no-work', + rawPath, + rawContentHash, + artifactKind: null, + artifactKey: null, + artifactContentHash: null, + actionType: 'skipped' as const, + })), + ); + await store.markCompleted(noWorkRun.id, diffSummary({ unchanged: 2 })); + await store.create({ + runId: noWorkRun.id, + jobId: 'job-no-work', + connectionId: 'docs', + sourceKey: 'notion', + body: emptyReportBody('sync-no-work', { workUnits: [], failedWorkUnits: [] }), + }); + + await expect(store.findLatestHashesForCompletedSyncs('docs', 'notion')).resolves.toEqual(new Map()); + await expect(new DiffSetService(store).compute('docs', 'notion', rawHashes)).resolves.toEqual({ + added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'], + modified: [], + deleted: [], + unchanged: [], + }); + }); + + it('baselines skipped provenance from successful no-output work unit runs', async () => { + const store = new SqliteBundleIngestStore({ dbPath }); + const run = await store.create(runArgs({ jobId: 'job-reviewed-no-output', syncId: 'sync-reviewed-no-output' })); + + await store.insertMany([ + { + connectionId: 'docs', + sourceKey: 'notion', + syncId: 'sync-reviewed-no-output', + rawPath: 'pages/page-1/page.md', + rawContentHash: 'hash-reviewed', + artifactKind: null, + artifactKey: null, + artifactContentHash: null, + actionType: 'skipped', + }, + ]); + await store.markCompleted(run.id, diffSummary({ added: 1 })); + await store.create({ + runId: run.id, + jobId: 'job-reviewed-no-output', + connectionId: 'docs', + sourceKey: 'notion', + body: emptyReportBody('sync-reviewed-no-output', { + workUnits: [ + { + unitKey: 'notion-page-page-1', + rawFiles: ['pages/page-1/page.md'], + status: 'success', + actions: [], + touchedSlSources: [], + }, + ], + failedWorkUnits: [], + }), + }); + + await expect(store.findLatestHashesForCompletedSyncs('docs', 'notion')).resolves.toEqual( + new Map([['pages/page-1/page.md', 'hash-reviewed']]), + ); + await expect( + new DiffSetService(store).compute('docs', 'notion', new Map([['pages/page-1/page.md', 'hash-reviewed']])), + ).resolves.toMatchObject({ + added: [], + unchanged: ['pages/page-1/page.md'], + }); + }); + + it('baselines artifact provenance in partial failures but not skipped-only failed paths', async () => { + const store = new SqliteBundleIngestStore({ dbPath }); + const run = await store.create(runArgs({ jobId: 'job-partial', syncId: 'sync-partial' })); + + await store.insertMany([ + { + connectionId: 'docs', + sourceKey: 'notion', + syncId: 'sync-partial', + rawPath: 'pages/success/page.md', + rawContentHash: 'hash-success', + artifactKind: 'wiki', + artifactKey: 'knowledge/notion/success.md', + artifactContentHash: 'artifact-success', + actionType: 'wiki_written', + }, + { + connectionId: 'docs', + sourceKey: 'notion', + syncId: 'sync-partial', + rawPath: 'pages/failed/page.md', + rawContentHash: 'hash-failed', + artifactKind: null, + artifactKey: null, + artifactContentHash: null, + actionType: 'skipped', + }, + ]); + await store.markCompleted(run.id, diffSummary({ added: 2 })); + await store.create({ + runId: run.id, + jobId: 'job-partial', + connectionId: 'docs', + sourceKey: 'notion', + body: emptyReportBody('sync-partial', { + workUnits: [ + { + unitKey: 'notion-page-success', + rawFiles: ['pages/success/page.md'], + status: 'success', + actions: [], + touchedSlSources: [], + }, + { + unitKey: 'notion-page-failed', + rawFiles: ['pages/failed/page.md'], + status: 'failed', + reason: 'invalid_grant', + actions: [], + touchedSlSources: [], + }, + ], + failedWorkUnits: ['notion-page-failed'], + }), + }); + + await expect(store.findLatestHashesForCompletedSyncs('docs', 'notion')).resolves.toEqual( + new Map([['pages/success/page.md', 'hash-success']]), + ); + await expect( + new DiffSetService(store).compute( + 'docs', + 'notion', + new Map([ + ['pages/success/page.md', 'hash-success'], + ['pages/failed/page.md', 'hash-failed'], + ]), + ), + ).resolves.toEqual({ + added: ['pages/failed/page.md'], + modified: [], + deleted: [], + unchanged: ['pages/success/page.md'], + }); + }); + it('returns the latest stored report across bundle ingest runs', async () => { const store = new SqliteBundleIngestStore({ dbPath, diff --git a/packages/context/src/ingest/sqlite-bundle-ingest-store.ts b/packages/context/src/ingest/sqlite-bundle-ingest-store.ts index 50c2d75d..bd9cf5c7 100644 --- a/packages/context/src/ingest/sqlite-bundle-ingest-store.ts +++ b/packages/context/src/ingest/sqlite-bundle-ingest-store.ts @@ -46,6 +46,13 @@ interface ProvenanceRow { action_type: string; } +interface ProvenanceHashCandidateRow { + raw_path: string; + raw_content_hash: string; + action_type: string; + report_body_json: string | null; +} + function parseArtifactKind(kind: string | null): IngestProvenanceRow['artifact_kind'] { if (kind === null || kind === 'sl' || kind === 'wiki') { return kind; @@ -93,6 +100,31 @@ function toPortProvenanceRow(row: ProvenanceRow): IngestProvenanceRow { }; } +function recordValue(value: unknown, key: string): unknown { + return typeof value === 'object' && value !== null && !Array.isArray(value) + ? (value as Record)[key] + : undefined; +} + +function isSuccessfulNoOutputSkippedBaseline(reportBodyJson: string | null): boolean { + if (reportBodyJson === null) { + return true; + } + const body = JSON.parse(reportBodyJson) as unknown; + const workUnits = recordValue(body, 'workUnits'); + const failedWorkUnits = recordValue(body, 'failedWorkUnits'); + return ( + Array.isArray(workUnits) && + workUnits.length > 0 && + Array.isArray(failedWorkUnits) && + failedWorkUnits.length === 0 + ); +} + +function isProcessedHashBaseline(row: ProvenanceHashCandidateRow): boolean { + return row.action_type !== 'skipped' || isSuccessfulNoOutputSkippedBaseline(row.report_body_json); +} + function placeholders(values: readonly unknown[]): string { return values.map(() => '?').join(', '); } @@ -275,23 +307,34 @@ export class SqliteBundleIngestStore const rows = this.db .prepare( ` - SELECT p.raw_path, p.raw_content_hash + SELECT + p.raw_path, + p.raw_content_hash, + p.action_type, + br.body_json AS report_body_json FROM bundle_ingest_provenance p INNER JOIN bundle_ingest_runs r ON r.connection_id = p.connection_id AND r.source_key = p.source_key AND r.sync_id = p.sync_id + LEFT JOIN bundle_ingest_reports br + ON br.run_id = r.id WHERE p.connection_id = ? AND p.source_key = ? AND r.status = 'completed' ORDER BY r.completed_at DESC, r.rowid DESC, p.created_at DESC, p.rowid DESC `, ) - .all(connectionId, sourceKey) as Array<{ raw_path: string; raw_content_hash: string }>; + .all(connectionId, sourceKey) as ProvenanceHashCandidateRow[]; const latest = new Map(); + const seen = new Set(); for (const row of rows) { - if (!latest.has(row.raw_path)) { + if (seen.has(row.raw_path)) { + continue; + } + seen.add(row.raw_path); + if (isProcessedHashBaseline(row)) { latest.set(row.raw_path, row.raw_content_hash); } } diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index 032df825..5531db11 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -1056,7 +1056,7 @@ try { requireStdout('ktx setup demo seeded', seeded, /Mode: seeded/); requireStdout('ktx setup demo seeded', seeded, /Source: packaged demo project/); requireStdout('ktx setup demo seeded', seeded, /LLM calls: none/); - requireStdout('ktx setup demo seeded', seeded, /ktx serve --mcp stdio/); + requireStdout('ktx setup demo seeded', seeded, /ktx agent context --json/); assert.doesNotMatch(seeded.stdout, new RegExp(['--mode', 'deterministic'].join(' '))); assert.doesNotMatch(seeded.stdout, /KTX memory flow/); assert.equal(seeded.stderr, '', 'ktx setup demo seeded wrote unexpected stderr'); diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index 624248d0..4efe5fb5 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -517,7 +517,7 @@ describe('verification snippets', () => { assert.match(source, /Mode: seeded/); assert.match(source, /Source: packaged demo project/); assert.match(source, /LLM calls: none/); - assert.match(source, /ktx serve --mcp stdio/); + assert.match(source, /ktx agent context --json/); assert.doesNotMatch(source, new RegExp(["'demo'", "'--mode'", "'deterministic'"].join(', '))); assert.match(source, /'dev', 'doctor', 'setup', '--no-input'/); assert.match(source, /'--plain'/); diff --git a/scripts/precommit-check.mjs b/scripts/precommit-check.mjs deleted file mode 100644 index 299db534..00000000 --- a/scripts/precommit-check.mjs +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/env node -import { spawnSync } from 'node:child_process'; -import { existsSync, readFileSync } from 'node:fs'; -import { dirname, join, resolve } from 'node:path'; -import { fileURLToPath } from 'node:url'; - -const scriptPath = fileURLToPath(import.meta.url); -const ktxRoot = dirname(dirname(scriptPath)); - -const packageNameByDir = new Map( - [ - 'cli', - 'connector-bigquery', - 'connector-clickhouse', - 'connector-mysql', - 'connector-postgres', - 'connector-snowflake', - 'connector-sqlite', - 'connector-sqlserver', - 'context', - 'llm', - ].map((packageDir) => { - const manifestPath = join(ktxRoot, 'packages', packageDir, 'package.json'); - const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')); - return [packageDir, manifest.name]; - }), -); - -const packageCodePattern = /\.(?:ts|tsx|js|jsx|json)$/; -const scriptPattern = /\.(?:mjs|js|json)$/; -const pythonPackageTests = new Map([ - ['ktx-sl', 'python/ktx-sl/tests'], - ['ktx-daemon', 'python/ktx-daemon/tests'], -]); - -function normalizeFilePath(filePath) { - const normalized = filePath.replaceAll('\\', '/').replace(/^\.\//, ''); - return normalized.startsWith('ktx/') ? normalized.slice('ktx/'.length) : normalized; -} - -function stablePush(commands, key, cmd, args) { - if (commands.some((command) => command.key === key)) { - return; - } - - commands.push({ key, cmd, args }); -} - -function maybeScriptTest(scriptFile) { - if (scriptFile.endsWith('.test.mjs')) { - return scriptFile; - } - - if (!scriptFile.endsWith('.mjs')) { - return null; - } - - const testFile = scriptFile.replace(/\.mjs$/, '.test.mjs'); - return existsSync(join(ktxRoot, testFile)) ? testFile : null; -} - -export function planChecks(files) { - const commands = []; - const packageNames = new Set(); - const pythonPackages = new Set(); - let runBoundaryCheck = false; - let runAllTypeChecks = false; - let runAllPythonTests = false; - - for (const rawFile of files) { - const ktxFile = normalizeFilePath(rawFile); - - if (ktxFile.startsWith('packages/')) { - const [, packageDir, ...rest] = ktxFile.split('/'); - const packageName = packageNameByDir.get(packageDir); - const packageFile = rest.join('/'); - - if (packageName && packageCodePattern.test(packageFile)) { - packageNames.add(packageName); - runBoundaryCheck = true; - } - - continue; - } - - if (ktxFile.startsWith('scripts/') && scriptPattern.test(ktxFile)) { - const testFile = maybeScriptTest(ktxFile); - - if (testFile) { - stablePush(commands, `script-test:${testFile}`, 'node', ['--test', testFile]); - } - - continue; - } - - if (ktxFile.startsWith('python/')) { - const [, packageDir] = ktxFile.split('/'); - - if (pythonPackageTests.has(packageDir)) { - pythonPackages.add(packageDir); - } - - continue; - } - - if ( - ['package.json', 'pnpm-lock.yaml', 'pnpm-workspace.yaml', 'release-policy.json', 'tsconfig.base.json'].includes( - ktxFile, - ) - ) { - runBoundaryCheck = true; - runAllTypeChecks = true; - continue; - } - - if (['pyproject.toml', 'uv.lock', 'uv.toml'].includes(ktxFile)) { - runAllPythonTests = true; - } - } - - if (runBoundaryCheck) { - stablePush(commands, 'boundary-check', 'node', ['scripts/check-boundaries.mjs']); - } - - if (runAllTypeChecks) { - stablePush(commands, 'type-check:all', 'pnpm', ['--filter', './packages/*', 'run', 'type-check']); - } else { - for (const packageName of [...packageNames].sort()) { - stablePush(commands, `type-check:${packageName}`, 'pnpm', ['--filter', packageName, 'run', 'type-check']); - stablePush(commands, `build:${packageName}`, 'pnpm', ['--filter', `${packageName}...`, 'run', 'build']); - stablePush(commands, `test:${packageName}`, 'pnpm', ['--filter', packageName, 'run', 'test']); - } - } - - if (runAllPythonTests) { - stablePush(commands, 'pytest:all', 'uv', ['run', 'pytest']); - } else { - for (const packageDir of [...pythonPackages].sort()) { - stablePush(commands, `pytest:${packageDir}`, 'uv', [ - 'run', - '--package', - packageDir, - 'pytest', - pythonPackageTests.get(packageDir), - ]); - } - } - - return commands; -} - -function printCommand(command) { - console.log(`\n$ ${command.cmd} ${command.args.join(' ')}`); -} - -export function runChecks(files) { - const commands = planChecks(files); - - if (commands.length === 0) { - console.log('No KTX package checks needed for these files.'); - return 0; - } - - for (const command of commands) { - printCommand(command); - - const result = spawnSync(command.cmd, command.args, { - cwd: ktxRoot, - stdio: 'inherit', - env: process.env, - }); - - if (result.error) { - console.error(result.error.message); - return 1; - } - - if (result.status !== 0) { - return result.status ?? 1; - } - } - - return 0; -} - -if (process.argv[1] && resolve(process.argv[1]) === scriptPath) { - process.exitCode = runChecks(process.argv.slice(2)); -} diff --git a/scripts/precommit-check.test.mjs b/scripts/precommit-check.test.mjs deleted file mode 100644 index 40bd1716..00000000 --- a/scripts/precommit-check.test.mjs +++ /dev/null @@ -1,42 +0,0 @@ -import assert from 'node:assert/strict'; -import { describe, it } from 'node:test'; - -import { planChecks } from './precommit-check.mjs'; - -function commandKeys(files) { - return planChecks(files).map((command) => command.key); -} - -describe('precommit-check', () => { - it('skips files outside ktx', () => { - assert.deepEqual(commandKeys(['outside-workspace/src/app.ts']), []); - }); - - it('runs only the touched package checks for standalone package paths', () => { - assert.deepEqual(commandKeys(['packages/cli/src/index.ts']), [ - 'boundary-check', - 'type-check:@ktx/cli', - 'build:@ktx/cli', - 'test:@ktx/cli', - ]); - }); - - it('accepts legacy subtree-prefixed package paths', () => { - assert.deepEqual(commandKeys(['ktx/packages/cli/src/index.ts']), [ - 'boundary-check', - 'type-check:@ktx/cli', - 'build:@ktx/cli', - 'test:@ktx/cli', - ]); - }); - - it('runs the matching script test when a script changes', () => { - assert.deepEqual(commandKeys(['scripts/check-boundaries.mjs']), [ - 'script-test:scripts/check-boundaries.test.mjs', - ]); - }); - - it('runs the touched python package tests', () => { - assert.deepEqual(commandKeys(['python/ktx-sl/semantic_layer/parser.py']), ['pytest:ktx-sl']); - }); -});