ktx/packages/cli/test/context/ingest/context-candidates/curator-pagination.service.test.ts
Andrey Avtomonov 56985b7e09
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract

* test(cli): keep dialect edge tests focused

* fix(cli): stabilize dialect contract foundation

* refactor(connectors): own read-only query preparation

* refactor(connectors): resolve dialects through registry

* refactor(connectors): keep concrete dialect classes internal

* chore(workspace): enforce dialect import boundary

* refactor(cli): resolve relationship dialect at scan boundary

* refactor(cli): use dialect display parsing for entity details

* refactor(cli): use dialect display parsing for warehouse catalog

* refactor(cli): use dialect SQL in relationship workflows

* test(cli): verify solid dialect scan workflow closure

* test: split cli tests from source tree

* refactor(cli): standardize BigQuery scope listing

* feat(sqlite): implement connector scope listing

* test(connectors): cover required table listing

* feat(cli): add warehouse driver registry

* refactor(setup): route scope discovery through driver registry

* refactor(cli): route local query execution through driver registry

* refactor(historic-sql): route dialect support through driver registry

* refactor(cli): test warehouse connections through driver registry

* fix(cli): close driver registry type export gaps

* Improve setup daemon diagnostics

* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback

Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.

* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match

The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.

Align the picker boundary with the canonical 3-level KtxTableRef:

- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
  resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
  (resolveEnabledTables already accepts the 3-part shape) and
  schemasFromEnabledTables now goes through parseDottedTableEntry so it
  recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
  reuse.

Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).

* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00

196 lines
7.7 KiB
TypeScript

import { describe, expect, it, vi } from 'vitest';
import type { ContextCandidateForDedup } from '../../../../src/context/ingest/ports.js';
import { type CuratorPaginationInput, CuratorPaginationService } from '../../../../src/context/ingest/context-candidates/curator-pagination.service.js';
import type { ContextCandidateStorePort } from '../../../../src/context/ingest/context-candidates/store.js';
const candidate = (key: string, score: number): ContextCandidateForDedup => ({
id: `id-${key}`,
candidateKey: key,
topic: `Topic ${key}`,
assertion: `Assertion ${key}`,
promotionScore: score,
createdAt: new Date(`2026-04-30T10:0${score % 10}:00.000Z`),
evidenceChunkIds: [`chunk-${key}`],
evidenceRefs: [{ stableCitationKey: `ref-${key}` }],
embedding: null,
lane: 'full',
});
const promptCandidate = (key: string, score: number) => ({
candidateKey: key,
topic: `Topic ${key}`,
assertion: `Assertion ${key}`,
rationale: `Rationale ${key}`,
actionHint: 'create',
status: 'pending',
promotionScore: score,
suggestedPageKey: `page-${key}`,
evidenceRefs: [{ stableCitationKey: `ref-${key}` }],
});
function makeHarness(overrides: Partial<{ batchSize: number; maxPasses: number; stepBudgetPerPass: number }> = {}) {
const store = {
listPendingCandidatesForDedup: vi.fn().mockResolvedValue([]),
updateCandidateEmbedding: vi.fn().mockResolvedValue(undefined),
markCandidatesAsMergedToCluster: vi.fn().mockResolvedValue(undefined),
listBudgetExhaustedCandidatesForCarryForward: vi.fn().mockResolvedValue([]),
listCurrentRunEvidenceChunksForCarryForward: vi.fn().mockResolvedValue([]),
insertCandidate: vi.fn().mockResolvedValue({ id: 'candidate-1' }),
listCandidatesForPromptByKeys: vi
.fn()
.mockImplementation((_runId: string, keys: string[]) =>
Promise.resolve(keys.map((key) => promptCandidate(key, Number(key.replace('c', '')) || 1))),
),
markPendingCandidatesByReason: vi.fn().mockResolvedValue(0),
summarizeCandidateVerdicts: vi.fn().mockResolvedValue({
pending: 0,
promoted: 1,
merged: 1,
rejected: 1,
conflict: 0,
rejectedByReason: { exceeded_curator_passes: 1 },
}),
} satisfies ContextCandidateStorePort;
const agentRunner = {
runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' }),
};
const actions: Array<{ target: 'wiki'; type: 'created' | 'updated'; key: string; detail: string }> = [];
const prompts: string[] = [];
const service = new CuratorPaginationService({
store,
agentRunner: agentRunner as never,
settings: {
batchSize: overrides.batchSize ?? 2,
maxPasses: overrides.maxPasses ?? 2,
stepBudgetPerPass: overrides.stepBudgetPerPass ?? 7,
},
});
const input = (
representatives = [candidate('c1', 10), candidate('c2', 9), candidate('c3', 8)],
): CuratorPaginationInput => ({
runId: 'run-1',
sourceKey: 'notion',
jobId: 'job-1',
stageIndex: {
jobId: 'job-1',
connectionId: 'c1',
workUnits: [],
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
},
evictionUnit: undefined,
representatives,
initialBudget: { creates: 2, updates: 1 },
modelRole: 'curator',
buildSystemPrompt: () => 'system prompt',
buildUserPrompt: ({ runState, items }) => {
const prompt = `pass=${runState.passNumber}; budget=${runState.budgetRemaining.creates}/${
runState.budgetRemaining.updates
}; items=${items.map((item) => item.candidateKey).join(',')}; previous=${runState.previouslyPromotedInRun
.map((page) => page.pageKey)
.join(',')}`;
prompts.push(prompt);
return prompt;
},
buildToolSet: () => ({}),
getReconciliationActions: () => actions,
});
return { store, agentRunner, actions, prompts, service, input };
}
describe('CuratorPaginationService', () => {
it('paginates representatives and carries budget plus previous wiki writes into later passes', async () => {
const harness = makeHarness({ batchSize: 2, maxPasses: 2, stepBudgetPerPass: 7 });
harness.agentRunner.runLoop.mockImplementation(() => {
if (harness.actions.length === 0) {
harness.actions.push({ target: 'wiki', type: 'created', key: 'page-c1', detail: 'Created C1' });
}
return { stopReason: 'natural' };
});
const result = await harness.service.reconcile(harness.input());
expect(harness.agentRunner.runLoop).toHaveBeenCalledTimes(2);
expect(harness.agentRunner.runLoop).toHaveBeenCalledWith(expect.objectContaining({ modelRole: 'curator' }));
expect(harness.prompts[0]).toContain('pass=1');
expect(harness.prompts[0]).toContain('budget=2/1');
expect(harness.prompts[0]).toContain('items=c1,c2');
expect(harness.prompts[1]).toContain('pass=2');
expect(harness.prompts[1]).toContain('budget=1/1');
expect(harness.prompts[1]).toContain('previous=page-c1');
expect(result.report).toMatchObject({
passesRun: 2,
topicsExamined: 3,
topicsByVerdict: { promoted: 1, merged: 1, rejected: 1, conflict: 0 },
topicsRejectedByReason: { exceeded_curator_passes: 1 },
budgetExhausted: false,
});
});
it('marks unprocessed representatives when maxPasses is exhausted', async () => {
const harness = makeHarness({ batchSize: 1, maxPasses: 1 });
await harness.service.reconcile(harness.input([candidate('c1', 10), candidate('c2', 9)]));
expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({
runId: 'run-1',
candidateKeys: ['c2'],
rejectionReason: 'exceeded_curator_passes',
});
expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({
runId: 'run-1',
candidateKeys: ['c1', 'c2'],
rejectionReason: 'exceeded_curator_passes',
});
});
it('marks remaining pending representatives when run budget is exhausted', async () => {
const harness = makeHarness({ batchSize: 1, maxPasses: 5 });
const input = harness.input([candidate('c1', 10), candidate('c2', 9), candidate('c3', 8)]);
input.initialBudget = { creates: 1, updates: 0 };
harness.agentRunner.runLoop.mockImplementation(() => {
harness.actions.push({ target: 'wiki', type: 'created', key: 'page-c1', detail: 'Created C1' });
return { stopReason: 'natural' };
});
const result = await harness.service.reconcile(input);
expect(harness.agentRunner.runLoop).toHaveBeenCalledTimes(1);
expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({
runId: 'run-1',
candidateKeys: ['c1', 'c2', 'c3'],
rejectionReason: 'exceeded_run_budget',
});
expect(result.report.budgetExhausted).toBe(true);
});
it('marks a failed pass with curator_pass_error and continues to later batches', async () => {
const harness = makeHarness({ batchSize: 1, maxPasses: 3 });
harness.agentRunner.runLoop
.mockResolvedValueOnce({ stopReason: 'error', error: new Error('provider timeout') })
.mockResolvedValue({ stopReason: 'natural' });
const result = await harness.service.reconcile(harness.input([candidate('c1', 10), candidate('c2', 9)]));
expect(harness.agentRunner.runLoop).toHaveBeenCalledTimes(2);
expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({
runId: 'run-1',
candidateKeys: ['c1'],
rejectionReason: 'curator_pass_error',
});
expect(result.warnings).toContain('Curator pass 1 failed: provider timeout');
});
it('skips when there are no representatives and no reconciliation work', async () => {
const harness = makeHarness();
const result = await harness.service.reconcile(harness.input([]));
expect(result.skipped).toBe(true);
expect(harness.agentRunner.runLoop).not.toHaveBeenCalled();
expect(result.report.topicsExamined).toBe(0);
});
});