mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
196 lines
7.7 KiB
TypeScript
196 lines
7.7 KiB
TypeScript
import { describe, expect, it, vi } from 'vitest';
|
|
import type { ContextCandidateForDedup } from '../../../../src/context/ingest/ports.js';
|
|
import { type CuratorPaginationInput, CuratorPaginationService } from '../../../../src/context/ingest/context-candidates/curator-pagination.service.js';
|
|
import type { ContextCandidateStorePort } from '../../../../src/context/ingest/context-candidates/store.js';
|
|
|
|
const candidate = (key: string, score: number): ContextCandidateForDedup => ({
|
|
id: `id-${key}`,
|
|
candidateKey: key,
|
|
topic: `Topic ${key}`,
|
|
assertion: `Assertion ${key}`,
|
|
promotionScore: score,
|
|
createdAt: new Date(`2026-04-30T10:0${score % 10}:00.000Z`),
|
|
evidenceChunkIds: [`chunk-${key}`],
|
|
evidenceRefs: [{ stableCitationKey: `ref-${key}` }],
|
|
embedding: null,
|
|
lane: 'full',
|
|
});
|
|
|
|
const promptCandidate = (key: string, score: number) => ({
|
|
candidateKey: key,
|
|
topic: `Topic ${key}`,
|
|
assertion: `Assertion ${key}`,
|
|
rationale: `Rationale ${key}`,
|
|
actionHint: 'create',
|
|
status: 'pending',
|
|
promotionScore: score,
|
|
suggestedPageKey: `page-${key}`,
|
|
evidenceRefs: [{ stableCitationKey: `ref-${key}` }],
|
|
});
|
|
|
|
function makeHarness(overrides: Partial<{ batchSize: number; maxPasses: number; stepBudgetPerPass: number }> = {}) {
|
|
const store = {
|
|
listPendingCandidatesForDedup: vi.fn().mockResolvedValue([]),
|
|
updateCandidateEmbedding: vi.fn().mockResolvedValue(undefined),
|
|
markCandidatesAsMergedToCluster: vi.fn().mockResolvedValue(undefined),
|
|
listBudgetExhaustedCandidatesForCarryForward: vi.fn().mockResolvedValue([]),
|
|
listCurrentRunEvidenceChunksForCarryForward: vi.fn().mockResolvedValue([]),
|
|
insertCandidate: vi.fn().mockResolvedValue({ id: 'candidate-1' }),
|
|
listCandidatesForPromptByKeys: vi
|
|
.fn()
|
|
.mockImplementation((_runId: string, keys: string[]) =>
|
|
Promise.resolve(keys.map((key) => promptCandidate(key, Number(key.replace('c', '')) || 1))),
|
|
),
|
|
markPendingCandidatesByReason: vi.fn().mockResolvedValue(0),
|
|
summarizeCandidateVerdicts: vi.fn().mockResolvedValue({
|
|
pending: 0,
|
|
promoted: 1,
|
|
merged: 1,
|
|
rejected: 1,
|
|
conflict: 0,
|
|
rejectedByReason: { exceeded_curator_passes: 1 },
|
|
}),
|
|
} satisfies ContextCandidateStorePort;
|
|
const agentRunner = {
|
|
runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' }),
|
|
};
|
|
const actions: Array<{ target: 'wiki'; type: 'created' | 'updated'; key: string; detail: string }> = [];
|
|
const prompts: string[] = [];
|
|
const service = new CuratorPaginationService({
|
|
store,
|
|
agentRunner: agentRunner as never,
|
|
settings: {
|
|
batchSize: overrides.batchSize ?? 2,
|
|
maxPasses: overrides.maxPasses ?? 2,
|
|
stepBudgetPerPass: overrides.stepBudgetPerPass ?? 7,
|
|
},
|
|
});
|
|
|
|
const input = (
|
|
representatives = [candidate('c1', 10), candidate('c2', 9), candidate('c3', 8)],
|
|
): CuratorPaginationInput => ({
|
|
runId: 'run-1',
|
|
sourceKey: 'notion',
|
|
jobId: 'job-1',
|
|
stageIndex: {
|
|
jobId: 'job-1',
|
|
connectionId: 'c1',
|
|
workUnits: [],
|
|
conflictsResolved: [],
|
|
evictionsApplied: [],
|
|
unmappedFallbacks: [],
|
|
},
|
|
evictionUnit: undefined,
|
|
representatives,
|
|
initialBudget: { creates: 2, updates: 1 },
|
|
modelRole: 'curator',
|
|
buildSystemPrompt: () => 'system prompt',
|
|
buildUserPrompt: ({ runState, items }) => {
|
|
const prompt = `pass=${runState.passNumber}; budget=${runState.budgetRemaining.creates}/${
|
|
runState.budgetRemaining.updates
|
|
}; items=${items.map((item) => item.candidateKey).join(',')}; previous=${runState.previouslyPromotedInRun
|
|
.map((page) => page.pageKey)
|
|
.join(',')}`;
|
|
prompts.push(prompt);
|
|
return prompt;
|
|
},
|
|
buildToolSet: () => ({}),
|
|
getReconciliationActions: () => actions,
|
|
});
|
|
|
|
return { store, agentRunner, actions, prompts, service, input };
|
|
}
|
|
|
|
describe('CuratorPaginationService', () => {
|
|
it('paginates representatives and carries budget plus previous wiki writes into later passes', async () => {
|
|
const harness = makeHarness({ batchSize: 2, maxPasses: 2, stepBudgetPerPass: 7 });
|
|
harness.agentRunner.runLoop.mockImplementation(() => {
|
|
if (harness.actions.length === 0) {
|
|
harness.actions.push({ target: 'wiki', type: 'created', key: 'page-c1', detail: 'Created C1' });
|
|
}
|
|
return { stopReason: 'natural' };
|
|
});
|
|
|
|
const result = await harness.service.reconcile(harness.input());
|
|
|
|
expect(harness.agentRunner.runLoop).toHaveBeenCalledTimes(2);
|
|
expect(harness.agentRunner.runLoop).toHaveBeenCalledWith(expect.objectContaining({ modelRole: 'curator' }));
|
|
expect(harness.prompts[0]).toContain('pass=1');
|
|
expect(harness.prompts[0]).toContain('budget=2/1');
|
|
expect(harness.prompts[0]).toContain('items=c1,c2');
|
|
expect(harness.prompts[1]).toContain('pass=2');
|
|
expect(harness.prompts[1]).toContain('budget=1/1');
|
|
expect(harness.prompts[1]).toContain('previous=page-c1');
|
|
expect(result.report).toMatchObject({
|
|
passesRun: 2,
|
|
topicsExamined: 3,
|
|
topicsByVerdict: { promoted: 1, merged: 1, rejected: 1, conflict: 0 },
|
|
topicsRejectedByReason: { exceeded_curator_passes: 1 },
|
|
budgetExhausted: false,
|
|
});
|
|
});
|
|
|
|
it('marks unprocessed representatives when maxPasses is exhausted', async () => {
|
|
const harness = makeHarness({ batchSize: 1, maxPasses: 1 });
|
|
|
|
await harness.service.reconcile(harness.input([candidate('c1', 10), candidate('c2', 9)]));
|
|
|
|
expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({
|
|
runId: 'run-1',
|
|
candidateKeys: ['c2'],
|
|
rejectionReason: 'exceeded_curator_passes',
|
|
});
|
|
expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({
|
|
runId: 'run-1',
|
|
candidateKeys: ['c1', 'c2'],
|
|
rejectionReason: 'exceeded_curator_passes',
|
|
});
|
|
});
|
|
|
|
it('marks remaining pending representatives when run budget is exhausted', async () => {
|
|
const harness = makeHarness({ batchSize: 1, maxPasses: 5 });
|
|
const input = harness.input([candidate('c1', 10), candidate('c2', 9), candidate('c3', 8)]);
|
|
input.initialBudget = { creates: 1, updates: 0 };
|
|
harness.agentRunner.runLoop.mockImplementation(() => {
|
|
harness.actions.push({ target: 'wiki', type: 'created', key: 'page-c1', detail: 'Created C1' });
|
|
return { stopReason: 'natural' };
|
|
});
|
|
|
|
const result = await harness.service.reconcile(input);
|
|
|
|
expect(harness.agentRunner.runLoop).toHaveBeenCalledTimes(1);
|
|
expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({
|
|
runId: 'run-1',
|
|
candidateKeys: ['c1', 'c2', 'c3'],
|
|
rejectionReason: 'exceeded_run_budget',
|
|
});
|
|
expect(result.report.budgetExhausted).toBe(true);
|
|
});
|
|
|
|
it('marks a failed pass with curator_pass_error and continues to later batches', async () => {
|
|
const harness = makeHarness({ batchSize: 1, maxPasses: 3 });
|
|
harness.agentRunner.runLoop
|
|
.mockResolvedValueOnce({ stopReason: 'error', error: new Error('provider timeout') })
|
|
.mockResolvedValue({ stopReason: 'natural' });
|
|
|
|
const result = await harness.service.reconcile(harness.input([candidate('c1', 10), candidate('c2', 9)]));
|
|
|
|
expect(harness.agentRunner.runLoop).toHaveBeenCalledTimes(2);
|
|
expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({
|
|
runId: 'run-1',
|
|
candidateKeys: ['c1'],
|
|
rejectionReason: 'curator_pass_error',
|
|
});
|
|
expect(result.warnings).toContain('Curator pass 1 failed: provider timeout');
|
|
});
|
|
|
|
it('skips when there are no representatives and no reconciliation work', async () => {
|
|
const harness = makeHarness();
|
|
|
|
const result = await harness.service.reconcile(harness.input([]));
|
|
|
|
expect(result.skipped).toBe(true);
|
|
expect(harness.agentRunner.runLoop).not.toHaveBeenCalled();
|
|
expect(result.report.topicsExamined).toBe(0);
|
|
});
|
|
});
|