mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
396 lines
14 KiB
TypeScript
396 lines
14 KiB
TypeScript
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
|
import { tmpdir } from 'node:os';
|
|
import { join } from 'node:path';
|
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
import { PageTriageService } from '../../../../src/context/ingest/page-triage/page-triage.service.js';
|
|
|
|
describe('PageTriageService', () => {
|
|
let stagedDir: string;
|
|
let repository: {
|
|
setDocumentTriageLane: ReturnType<typeof vi.fn>;
|
|
listDocumentChunksForLightExtraction: ReturnType<typeof vi.fn>;
|
|
insertCandidate: ReturnType<typeof vi.fn>;
|
|
};
|
|
let service: PageTriageService;
|
|
let triageSettings: {
|
|
enabled: boolean;
|
|
maxConcurrency: number;
|
|
lightExtractionEnabled: boolean;
|
|
classifierModel: string | null;
|
|
lightExtractionMaxCandidates: number;
|
|
};
|
|
let promptService: { loadPrompt: ReturnType<typeof vi.fn<(name: string) => Promise<string>>> };
|
|
let adapter: { triageSupported: true; getTriageSignals: ReturnType<typeof vi.fn> };
|
|
let llmRuntime: {
|
|
generateText: ReturnType<typeof vi.fn>;
|
|
generateObject: ReturnType<typeof vi.fn>;
|
|
runAgentLoop: ReturnType<typeof vi.fn>;
|
|
};
|
|
|
|
beforeEach(async () => {
|
|
stagedDir = await mkdtemp(join(tmpdir(), 'page-triage-'));
|
|
await mkdir(join(stagedDir, 'pages', 'page-1'), { recursive: true });
|
|
await writeFile(
|
|
join(stagedDir, 'pages', 'page-1', 'metadata.json'),
|
|
JSON.stringify({
|
|
objectType: 'page',
|
|
id: 'page-1',
|
|
title: 'Support Handoff',
|
|
path: 'Company / Support Handoff',
|
|
url: null,
|
|
parentId: null,
|
|
databaseId: null,
|
|
dataSourceId: null,
|
|
lastEditedAt: '2026-04-29T12:00:00.000Z',
|
|
lastEditedBy: null,
|
|
properties: { Status: 'Approved' },
|
|
}),
|
|
'utf-8',
|
|
);
|
|
await writeFile(
|
|
join(stagedDir, 'pages', 'page-1', 'page.md'),
|
|
'# Support Handoff\n\nSupport handoffs require a named customer owner.\n',
|
|
'utf-8',
|
|
);
|
|
|
|
repository = {
|
|
setDocumentTriageLane: vi.fn().mockResolvedValue(1),
|
|
listDocumentChunksForLightExtraction: vi.fn().mockResolvedValue([
|
|
{
|
|
chunkId: '00000000-0000-0000-0000-000000000101',
|
|
headingPath: ['Support Handoff'],
|
|
ordinal: 0,
|
|
content: 'Support handoffs require a named customer owner.',
|
|
stableCitationKey: 'notion:page-1:support-handoff',
|
|
citation: { source: 'notion', pageId: 'page-1' },
|
|
rawPath: 'pages/page-1/page.md',
|
|
title: 'Support Handoff',
|
|
path: 'Company / Support Handoff',
|
|
url: null,
|
|
lastEditedAt: new Date('2026-04-29T12:00:00.000Z'),
|
|
},
|
|
]),
|
|
insertCandidate: vi
|
|
.fn()
|
|
.mockImplementation((input) =>
|
|
Promise.resolve({ candidate_key: input.candidateKey, promotion_score: input.promotionScore }),
|
|
),
|
|
};
|
|
triageSettings = {
|
|
enabled: true,
|
|
maxConcurrency: 2,
|
|
lightExtractionEnabled: true,
|
|
classifierModel: null,
|
|
lightExtractionMaxCandidates: 3,
|
|
};
|
|
adapter = {
|
|
triageSupported: true,
|
|
getTriageSignals: vi.fn().mockResolvedValue({ objectType: 'page', propertyHints: { Status: 'Approved' } }),
|
|
};
|
|
promptService = {
|
|
loadPrompt: vi
|
|
.fn<(name: string) => Promise<string>>()
|
|
.mockImplementation((name) => Promise.resolve(`prompt:${name}`)),
|
|
};
|
|
llmRuntime = {
|
|
generateText: vi.fn(),
|
|
generateObject: vi.fn(),
|
|
runAgentLoop: vi.fn(),
|
|
};
|
|
service = new PageTriageService({
|
|
store: repository as any,
|
|
llmRuntime: llmRuntime as any,
|
|
settings: triageSettings,
|
|
promptService: promptService as any,
|
|
});
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await rm(stagedDir, { recursive: true, force: true });
|
|
});
|
|
|
|
it('writes light-lane candidates and keeps the page out of full WorkUnits', async () => {
|
|
llmRuntime.generateText
|
|
.mockResolvedValueOnce(JSON.stringify({ lane: 'light', reason: 'short durable policy' }))
|
|
.mockResolvedValueOnce(
|
|
JSON.stringify({
|
|
candidates: [
|
|
{
|
|
candidateKey: 'support-handoff-owner',
|
|
topic: 'Support Handoff',
|
|
assertion: 'Support handoffs require a named customer owner.',
|
|
rationale: 'The staged Support Handoff page states the owner rule.',
|
|
evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'],
|
|
suggestedPageKey: 'support-handoff',
|
|
actionHint: 'create',
|
|
durabilityScore: 3,
|
|
authorityScore: 2,
|
|
reuseScore: 3,
|
|
noveltyScore: 2,
|
|
riskScore: 0,
|
|
},
|
|
],
|
|
}),
|
|
);
|
|
|
|
const result = await service.triageRun({
|
|
stagedDir,
|
|
runId: 'run-1',
|
|
connectionId: 'conn-1',
|
|
sourceKey: 'notion',
|
|
syncId: 'sync-1',
|
|
jobId: 'job-1',
|
|
diffSet: {
|
|
added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'],
|
|
modified: [],
|
|
deleted: [],
|
|
unchanged: [],
|
|
},
|
|
adapter: adapter as any,
|
|
});
|
|
|
|
expect(result.enabled).toBe(true);
|
|
expect(result.report).toEqual({
|
|
pageCount: 1,
|
|
skip: 0,
|
|
light: 1,
|
|
full: 0,
|
|
classifierFailures: 0,
|
|
lightExtractionFailures: 0,
|
|
});
|
|
expect(result.fullRawPaths.has('pages/page-1/page.md')).toBe(false);
|
|
expect(adapter.getTriageSignals).toHaveBeenCalledWith(stagedDir, 'page-1');
|
|
expect(llmRuntime.generateText).toHaveBeenCalledWith(expect.objectContaining({ role: 'triage' }));
|
|
expect(repository.setDocumentTriageLane).toHaveBeenCalledWith('run-1', 'pages/page-1/page.md', 'light');
|
|
expect(repository.insertCandidate).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
runId: 'run-1',
|
|
candidateKey: 'support-handoff-owner',
|
|
lane: 'light',
|
|
promotionScore: 10,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it('does not classify named reusable sales scripts as skip', async () => {
|
|
await writeFile(
|
|
join(stagedDir, 'pages', 'page-1', 'metadata.json'),
|
|
JSON.stringify({
|
|
objectType: 'page',
|
|
id: 'page-1',
|
|
title: 'Cold Call Script',
|
|
path: 'Sales / Cold Call Script',
|
|
url: null,
|
|
parentId: null,
|
|
databaseId: null,
|
|
dataSourceId: null,
|
|
lastEditedAt: '2026-04-29T12:00:00.000Z',
|
|
lastEditedBy: null,
|
|
properties: { Team: 'Sales' },
|
|
}),
|
|
'utf-8',
|
|
);
|
|
await writeFile(
|
|
join(stagedDir, 'pages', 'page-1', 'page.md'),
|
|
[
|
|
'# Cold Call Script',
|
|
'',
|
|
'Reusable outbound sequence:',
|
|
'',
|
|
'- Ask about current customer success expansion workflow.',
|
|
'- Position KTX as AI search visibility for CS teams.',
|
|
'- Close with a discovery call request.',
|
|
].join('\n'),
|
|
'utf-8',
|
|
);
|
|
|
|
promptService.loadPrompt.mockImplementation((name: string) => {
|
|
if (name === 'skills/page_triage_classifier') {
|
|
return Promise.resolve(
|
|
[
|
|
'Reusable templates and scripts are durable knowledge regardless of subject matter.',
|
|
'Date-titled standups are still skip; named templates and scripts are not.',
|
|
].join('\n'),
|
|
);
|
|
}
|
|
return Promise.resolve(`prompt:${name}`);
|
|
});
|
|
llmRuntime.generateText
|
|
.mockImplementationOnce((args: any) => {
|
|
const systemText = args.system as string;
|
|
const userText = args.prompt as string;
|
|
expect(systemText).toContain(
|
|
'Reusable templates and scripts are durable knowledge regardless of subject matter.',
|
|
);
|
|
expect(systemText).toContain('Date-titled standups are still skip; named templates and scripts are not.');
|
|
expect(userText).toContain('Cold Call Script');
|
|
expect(userText).not.toContain('Reusable templates and scripts are durable knowledge');
|
|
return JSON.stringify({ lane: 'light', reason: 'reusable sales script' });
|
|
})
|
|
.mockResolvedValueOnce(
|
|
JSON.stringify({
|
|
candidates: [
|
|
{
|
|
candidateKey: 'cold-call-script',
|
|
topic: 'Cold Call Script',
|
|
assertion: 'Cold call outreach should position KTX around AI search visibility for CS teams.',
|
|
rationale: 'The script gives a reusable outbound call sequence and positioning language.',
|
|
evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'],
|
|
suggestedPageKey: 'cold-call-script',
|
|
actionHint: 'create',
|
|
durabilityScore: 3,
|
|
authorityScore: 2,
|
|
reuseScore: 3,
|
|
noveltyScore: 2,
|
|
riskScore: 0,
|
|
},
|
|
],
|
|
}),
|
|
);
|
|
|
|
const result = await service.triageRun({
|
|
stagedDir,
|
|
runId: 'run-1',
|
|
connectionId: 'conn-1',
|
|
sourceKey: 'notion',
|
|
syncId: 'sync-1',
|
|
jobId: 'job-1',
|
|
diffSet: {
|
|
added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'],
|
|
modified: [],
|
|
deleted: [],
|
|
unchanged: [],
|
|
},
|
|
adapter: adapter as any,
|
|
});
|
|
|
|
expect(result.report).toMatchObject({ pageCount: 1, skip: 0, light: 1, full: 0 });
|
|
expect(repository.setDocumentTriageLane).toHaveBeenCalledWith('run-1', 'pages/page-1/page.md', 'light');
|
|
});
|
|
|
|
it('triages Notion data-source row pages without reading data-source metadata as page markdown', async () => {
|
|
triageSettings.lightExtractionEnabled = false;
|
|
|
|
await mkdir(join(stagedDir, 'data-sources', 'ds-1', 'rows', 'row-1'), { recursive: true });
|
|
await writeFile(
|
|
join(stagedDir, 'data-sources', 'ds-1', 'metadata.json'),
|
|
JSON.stringify({
|
|
objectType: 'data_source',
|
|
id: 'ds-1',
|
|
title: 'Product Docs',
|
|
path: 'Product Docs',
|
|
}),
|
|
'utf-8',
|
|
);
|
|
await writeFile(
|
|
join(stagedDir, 'data-sources', 'ds-1', 'rows', 'row-1', 'metadata.json'),
|
|
JSON.stringify({
|
|
objectType: 'data_source_row',
|
|
id: 'row-1',
|
|
title: 'Launch Policy',
|
|
path: 'Product Docs / Launch Policy',
|
|
dataSourceId: 'ds-1',
|
|
}),
|
|
'utf-8',
|
|
);
|
|
await writeFile(
|
|
join(stagedDir, 'data-sources', 'ds-1', 'rows', 'row-1', 'page.md'),
|
|
'# Launch Policy\n\nLaunches require a customer-facing rollback owner.\n',
|
|
'utf-8',
|
|
);
|
|
|
|
llmRuntime.generateText.mockResolvedValue(JSON.stringify({ lane: 'full', reason: 'durable policy page' }));
|
|
|
|
const result = await service.triageRun({
|
|
stagedDir,
|
|
runId: 'run-1',
|
|
connectionId: 'conn-1',
|
|
sourceKey: 'notion',
|
|
syncId: 'sync-1',
|
|
jobId: 'job-1',
|
|
diffSet: {
|
|
added: [
|
|
'pages/page-1/metadata.json',
|
|
'pages/page-1/page.md',
|
|
'data-sources/ds-1/metadata.json',
|
|
'data-sources/ds-1/rows/row-1/metadata.json',
|
|
'data-sources/ds-1/rows/row-1/page.md',
|
|
],
|
|
modified: [],
|
|
deleted: [],
|
|
unchanged: [],
|
|
},
|
|
adapter: adapter as any,
|
|
});
|
|
|
|
expect(result.report).toMatchObject({ pageCount: 2, skip: 0, light: 0, full: 2 });
|
|
expect([...result.fullRawPaths].sort()).toEqual(
|
|
expect.arrayContaining(['data-sources/ds-1/rows/row-1/page.md', 'pages/page-1/page.md']),
|
|
);
|
|
expect(result.fullRawPaths.has('data-sources/ds-1/metadata.json')).toBe(false);
|
|
expect(repository.setDocumentTriageLane).toHaveBeenCalledWith(
|
|
'run-1',
|
|
'data-sources/ds-1/rows/row-1/page.md',
|
|
'full',
|
|
);
|
|
});
|
|
|
|
it('falls back to full when classifier output is malformed', async () => {
|
|
llmRuntime.generateText.mockResolvedValueOnce('not-json');
|
|
|
|
const result = await service.triageRun({
|
|
stagedDir,
|
|
runId: 'run-1',
|
|
connectionId: 'conn-1',
|
|
sourceKey: 'notion',
|
|
syncId: 'sync-1',
|
|
jobId: 'job-1',
|
|
diffSet: { added: ['pages/page-1/page.md'], modified: [], deleted: [], unchanged: [] },
|
|
adapter: adapter as any,
|
|
});
|
|
|
|
expect(result.report).toMatchObject({ pageCount: 1, skip: 0, light: 0, full: 1, classifierFailures: 1 });
|
|
expect(result.fullRawPaths.has('pages/page-1/page.md')).toBe(true);
|
|
expect(repository.setDocumentTriageLane).toHaveBeenCalledWith('run-1', 'pages/page-1/page.md', 'full');
|
|
});
|
|
|
|
it('promotes a light page to full when light extraction fails', async () => {
|
|
llmRuntime.generateText
|
|
.mockResolvedValueOnce(JSON.stringify({ lane: 'light', reason: 'short durable policy' }))
|
|
.mockRejectedValueOnce(new Error('provider unavailable'));
|
|
|
|
const result = await service.triageRun({
|
|
stagedDir,
|
|
runId: 'run-1',
|
|
connectionId: 'conn-1',
|
|
sourceKey: 'notion',
|
|
syncId: 'sync-1',
|
|
jobId: 'job-1',
|
|
diffSet: { added: ['pages/page-1/page.md'], modified: [], deleted: [], unchanged: [] },
|
|
adapter: adapter as any,
|
|
});
|
|
|
|
expect(result.report).toMatchObject({ pageCount: 1, skip: 0, light: 0, full: 1, lightExtractionFailures: 1 });
|
|
expect(result.fullRawPaths.has('pages/page-1/page.md')).toBe(true);
|
|
expect(repository.setDocumentTriageLane).toHaveBeenLastCalledWith('run-1', 'pages/page-1/page.md', 'full');
|
|
});
|
|
|
|
it('short-circuits when triage is disabled', async () => {
|
|
triageSettings.enabled = false;
|
|
|
|
const result = await service.triageRun({
|
|
stagedDir,
|
|
runId: 'run-1',
|
|
connectionId: 'conn-1',
|
|
sourceKey: 'notion',
|
|
syncId: 'sync-1',
|
|
jobId: 'job-1',
|
|
diffSet: { added: ['pages/page-1/page.md'], modified: [], deleted: [], unchanged: [] },
|
|
adapter: adapter as any,
|
|
});
|
|
|
|
expect(result).toEqual({ enabled: false, report: undefined, fullRawPaths: new Set<string>(), warnings: [] });
|
|
expect(llmRuntime.generateText).not.toHaveBeenCalled();
|
|
expect(repository.setDocumentTriageLane).not.toHaveBeenCalled();
|
|
});
|
|
});
|