feat: merge ingest and scan

* docs: add CLI component reuse guidance

* docs: add unified ingest ux design

* Refine unified ingest UX design after adversarial review iteration 1

* Refine unified ingest UX design after adversarial review iteration 2

* Refine unified ingest UX design after adversarial review iteration 3

* feat(cli): route public connection ingest command

* feat(cli): hide standalone scan from public help

* feat(cli): plan public ingest depth and query history

* feat(cli): execute public database ingest facets

* feat(ingest): read connection query history config

* fix(cli): use public ingest wording

* fix(config): stop generating ingest adapter allow lists

* docs: document public ingest command

* test: align ingest surface expectations

* docs: add unified ingest public CLI surface plan

* feat(cli): preflight deep public ingest readiness

* feat(setup): store query history in connection context

* feat(setup): store database context depth

* feat(setup): verify context readiness by database depth

* fix(setup): keep context build foreground only

* fix(config): reject reserved ingest connection ids

* test: close unified ingest v1 expectations

* docs: add unified ingest v1 closure plan

* fix(ingest): bypass adapter allow-list for public source ingest

* fix(ingest): honor query history window intent

* fix(ingest): hide scan internals from public database ingest

* feat(ingest): use foreground view for interactive public ingest

* fix(setup): use schema context and query history wording

* test(cli): verify unified ingest public output

* docs: add unified ingest v1 public output closure plan

* fix(setup): forward query history flags

* fix(setup): prompt for postgres query history

* fix(status): report query history readiness

* fix(ingest): remove legacy public guidance

* fix(ingest): polish foreground retry copy

* docs(examples): use unified query history wording

* chore(ingest): finish public query history cleanup

* docs: add unified ingest v1 query history status cleanup plan

* test(docs): cover unified ingest public docs

* docs: align ingest CLI reference with unified UX

* docs: update context build guides for unified ingest

* docs: update setup and primary source ingest wording

* docs: stop advertising adapter-backed example ingest

* docs: close unified ingest public docs gaps

* docs: add unified ingest v1 docs site closure plan

* fix: render unified ingest foreground warnings

* fix: explain query history schema order

* fix: add public ingest retry guidance

* fix: align setup next steps with unified ingest

* fix: remove scan wording from demo progress

* test: verify unified ingest ux closure

* docs: add unified ingest v1 foreground and retry closure plan

* fix(cli): preserve query-history pull config in public ingest

* fix(cli): omit hidden commands from docs command tree

* test(cli): close unified ingest final public surface checks

* docs: add unified ingest v1 final public surface closure plan

* fix(cli): use public source labels in ingest reports

* fix(cli): suppress low-level public ingest output

* test(cli): verify unified ingest public plain output

* docs: add unified ingest v1 public plain output closure plan

* fix(cli): add public ingest copy sanitizers

* fix(cli): sanitize public ingest progress copy

* fix(cli): rename setup schema scope prompt

* docs(plan): add progress copy closure; test: align setup back-nav fixture

Adds the iter9 plan and updates the setup back-navigation test fixture
to pass disableQueryHistory plus listSchemas/listTables stubs that the
unified ingest setup step now requires.

* docs(plan): add final ux labels plan with narrowed label scans

* fix(cli): aggregate unsupported query-history warnings

* fix(cli): align setup database labels

* test(cli): fix setup database test type-check

* fix(cli): remove primary-source wording from setup output

* test(cli): verify unified ingest setup closure

* docs(plan): add unified ingest v1 verification copy closure plan

* fix(cli): remove top-level scan command

* fix(cli): remove legacy ingest and wiki commands

* Merge scan into ingest flow

* feat(cli): split ingest progress into per-phase rows, rename work units to tasks

Each database target in the unified ingest dashboard now renders one row per
real subprocess (Schema, then Query history when enabled) instead of a single
combined bar. Each phase has its own monotonic 0-100% bar so the progress
never snaps back to zero when historic-sql starts after scan completes.
Completed phases keep their final bar, summary, and elapsed time visible as
an inline audit trail; queued and skipped phases are shown explicitly.

Also rename user-facing "work units" / "Failed work units" to "tasks" /
"Failed tasks" in ingest output and parseIngestSummary. The parser still
accepts the legacy "Work units:" wording in captured output for backward
compat. Internal memory-flow event names and type fields are left alone.

* Fix test harness failures

* Fix CI smoke checks

---------

Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
Andrey Avtomonov 2026-05-14 01:43:06 +02:00 committed by GitHub
parent 1a472cf3ed
commit b00c1a11a9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
118 changed files with 16890 additions and 2992 deletions

View file

@ -518,7 +518,7 @@ function requireSuccess(label, result) {
assert.equal(result.stderr, '', label + ' wrote unexpected stderr');
}
function requireProjectStderr(label, result, projectDir) {
function requireSuccessWithProjectStderr(label, result, projectDir) {
assert.equal(
result.code,
0,
@ -527,6 +527,15 @@ function requireProjectStderr(label, result, projectDir) {
assert.equal(result.stderr, 'Project: ' + projectDir + '\\n', label + ' wrote unexpected stderr');
}
function requireExitCodeWithProjectStderr(label, result, projectDir, expectedCode) {
assert.equal(
result.code,
expectedCode,
label + ' failed with code ' + result.code + '\\nstdout:\\n' + result.stdout + '\\nstderr:\\n' + result.stderr,
);
assert.equal(result.stderr, 'Project: ' + projectDir + '\\n', label + ' wrote unexpected stderr');
}
function requireSuccessWithStderr(label, result, stderrPattern) {
assert.equal(
result.code,
@ -559,12 +568,6 @@ function requireIncludes(values, expected, label) {
assert.ok(values.includes(expected), label + ' did not include ' + expected + ': ' + values.join(', '));
}
function getRunId(stdout) {
const match = stdout.match(/^Run: (.+)$/m);
assert.ok(match, 'ingest run output did not include a run id');
return match[1];
}
async function writeSqliteWarehouse(projectDir) {
const database = new DatabaseSync(join(projectDir, 'warehouse.db'));
try {
@ -588,7 +591,6 @@ process.env.KTX_RUNTIME_ROOT = join(root, 'managed-runtime');
let daemonStarted = false;
try {
const projectDir = join(root, 'project');
const sourceDir = join(root, 'source');
const version = await run('pnpm', ['exec', 'ktx', '--version']);
requireSuccess('ktx public package version', version);
@ -619,7 +621,6 @@ try {
'--skip-agents',
]);
requireSuccess('ktx setup', init);
requireOutput('ktx setup', init, /Project: /);
const emptyProjectDir = join(root, 'empty-project');
const emptyInit = await run('pnpm', [
@ -652,10 +653,6 @@ try {
'scan:',
' enrichment:',
' mode: deterministic',
'ingest:',
' adapters:',
' - fake',
' - live-database',
'',
].join('\\n'),
'utf-8',
@ -818,52 +815,32 @@ try {
requireOutput('ktx dev runtime stop', runtimeStop, /Stopped KTX Python daemon/);
process.stdout.write('ktx dev runtime daemon lifecycle verified\\n');
const structuralScan = await run('pnpm', ['exec', 'ktx', 'scan', 'warehouse',
const structuralScan = await run('pnpm', ['exec', 'ktx', 'ingest', 'warehouse',
'--project-dir',
projectDir,
'--fast',
'--no-input',
]);
requireProjectStderr('ktx scan structural', structuralScan, projectDir);
requireOutput('ktx scan structural', structuralScan, /Status: done/);
requireOutput('ktx scan structural', structuralScan, /Mode: structural/);
requireOutput('ktx scan structural', structuralScan, /Needs attention\\s+None/);
const structuralScanRunId = getRunId(structuralScan.stdout);
requireSuccessWithProjectStderr('ktx ingest fast', structuralScan, projectDir);
requireOutput('ktx ingest fast', structuralScan, /Ingest finished/);
requireOutput('ktx ingest fast', structuralScan, /Database schema/);
requireOutput('ktx ingest fast', structuralScan, /warehouse\\s+done/);
await access(join(projectDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml'));
process.stdout.write('ktx scan structural verified: ' + structuralScanRunId + '\\n');
process.stdout.write('ktx ingest fast verified\\n');
const enrichedScan = await run('pnpm', ['exec', 'ktx', 'scan', 'warehouse',
const enrichedScan = await run('pnpm', ['exec', 'ktx', 'ingest', 'warehouse',
'--project-dir',
projectDir,
'--mode',
'enriched',
'--deep',
'--no-input',
]);
requireProjectStderr('ktx scan enriched', enrichedScan, projectDir);
requireOutput('ktx scan enriched', enrichedScan, /Status: done/);
requireOutput('ktx scan enriched', enrichedScan, /Mode: enriched/);
requireOutput('ktx scan enriched', enrichedScan, /Enrichment artifacts:/);
const enrichedScanRunId = getRunId(enrichedScan.stdout);
process.stdout.write('ktx scan enriched verified: ' + enrichedScanRunId + '\\n');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\\n', 'utf-8');
const ingestRun = await run('pnpm', ['exec', 'ktx', 'ingest', 'run',
'--project-dir',
projectDir,
'--connection-id',
'warehouse',
'--adapter',
'fake',
'--source-dir',
sourceDir,
]);
assert.equal(ingestRun.code, 1, 'ktx ingest run without an LLM provider must fail');
assert.match(
ingestRun.stderr,
/ktx ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway, or an injected agentRunner/,
);
requireExitCodeWithProjectStderr('ktx ingest deep readiness guard', enrichedScan, projectDir, 1);
requireOutput('ktx ingest deep readiness guard', enrichedScan, /Ingest finished with partial failures/);
requireOutput('ktx ingest deep readiness guard', enrichedScan, /requires deep ingest readiness/);
process.stdout.write('ktx ingest deep readiness guard verified\\n');
await access(join(projectDir, '.ktx', 'db.sqlite'));
process.stdout.write('ktx ingest provider guard verified\\n');
process.stdout.write('ktx ingest state verified\\n');
} finally {
if (daemonStarted) {
await run('pnpm', ['exec', 'ktx', 'dev', 'runtime', 'stop']);
@ -939,7 +916,7 @@ try {
assert.ok([0, 1].includes(doctor.code), 'ktx status setup exit code must be 0 or 1');
requireStdout('ktx status setup', doctor, /KTX status/);
requireStdout('ktx status setup', doctor, /No project here yet\\./);
requireStdout('ktx status setup', doctor, /Before you can run ktx setup/);
requireStdout('ktx status setup', doctor, /ktx setup/);
requireStdout('ktx status setup', doctor, /Node 22\\+/);
assert.equal(doctor.stderr, '', 'ktx status setup wrote unexpected stderr');
} finally {