2026-05-10 23:12:26 +02:00
import { mkdirSync } from 'node:fs' ;
import { join } from 'node:path' ;
import { fileURLToPath } from 'node:url' ;
import YAML from 'yaml' ;
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm
* refactor(workspace): rewrite @ktx/llm imports to relative paths
* refactor(workspace): fold internal packages into cli
* chore(workspace): gate dead-code with knip production mode
Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.
* refactor(cli): delete internal barrel index.ts files
The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).
This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
(the published package entry).
- Rewrites ~270 source/test files to import each name directly from
the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
`create-warehouse-verification-tools.ts` (the function it defined
locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
live-database/extracted-schema, live-database/structural-sync,
relationship-* feedback/review chain) plus their tests and a
cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
(notion-client, connector barrels in scan/local-scan-connectors
tests) to mock the source files instead.
- Points the maintainer benchmark script
(`scripts/relationship-benchmark-report.mjs`) at source files
instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
production entries only for the benchmark code reached via dist by
the maintainer script.
Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.
`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.
* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly
Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.
Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.
* docs: align "agent clients" and "data agents" terminology
Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.
Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.
* refactor(release): single source of truth for package version
Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.
Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.
- update-public-release-version.mjs rewrites both Python pyproject.toml
files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
@semantic-release/git assets so the release commit back to main
carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
replaced with "?? getKtxCliPackageInfo().version", and
createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
always reflects the most recent release; no sentinel pin to
maintain.
Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.
* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime
Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.
* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal
Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.
* fix(cli): use real package metadata in print-command-tree
The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.
* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts
Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.
Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
import { localConnectionInfoFromConfig } from '../../context/connections/local-warehouse-descriptor.js' ;
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js' ;
import type { KtxEmbeddingPort } from '../../context/core/embedding.js' ;
import type { KtxLogger } from '../../context/core/config.js' ;
import { noopLogger } from '../../context/core/config.js' ;
import { SessionWorktreeService } from '../../context/core/session-worktree.service.js' ;
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js' ;
import { createRuntimeToolDescriptorFromAiTool } from '../../context/llm/runtime-tools.js' ;
import { createLocalKtxLlmRuntimeFromConfig } from '../../context/llm/local-config.js' ;
import { KtxIngestEmbeddingPortAdapter } from '../../context/llm/embedding-port.js' ;
import { RuntimeAgentRunner , type AgentRunnerPort , type KtxLlmRuntimePort , type KtxRuntimeToolSet } from '../../context/llm/runtime-port.js' ;
import type { KtxEmbeddingProvider } from '../../llm/types.js' ;
import type { KtxLocalProject } from '../../context/project/project.js' ;
import { ktxLocalStateDbPath } from '../../context/project/local-state-db.js' ;
import { PromptService } from '../../context/prompts/prompt.service.js' ;
import { SkillsRegistryService } from '../../context/skills/skills-registry.service.js' ;
import type { KtxConnectionInfo , KtxQueryResult , SlConnectionCatalogPort , SlPythonPort , SlSourcesIndexPort } from '../../context/sl/ports.js' ;
import { SemanticLayerService } from '../../context/sl/semantic-layer.service.js' ;
import { SlDiscoverTool } from '../../context/sl/tools/sl-discover.tool.js' ;
import { SlEditSourceTool } from '../../context/sl/tools/sl-edit-source.tool.js' ;
import { SlReadSourceTool } from '../../context/sl/tools/sl-read-source.tool.js' ;
import { SlRollbackTool } from '../../context/sl/tools/sl-rollback.tool.js' ;
import { SlSearchService } from '../../context/sl/sl-search.service.js' ;
import { SlValidateTool } from '../../context/sl/tools/sl-validate.tool.js' ;
import type { SlValidationDeps } from '../../context/sl/tools/sl-warehouse-validation.js' ;
import type { SlValidatorPort } from '../../context/sl/sl-validator.port.js' ;
import { SlWriteSourceTool } from '../../context/sl/tools/sl-write-source.tool.js' ;
import { SqliteSlSourcesIndex } from '../../context/sl/sqlite-sl-sources-index.js' ;
import { sourceDefinitionSchema , sourceOverlaySchema } from '../../context/sl/schemas.js' ;
import { BaseTool , type ToolContext } from '../../context/tools/base-tool.js' ;
import { ContextCandidateMarkTool } from '../../context/tools/context-candidate-mark.tool.js' ;
import { ContextCandidateWriteTool } from '../../context/tools/context-candidate-write.tool.js' ;
import { ContextEvidenceNeighborsTool } from '../../context/tools/context-evidence-neighbors.tool.js' ;
import { ContextEvidenceReadTool } from '../../context/tools/context-evidence-read.tool.js' ;
import { ContextEvidenceSearchTool } from '../../context/tools/context-evidence-search.tool.js' ;
import type { GitAuthorResolverPort } from '../../context/tools/authors.js' ;
import type { ToolSession } from '../../context/tools/tool-session.js' ;
import { buildKnowledgeSearchText } from '../../context/wiki/knowledge-search-text.js' ;
import type { KnowledgeEventPort , KnowledgeIndexPort , KnowledgeIndexPageListing } from '../../context/wiki/ports.js' ;
import { KnowledgeWikiService } from '../../context/wiki/knowledge-wiki.service.js' ;
import { searchLocalKnowledgePages } from '../../context/wiki/local-knowledge.js' ;
import { SqliteKnowledgeIndex , type SqliteKnowledgeIndexPage } from '../../context/wiki/sqlite-knowledge-index.js' ;
import { WikiListTagsTool } from '../../context/wiki/tools/wiki-list-tags.tool.js' ;
import { WikiReadTool } from '../../context/wiki/tools/wiki-read.tool.js' ;
import { WikiRemoveTool } from '../../context/wiki/tools/wiki-remove.tool.js' ;
import { WikiSearchTool } from '../../context/wiki/tools/wiki-search.tool.js' ;
import { WikiWriteTool } from '../../context/wiki/tools/wiki-write.tool.js' ;
import { CandidateDedupService } from '../../context/ingest/context-candidates/candidate-dedup.service.js' ;
import { ContextCandidateCarryforwardService } from '../../context/ingest/context-candidates/context-candidate-carryforward.service.js' ;
import { CuratorPaginationService } from '../../context/ingest/context-candidates/curator-pagination.service.js' ;
2026-05-11 18:58:19 +02:00
import { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evidence-tool.js' ;
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm
* refactor(workspace): rewrite @ktx/llm imports to relative paths
* refactor(workspace): fold internal packages into cli
* chore(workspace): gate dead-code with knip production mode
Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.
* refactor(cli): delete internal barrel index.ts files
The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).
This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
(the published package entry).
- Rewrites ~270 source/test files to import each name directly from
the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
`create-warehouse-verification-tools.ts` (the function it defined
locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
live-database/extracted-schema, live-database/structural-sync,
relationship-* feedback/review chain) plus their tests and a
cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
(notion-client, connector barrels in scan/local-scan-connectors
tests) to mock the source files instead.
- Points the maintainer benchmark script
(`scripts/relationship-benchmark-report.mjs`) at source files
instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
production entries only for the benchmark code reached via dist by
the maintainer script.
Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.
`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.
* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly
Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.
Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.
* docs: align "agent clients" and "data agents" terminology
Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.
Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.
* refactor(release): single source of truth for package version
Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.
Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.
- update-public-release-version.mjs rewrites both Python pyproject.toml
files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
@semantic-release/git assets so the release commit back to main
carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
replaced with "?? getKtxCliPackageInfo().version", and
createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
always reflects the most recent release; no sentinel pin to
maintain.
Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.
* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime
Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.
* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal
Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.
* fix(cli): use real package metadata in print-command-tree
The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.
* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts
Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.
Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
import { ContextEvidenceIndexService } from '../../context/ingest/context-evidence/context-evidence-index.service.js' ;
import { SqliteContextEvidenceStore } from '../../context/ingest/context-evidence/sqlite-context-evidence-store.js' ;
2026-05-10 23:12:26 +02:00
import { DiffSetService } from './diff-set.service.js' ;
2026-05-18 13:38:06 +02:00
import { ingestTracePathForJob , type IngestTraceLevel } from './ingest-trace.js' ;
2026-05-10 23:12:26 +02:00
import { IngestBundleRunner } from './ingest-bundle.runner.js' ;
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm
* refactor(workspace): rewrite @ktx/llm imports to relative paths
* refactor(workspace): fold internal packages into cli
* chore(workspace): gate dead-code with knip production mode
Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.
* refactor(cli): delete internal barrel index.ts files
The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).
This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
(the published package entry).
- Rewrites ~270 source/test files to import each name directly from
the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
`create-warehouse-verification-tools.ts` (the function it defined
locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
live-database/extracted-schema, live-database/structural-sync,
relationship-* feedback/review chain) plus their tests and a
cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
(notion-client, connector barrels in scan/local-scan-connectors
tests) to mock the source files instead.
- Points the maintainer benchmark script
(`scripts/relationship-benchmark-report.mjs`) at source files
instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
production entries only for the benchmark code reached via dist by
the maintainer script.
Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.
`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.
* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly
Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.
Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.
* docs: align "agent clients" and "data agents" terminology
Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.
Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.
* refactor(release): single source of truth for package version
Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.
Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.
- update-public-release-version.mjs rewrites both Python pyproject.toml
files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
@semantic-release/git assets so the release commit back to main
carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
replaced with "?? getKtxCliPackageInfo().version", and
createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
always reflects the most recent release; no sentinel pin to
maintain.
Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.
* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime
Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.
* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal
Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.
* fix(cli): use real package metadata in print-command-tree
The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.
* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts
Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.
Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
import { PageTriageService } from '../../context/ingest/page-triage/page-triage.service.js' ;
import { createWarehouseVerificationTools } from '../../context/ingest/tools/warehouse-verification/create-warehouse-verification-tools.js' ;
2026-05-10 23:12:26 +02:00
import type {
IngestBundleRunnerDeps ,
IngestCommitMessagePort ,
IngestLockPort ,
IngestStoragePort ,
IngestToolsetFactoryPort ,
IngestToolsetLike ,
SourceAdapterRegistryPort ,
} from './ports.js' ;
import { SourceAdapterRegistry } from './source-adapter-registry.js' ;
import { SqliteBundleIngestStore } from './sqlite-bundle-ingest-store.js' ;
import type { SourceAdapter } from './types.js' ;
const promptsDir = fileURLToPath ( new URL ( '../../prompts' , import . meta . url ) ) ;
const skillsDir = fileURLToPath ( new URL ( '../../skills' , import . meta . url ) ) ;
2026-05-10 23:51:24 +02:00
const LOCAL_AUTHOR = { name : 'KTX Local' , email : 'local@ktx.local' } ;
2026-05-10 23:12:26 +02:00
const LOCAL_SHAPE_WARNING = 'Local ingest validates semantic-layer YAML shape only.' ;
2026-05-18 13:38:06 +02:00
const INGEST_TRACE_LEVELS = new Set < IngestTraceLevel > ( [ 'error' , 'info' , 'debug' , 'trace' ] ) ;
function ingestTraceLevelFromEnv ( env : NodeJS.ProcessEnv = process . env ) : IngestTraceLevel {
const raw = env . KTX_INGEST_TRACE_LEVEL ;
return raw && INGEST_TRACE_LEVELS . has ( raw as IngestTraceLevel ) ? ( raw as IngestTraceLevel ) : 'debug' ;
}
2026-05-10 23:12:26 +02:00
export interface CreateLocalBundleIngestRuntimeOptions {
2026-05-10 23:51:24 +02:00
project : KtxLocalProject ;
2026-05-10 23:12:26 +02:00
adapters : SourceAdapter [ ] ;
2026-05-16 12:06:34 +02:00
agentRunner? : AgentRunnerPort ;
llmRuntime? : KtxLlmRuntimePort ;
createLlmRuntime? : typeof createLocalKtxLlmRuntimeFromConfig ;
2026-05-10 23:12:26 +02:00
llmDebugRequestFile? : string ;
memoryModel? : string ;
2026-05-10 23:51:24 +02:00
semanticLayerCompute? : KtxSemanticLayerComputePort ;
2026-05-13 13:43:23 +02:00
queryExecutor? : KtxSqlQueryExecutorPort ;
2026-05-10 23:12:26 +02:00
jobIdFactory ? : ( ) = > string ;
2026-05-10 23:51:24 +02:00
logger? : KtxLogger ;
2026-05-21 02:21:22 +02:00
embeddingProvider? : KtxEmbeddingProvider | null ;
2026-05-10 23:12:26 +02:00
}
export interface LocalBundleIngestRuntime {
runner : IngestBundleRunner ;
store : SqliteBundleIngestStore ;
contextStore : SqliteContextEvidenceStore ;
storage : IngestStoragePort ;
registry : SourceAdapterRegistryPort ;
nextJobId ( ) : string ;
}
2026-05-10 23:51:24 +02:00
class NoopEmbeddingPort implements KtxEmbeddingPort {
2026-05-10 23:12:26 +02:00
readonly maxBatchSize = 64 ;
async computeEmbedding ( ) : Promise < number [ ] > {
return [ ] ;
}
async computeEmbeddingsBulk ( texts : string [ ] ) : Promise < number [ ] [ ] > {
return texts . map ( ( ) = > [ ] ) ;
}
}
class LocalIngestStorage implements IngestStoragePort {
readonly homeDir : string ;
readonly systemGitAuthor = LOCAL_AUTHOR ;
2026-05-10 23:51:24 +02:00
constructor ( private readonly project : KtxLocalProject ) {
this . homeDir = join ( project . projectDir , '.ktx' ) ;
2026-05-10 23:12:26 +02:00
}
resolveUploadDir ( uploadId : string ) : string {
2026-05-10 23:51:24 +02:00
return join ( this . project . projectDir , '.ktx/cache/local-ingest' , uploadId , 'upload' ) ;
2026-05-10 23:12:26 +02:00
}
resolvePullDir ( jobId : string ) : string {
2026-05-10 23:51:24 +02:00
return join ( this . project . projectDir , '.ktx/cache/local-ingest' , jobId , 'pull' ) ;
2026-05-10 23:12:26 +02:00
}
resolveTranscriptDir ( jobId : string ) : string {
2026-05-10 23:51:24 +02:00
return join ( this . project . projectDir , '.ktx/ingest-transcripts' , jobId ) ;
2026-05-10 23:12:26 +02:00
}
2026-05-18 13:38:06 +02:00
resolveTracePath ( jobId : string ) : string {
return ingestTracePathForJob ( this . homeDir , jobId ) ;
}
2026-05-10 23:12:26 +02:00
}
class LocalIngestLock implements IngestLockPort {
async withLock < T > ( _key : string , fn : ( ) = > Promise < T > ) : Promise < T > {
return fn ( ) ;
}
}
class LocalCommitMessagePort implements IngestCommitMessagePort {
async enqueueForExternalCommit ( ) : Promise < void > { }
}
class LocalAuthorResolver implements GitAuthorResolverPort {
async resolve() {
return LOCAL_AUTHOR ;
}
}
class LocalConnectionCatalog implements SlConnectionCatalogPort {
constructor (
2026-05-10 23:51:24 +02:00
private readonly project : KtxLocalProject ,
2026-05-13 13:43:23 +02:00
private readonly queryExecutor? : KtxSqlQueryExecutorPort ,
2026-05-10 23:12:26 +02:00
) { }
2026-05-10 23:51:24 +02:00
async listEnabledConnections ( ids : string [ ] ) : Promise < KtxConnectionInfo [ ] > {
2026-05-10 23:12:26 +02:00
return ids
. map ( ( id ) = > localConnectionInfoFromConfig ( id , this . project . config . connections [ id ] ) )
2026-05-10 23:51:24 +02:00
. filter ( ( connection ) : connection is KtxConnectionInfo = > connection !== null ) ;
2026-05-10 23:12:26 +02:00
}
2026-05-10 23:51:24 +02:00
async getConnectionById ( connectionId : string ) : Promise < KtxConnectionInfo > {
2026-05-10 23:12:26 +02:00
const connection = localConnectionInfoFromConfig ( connectionId , this . project . config . connections [ connectionId ] ) ;
if ( ! connection ) {
throw new Error ( ` Connection not found: ${ connectionId } ` ) ;
}
return connection ;
}
2026-05-10 23:51:24 +02:00
async executeQuery ( connectionId : string , sql : string ) : Promise < KtxQueryResult > {
2026-05-10 23:12:26 +02:00
if ( ! this . queryExecutor ) {
throw new Error ( 'Local ingest has no query executor configured' ) ;
}
2026-05-13 13:43:23 +02:00
return this . queryExecutor . execute ( {
connectionId ,
projectDir : this.project.projectDir ,
connection : this.project.config.connections [ connectionId ] ,
sql ,
} ) ;
2026-05-10 23:12:26 +02:00
}
}
class LocalSlPythonPort implements SlPythonPort {
2026-05-10 23:51:24 +02:00
constructor ( private readonly compute? : KtxSemanticLayerComputePort ) { }
2026-05-10 23:12:26 +02:00
async validateSources ( input : Parameters < SlPythonPort [ 'validateSources' ] > [ 0 ] ) {
if ( ! this . compute ) {
return { data : { errors : [ ] , warnings : [ LOCAL_SHAPE_WARNING ] , per_source_warnings : { } } } ;
}
const result = await this . compute . validateSources ( {
sources : input.sources ,
dialect : input.dialect ,
recentlyTouched : input.recently_touched ,
} ) ;
return {
data : {
errors : result.errors ,
warnings : result.warnings ,
per_source_warnings : result.perSourceWarnings ,
} ,
} ;
}
async query ( input : Parameters < SlPythonPort [ 'query' ] > [ 0 ] ) {
if ( ! this . compute ) {
return { error : 'Local ingest has no semantic compute adapter configured' } ;
}
const result = await this . compute . query ( {
sources : input.sources ,
dialect : input.dialect ,
query : input.query ,
} ) ;
return { data : { sql : result.sql , plan : result.plan } } ;
}
}
class LocalShapeOnlySlValidator implements SlValidatorPort < SlValidationDeps > {
2026-05-18 13:38:06 +02:00
private validateParsedSource ( sourceName : string , parsed : Record < string , unknown > ) {
const isOverlay = parsed . table == null && parsed . sql == null ;
const result = ( isOverlay ? sourceOverlaySchema : sourceDefinitionSchema ) . safeParse ( parsed ) ;
return result . success
? { errors : [ ] , warnings : [ LOCAL_SHAPE_WARNING ] }
: {
errors : result.error.issues.map (
( issue ) = > ` ${ sourceName } : ${ issue . path . join ( '.' ) || 'source' } ${ issue . message } ` ,
) ,
warnings : [ ] ,
} ;
}
private async validateComposedSource (
deps : SlValidationDeps ,
connectionId : string ,
sourceName : string ,
readError : unknown ,
) {
try {
const { sources , loadErrors } = await deps . semanticLayerService . loadAllSources ( connectionId ) ;
const source = sources . find ( ( candidate ) = > candidate . name === sourceName ) ;
if ( source ) {
return this . validateParsedSource ( sourceName , source as unknown as Record < string , unknown > ) ;
}
const detail =
loadErrors . length > 0
? loadErrors . join ( '; ' )
: readError instanceof Error
? readError . message
: String ( readError ) ;
return { errors : [ ` ${ sourceName } : ${ detail } ` ] , warnings : [ ] } ;
} catch ( fallbackError ) {
return {
errors : [ ` ${ sourceName } : ${ fallbackError instanceof Error ? fallbackError.message : String ( fallbackError ) } ` ] ,
warnings : [ ] ,
} ;
}
}
2026-05-10 23:12:26 +02:00
async validateSingleSource ( deps : SlValidationDeps , connectionId : string , sourceName : string ) {
2026-05-18 13:38:06 +02:00
let content : string ;
2026-05-10 23:12:26 +02:00
try {
const file = await deps . semanticLayerService . readSourceFile ( connectionId , sourceName ) ;
2026-05-18 13:38:06 +02:00
content = file . content ;
} catch ( error ) {
return this . validateComposedSource ( deps , connectionId , sourceName , error ) ;
}
try {
const parsed = YAML . parse ( content ) as unknown as Record < string , unknown > ;
return this . validateParsedSource ( sourceName , parsed ) ;
2026-05-10 23:12:26 +02:00
} catch ( error ) {
2026-05-18 13:38:06 +02:00
return {
errors : [ ` ${ sourceName } : invalid YAML — ${ error instanceof Error ? error.message : String ( error ) } ` ] ,
warnings : [ ] ,
} ;
2026-05-10 23:12:26 +02:00
}
}
}
function parseWiki ( raw : string ) : { summary : string ; content : string } {
const match = raw . match ( /^---\n([\s\S]*?)\n---\n?([\s\S]*)$/ ) ;
if ( ! match ) {
return { summary : '' , content : raw.trim ( ) } ;
}
const frontmatter = ( YAML . parse ( match [ 1 ] ) ? ? { } ) as Record < string , unknown > ;
return {
summary : typeof frontmatter . summary === 'string' ? frontmatter . summary : '' ,
content : match [ 2 ] . trim ( ) ,
} ;
}
2026-05-10 23:13:17 -07:00
function parseWikiTags ( raw : string ) : string [ ] {
const match = raw . match ( /^---\n([\s\S]*?)\n---\n?/ ) ;
if ( ! match ) {
return [ ] ;
}
const frontmatter = ( YAML . parse ( match [ 1 ] ) ? ? { } ) as Record < string , unknown > ;
return Array . isArray ( frontmatter . tags )
? frontmatter . tags . filter ( ( tag ) : tag is string = > typeof tag === 'string' )
: [ ] ;
}
2026-05-10 23:12:26 +02:00
function scoreText ( text : string , query : string ) : number {
const normalized = query . toLowerCase ( ) . trim ( ) ;
if ( ! normalized ) {
return 0 ;
}
const haystack = text . toLowerCase ( ) ;
if ( haystack . includes ( normalized ) ) {
return 1 ;
}
const words = normalized . split ( /\s+/ ) . filter ( Boolean ) ;
return words . filter ( ( word ) = > haystack . includes ( word ) ) . length / Math . max ( words . length , 1 ) ;
}
class LocalKnowledgeIndex implements KnowledgeIndexPort {
2026-05-10 23:13:17 -07:00
private readonly sqlite : SqliteKnowledgeIndex ;
2026-05-10 23:12:26 +02:00
2026-05-12 16:56:58 -04:00
constructor (
private readonly project : KtxLocalProject ,
private readonly embedding : KtxEmbeddingPort ,
) {
2026-05-10 23:13:17 -07:00
this . sqlite = new SqliteKnowledgeIndex ( { dbPath : ktxLocalStateDbPath ( project ) } ) ;
}
async upsertPage ( ) : Promise < void > {
await this . syncAllPagesFromDisk ( ) ;
}
2026-05-10 23:12:26 +02:00
2026-05-10 23:13:17 -07:00
async applyDiffTransactional ( ) : Promise < void > {
await this . syncAllPagesFromDisk ( ) ;
}
2026-05-10 23:12:26 +02:00
2026-05-10 23:13:17 -07:00
async getExistingSearchTexts (
scope : string ,
scopeId : string | null ,
) : Promise < Map < string , { searchText : string ; hasEmbedding : boolean } > > {
2026-05-13 16:05:58 +02:00
const prefix = scope === 'GLOBAL' ? 'wiki/global/' : ` wiki/user/ ${ scopeId } / ` ;
2026-05-10 23:13:17 -07:00
const result = new Map < string , { searchText : string ; hasEmbedding : boolean } > ( ) ;
for ( const [ path , page ] of this . sqlite . getExistingPages ( ) ) {
if ( ! path . startsWith ( prefix ) ) {
continue ;
}
result . set ( path . slice ( prefix . length ) . replace ( /\.md$/ , '' ) , {
searchText : page.searchText ,
hasEmbedding : page.embedding !== null ,
} ) ;
}
return result ;
2026-05-10 23:12:26 +02:00
}
2026-05-20 01:36:54 +02:00
async deleteStale ( ) : Promise < number > {
2026-05-10 23:13:17 -07:00
await this . syncAllPagesFromDisk ( ) ;
2026-05-20 01:36:54 +02:00
return 0 ;
2026-05-10 23:13:17 -07:00
}
2026-05-10 23:12:26 +02:00
2026-05-20 01:36:54 +02:00
async deleteByScope ( ) : Promise < number > {
2026-05-10 23:13:17 -07:00
await this . syncAllPagesFromDisk ( ) ;
2026-05-20 01:36:54 +02:00
return 0 ;
2026-05-10 23:13:17 -07:00
}
2026-05-10 23:12:26 +02:00
2026-05-20 01:36:54 +02:00
async deleteByKey ( ) : Promise < number > {
2026-05-10 23:13:17 -07:00
await this . syncAllPagesFromDisk ( ) ;
2026-05-20 01:36:54 +02:00
return 0 ;
2026-05-10 23:13:17 -07:00
}
2026-05-10 23:12:26 +02:00
async findPageByKey ( scope : string , scopeId : string | null , pageKey : string ) {
2026-05-13 16:05:58 +02:00
const path = scope === 'GLOBAL' ? ` wiki/global/ ${ pageKey } .md ` : ` wiki/user/ ${ scopeId } / ${ pageKey } .md ` ;
2026-05-10 23:12:26 +02:00
try {
await this . project . fileStore . readFile ( path ) ;
return { page_key : pageKey } ;
} catch {
return null ;
}
}
async listPagesForUser (
userId : string ,
2026-05-13 13:43:23 +02:00
) : Promise < KnowledgeIndexPageListing [ ] > {
const pages : KnowledgeIndexPageListing [ ] = [ ] ;
2026-05-10 23:12:26 +02:00
for ( const scope of [
2026-05-13 16:05:58 +02:00
{ scope : 'GLOBAL' , scopeId : null , dir : 'wiki/global' } ,
{ scope : 'USER' , scopeId : userId , dir : ` wiki/user/ ${ userId } ` } ,
2026-05-10 23:12:26 +02:00
] ) {
const listed = await this . project . fileStore . listFiles ( scope . dir , true ) ;
for ( const file of listed . files . filter ( ( entry ) = > entry . endsWith ( '.md' ) ) ) {
2026-05-13 16:05:58 +02:00
const parsedPath = parseKnowledgeIndexPath ( file . startsWith ( 'global/' ) || file . startsWith ( 'user/' ) ? file : ` ${ scope . dir . replace ( 'wiki/' , '' ) } / ${ file } ` ) ;
2026-05-13 13:43:23 +02:00
if ( ! parsedPath || parsedPath . scope !== scope . scope ) {
continue ;
}
const pageKey = parsedPath . pageKey ;
2026-05-10 23:12:26 +02:00
const raw = await this . project . fileStore . readFile ( ` ${ scope . dir } / ${ file } ` ) ;
const parsed = parseWiki ( raw . content ) ;
pages . push ( {
page_key : pageKey ,
summary : parsed.summary ,
scope : scope.scope ,
scope_id : scope.scopeId ,
2026-05-13 13:43:23 +02:00
tags : parseWikiTags ( raw . content ) ,
2026-05-10 23:12:26 +02:00
} ) ;
}
}
return pages . sort ( ( left , right ) = > left . page_key . localeCompare ( right . page_key ) ) ;
}
async getUserPageCount ( userId : string ) : Promise < number > {
return ( await this . listPagesForUser ( userId ) ) . filter ( ( page ) = > page . scope === 'USER' ) . length ;
}
async incrementUsageCount ( ) : Promise < void > { }
async searchRRF (
userId : string ,
_embedding : number [ ] | null ,
queryText : string ,
limit : number ,
) : Promise < Array < { pageKey : string ; summary : string ; rrfScore : number } > > {
const pages = await this . listPagesForUser ( userId ) ;
return pages
. map ( ( page ) = > ( {
pageKey : page.page_key ,
summary : page.summary ,
rrfScore : scoreText ( ` ${ page . page_key } ${ page . summary } ` , queryText ) ,
} ) )
. filter ( ( page ) = > page . rrfScore > 0 )
. sort ( ( left , right ) = > right . rrfScore - left . rrfScore || left . pageKey . localeCompare ( right . pageKey ) )
. slice ( 0 , limit ) ;
}
2026-05-10 23:13:17 -07:00
private async syncAllPagesFromDisk ( ) : Promise < void > {
2026-05-13 16:05:58 +02:00
const listed = await this . project . fileStore . listFiles ( 'wiki' , true ) ;
2026-05-12 16:56:58 -04:00
const existingPages = this . sqlite . getExistingPages ( ) ;
2026-05-10 23:13:17 -07:00
const pages : SqliteKnowledgeIndexPage [ ] = [ ] ;
for ( const file of listed . files . filter ( ( entry ) = > entry . endsWith ( '.md' ) ) ) {
const parsedPath = parseKnowledgeIndexPath ( file ) ;
if ( ! parsedPath ) {
continue ;
}
2026-05-13 16:05:58 +02:00
const path = ` wiki/ ${ file } ` ;
2026-05-10 23:13:17 -07:00
const raw = await this . project . fileStore . readFile ( path ) ;
const parsed = parseWiki ( raw . content ) ;
2026-05-12 16:56:58 -04:00
const tags = parseWikiTags ( raw . content ) ;
const searchText = buildKnowledgeSearchText ( parsedPath . pageKey , parsed . summary , parsed . content , tags ) ;
const existing = existingPages . get ( path ) ;
const embedding =
existing ? . searchText === searchText && existing . embedding
? existing . embedding
: await this . embedding . computeEmbedding ( searchText ) . catch ( ( ) = > null ) ;
2026-05-10 23:13:17 -07:00
pages . push ( {
path ,
key : parsedPath.pageKey ,
scope : parsedPath.scope ,
summary : parsed.summary ,
content : parsed.content ,
2026-05-12 16:56:58 -04:00
tags ,
embedding ,
2026-05-10 23:13:17 -07:00
} ) ;
}
this . sqlite . sync ( pages ) ;
}
}
function parseKnowledgeIndexPath ( file : string ) : { scope : 'GLOBAL' | 'USER' ; pageKey : string } | null {
const segments = file . split ( '/' ) ;
if ( segments . length === 2 && segments [ 0 ] === 'global' ) {
2026-05-12 16:56:58 -04:00
const pageKey = segments [ 1 ] . replace ( /\.md$/ , '' ) ;
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/ . test ( pageKey ) ? { scope : 'GLOBAL' , pageKey } : null ;
}
2026-05-10 23:13:17 -07:00
if ( segments . length === 3 && segments [ 0 ] === 'user' ) {
2026-05-12 16:56:58 -04:00
const pageKey = segments [ 2 ] . replace ( /\.md$/ , '' ) ;
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/ . test ( pageKey ) ? { scope : 'USER' , pageKey } : null ;
2026-05-10 23:13:17 -07:00
}
return null ;
2026-05-10 23:12:26 +02:00
}
class NoopKnowledgeEventPort implements KnowledgeEventPort {
async createEvent ( ) : Promise < void > { }
}
class LocalIngestToolSet implements IngestToolsetLike {
2026-05-11 18:58:19 +02:00
constructor (
private readonly tools : BaseTool [ ] ,
2026-05-16 12:06:34 +02:00
private readonly sourceTools : KtxRuntimeToolSet = { } ,
2026-05-11 18:58:19 +02:00
) { }
2026-05-10 23:12:26 +02:00
2026-05-16 12:06:34 +02:00
toRuntimeTools ( context : ToolContext ) : KtxRuntimeToolSet {
2026-05-11 18:58:19 +02:00
return {
2026-05-16 12:06:34 +02:00
. . . Object . fromEntries ( this . tools . map ( ( tool ) = > [ tool . name , tool . toRuntimeTool ( context ) ] ) ) ,
2026-05-11 18:58:19 +02:00
. . . this . sourceTools ,
} ;
2026-05-10 23:12:26 +02:00
}
}
class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
private readonly baseTools : BaseTool [ ] ;
private readonly contextTools : BaseTool [ ] ;
constructor ( deps : {
2026-05-10 23:51:24 +02:00
project : KtxLocalProject ;
2026-05-10 23:12:26 +02:00
wikiService : KnowledgeWikiService ;
knowledgeIndex : KnowledgeIndexPort ;
knowledgeEvents : KnowledgeEventPort ;
semanticLayerService : SemanticLayerService ;
slSearchService : SlSearchService ;
authorResolver : GitAuthorResolverPort ;
slSourcesRepository : SlSourcesIndexPort ;
connections : SlConnectionCatalogPort ;
contextStore : SqliteContextEvidenceStore ;
2026-05-10 23:51:24 +02:00
embedding : KtxEmbeddingPort ;
2026-05-10 23:12:26 +02:00
} ) {
const slDeps = {
semanticLayerService : deps.semanticLayerService ,
slSearchService : deps.slSearchService ,
authorResolver : deps.authorResolver ,
} ;
2026-05-13 13:43:23 +02:00
const wikiSearchTool = new WikiSearchTool ( {
search : async ( input ) = > {
const results = await searchLocalKnowledgePages ( deps . project , {
userId : input.userId ,
query : input.query ,
limit : input.limit ,
embeddingService : deps.embedding ,
} ) ;
return {
results : results.slice ( 0 , input . limit ) . map ( ( result ) = > ( {
key : result.key ,
path : result.path ,
summary : result.summary ,
score : result.score ,
matchReasons : result.matchReasons ,
lanes : result.lanes ,
} ) ) ,
totalFound : results.length ,
} ;
} ,
} ) ;
const slDiscoverTool = new SlDiscoverTool ( slDeps , { maxSources : 25 , minRrfScore : 0 , maxDetailedSources : 5 } ) ;
const warehouseVerificationTools = createWarehouseVerificationTools ( {
connections : deps.connections ,
fallbackFileStore : deps.project.fileStore ,
wikiSearchTool ,
slDiscoverTool ,
} ) ;
2026-05-10 23:12:26 +02:00
this . baseTools = [
new WikiReadTool ( deps . wikiService , deps . knowledgeIndex ) ,
2026-05-13 13:43:23 +02:00
wikiSearchTool ,
new WikiListTagsTool ( deps . knowledgeIndex ) ,
2026-05-10 23:12:26 +02:00
new WikiWriteTool ( deps . wikiService , deps . knowledgeIndex , deps . knowledgeEvents ) ,
new WikiRemoveTool ( deps . wikiService , deps . knowledgeIndex , deps . knowledgeEvents ) ,
2026-05-13 13:43:23 +02:00
slDiscoverTool ,
2026-05-10 23:12:26 +02:00
new SlEditSourceTool ( slDeps ) ,
new SlReadSourceTool ( slDeps ) ,
new SlWriteSourceTool ( slDeps ) ,
new SlValidateTool ( slDeps ) ,
new SlRollbackTool ( deps . slSourcesRepository , deps . connections , 0 ) ,
2026-05-13 13:43:23 +02:00
. . . warehouseVerificationTools ,
2026-05-10 23:12:26 +02:00
] ;
this . contextTools = [
new ContextEvidenceSearchTool ( deps . contextStore , deps . embedding ) ,
new ContextEvidenceReadTool ( deps . contextStore ) ,
new ContextEvidenceNeighborsTool ( deps . contextStore ) ,
new ContextCandidateWriteTool ( deps . contextStore , deps . embedding ) ,
new ContextCandidateMarkTool ( deps . contextStore ) ,
] ;
}
2026-05-11 18:58:19 +02:00
createIngestWuToolset ( session : ToolSession , options ? : { includeContextEvidenceTools? : boolean } ) : IngestToolsetLike {
2026-05-16 12:06:34 +02:00
const sourceTools : KtxRuntimeToolSet =
2026-05-11 18:58:19 +02:00
session . ingest ? . sourceKey === 'historic-sql'
? {
2026-05-16 12:06:34 +02:00
emit_historic_sql_evidence : createRuntimeToolDescriptorFromAiTool (
'emit_historic_sql_evidence' ,
createEmitHistoricSqlEvidenceTool ( {
connectionId : session.connectionId ,
session ,
} ) ,
) ,
2026-05-11 18:58:19 +02:00
}
: { } ;
2026-05-10 23:12:26 +02:00
return new LocalIngestToolSet (
options ? . includeContextEvidenceTools ? [ . . . this . baseTools , . . . this . contextTools ] : this . baseTools ,
2026-05-11 18:58:19 +02:00
sourceTools ,
2026-05-10 23:12:26 +02:00
) ;
}
}
function registerAdapters ( adapters : SourceAdapter [ ] ) : SourceAdapterRegistry {
const registry = new SourceAdapterRegistry ( ) ;
for ( const adapter of adapters ) {
registry . register ( adapter ) ;
}
return registry ;
}
function nextLocalJobId ( ) : string {
return ` local- ${ Date . now ( ) . toString ( 36 ) } ` ;
}
2026-05-12 10:26:07 +02:00
function localIngestLlmProviderGuardMessage ( projectDir : string ) : string {
return [
2026-05-16 12:06:34 +02:00
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.' ,
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:' ,
` ktx setup --project-dir ${ projectDir } --llm-backend claude-code --no-input ` ,
2026-05-19 19:23:35 +02:00
` ktx setup --project-dir ${ projectDir } --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --llm-model claude-sonnet-4-6 --no-input ` ,
2026-05-12 10:26:07 +02:00
] . join ( '\n' ) ;
}
2026-05-10 23:12:26 +02:00
function resolveAgentRunner ( options : CreateLocalBundleIngestRuntimeOptions ) : {
2026-05-16 12:06:34 +02:00
agentRunner : AgentRunnerPort ;
llmRuntime? : KtxLlmRuntimePort ;
2026-05-10 23:12:26 +02:00
} {
2026-05-16 12:06:34 +02:00
const llmRuntime =
options . llmRuntime ? ?
( options . createLlmRuntime ? ? createLocalKtxLlmRuntimeFromConfig ) ( options . project . config . llm , {
projectDir : options.project.projectDir ,
env : process.env ,
} ) ? ?
undefined ;
2026-05-10 23:12:26 +02:00
if ( options . agentRunner ) {
2026-05-16 12:06:34 +02:00
return { agentRunner : options.agentRunner , . . . ( llmRuntime ? { llmRuntime } : { } ) } ;
2026-05-10 23:12:26 +02:00
}
2026-05-16 12:06:34 +02:00
if ( ! llmRuntime ) {
2026-05-12 10:26:07 +02:00
throw new Error ( localIngestLlmProviderGuardMessage ( options . project . projectDir ) ) ;
2026-05-10 23:12:26 +02:00
}
return {
2026-05-16 12:06:34 +02:00
agentRunner : new RuntimeAgentRunner ( llmRuntime ) ,
llmRuntime ,
2026-05-10 23:12:26 +02:00
} ;
}
export function createLocalBundleIngestRuntime (
options : CreateLocalBundleIngestRuntimeOptions ,
) : LocalBundleIngestRuntime {
const logger = options . logger ? ? noopLogger ;
2026-05-10 23:51:24 +02:00
const dbPath = ktxLocalStateDbPath ( options . project ) ;
mkdirSync ( join ( options . project . projectDir , '.ktx/cache/local-ingest' ) , { recursive : true } ) ;
2026-05-10 23:12:26 +02:00
const store = new SqliteBundleIngestStore ( { dbPath } ) ;
const contextStore = new SqliteContextEvidenceStore ( { dbPath } ) ;
2026-05-21 02:21:22 +02:00
const embeddingProvider = options . embeddingProvider ? ? null ;
2026-05-21 10:38:23 +02:00
if ( ! embeddingProvider && options . project . config . ingest . embeddings . backend !== 'none' ) {
// Embedding-dependent stages (CandidateDedup clustering, ContextEvidenceIndex
// chunk indexing) silently produce zero-vector data with NoopEmbeddingPort.
// Surface that fact so the caller knows ingest will not be running its
// configured backend.
logger . warn (
` [local-bundle-runtime] embeddings backend " ${ options . project . config . ingest . embeddings . backend } " is configured but no embedding provider was passed; embedding-dependent stages will run against a no-op embedding port. ` ,
) ;
}
2026-05-10 23:51:24 +02:00
const embedding = embeddingProvider ? new KtxIngestEmbeddingPortAdapter ( embeddingProvider ) : new NoopEmbeddingPort ( ) ;
2026-05-10 23:12:26 +02:00
const connections = new LocalConnectionCatalog ( options . project , options . queryExecutor ) ;
const rootFileStore = options . project . fileStore ;
const semanticLayerService = new SemanticLayerService (
rootFileStore ,
connections ,
new LocalSlPythonPort ( options . semanticLayerCompute ) ,
logger ,
) ;
const slSourcesRepository = new SqliteSlSourcesIndex ( { dbPath } ) ;
const slSearchService = new SlSearchService ( embedding , slSourcesRepository , logger ) ;
2026-05-12 16:56:58 -04:00
const knowledgeIndex = new LocalKnowledgeIndex ( options . project , embedding ) ;
2026-05-10 23:12:26 +02:00
const knowledgeEvents = new NoopKnowledgeEventPort ( ) ;
const wikiService = new KnowledgeWikiService ( rootFileStore , embedding , knowledgeIndex , options . project . git , logger ) ;
2026-05-16 12:06:34 +02:00
const { agentRunner , llmRuntime } = resolveAgentRunner ( options ) ;
2026-05-10 23:12:26 +02:00
const promptService = new PromptService ( { promptsDir , partials : [ ] , logger } ) ;
const storage = new LocalIngestStorage ( options . project ) ;
const registry = registerAdapters ( options . adapters ) ;
const toolsetFactory = new LocalIngestToolsetFactory ( {
project : options.project ,
wikiService ,
knowledgeIndex ,
knowledgeEvents ,
semanticLayerService ,
slSearchService ,
authorResolver : new LocalAuthorResolver ( ) ,
slSourcesRepository ,
connections ,
contextStore ,
embedding ,
} ) ;
const deps : IngestBundleRunnerDeps = {
runs : store ,
provenance : store ,
reports : store ,
canonicalPins : store ,
registry ,
diffSetService : new DiffSetService ( store ) ,
sessionWorktreeService : new SessionWorktreeService ( {
coreConfig : options.project.coreConfig ,
gitService : options.project.git ,
configService : rootFileStore ,
} ) ,
agentRunner ,
gitService : options.project.git ,
lockingService : new LocalIngestLock ( ) ,
storage ,
settings : {
memoryIngestionModel : options.project.config.llm.models.default ? ? 'local-ingest-model' ,
probeRowCount : 0 ,
workUnitMaxConcurrency : options.project.config.ingest.workUnits.maxConcurrency ,
workUnitStepBudget : options.project.config.ingest.workUnits.stepBudget ,
workUnitFailureMode : options.project.config.ingest.workUnits.failureMode ,
2026-05-18 13:38:06 +02:00
ingestTraceLevel : ingestTraceLevelFromEnv ( ) ,
2026-05-10 23:12:26 +02:00
} ,
skillsRegistry : new SkillsRegistryService ( { skillsDir , logger } ) ,
promptService ,
wikiService ,
knowledgeIndex ,
semanticLayerService ,
slSearchService ,
slSourcesRepository ,
connections ,
slValidator : new LocalShapeOnlySlValidator ( ) ,
toolsetFactory ,
commitMessages : new LocalCommitMessagePort ( ) ,
embedding ,
contextEvidenceIndex : new ContextEvidenceIndexService ( { store : contextStore , embeddings : embedding , logger } ) ,
2026-05-16 12:06:34 +02:00
llmRuntime ,
pageTriage : llmRuntime
2026-05-10 23:12:26 +02:00
? new PageTriageService ( {
store : contextStore ,
2026-05-16 12:06:34 +02:00
llmRuntime ,
2026-05-10 23:12:26 +02:00
settings : {
enabled : true ,
maxConcurrency : 2 ,
lightExtractionEnabled : true ,
classifierModel : null ,
lightExtractionMaxCandidates : 5 ,
} ,
promptService ,
logger ,
} )
: undefined ,
contextEvidenceCandidates : contextStore ,
candidateDedup : new CandidateDedupService ( {
store : contextStore ,
embeddings : embedding ,
settings : { enabled : true , topicSimilarityThreshold : 0.86 , scoreAggregation : 'max' } ,
logger ,
} ) ,
contextCandidateCarryforward : new ContextCandidateCarryforwardService ( {
store : contextStore ,
settings : { reExamineBudgetExhaustedOnRerun : true } ,
logger ,
} ) ,
curatorPagination : new CuratorPaginationService ( {
store : contextStore ,
agentRunner ,
settings : { batchSize : 8 , maxPasses : 8 , stepBudgetPerPass : 60 } ,
logger ,
} ) ,
logger ,
} ;
return {
runner : new IngestBundleRunner ( deps ) ,
store ,
contextStore ,
storage ,
registry ,
nextJobId : options.jobIdFactory ? ? nextLocalJobId ,
} ;
}