From 4d6808e09f7c27675ab835bb1593abb160385d6e Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Thu, 21 May 2026 01:56:34 +0200 Subject: [PATCH] refactor: pass embeddingProvider into ingest/scan instead of resolving inside @ktx/context --- packages/cli/src/ingest.ts | 23 +++++++++++-------- packages/cli/src/public-ingest.ts | 12 ++-------- packages/cli/src/scan.ts | 13 +++++++---- .../src/ingest/local-bundle-runtime.ts | 5 ++-- packages/context/src/ingest/local-ingest.ts | 4 ++++ .../context/src/scan/local-enrichment.test.ts | 11 ++++----- packages/context/src/scan/local-scan.ts | 13 +++++------ 7 files changed, 41 insertions(+), 40 deletions(-) diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index 0d85634b..b2b7bd0e 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -18,8 +18,8 @@ import { sanitizeMemoryFlowError, } from '@ktx/context/ingest'; import type { KtxSqlQueryExecutorPort } from '@ktx/context/connections'; -import { type KtxLocalProject } from '@ktx/context/project'; -import { loadKtxCliProject } from './cli-project.js'; +import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project'; +import { resolveProjectEmbeddingProvider } from './embedding-resolution.js'; import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js'; import { readIngestReportSnapshotFile } from './ingest-report-file.js'; import { createCliOperationalLogger } from './io/logger.js'; @@ -682,16 +682,17 @@ export async function runKtxIngest( deps: KtxIngestDeps = {}, ): Promise { try { - const cliVersion = args.command === 'run' ? args.cliVersion : undefined; - const runtimeInstallPolicy = args.command === 'run' ? args.runtimeInstallPolicy : undefined; - const project = await loadKtxCliProject({ - projectDir: args.projectDir, - cliVersion: cliVersion ?? '0.0.0-private', - installPolicy: runtimeInstallPolicy ?? 'never', - io, - }); + const project = await loadKtxProject({ projectDir: args.projectDir }); const env = deps.env ?? process.env; if (args.command === 'run') { + const resolution = await resolveProjectEmbeddingProvider(project, { + mode: 'ensure', + installPolicy: args.runtimeInstallPolicy ?? 'never', + cliVersion: args.cliVersion ?? '0.0.0-private', + io, + }); + const embeddingProvider = + resolution.kind === 'disabled' || resolution.kind === 'managed-unavailable' ? null : resolution.provider; const ingestProject = args.allowImplicitAdapter && !project.config.ingest.adapters.includes(args.adapter) ? { @@ -771,6 +772,7 @@ export async function runKtxIngest( queryExecutor, trigger: 'manual_resync', jobIdFactory: deps.jobIdFactory, + embeddingProvider, ...(memoryFlow ? { memoryFlow } : {}), ...(progress ? { progress } : {}), }); @@ -843,6 +845,7 @@ export async function runKtxIngest( ...localIngestOptions, queryExecutor, pullConfigOptions: adapterOptions, + embeddingProvider, ...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}), ...(memoryFlow ? { memoryFlow } : {}), }); diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index b7251721..eb5a47fd 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -1,5 +1,4 @@ -import { type KtxLocalProject, type KtxProjectConnectionConfig } from '@ktx/context/project'; -import { loadKtxCliProject } from './cli-project.js'; +import { loadKtxProject, type KtxLocalProject, type KtxProjectConnectionConfig } from '@ktx/context/project'; import type { KtxProgressPort } from '@ktx/context/scan'; import type { KtxCliIo } from './index.js'; import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js'; @@ -869,14 +868,7 @@ export async function runKtxPublicIngest( deps: KtxPublicIngestDeps = {}, ): Promise { const loadProject = - deps.loadProject ?? - ((options: { projectDir: string }) => - loadKtxCliProject({ - projectDir: options.projectDir, - cliVersion: args.cliVersion ?? '0.0.0-private', - installPolicy: args.runtimeInstallPolicy ?? 'never', - io, - })); + deps.loadProject ?? ((options: { projectDir: string }) => loadKtxProject({ projectDir: options.projectDir })); const project = await loadProject({ projectDir: args.projectDir }); if (shouldUseForegroundContextBuildView(args, io)) { const plan = buildPublicIngestPlan(project, args); diff --git a/packages/cli/src/scan.ts b/packages/cli/src/scan.ts index 68d0db35..9583d3bf 100644 --- a/packages/cli/src/scan.ts +++ b/packages/cli/src/scan.ts @@ -5,7 +5,8 @@ import { type KtxScanWarning, runLocalScan, } from '@ktx/context/scan'; -import { loadKtxCliProject } from './cli-project.js'; +import { loadKtxProject } from '@ktx/context/project'; +import { resolveProjectEmbeddingProvider } from './embedding-resolution.js'; import type { KtxCliIo } from './index.js'; import { createKtxCliLocalIngestAdapters } from './local-adapters.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; @@ -313,12 +314,15 @@ export function createCliScanProgress( export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps: KtxScanDeps = {}): Promise { try { - const project = await loadKtxCliProject({ - projectDir: args.projectDir, - cliVersion: args.cliVersion ?? '0.0.0-private', + const project = await loadKtxProject({ projectDir: args.projectDir }); + const resolution = await resolveProjectEmbeddingProvider(project, { + mode: 'ensure', installPolicy: args.runtimeInstallPolicy ?? 'never', + cliVersion: args.cliVersion ?? '0.0.0-private', io, }); + const embeddingProvider = + resolution.kind === 'disabled' || resolution.kind === 'managed-unavailable' ? null : resolution.provider; const managedDaemon = managedDaemonOptionsForScanRun(args, deps.runtimeIo ?? io); const connector = args.mode !== 'structural' || args.detectRelationships @@ -336,6 +340,7 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps trigger: 'cli', databaseIntrospectionUrl: args.databaseIntrospectionUrl, connector, + embeddingProvider, adapters: (deps.createLocalIngestAdapters ?? createKtxCliLocalIngestAdapters)(project, { ...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}), ...(managedDaemon ? { managedDaemon } : {}), diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index f5bb73bc..13466e50 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -8,7 +8,6 @@ import { noopLogger, SessionWorktreeService } from '../core/index.js'; import type { KtxSemanticLayerComputePort } from '../daemon/index.js'; import { createRuntimeToolDescriptorFromAiTool, - createLocalKtxEmbeddingProviderFromConfig, createLocalKtxLlmRuntimeFromConfig, KtxIngestEmbeddingPortAdapter, RuntimeAgentRunner, @@ -16,6 +15,7 @@ import { type KtxLlmRuntimePort, type KtxRuntimeToolSet, } from '../llm/index.js'; +import type { KtxEmbeddingProvider } from '@ktx/llm'; import type { KtxLocalProject } from '../project/index.js'; import { ktxLocalStateDbPath } from '../project/index.js'; import { PromptService } from '../prompts/index.js'; @@ -114,6 +114,7 @@ export interface CreateLocalBundleIngestRuntimeOptions { queryExecutor?: KtxSqlQueryExecutorPort; jobIdFactory?: () => string; logger?: KtxLogger; + embeddingProvider?: KtxEmbeddingProvider | null; } export interface LocalBundleIngestRuntime { @@ -669,7 +670,7 @@ export function createLocalBundleIngestRuntime( mkdirSync(join(options.project.projectDir, '.ktx/cache/local-ingest'), { recursive: true }); const store = new SqliteBundleIngestStore({ dbPath }); const contextStore = new SqliteContextEvidenceStore({ dbPath }); - const embeddingProvider = createLocalKtxEmbeddingProviderFromConfig(options.project.config.ingest.embeddings); + const embeddingProvider = options.embeddingProvider ?? null; const embedding = embeddingProvider ? new KtxIngestEmbeddingPortAdapter(embeddingProvider) : new NoopEmbeddingPort(); const connections = new LocalConnectionCatalog(options.project, options.queryExecutor); const rootFileStore = options.project.fileStore; diff --git a/packages/context/src/ingest/local-ingest.ts b/packages/context/src/ingest/local-ingest.ts index 0ac300c4..794ccfc4 100644 --- a/packages/context/src/ingest/local-ingest.ts +++ b/packages/context/src/ingest/local-ingest.ts @@ -34,6 +34,7 @@ export interface RunLocalIngestOptions { semanticLayerCompute?: KtxSemanticLayerComputePort; queryExecutor?: KtxSqlQueryExecutorPort; logger?: KtxLogger; + embeddingProvider?: import('@ktx/llm').KtxEmbeddingProvider | null; } export interface LocalIngestMcpOptions @@ -172,6 +173,7 @@ async function runScheduledPullJob(options: { semanticLayerCompute?: KtxSemanticLayerComputePort; queryExecutor?: KtxSqlQueryExecutorPort; logger?: KtxLogger; + embeddingProvider?: import('@ktx/llm').KtxEmbeddingProvider | null; }): Promise { const runtime = createLocalBundleIngestRuntime(options); const jobId = options.jobId ?? runtime.nextJobId(); @@ -225,6 +227,7 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise { } }); - it('resolves gateway LLM providers and OpenAI embeddings from local scan config', () => { + it('resolves gateway LLM providers and passes injected embedding provider through to scan enrichment', () => { const createKtxLlmProvider = vi.fn(() => ({ getModel: vi.fn().mockReturnValue({ modelId: 'provider/language-model', provider: 'gateway' }), })); - const createKtxEmbeddingProvider = vi.fn(() => ({ + const embeddingProvider = { dimensions: 1536, maxBatchSize: 8, embed: vi.fn(), [['embed', 'Many'].join('')]: vi.fn(), - })); + }; const providers = createLocalScanEnrichmentProvidersFromConfig( { @@ -844,8 +844,8 @@ describe('local scan enrichment', () => { }, { createKtxLlmProvider: createKtxLlmProvider as any, - createKtxEmbeddingProvider: createKtxEmbeddingProvider as any, env: { OPENAI_API_KEY: 'openai-key' }, // pragma: allowlist secret + embeddingProvider: embeddingProvider as any, }, ); @@ -854,8 +854,5 @@ describe('local scan enrichment', () => { expect(createKtxLlmProvider).toHaveBeenCalledWith( expect.objectContaining({ backend: 'gateway', modelSlots: { default: 'provider/language-model' } }), ); - expect(createKtxEmbeddingProvider).toHaveBeenCalledWith( - expect.objectContaining({ backend: 'openai', model: 'provider/embedding-model' }), - ); }); }); diff --git a/packages/context/src/scan/local-scan.ts b/packages/context/src/scan/local-scan.ts index e878f874..8bd2cf53 100644 --- a/packages/context/src/scan/local-scan.ts +++ b/packages/context/src/scan/local-scan.ts @@ -1,4 +1,4 @@ -import type { createKtxEmbeddingProvider, createKtxLlmProvider } from '@ktx/llm'; +import type { createKtxEmbeddingProvider, createKtxLlmProvider, KtxEmbeddingProvider } from '@ktx/llm'; import { createDefaultLocalIngestAdapters, getLocalStageOnlyIngestStatus, @@ -6,11 +6,7 @@ import { runLocalStageOnlyIngest, type SourceAdapter, } from '../ingest/index.js'; -import { - createLocalKtxEmbeddingProviderFromConfig, - createLocalKtxLlmRuntimeFromConfig, - KtxScanEmbeddingPortAdapter, -} from '../llm/index.js'; +import { createLocalKtxLlmRuntimeFromConfig, KtxScanEmbeddingPortAdapter } from '../llm/index.js'; import type { KtxProjectLlmConfig, KtxScanEnrichmentConfig, KtxScanRelationshipConfig } from '../project/config.js'; import type { KtxLocalProject } from '../project/index.js'; import { ktxLocalStateDbPath } from '../project/local-state-db.js'; @@ -55,6 +51,7 @@ export interface RunLocalScanOptions { enrichmentProviders?: KtxLocalScanEnrichmentProviders | null; enrichmentStateStore?: SqliteLocalScanEnrichmentStateStore | null; progress?: KtxProgressPort; + embeddingProvider?: KtxEmbeddingProvider | null; } export interface LocalScanRunResult { @@ -152,6 +149,7 @@ interface LocalScanEnrichmentProviderDeps { createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider; env?: NodeJS.ProcessEnv; projectDir?: string; + embeddingProvider?: KtxEmbeddingProvider | null; } export function createLocalScanEnrichmentProvidersFromConfig( @@ -171,7 +169,7 @@ export function createLocalScanEnrichmentProvidersFromConfig( ...deps, projectDir: deps.projectDir, }); - const embeddingProvider = createLocalKtxEmbeddingProviderFromConfig(config.embeddings, deps); + const embeddingProvider = deps.embeddingProvider ?? null; if (!llmRuntime || !embeddingProvider) { return null; } @@ -371,6 +369,7 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise