refactor: pass embeddingProvider into ingest/scan instead of resolving inside @ktx/context

This commit is contained in:
Andrey Avtomonov 2026-05-21 01:56:34 +02:00
parent 95c0846ff7
commit 4d6808e09f
7 changed files with 41 additions and 40 deletions

View file

@ -18,8 +18,8 @@ import {
sanitizeMemoryFlowError,
} from '@ktx/context/ingest';
import type { KtxSqlQueryExecutorPort } from '@ktx/context/connections';
import { type KtxLocalProject } from '@ktx/context/project';
import { loadKtxCliProject } from './cli-project.js';
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
import { readIngestReportSnapshotFile } from './ingest-report-file.js';
import { createCliOperationalLogger } from './io/logger.js';
@ -682,16 +682,17 @@ export async function runKtxIngest(
deps: KtxIngestDeps = {},
): Promise<number> {
try {
const cliVersion = args.command === 'run' ? args.cliVersion : undefined;
const runtimeInstallPolicy = args.command === 'run' ? args.runtimeInstallPolicy : undefined;
const project = await loadKtxCliProject({
projectDir: args.projectDir,
cliVersion: cliVersion ?? '0.0.0-private',
installPolicy: runtimeInstallPolicy ?? 'never',
io,
});
const project = await loadKtxProject({ projectDir: args.projectDir });
const env = deps.env ?? process.env;
if (args.command === 'run') {
const resolution = await resolveProjectEmbeddingProvider(project, {
mode: 'ensure',
installPolicy: args.runtimeInstallPolicy ?? 'never',
cliVersion: args.cliVersion ?? '0.0.0-private',
io,
});
const embeddingProvider =
resolution.kind === 'disabled' || resolution.kind === 'managed-unavailable' ? null : resolution.provider;
const ingestProject =
args.allowImplicitAdapter && !project.config.ingest.adapters.includes(args.adapter)
? {
@ -771,6 +772,7 @@ export async function runKtxIngest(
queryExecutor,
trigger: 'manual_resync',
jobIdFactory: deps.jobIdFactory,
embeddingProvider,
...(memoryFlow ? { memoryFlow } : {}),
...(progress ? { progress } : {}),
});
@ -843,6 +845,7 @@ export async function runKtxIngest(
...localIngestOptions,
queryExecutor,
pullConfigOptions: adapterOptions,
embeddingProvider,
...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}),
...(memoryFlow ? { memoryFlow } : {}),
});

View file

@ -1,5 +1,4 @@
import { type KtxLocalProject, type KtxProjectConnectionConfig } from '@ktx/context/project';
import { loadKtxCliProject } from './cli-project.js';
import { loadKtxProject, type KtxLocalProject, type KtxProjectConnectionConfig } from '@ktx/context/project';
import type { KtxProgressPort } from '@ktx/context/scan';
import type { KtxCliIo } from './index.js';
import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js';
@ -869,14 +868,7 @@ export async function runKtxPublicIngest(
deps: KtxPublicIngestDeps = {},
): Promise<number> {
const loadProject =
deps.loadProject ??
((options: { projectDir: string }) =>
loadKtxCliProject({
projectDir: options.projectDir,
cliVersion: args.cliVersion ?? '0.0.0-private',
installPolicy: args.runtimeInstallPolicy ?? 'never',
io,
}));
deps.loadProject ?? ((options: { projectDir: string }) => loadKtxProject({ projectDir: options.projectDir }));
const project = await loadProject({ projectDir: args.projectDir });
if (shouldUseForegroundContextBuildView(args, io)) {
const plan = buildPublicIngestPlan(project, args);

View file

@ -5,7 +5,8 @@ import {
type KtxScanWarning,
runLocalScan,
} from '@ktx/context/scan';
import { loadKtxCliProject } from './cli-project.js';
import { loadKtxProject } from '@ktx/context/project';
import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
import type { KtxCliIo } from './index.js';
import { createKtxCliLocalIngestAdapters } from './local-adapters.js';
import { createKtxCliScanConnector } from './local-scan-connectors.js';
@ -313,12 +314,15 @@ export function createCliScanProgress(
export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps: KtxScanDeps = {}): Promise<number> {
try {
const project = await loadKtxCliProject({
projectDir: args.projectDir,
cliVersion: args.cliVersion ?? '0.0.0-private',
const project = await loadKtxProject({ projectDir: args.projectDir });
const resolution = await resolveProjectEmbeddingProvider(project, {
mode: 'ensure',
installPolicy: args.runtimeInstallPolicy ?? 'never',
cliVersion: args.cliVersion ?? '0.0.0-private',
io,
});
const embeddingProvider =
resolution.kind === 'disabled' || resolution.kind === 'managed-unavailable' ? null : resolution.provider;
const managedDaemon = managedDaemonOptionsForScanRun(args, deps.runtimeIo ?? io);
const connector =
args.mode !== 'structural' || args.detectRelationships
@ -336,6 +340,7 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps
trigger: 'cli',
databaseIntrospectionUrl: args.databaseIntrospectionUrl,
connector,
embeddingProvider,
adapters: (deps.createLocalIngestAdapters ?? createKtxCliLocalIngestAdapters)(project, {
...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}),
...(managedDaemon ? { managedDaemon } : {}),

View file

@ -8,7 +8,6 @@ import { noopLogger, SessionWorktreeService } from '../core/index.js';
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
import {
createRuntimeToolDescriptorFromAiTool,
createLocalKtxEmbeddingProviderFromConfig,
createLocalKtxLlmRuntimeFromConfig,
KtxIngestEmbeddingPortAdapter,
RuntimeAgentRunner,
@ -16,6 +15,7 @@ import {
type KtxLlmRuntimePort,
type KtxRuntimeToolSet,
} from '../llm/index.js';
import type { KtxEmbeddingProvider } from '@ktx/llm';
import type { KtxLocalProject } from '../project/index.js';
import { ktxLocalStateDbPath } from '../project/index.js';
import { PromptService } from '../prompts/index.js';
@ -114,6 +114,7 @@ export interface CreateLocalBundleIngestRuntimeOptions {
queryExecutor?: KtxSqlQueryExecutorPort;
jobIdFactory?: () => string;
logger?: KtxLogger;
embeddingProvider?: KtxEmbeddingProvider | null;
}
export interface LocalBundleIngestRuntime {
@ -669,7 +670,7 @@ export function createLocalBundleIngestRuntime(
mkdirSync(join(options.project.projectDir, '.ktx/cache/local-ingest'), { recursive: true });
const store = new SqliteBundleIngestStore({ dbPath });
const contextStore = new SqliteContextEvidenceStore({ dbPath });
const embeddingProvider = createLocalKtxEmbeddingProviderFromConfig(options.project.config.ingest.embeddings);
const embeddingProvider = options.embeddingProvider ?? null;
const embedding = embeddingProvider ? new KtxIngestEmbeddingPortAdapter(embeddingProvider) : new NoopEmbeddingPort();
const connections = new LocalConnectionCatalog(options.project, options.queryExecutor);
const rootFileStore = options.project.fileStore;

View file

@ -34,6 +34,7 @@ export interface RunLocalIngestOptions {
semanticLayerCompute?: KtxSemanticLayerComputePort;
queryExecutor?: KtxSqlQueryExecutorPort;
logger?: KtxLogger;
embeddingProvider?: import('@ktx/llm').KtxEmbeddingProvider | null;
}
export interface LocalIngestMcpOptions
@ -172,6 +173,7 @@ async function runScheduledPullJob(options: {
semanticLayerCompute?: KtxSemanticLayerComputePort;
queryExecutor?: KtxSqlQueryExecutorPort;
logger?: KtxLogger;
embeddingProvider?: import('@ktx/llm').KtxEmbeddingProvider | null;
}): Promise<LocalIngestResult> {
const runtime = createLocalBundleIngestRuntime(options);
const jobId = options.jobId ?? runtime.nextJobId();
@ -225,6 +227,7 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise<Lo
semanticLayerCompute: options.semanticLayerCompute,
queryExecutor: options.queryExecutor,
logger: options.logger,
embeddingProvider: options.embeddingProvider,
});
}
@ -403,6 +406,7 @@ export async function runLocalMetabaseIngest(
semanticLayerCompute: options.semanticLayerCompute,
queryExecutor: options.queryExecutor,
logger: options.logger,
embeddingProvider: options.embeddingProvider,
});
} catch (error) {
child = await recordLocalMetabaseChildFailure({

View file

@ -813,16 +813,16 @@ describe('local scan enrichment', () => {
}
});
it('resolves gateway LLM providers and OpenAI embeddings from local scan config', () => {
it('resolves gateway LLM providers and passes injected embedding provider through to scan enrichment', () => {
const createKtxLlmProvider = vi.fn(() => ({
getModel: vi.fn().mockReturnValue({ modelId: 'provider/language-model', provider: 'gateway' }),
}));
const createKtxEmbeddingProvider = vi.fn(() => ({
const embeddingProvider = {
dimensions: 1536,
maxBatchSize: 8,
embed: vi.fn(),
[['embed', 'Many'].join('')]: vi.fn(),
}));
};
const providers = createLocalScanEnrichmentProvidersFromConfig(
{
@ -844,8 +844,8 @@ describe('local scan enrichment', () => {
},
{
createKtxLlmProvider: createKtxLlmProvider as any,
createKtxEmbeddingProvider: createKtxEmbeddingProvider as any,
env: { OPENAI_API_KEY: 'openai-key' }, // pragma: allowlist secret
embeddingProvider: embeddingProvider as any,
},
);
@ -854,8 +854,5 @@ describe('local scan enrichment', () => {
expect(createKtxLlmProvider).toHaveBeenCalledWith(
expect.objectContaining({ backend: 'gateway', modelSlots: { default: 'provider/language-model' } }),
);
expect(createKtxEmbeddingProvider).toHaveBeenCalledWith(
expect.objectContaining({ backend: 'openai', model: 'provider/embedding-model' }),
);
});
});

View file

@ -1,4 +1,4 @@
import type { createKtxEmbeddingProvider, createKtxLlmProvider } from '@ktx/llm';
import type { createKtxEmbeddingProvider, createKtxLlmProvider, KtxEmbeddingProvider } from '@ktx/llm';
import {
createDefaultLocalIngestAdapters,
getLocalStageOnlyIngestStatus,
@ -6,11 +6,7 @@ import {
runLocalStageOnlyIngest,
type SourceAdapter,
} from '../ingest/index.js';
import {
createLocalKtxEmbeddingProviderFromConfig,
createLocalKtxLlmRuntimeFromConfig,
KtxScanEmbeddingPortAdapter,
} from '../llm/index.js';
import { createLocalKtxLlmRuntimeFromConfig, KtxScanEmbeddingPortAdapter } from '../llm/index.js';
import type { KtxProjectLlmConfig, KtxScanEnrichmentConfig, KtxScanRelationshipConfig } from '../project/config.js';
import type { KtxLocalProject } from '../project/index.js';
import { ktxLocalStateDbPath } from '../project/local-state-db.js';
@ -55,6 +51,7 @@ export interface RunLocalScanOptions {
enrichmentProviders?: KtxLocalScanEnrichmentProviders | null;
enrichmentStateStore?: SqliteLocalScanEnrichmentStateStore | null;
progress?: KtxProgressPort;
embeddingProvider?: KtxEmbeddingProvider | null;
}
export interface LocalScanRunResult {
@ -152,6 +149,7 @@ interface LocalScanEnrichmentProviderDeps {
createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider;
env?: NodeJS.ProcessEnv;
projectDir?: string;
embeddingProvider?: KtxEmbeddingProvider | null;
}
export function createLocalScanEnrichmentProvidersFromConfig(
@ -171,7 +169,7 @@ export function createLocalScanEnrichmentProvidersFromConfig(
...deps,
projectDir: deps.projectDir,
});
const embeddingProvider = createLocalKtxEmbeddingProviderFromConfig(config.embeddings, deps);
const embeddingProvider = deps.embeddingProvider ?? null;
if (!llmRuntime || !embeddingProvider) {
return null;
}
@ -371,6 +369,7 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise<LocalS
? options.enrichmentProviders
: createLocalScanEnrichmentProvidersFromConfig(options.project.config.scan.enrichment, options.project.config.llm, {
projectDir: options.project.projectDir,
embeddingProvider: options.embeddingProvider ?? null,
})
: null;