From 4d4441ccd505d32b2cefe3fe52ccb8559758ab8f Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 14:34:15 +0200 Subject: [PATCH] fix(context): avoid saving scan error descriptions (#37) --- .../src/scan/description-generation.test.ts | 68 +++++++++++++++++++ .../src/scan/description-generation.ts | 20 +++--- .../scan/local-enrichment-artifacts.test.ts | 41 +++++++++++ .../src/scan/local-enrichment-artifacts.ts | 12 +++- 4 files changed, 129 insertions(+), 12 deletions(-) diff --git a/packages/context/src/scan/description-generation.test.ts b/packages/context/src/scan/description-generation.test.ts index de69fb27..70117919 100644 --- a/packages/context/src/scan/description-generation.test.ts +++ b/packages/context/src/scan/description-generation.test.ts @@ -51,6 +51,29 @@ function createLlmProvider(text = 'generated description') { } as any; } +function createFailingLlmProvider(message = 'timeout exceeded when trying to connect') { + vi.mocked(generateText).mockRejectedValue(new Error(message) as never); + return { + getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }), + getModelByName: vi.fn(), + cacheMarker: vi.fn(), + repairToolCallHandler: vi.fn(), + thinkingProviderOptions: vi.fn(), + telemetryConfig: vi.fn(), + promptCachingConfig: vi.fn(() => ({ + enabled: false, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + })), + activeBackend: vi.fn(() => 'anthropic'), + } as any; +} + function createConnector(): KtxScanConnector { return { id: 'test-connector', @@ -274,6 +297,51 @@ describe('KtxDescriptionGenerator', () => { expect('introspect' in sampler).toBe(false); }); + it('does not turn LLM failures into generated descriptions', async () => { + const cache = createCache(); + const connector = createConnector(); + const generator = new KtxDescriptionGenerator({ + llmProvider: createFailingLlmProvider(), + cache, + settings: { + columnMaxWords: 12, + tableMaxWords: 18, + dataSourceMaxWords: 24, + }, + }); + + const columnResult = await generator.generateColumnDescriptions({ + connectionId: 'conn-1', + connector, + context: { runId: 'run-1' }, + dataSourceType: 'POSTGRESQL', + supportsNestedAnalysis: false, + table: { + catalog: null, + db: 'public', + name: 'orders', + columns: [{ name: 'status' }], + }, + }); + + await expect( + generator.generateTableDescription({ + connectionId: 'conn-1', + connector, + context: { runId: 'run-1' }, + dataSourceType: 'POSTGRESQL', + table: { catalog: null, db: 'public', name: 'orders' }, + }), + ).resolves.toBeNull(); + + expect(columnResult).toEqual({ + columnDescriptions: [['status', null]], + processedColumns: [], + skippedColumns: [], + }); + expect(cache.set).not.toHaveBeenCalled(); + }); + it('generates and caches table and data-source descriptions', async () => { const cache = createCache(); const connector = createConnector(); diff --git a/packages/context/src/scan/description-generation.ts b/packages/context/src/scan/description-generation.ts index dc30af04..c719ca65 100644 --- a/packages/context/src/scan/description-generation.ts +++ b/packages/context/src/scan/description-generation.ts @@ -348,7 +348,7 @@ export class KtxDescriptionGenerator { }; } - async generateTableDescription(input: KtxGenerateTableDescriptionInput): Promise { + async generateTableDescription(input: KtxGenerateTableDescriptionInput): Promise { const tableRef = toTableRef(input.table); const cacheKey = this.cache?.buildTableKey(tableRef); if (cacheKey) { @@ -386,7 +386,7 @@ export class KtxDescriptionGenerator { this.settings.tableMaxWords, 'ktx-table-description', ); - if (cacheKey) { + if (cacheKey && description) { await this.cache?.set(cacheKey, description); } return description; @@ -396,7 +396,7 @@ export class KtxDescriptionGenerator { } } - async generateDataSourceDescription(input: KtxGenerateDataSourceDescriptionInput): Promise { + async generateDataSourceDescription(input: KtxGenerateDataSourceDescriptionInput): Promise { if (input.tables.length === 0) { return 'No tables found in database'; } @@ -451,7 +451,7 @@ export class KtxDescriptionGenerator { this.settings.dataSourceMaxWords, 'ktx-data-source-description', ); - if (cacheKey) { + if (cacheKey && description) { await this.cache?.set(cacheKey, description); } return description; @@ -543,7 +543,7 @@ export class KtxDescriptionGenerator { 'ktx-column-description', ); - if (cacheKey) { + if (cacheKey && description) { await this.cache?.set(cacheKey, description); } @@ -551,20 +551,20 @@ export class KtxDescriptionGenerator { columnName: column.name, description, skipped: false, - processed: true, + processed: description !== null, }; } catch (error) { this.logger?.error(`Error analyzing column '${column.name}': ${errorMessage(error)}`); return { columnName: column.name, - description: `Error generating description: ${errorMessage(error)}`, + description: null, skipped: false, processed: false, }; } } - private async generateAiDescription(prompt: string, maxWords: number, _operationName: string): Promise { + private async generateAiDescription(prompt: string, maxWords: number, _operationName: string): Promise { try { const text = await generateKtxText({ llmProvider: this.llmProvider, @@ -573,10 +573,10 @@ export class KtxDescriptionGenerator { temperature: this.settings.temperature, }); const description = text.trim(); - return description || 'Failed to generate description'; + return description || null; } catch (error) { this.logger?.error(`Error generating AI description: ${errorMessage(error)}`); - return `Error generating description: ${errorMessage(error)}`; + return null; } } } diff --git a/packages/context/src/scan/local-enrichment-artifacts.test.ts b/packages/context/src/scan/local-enrichment-artifacts.test.ts index 0123f086..8e0c25fd 100644 --- a/packages/context/src/scan/local-enrichment-artifacts.test.ts +++ b/packages/context/src/scan/local-enrichment-artifacts.test.ts @@ -553,6 +553,47 @@ describe('writeLocalScanEnrichmentArtifacts', () => { }); }); + it('does not persist generated error descriptions in manifest shards', async () => { + await writeLocalScanManifestShards({ + project, + connectionId: 'warehouse', + syncId: 'sync-error-description', + driver: 'postgres', + snapshot, + descriptionUpdates: [ + { + table: { catalog: null, db: 'public', name: 'orders' }, + tableDescription: 'Error generating description: timeout exceeded when trying to connect', + columnDescriptions: { + id: 'Error generating description: timeout exceeded when trying to connect', + customer_id: 'AI customer reference', + }, + }, + ], + dryRun: false, + }); + + const shard = YAML.parse( + await readFile(join(tempDir, 'project/semantic-layer/warehouse/_schema/public.yaml'), 'utf8'), + ) as { + tables: { + orders: { + descriptions?: Record; + columns: Array<{ name: string; descriptions?: Record }>; + }; + }; + }; + + expect(shard.tables.orders.descriptions).toEqual({ db: 'DB orders table' }); + expect(shard.tables.orders.columns.find((column) => column.name === 'id')?.descriptions).toEqual({ + db: 'DB order id', + }); + expect(shard.tables.orders.columns.find((column) => column.name === 'customer_id')?.descriptions).toEqual({ + db: 'DB customer id', + ai: 'AI customer reference', + }); + }); + it('writes accepted composite relationships to relationship artifacts and manifest shards', async () => { const compositeSnapshot: KtxSchemaSnapshot = { connectionId: 'warehouse', diff --git a/packages/context/src/scan/local-enrichment-artifacts.ts b/packages/context/src/scan/local-enrichment-artifacts.ts index 101d062e..78f5e36d 100644 --- a/packages/context/src/scan/local-enrichment-artifacts.ts +++ b/packages/context/src/scan/local-enrichment-artifacts.ts @@ -62,6 +62,14 @@ interface ExistingManifestState { type LocalDescriptionUpdates = KtxLocalScanEnrichmentResult['descriptionUpdates']; +function isGeneratedErrorDescription(description: string | null | undefined): boolean { + const normalized = description?.trim().toLowerCase(); + return ( + normalized === 'failed to generate description' || + normalized?.startsWith('error generating description:') === true + ); +} + function artifactDir(connectionId: string, syncId: string): string { return `raw-sources/${connectionId}/${LIVE_DATABASE_ADAPTER}/${syncId}/enrichment`; } @@ -79,7 +87,7 @@ function tableDescription( if (table.comment) { descriptions.db = table.comment; } - if (update?.tableDescription) { + if (update?.tableDescription && !isGeneratedErrorDescription(update.tableDescription)) { descriptions.ai = update.tableDescription; } return Object.keys(descriptions).length > 0 ? descriptions : undefined; @@ -96,7 +104,7 @@ function columnDescription( if (column.comment) { descriptions.db = column.comment; } - if (aiDescription) { + if (aiDescription && !isGeneratedErrorDescription(aiDescription)) { descriptions.ai = aiDescription; } return Object.keys(descriptions).length > 0 ? descriptions : undefined;