fix(context): avoid saving scan error descriptions (#37)

This commit is contained in:
Andrey Avtomonov 2026-05-12 14:34:15 +02:00 committed by GitHub
parent f422facf10
commit 4d4441ccd5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 129 additions and 12 deletions

View file

@ -51,6 +51,29 @@ function createLlmProvider(text = 'generated description') {
} as any;
}
function createFailingLlmProvider(message = 'timeout exceeded when trying to connect') {
vi.mocked(generateText).mockRejectedValue(new Error(message) as never);
return {
getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }),
getModelByName: vi.fn(),
cacheMarker: vi.fn(),
repairToolCallHandler: vi.fn(),
thinkingProviderOptions: vi.fn(),
telemetryConfig: vi.fn(),
promptCachingConfig: vi.fn(() => ({
enabled: false,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
})),
activeBackend: vi.fn(() => 'anthropic'),
} as any;
}
function createConnector(): KtxScanConnector {
return {
id: 'test-connector',
@ -274,6 +297,51 @@ describe('KtxDescriptionGenerator', () => {
expect('introspect' in sampler).toBe(false);
});
it('does not turn LLM failures into generated descriptions', async () => {
const cache = createCache();
const connector = createConnector();
const generator = new KtxDescriptionGenerator({
llmProvider: createFailingLlmProvider(),
cache,
settings: {
columnMaxWords: 12,
tableMaxWords: 18,
dataSourceMaxWords: 24,
},
});
const columnResult = await generator.generateColumnDescriptions({
connectionId: 'conn-1',
connector,
context: { runId: 'run-1' },
dataSourceType: 'POSTGRESQL',
supportsNestedAnalysis: false,
table: {
catalog: null,
db: 'public',
name: 'orders',
columns: [{ name: 'status' }],
},
});
await expect(
generator.generateTableDescription({
connectionId: 'conn-1',
connector,
context: { runId: 'run-1' },
dataSourceType: 'POSTGRESQL',
table: { catalog: null, db: 'public', name: 'orders' },
}),
).resolves.toBeNull();
expect(columnResult).toEqual({
columnDescriptions: [['status', null]],
processedColumns: [],
skippedColumns: [],
});
expect(cache.set).not.toHaveBeenCalled();
});
it('generates and caches table and data-source descriptions', async () => {
const cache = createCache();
const connector = createConnector();

View file

@ -348,7 +348,7 @@ export class KtxDescriptionGenerator {
};
}
async generateTableDescription(input: KtxGenerateTableDescriptionInput): Promise<string> {
async generateTableDescription(input: KtxGenerateTableDescriptionInput): Promise<string | null> {
const tableRef = toTableRef(input.table);
const cacheKey = this.cache?.buildTableKey(tableRef);
if (cacheKey) {
@ -386,7 +386,7 @@ export class KtxDescriptionGenerator {
this.settings.tableMaxWords,
'ktx-table-description',
);
if (cacheKey) {
if (cacheKey && description) {
await this.cache?.set(cacheKey, description);
}
return description;
@ -396,7 +396,7 @@ export class KtxDescriptionGenerator {
}
}
async generateDataSourceDescription(input: KtxGenerateDataSourceDescriptionInput): Promise<string> {
async generateDataSourceDescription(input: KtxGenerateDataSourceDescriptionInput): Promise<string | null> {
if (input.tables.length === 0) {
return 'No tables found in database';
}
@ -451,7 +451,7 @@ export class KtxDescriptionGenerator {
this.settings.dataSourceMaxWords,
'ktx-data-source-description',
);
if (cacheKey) {
if (cacheKey && description) {
await this.cache?.set(cacheKey, description);
}
return description;
@ -543,7 +543,7 @@ export class KtxDescriptionGenerator {
'ktx-column-description',
);
if (cacheKey) {
if (cacheKey && description) {
await this.cache?.set(cacheKey, description);
}
@ -551,20 +551,20 @@ export class KtxDescriptionGenerator {
columnName: column.name,
description,
skipped: false,
processed: true,
processed: description !== null,
};
} catch (error) {
this.logger?.error(`Error analyzing column '${column.name}': ${errorMessage(error)}`);
return {
columnName: column.name,
description: `Error generating description: ${errorMessage(error)}`,
description: null,
skipped: false,
processed: false,
};
}
}
private async generateAiDescription(prompt: string, maxWords: number, _operationName: string): Promise<string> {
private async generateAiDescription(prompt: string, maxWords: number, _operationName: string): Promise<string | null> {
try {
const text = await generateKtxText({
llmProvider: this.llmProvider,
@ -573,10 +573,10 @@ export class KtxDescriptionGenerator {
temperature: this.settings.temperature,
});
const description = text.trim();
return description || 'Failed to generate description';
return description || null;
} catch (error) {
this.logger?.error(`Error generating AI description: ${errorMessage(error)}`);
return `Error generating description: ${errorMessage(error)}`;
return null;
}
}
}

View file

@ -553,6 +553,47 @@ describe('writeLocalScanEnrichmentArtifacts', () => {
});
});
it('does not persist generated error descriptions in manifest shards', async () => {
await writeLocalScanManifestShards({
project,
connectionId: 'warehouse',
syncId: 'sync-error-description',
driver: 'postgres',
snapshot,
descriptionUpdates: [
{
table: { catalog: null, db: 'public', name: 'orders' },
tableDescription: 'Error generating description: timeout exceeded when trying to connect',
columnDescriptions: {
id: 'Error generating description: timeout exceeded when trying to connect',
customer_id: 'AI customer reference',
},
},
],
dryRun: false,
});
const shard = YAML.parse(
await readFile(join(tempDir, 'project/semantic-layer/warehouse/_schema/public.yaml'), 'utf8'),
) as {
tables: {
orders: {
descriptions?: Record<string, string>;
columns: Array<{ name: string; descriptions?: Record<string, string> }>;
};
};
};
expect(shard.tables.orders.descriptions).toEqual({ db: 'DB orders table' });
expect(shard.tables.orders.columns.find((column) => column.name === 'id')?.descriptions).toEqual({
db: 'DB order id',
});
expect(shard.tables.orders.columns.find((column) => column.name === 'customer_id')?.descriptions).toEqual({
db: 'DB customer id',
ai: 'AI customer reference',
});
});
it('writes accepted composite relationships to relationship artifacts and manifest shards', async () => {
const compositeSnapshot: KtxSchemaSnapshot = {
connectionId: 'warehouse',

View file

@ -62,6 +62,14 @@ interface ExistingManifestState {
type LocalDescriptionUpdates = KtxLocalScanEnrichmentResult['descriptionUpdates'];
function isGeneratedErrorDescription(description: string | null | undefined): boolean {
const normalized = description?.trim().toLowerCase();
return (
normalized === 'failed to generate description' ||
normalized?.startsWith('error generating description:') === true
);
}
function artifactDir(connectionId: string, syncId: string): string {
return `raw-sources/${connectionId}/${LIVE_DATABASE_ADAPTER}/${syncId}/enrichment`;
}
@ -79,7 +87,7 @@ function tableDescription(
if (table.comment) {
descriptions.db = table.comment;
}
if (update?.tableDescription) {
if (update?.tableDescription && !isGeneratedErrorDescription(update.tableDescription)) {
descriptions.ai = update.tableDescription;
}
return Object.keys(descriptions).length > 0 ? descriptions : undefined;
@ -96,7 +104,7 @@ function columnDescription(
if (column.comment) {
descriptions.db = column.comment;
}
if (aiDescription) {
if (aiDescription && !isGeneratedErrorDescription(aiDescription)) {
descriptions.ai = aiDescription;
}
return Object.keys(descriptions).length > 0 ? descriptions : undefined;