mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
fix(context): avoid saving scan error descriptions (#37)
This commit is contained in:
parent
f422facf10
commit
4d4441ccd5
4 changed files with 129 additions and 12 deletions
|
|
@ -51,6 +51,29 @@ function createLlmProvider(text = 'generated description') {
|
|||
} as any;
|
||||
}
|
||||
|
||||
function createFailingLlmProvider(message = 'timeout exceeded when trying to connect') {
|
||||
vi.mocked(generateText).mockRejectedValue(new Error(message) as never);
|
||||
return {
|
||||
getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }),
|
||||
getModelByName: vi.fn(),
|
||||
cacheMarker: vi.fn(),
|
||||
repairToolCallHandler: vi.fn(),
|
||||
thinkingProviderOptions: vi.fn(),
|
||||
telemetryConfig: vi.fn(),
|
||||
promptCachingConfig: vi.fn(() => ({
|
||||
enabled: false,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
})),
|
||||
activeBackend: vi.fn(() => 'anthropic'),
|
||||
} as any;
|
||||
}
|
||||
|
||||
function createConnector(): KtxScanConnector {
|
||||
return {
|
||||
id: 'test-connector',
|
||||
|
|
@ -274,6 +297,51 @@ describe('KtxDescriptionGenerator', () => {
|
|||
expect('introspect' in sampler).toBe(false);
|
||||
});
|
||||
|
||||
it('does not turn LLM failures into generated descriptions', async () => {
|
||||
const cache = createCache();
|
||||
const connector = createConnector();
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createFailingLlmProvider(),
|
||||
cache,
|
||||
settings: {
|
||||
columnMaxWords: 12,
|
||||
tableMaxWords: 18,
|
||||
dataSourceMaxWords: 24,
|
||||
},
|
||||
});
|
||||
|
||||
const columnResult = await generator.generateColumnDescriptions({
|
||||
connectionId: 'conn-1',
|
||||
connector,
|
||||
context: { runId: 'run-1' },
|
||||
dataSourceType: 'POSTGRESQL',
|
||||
supportsNestedAnalysis: false,
|
||||
table: {
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
name: 'orders',
|
||||
columns: [{ name: 'status' }],
|
||||
},
|
||||
});
|
||||
|
||||
await expect(
|
||||
generator.generateTableDescription({
|
||||
connectionId: 'conn-1',
|
||||
connector,
|
||||
context: { runId: 'run-1' },
|
||||
dataSourceType: 'POSTGRESQL',
|
||||
table: { catalog: null, db: 'public', name: 'orders' },
|
||||
}),
|
||||
).resolves.toBeNull();
|
||||
|
||||
expect(columnResult).toEqual({
|
||||
columnDescriptions: [['status', null]],
|
||||
processedColumns: [],
|
||||
skippedColumns: [],
|
||||
});
|
||||
expect(cache.set).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('generates and caches table and data-source descriptions', async () => {
|
||||
const cache = createCache();
|
||||
const connector = createConnector();
|
||||
|
|
|
|||
|
|
@ -348,7 +348,7 @@ export class KtxDescriptionGenerator {
|
|||
};
|
||||
}
|
||||
|
||||
async generateTableDescription(input: KtxGenerateTableDescriptionInput): Promise<string> {
|
||||
async generateTableDescription(input: KtxGenerateTableDescriptionInput): Promise<string | null> {
|
||||
const tableRef = toTableRef(input.table);
|
||||
const cacheKey = this.cache?.buildTableKey(tableRef);
|
||||
if (cacheKey) {
|
||||
|
|
@ -386,7 +386,7 @@ export class KtxDescriptionGenerator {
|
|||
this.settings.tableMaxWords,
|
||||
'ktx-table-description',
|
||||
);
|
||||
if (cacheKey) {
|
||||
if (cacheKey && description) {
|
||||
await this.cache?.set(cacheKey, description);
|
||||
}
|
||||
return description;
|
||||
|
|
@ -396,7 +396,7 @@ export class KtxDescriptionGenerator {
|
|||
}
|
||||
}
|
||||
|
||||
async generateDataSourceDescription(input: KtxGenerateDataSourceDescriptionInput): Promise<string> {
|
||||
async generateDataSourceDescription(input: KtxGenerateDataSourceDescriptionInput): Promise<string | null> {
|
||||
if (input.tables.length === 0) {
|
||||
return 'No tables found in database';
|
||||
}
|
||||
|
|
@ -451,7 +451,7 @@ export class KtxDescriptionGenerator {
|
|||
this.settings.dataSourceMaxWords,
|
||||
'ktx-data-source-description',
|
||||
);
|
||||
if (cacheKey) {
|
||||
if (cacheKey && description) {
|
||||
await this.cache?.set(cacheKey, description);
|
||||
}
|
||||
return description;
|
||||
|
|
@ -543,7 +543,7 @@ export class KtxDescriptionGenerator {
|
|||
'ktx-column-description',
|
||||
);
|
||||
|
||||
if (cacheKey) {
|
||||
if (cacheKey && description) {
|
||||
await this.cache?.set(cacheKey, description);
|
||||
}
|
||||
|
||||
|
|
@ -551,20 +551,20 @@ export class KtxDescriptionGenerator {
|
|||
columnName: column.name,
|
||||
description,
|
||||
skipped: false,
|
||||
processed: true,
|
||||
processed: description !== null,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger?.error(`Error analyzing column '${column.name}': ${errorMessage(error)}`);
|
||||
return {
|
||||
columnName: column.name,
|
||||
description: `Error generating description: ${errorMessage(error)}`,
|
||||
description: null,
|
||||
skipped: false,
|
||||
processed: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async generateAiDescription(prompt: string, maxWords: number, _operationName: string): Promise<string> {
|
||||
private async generateAiDescription(prompt: string, maxWords: number, _operationName: string): Promise<string | null> {
|
||||
try {
|
||||
const text = await generateKtxText({
|
||||
llmProvider: this.llmProvider,
|
||||
|
|
@ -573,10 +573,10 @@ export class KtxDescriptionGenerator {
|
|||
temperature: this.settings.temperature,
|
||||
});
|
||||
const description = text.trim();
|
||||
return description || 'Failed to generate description';
|
||||
return description || null;
|
||||
} catch (error) {
|
||||
this.logger?.error(`Error generating AI description: ${errorMessage(error)}`);
|
||||
return `Error generating description: ${errorMessage(error)}`;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -553,6 +553,47 @@ describe('writeLocalScanEnrichmentArtifacts', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('does not persist generated error descriptions in manifest shards', async () => {
|
||||
await writeLocalScanManifestShards({
|
||||
project,
|
||||
connectionId: 'warehouse',
|
||||
syncId: 'sync-error-description',
|
||||
driver: 'postgres',
|
||||
snapshot,
|
||||
descriptionUpdates: [
|
||||
{
|
||||
table: { catalog: null, db: 'public', name: 'orders' },
|
||||
tableDescription: 'Error generating description: timeout exceeded when trying to connect',
|
||||
columnDescriptions: {
|
||||
id: 'Error generating description: timeout exceeded when trying to connect',
|
||||
customer_id: 'AI customer reference',
|
||||
},
|
||||
},
|
||||
],
|
||||
dryRun: false,
|
||||
});
|
||||
|
||||
const shard = YAML.parse(
|
||||
await readFile(join(tempDir, 'project/semantic-layer/warehouse/_schema/public.yaml'), 'utf8'),
|
||||
) as {
|
||||
tables: {
|
||||
orders: {
|
||||
descriptions?: Record<string, string>;
|
||||
columns: Array<{ name: string; descriptions?: Record<string, string> }>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
expect(shard.tables.orders.descriptions).toEqual({ db: 'DB orders table' });
|
||||
expect(shard.tables.orders.columns.find((column) => column.name === 'id')?.descriptions).toEqual({
|
||||
db: 'DB order id',
|
||||
});
|
||||
expect(shard.tables.orders.columns.find((column) => column.name === 'customer_id')?.descriptions).toEqual({
|
||||
db: 'DB customer id',
|
||||
ai: 'AI customer reference',
|
||||
});
|
||||
});
|
||||
|
||||
it('writes accepted composite relationships to relationship artifacts and manifest shards', async () => {
|
||||
const compositeSnapshot: KtxSchemaSnapshot = {
|
||||
connectionId: 'warehouse',
|
||||
|
|
|
|||
|
|
@ -62,6 +62,14 @@ interface ExistingManifestState {
|
|||
|
||||
type LocalDescriptionUpdates = KtxLocalScanEnrichmentResult['descriptionUpdates'];
|
||||
|
||||
function isGeneratedErrorDescription(description: string | null | undefined): boolean {
|
||||
const normalized = description?.trim().toLowerCase();
|
||||
return (
|
||||
normalized === 'failed to generate description' ||
|
||||
normalized?.startsWith('error generating description:') === true
|
||||
);
|
||||
}
|
||||
|
||||
function artifactDir(connectionId: string, syncId: string): string {
|
||||
return `raw-sources/${connectionId}/${LIVE_DATABASE_ADAPTER}/${syncId}/enrichment`;
|
||||
}
|
||||
|
|
@ -79,7 +87,7 @@ function tableDescription(
|
|||
if (table.comment) {
|
||||
descriptions.db = table.comment;
|
||||
}
|
||||
if (update?.tableDescription) {
|
||||
if (update?.tableDescription && !isGeneratedErrorDescription(update.tableDescription)) {
|
||||
descriptions.ai = update.tableDescription;
|
||||
}
|
||||
return Object.keys(descriptions).length > 0 ? descriptions : undefined;
|
||||
|
|
@ -96,7 +104,7 @@ function columnDescription(
|
|||
if (column.comment) {
|
||||
descriptions.db = column.comment;
|
||||
}
|
||||
if (aiDescription) {
|
||||
if (aiDescription && !isGeneratedErrorDescription(aiDescription)) {
|
||||
descriptions.ai = aiDescription;
|
||||
}
|
||||
return Object.keys(descriptions).length > 0 ? descriptions : undefined;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue