Canonicalize unmapped fallback details

This commit is contained in:
Luca Martial 2026-05-11 20:39:03 -07:00
parent 2c70e506f1
commit f13dc27805
3 changed files with 45 additions and 5 deletions

View file

@ -66,6 +66,7 @@ Search existing wiki pages for the same `tables:` or `sl_refs:` frontmatter and
- Notion `dataSourceCount` counts Notion databases/data sources only. It does not prove that a warehouse/dbt table has or lacks a mapped semantic-layer source.
- Do not create SL sources under the Notion connection just because a page mentions a warehouse, dbt, Looker, or Metabase object. Use the mapped warehouse/source connection after discovery, or emit an unmapped fallback and write wiki-only.
- Distinguish fallback reasons precisely: if a non-Notion warehouse/dbt connection exists but `sl_discover` cannot find the named table/source, use `no_physical_table`; reserve `no_connection_mapping` for cases where there is no plausible non-Notion target connection at all.
- When calling `emit_unmapped_fallback`, pass the table or source identifier as `tableRef` (e.g. `tableRef: "orbit_analytics.customer"`) — the tool generates the canonical detail string from the reason code and `tableRef`. Use the optional `clarification` field only to add context that does not contradict the reason. Do not restate the reason in `clarification`.
## Tools

View file

@ -148,6 +148,7 @@ describe('reconciliation emit tools', () => {
{
rawPath: 'metrics/conversion.yml',
reason: 'no_physical_table',
detail: expect.stringContaining('not present as a source'),
fallback: 'flagged',
},
]);
@ -175,6 +176,7 @@ describe('reconciliation emit tools', () => {
{
rawPath: 'metrics/conversion.yml',
reason: 'no_physical_table',
detail: expect.stringContaining('not present as a source'),
fallback: 'flagged',
},
]);

View file

@ -1,6 +1,6 @@
import { tool } from 'ai';
import { z } from 'zod';
import type { StageIndex, UnmappedFallbackRecord } from '../stages/stage-index.types.js';
import type { StageIndex, UnmappedFallbackRecord, UnmappedFallbackReason } from '../stages/stage-index.types.js';
interface EmitUnmappedFallbackDeps {
stageIndex: StageIndex;
@ -22,14 +22,48 @@ function sameUnmappedFallback(left: UnmappedFallbackRecord, right: UnmappedFallb
return left.rawPath === right.rawPath && left.reason === right.reason && left.fallback === right.fallback;
}
// Generates a canonical description for each reason so the recorded `detail`
// is always consistent with the reason code. Free-form text from the LLM
// previously caused contradictions like "no_physical_table" being explained
// as "no mapped connection exists" — the tool now owns the core sentence and
// the LLM may add optional clarification context.
function canonicalDetail(reason: UnmappedFallbackReason, tableRef: string | undefined): string {
const tableClause = tableRef ? `'${tableRef}'` : 'the referenced object';
switch (reason) {
case 'no_physical_table':
return `${tableClause} is described but is not present as a source in any mapped warehouse/dbt connection.`;
case 'no_connection_mapping':
return `${tableClause} has no non-Notion warehouse/dbt connection to map against.`;
case 'missing_target_table':
return `${tableClause} is referenced but the target table could not be located.`;
case 'looker_template_unresolved':
return `${tableClause} uses LookML templating that could not be resolved.`;
case 'derived_table_not_supported':
return `${tableClause} is a derived/inline definition that is not yet supported as a semantic-layer source.`;
case 'multiple_table_references':
return `${tableClause} references multiple tables; cannot map to a single source.`;
case 'unsupported_dialect':
return `${tableClause} uses a SQL dialect that is not yet supported.`;
case 'parse_error':
return `${tableClause} could not be parsed.`;
}
}
export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) {
return tool({
description:
'Record one unmapped fallback decision for the final IngestReport. The rawPath must be available to the current ingest stage. The reason MUST be one of the structured codes; put any human-readable context in detail.',
'Record one unmapped fallback decision for the final IngestReport. The rawPath must be available to the current ingest stage. The tool generates the canonical detail from the structured reason and optional tableRef; use clarification only to add context that does not contradict the reason code.',
inputSchema: z.object({
rawPath: z.string().min(1),
reason: unmappedFallbackReasonSchema,
detail: z.string().optional(),
tableRef: z
.string()
.optional()
.describe('The fully-qualified table or source reference that triggered the fallback (e.g. "orbit_analytics.customer"). Used to generate canonical detail text.'),
clarification: z
.string()
.optional()
.describe('Optional extra context appended to the canonical detail. Must not contradict the reason code.'),
fallback: z.enum(['sql_standalone', 'wiki_only', 'flagged']),
}),
execute: async (input): Promise<string> => {
@ -37,16 +71,19 @@ export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) {
return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`;
}
const base = canonicalDetail(input.reason, input.tableRef);
const detail = input.clarification ? `${base} ${input.clarification.trim()}`.trim() : base;
const record: UnmappedFallbackRecord = {
rawPath: input.rawPath,
reason: input.reason,
...(input.detail !== undefined ? { detail: input.detail } : {}),
detail,
fallback: input.fallback,
};
if (!deps.stageIndex.unmappedFallbacks.some((candidate) => sameUnmappedFallback(candidate, record))) {
deps.stageIndex.unmappedFallbacks.push(record);
}
return `recorded unmapped fallback for ${record.rawPath} (${record.fallback})`;
return `recorded unmapped fallback for ${record.rawPath} (${record.fallback}): ${detail}`;
},
});
}