mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-22 08:38:08 +02:00
fix(ingest): recover textual-conflict gate failures; fix query-history adapter (#255)
* fix(ingest): recover textual-conflict gate failures; fix query-history adapter Two latent gaps in the isolated-diff local-ingest pipeline that can abort an otherwise-successful ingest: - Metabase: when a work-unit patch hit both a textual conflict and a post-merge dangling sl_ref, the after-textual-resolution branch returned a hard semantic_conflict and rolled back the whole job. It now runs the same repairGateFailure recovery the clean-apply branch already uses (re-validate, then commit the union of resolved + repaired paths), reaching parity. - Query history: the historic-sql adapter was registered only when ktx.yaml had context.queryHistory.enabled=true, so `--query-history` threw "Adapter not available for local ingest". Registration now resolves the dialect from driver capability, since the explicit --query-history request is itself the opt-in; the config-gated helper is unchanged for status/setup/probes. Adds the previously-missing tests for both paths. * chore: sync uv.lock to 0.8.0 (regenerated with pinned uv 0.11.11) * fix(ingest): drop ktx's own scan probes and dedup tables in query history Query history (historic-sql) mined two kinds of noise back into context: - ktx's own warehouse scan emits relationship- and column-profiling probes (the relationship_profile_values aggregation and the child_values/parent_values FK-overlap CTEs) into pg_stat_statements. shouldDropBySql now filters these ktx-owned, dialect-stable signatures so ktx introspection is not ingested as usage history. - The same physical table appears both bare (accounts, via search_path) and schema-qualified (orbit_raw.accounts), producing duplicate per-table work units. canonicalizeTableIdentifiers collapses a bare name into its unique qualified form before work-unit keying; ambiguous names are left untouched. On the orbit demo this removes ~35% of sampled query templates (ktx self-probes) and ~45 duplicate per-table work units. * docs(agents): add Design Reasoning Defaults section
This commit is contained in:
parent
9d3a0b751d
commit
f5dea9a089
9 changed files with 437 additions and 12 deletions
|
|
@ -26,6 +26,21 @@ export function isQueryHistoryEnabled(connection: unknown): boolean {
|
|||
return queryHistoryRecord(connection)?.enabled === true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the query-history dialect from the connection's driver capability
|
||||
* alone, ignoring whether query history is enabled in ktx.yaml. Use this on the
|
||||
* adapter-registration path when query history has been explicitly requested
|
||||
* for the run (e.g. via `--query-history`, which is itself the opt-in): the
|
||||
* persisted `context.queryHistory.enabled` flag must not gate registration.
|
||||
* Returns null when the connection's driver has no query-history reader.
|
||||
*/
|
||||
export function historicSqlDialectForConnectionDriver(connection: unknown): HistoricSqlDialect | null {
|
||||
const conn = recordOrNull(connection);
|
||||
const driver = String(conn?.driver ?? '').toLowerCase();
|
||||
const registration = getDriverRegistration(driver);
|
||||
return registration?.hasHistoricSqlReader ? historicSqlDialectForDriver(registration.driver) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the query-history dialect for a connection. Returns null when
|
||||
* query history is disabled, or when the connection's driver has no
|
||||
|
|
@ -35,8 +50,5 @@ export function queryHistoryDialectForConnection(connection: unknown): HistoricS
|
|||
if (!isQueryHistoryEnabled(connection)) {
|
||||
return null;
|
||||
}
|
||||
const conn = recordOrNull(connection);
|
||||
const driver = String(conn?.driver ?? '').toLowerCase();
|
||||
const registration = getDriverRegistration(driver);
|
||||
return registration?.hasHistoricSqlReader ? historicSqlDialectForDriver(registration.driver) : null;
|
||||
return historicSqlDialectForConnectionDriver(connection);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,8 +79,21 @@ function matchesAny(value: string | null, patterns: RegExp[]): boolean {
|
|||
return !!value && patterns.some((pattern) => pattern.test(value));
|
||||
}
|
||||
|
||||
// ktx's own warehouse scan emits relationship- and column-profiling probes that land in
|
||||
// pg_stat_statements (relationship-validation, relationship-composite-candidates, and each
|
||||
// dialect's relationship value aggregation). They are ktx introspection, not genuine query
|
||||
// usage, so they must not be mined back as query history. The markers are ktx-owned
|
||||
// identifiers, stable across dialects.
|
||||
function isKtxScanProbe(sql: string): boolean {
|
||||
if (/\brelationship_profile_values\b/i.test(sql)) {
|
||||
return true;
|
||||
}
|
||||
return /\bchild_values\b/i.test(sql) && /\bparent_values\b/i.test(sql);
|
||||
}
|
||||
|
||||
function shouldDropBySql(sql: string, config: HistoricSqlUnifiedPullConfig): boolean {
|
||||
if (NOISE_PREFIX_RE.test(sql) || SYSTEM_TABLE_RE.test(sql)) return true;
|
||||
if (isKtxScanProbe(sql)) return true;
|
||||
if (config.filters.dropTrivialProbes !== false && TRIVIAL_SQL_RE.test(sql)) return true;
|
||||
return false;
|
||||
}
|
||||
|
|
@ -148,6 +161,53 @@ function isEnabledTable(table: string, filter: EnabledTableFilter | null): boole
|
|||
return filter.exact.has(normalized) || filter.uniqueUnqualified.has(unqualifiedTableIdentifier(normalized));
|
||||
}
|
||||
|
||||
/**
|
||||
* pg_stat_statements records queries as written, so the same physical table can appear
|
||||
* both bare (`accounts`, resolved via search_path) and schema-qualified
|
||||
* (`orbit_raw.accounts`). Collapse a bare identifier into its schema-qualified form when
|
||||
* exactly one qualified form shares its unqualified name, so the two never become separate
|
||||
* work units. Ambiguous bare names (two qualified forms) are left untouched.
|
||||
*/
|
||||
function canonicalizeTableIdentifiers(parsedTemplates: ParsedTemplate[]): void {
|
||||
const all = new Set<string>();
|
||||
for (const parsed of parsedTemplates) {
|
||||
for (const table of parsed.includedTables) {
|
||||
all.add(table);
|
||||
}
|
||||
}
|
||||
const qualifiedByUnqualified = new Map<string, Set<string>>();
|
||||
for (const table of all) {
|
||||
if (!table.includes('.')) {
|
||||
continue;
|
||||
}
|
||||
const unqualified = unqualifiedTableIdentifier(table);
|
||||
if (unqualified.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const forms = qualifiedByUnqualified.get(unqualified) ?? new Set<string>();
|
||||
forms.add(table);
|
||||
qualifiedByUnqualified.set(unqualified, forms);
|
||||
}
|
||||
const canonical = new Map<string, string>();
|
||||
for (const table of all) {
|
||||
if (table.includes('.')) {
|
||||
continue;
|
||||
}
|
||||
const forms = qualifiedByUnqualified.get(unqualifiedTableIdentifier(table));
|
||||
if (forms && forms.size === 1) {
|
||||
canonical.set(table, [...forms][0]);
|
||||
}
|
||||
}
|
||||
if (canonical.size === 0) {
|
||||
return;
|
||||
}
|
||||
const remap = (table: string): string => canonical.get(table) ?? table;
|
||||
for (const parsed of parsedTemplates) {
|
||||
parsed.includedTables = [...new Set(parsed.includedTables.map(remap))].sort();
|
||||
parsed.tablesTouched = [...new Set(parsed.tablesTouched.map(remap))].sort();
|
||||
}
|
||||
}
|
||||
|
||||
function historicSqlWindowDays(config: HistoricSqlUnifiedPullConfig): number {
|
||||
return 'windowDays' in config ? config.windowDays : 90;
|
||||
}
|
||||
|
|
@ -323,6 +383,8 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql
|
|||
});
|
||||
}
|
||||
|
||||
canonicalizeTableIdentifiers(parsedTemplates);
|
||||
|
||||
const byTable = new Map<string, TableAccumulator>();
|
||||
for (const parsed of parsedTemplates) {
|
||||
for (const table of parsed.includedTables) {
|
||||
|
|
|
|||
|
|
@ -155,18 +155,103 @@ export async function integrateWorkUnitPatch(input: IntegrateWorkUnitPatchInput)
|
|||
},
|
||||
);
|
||||
} catch (semanticError) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
const reason = errorMessage(semanticError);
|
||||
await input.trace.event('error', 'integration', 'patch_semantic_conflict_after_textual_resolution', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
reason: errorMessage(semanticError),
|
||||
reason,
|
||||
});
|
||||
|
||||
// A textual conflict and a semantic-gate failure can co-occur: the resolver
|
||||
// reconciles the text but can leave wiki sl_refs pointing at measures the
|
||||
// merged source no longer defines. Recover via the same gate repair the
|
||||
// clean-apply branch uses, instead of hard-failing the whole job.
|
||||
if (input.repairGateFailure) {
|
||||
const gateRepair = await input.repairGateFailure({
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
reason,
|
||||
});
|
||||
|
||||
if (gateRepair.status !== 'failed') {
|
||||
// The resolver wrote its merge to the worktree (unstaged); the repair
|
||||
// edited a subset on top. Commit the union so neither is dropped.
|
||||
const resolvedAndRepairedPaths = [
|
||||
...new Set([...textualResolution.changedPaths, ...gateRepair.changedPaths]),
|
||||
].sort();
|
||||
try {
|
||||
await traceTimed(
|
||||
input.trace,
|
||||
'integration',
|
||||
'semantic_gate_after_gate_repair',
|
||||
{ unitKey: input.unitKey, touchedPaths: gateRepair.changedPaths },
|
||||
async () => {
|
||||
await input.validateAppliedTree(gateRepair.changedPaths);
|
||||
},
|
||||
);
|
||||
|
||||
const commit = await input.integrationGit.commitFiles(
|
||||
resolvedAndRepairedPaths,
|
||||
`ingest: resolve WorkUnit ${input.unitKey} conflict`,
|
||||
input.author.name,
|
||||
input.author.email,
|
||||
);
|
||||
if (commit.created) {
|
||||
await input.trace.event('debug', 'integration', 'patch_accepted_after_textual_resolution', {
|
||||
unitKey: input.unitKey,
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths: resolvedAndRepairedPaths,
|
||||
attempts: textualResolution.attempts,
|
||||
gateRepairAttempts: gateRepair.attempts,
|
||||
});
|
||||
return {
|
||||
status: 'accepted',
|
||||
commitSha: commit.commitHash,
|
||||
touchedPaths: resolvedAndRepairedPaths,
|
||||
textualResolution,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
} catch (repairValidationError) {
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
await input.trace.event('error', 'integration', 'patch_semantic_conflict_after_textual_resolution', {
|
||||
unitKey: input.unitKey,
|
||||
patchPath: input.patchPath,
|
||||
touchedPaths: gateRepair.changedPaths,
|
||||
reason: errorMessage(repairValidationError),
|
||||
});
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: errorMessage(repairValidationError),
|
||||
touchedPaths: gateRepair.changedPaths,
|
||||
textualResolution,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: gateRepair.status === 'failed' ? gateRepair.reason : reason,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
textualResolution,
|
||||
gateRepair,
|
||||
};
|
||||
}
|
||||
|
||||
if (preApplyHead) {
|
||||
await input.integrationGit.resetHardTo(preApplyHead);
|
||||
}
|
||||
return {
|
||||
status: 'semantic_conflict',
|
||||
reason: errorMessage(semanticError),
|
||||
reason,
|
||||
touchedPaths: textualResolution.changedPaths,
|
||||
textualResolution,
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue