feat(context): add warehouse verification tools (#46)

* feat(context): add warehouse dialect dispatch

* feat(context): read warehouse scan catalog

* feat(context): add entity details verification tool

* feat(context): add ingest SQL verification tool

* feat(context): add raw warehouse discovery tool

* feat(context): expose warehouse verification tools to ingest

* docs(context): add ingest identifier verification protocol

* test(context): guard ingest identifier verification prompts

* chore(context): verify warehouse verification tools

* docs: add warehouse verification tools plan and spec

* fix(context): expose target warehouses to Notion ingest

* fix(context): update ingest prompts for warehouse verification tools

* fix(context): scope raw schema discovery to allowed connections

* fix(context): verify warehouse column display targets

* docs: add notion warehouse verification gap closure plan

* fix(context): include raw discovery connection names

* fix(context): expose warehouse targets for LookML and MetricFlow

* fix(context): pass connection config to ingest query executors

* fix(cli): enable read-only SQL probes for local ingest

* docs: add warehouse verification final v1 closure plan

* fix(context): align warehouse sql probe prompt shape

* docs: add warehouse verification prompt shape closure plan

* test(context): catch connectionless sql execution prompt examples

* fix(context): include connection name in sl capture sql example

* docs: add warehouse verification sql example closure plan

* fix(context): report structured entity detail misses

* docs: add warehouse verification structured target miss closure plan

* fix: report untracked squash merge conflicts

* feat: require ingest verification ledger

* fix: stabilize ingest wiki references
This commit is contained in:
Andrey Avtomonov 2026-05-13 13:43:23 +02:00 committed by GitHub
parent bcb0d2f8f7
commit c22248dabf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
89 changed files with 7818 additions and 191 deletions

View file

@ -6,7 +6,7 @@ import type { Tool } from 'ai';
import YAML from 'yaml';
import type { AgentRunnerService } from '../agent/index.js';
import { AgentRunnerService as DefaultAgentRunnerService } from '../agent/index.js';
import { localConnectionInfoFromConfig } from '../connections/index.js';
import { localConnectionInfoFromConfig, type KtxSqlQueryExecutorPort } from '../connections/index.js';
import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js';
import { noopLogger, SessionWorktreeService } from '../core/index.js';
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
@ -56,6 +56,7 @@ import {
buildKnowledgeSearchText,
type KnowledgeEventPort,
type KnowledgeIndexPort,
type KnowledgeIndexPageListing,
KnowledgeWikiService,
searchLocalKnowledgePages,
SqliteKnowledgeIndex,
@ -77,6 +78,7 @@ import { ContextEvidenceIndexService, SqliteContextEvidenceStore } from './conte
import { DiffSetService } from './diff-set.service.js';
import { IngestBundleRunner } from './ingest-bundle.runner.js';
import { PageTriageService } from './page-triage/index.js';
import { createWarehouseVerificationTools } from './tools/warehouse-verification/index.js';
import type {
IngestBundleRunnerDeps,
IngestCommitMessagePort,
@ -103,7 +105,7 @@ export interface CreateLocalBundleIngestRuntimeOptions {
llmDebugRequestFile?: string;
memoryModel?: string;
semanticLayerCompute?: KtxSemanticLayerComputePort;
queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KtxQueryResult> };
queryExecutor?: KtxSqlQueryExecutorPort;
jobIdFactory?: () => string;
logger?: KtxLogger;
}
@ -169,9 +171,7 @@ class LocalAuthorResolver implements GitAuthorResolverPort {
class LocalConnectionCatalog implements SlConnectionCatalogPort {
constructor(
private readonly project: KtxLocalProject,
private readonly queryExecutor?: {
execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KtxQueryResult>;
},
private readonly queryExecutor?: KtxSqlQueryExecutorPort,
) {}
async listEnabledConnections(ids: string[]): Promise<KtxConnectionInfo[]> {
@ -192,7 +192,12 @@ class LocalConnectionCatalog implements SlConnectionCatalogPort {
if (!this.queryExecutor) {
throw new Error('Local ingest has no query executor configured');
}
return this.queryExecutor.execute({ connectionId, sql });
return this.queryExecutor.execute({
connectionId,
projectDir: this.project.projectDir,
connection: this.project.config.connections[connectionId],
sql,
});
}
}
@ -347,15 +352,19 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
async listPagesForUser(
userId: string,
): Promise<Array<{ page_key: string; summary: string; scope: string; scope_id: string | null }>> {
const pages: Array<{ page_key: string; summary: string; scope: string; scope_id: string | null }> = [];
): Promise<KnowledgeIndexPageListing[]> {
const pages: KnowledgeIndexPageListing[] = [];
for (const scope of [
{ scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' },
{ scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` },
]) {
const listed = await this.project.fileStore.listFiles(scope.dir, true);
for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) {
const pageKey = file.replace(/\.md$/, '');
const parsedPath = parseKnowledgeIndexPath(file.startsWith('global/') || file.startsWith('user/') ? file : `${scope.dir.replace('knowledge/', '')}/${file}`);
if (!parsedPath || parsedPath.scope !== scope.scope) {
continue;
}
const pageKey = parsedPath.pageKey;
const raw = await this.project.fileStore.readFile(`${scope.dir}/${file}`);
const parsed = parseWiki(raw.content);
pages.push({
@ -363,6 +372,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
summary: parsed.summary,
scope: scope.scope,
scope_id: scope.scopeId,
tags: parseWikiTags(raw.content),
});
}
}
@ -432,13 +442,6 @@ function parseKnowledgeIndexPath(file: string): { scope: 'GLOBAL' | 'USER'; page
const pageKey = segments[1].replace(/\.md$/, '');
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'GLOBAL', pageKey } : null;
}
if (segments.length >= 3 && segments[0] === 'global' && segments[1] === 'historic-sql') {
const historicPath = segments.slice(2).join('/').replace(/\.md$/, '');
if (historicPath.split('/').every((segment) => /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment))) {
return { scope: 'GLOBAL', pageKey: `historic-sql/${historicPath}` };
}
return null;
}
if (segments.length === 3 && segments[0] === 'user') {
const pageKey = segments[2].replace(/\.md$/, '');
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'USER', pageKey } : null;
@ -486,38 +489,47 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
slSearchService: deps.slSearchService,
authorResolver: deps.authorResolver,
};
const wikiSearchTool = new WikiSearchTool({
search: async (input) => {
const results = await searchLocalKnowledgePages(deps.project, {
userId: input.userId,
query: input.query,
limit: input.limit,
embeddingService: deps.embedding,
});
return {
results: results.slice(0, input.limit).map((result) => ({
key: result.key,
path: result.path,
summary: result.summary,
score: result.score,
matchReasons: result.matchReasons,
lanes: result.lanes,
})),
totalFound: results.length,
};
},
});
const slDiscoverTool = new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 });
const warehouseVerificationTools = createWarehouseVerificationTools({
connections: deps.connections,
fallbackFileStore: deps.project.fileStore,
wikiSearchTool,
slDiscoverTool,
});
this.baseTools = [
new WikiReadTool(deps.wikiService, deps.knowledgeIndex),
new WikiSearchTool({
search: async (input) => {
const results = await searchLocalKnowledgePages(deps.project, {
userId: input.userId,
query: input.query,
limit: input.limit,
embeddingService: deps.embedding,
});
return {
results: results.slice(0, input.limit).map((result) => ({
key: result.key,
path: result.path,
summary: result.summary,
score: result.score,
matchReasons: result.matchReasons,
lanes: result.lanes,
})),
totalFound: results.length,
};
},
}),
new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex),
wikiSearchTool,
new WikiListTagsTool(deps.knowledgeIndex),
new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }),
slDiscoverTool,
new SlEditSourceTool(slDeps),
new SlReadSourceTool(slDeps),
new SlWriteSourceTool(slDeps),
new SlValidateTool(slDeps),
new SlRollbackTool(deps.slSourcesRepository, deps.connections, 0),
...warehouseVerificationTools,
];
this.contextTools = [
new ContextEvidenceSearchTool(deps.contextStore, deps.embedding),