mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-25 08:48:08 +02:00
feat(context): add warehouse verification tools (#46)
* feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
This commit is contained in:
parent
bcb0d2f8f7
commit
c22248dabf
89 changed files with 7818 additions and 191 deletions
|
|
@ -6,7 +6,7 @@ import type { Tool } from 'ai';
|
|||
import YAML from 'yaml';
|
||||
import type { AgentRunnerService } from '../agent/index.js';
|
||||
import { AgentRunnerService as DefaultAgentRunnerService } from '../agent/index.js';
|
||||
import { localConnectionInfoFromConfig } from '../connections/index.js';
|
||||
import { localConnectionInfoFromConfig, type KtxSqlQueryExecutorPort } from '../connections/index.js';
|
||||
import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger, SessionWorktreeService } from '../core/index.js';
|
||||
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
|
||||
|
|
@ -56,6 +56,7 @@ import {
|
|||
buildKnowledgeSearchText,
|
||||
type KnowledgeEventPort,
|
||||
type KnowledgeIndexPort,
|
||||
type KnowledgeIndexPageListing,
|
||||
KnowledgeWikiService,
|
||||
searchLocalKnowledgePages,
|
||||
SqliteKnowledgeIndex,
|
||||
|
|
@ -77,6 +78,7 @@ import { ContextEvidenceIndexService, SqliteContextEvidenceStore } from './conte
|
|||
import { DiffSetService } from './diff-set.service.js';
|
||||
import { IngestBundleRunner } from './ingest-bundle.runner.js';
|
||||
import { PageTriageService } from './page-triage/index.js';
|
||||
import { createWarehouseVerificationTools } from './tools/warehouse-verification/index.js';
|
||||
import type {
|
||||
IngestBundleRunnerDeps,
|
||||
IngestCommitMessagePort,
|
||||
|
|
@ -103,7 +105,7 @@ export interface CreateLocalBundleIngestRuntimeOptions {
|
|||
llmDebugRequestFile?: string;
|
||||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KtxQueryResult> };
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
jobIdFactory?: () => string;
|
||||
logger?: KtxLogger;
|
||||
}
|
||||
|
|
@ -169,9 +171,7 @@ class LocalAuthorResolver implements GitAuthorResolverPort {
|
|||
class LocalConnectionCatalog implements SlConnectionCatalogPort {
|
||||
constructor(
|
||||
private readonly project: KtxLocalProject,
|
||||
private readonly queryExecutor?: {
|
||||
execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KtxQueryResult>;
|
||||
},
|
||||
private readonly queryExecutor?: KtxSqlQueryExecutorPort,
|
||||
) {}
|
||||
|
||||
async listEnabledConnections(ids: string[]): Promise<KtxConnectionInfo[]> {
|
||||
|
|
@ -192,7 +192,12 @@ class LocalConnectionCatalog implements SlConnectionCatalogPort {
|
|||
if (!this.queryExecutor) {
|
||||
throw new Error('Local ingest has no query executor configured');
|
||||
}
|
||||
return this.queryExecutor.execute({ connectionId, sql });
|
||||
return this.queryExecutor.execute({
|
||||
connectionId,
|
||||
projectDir: this.project.projectDir,
|
||||
connection: this.project.config.connections[connectionId],
|
||||
sql,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -347,15 +352,19 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
|
||||
async listPagesForUser(
|
||||
userId: string,
|
||||
): Promise<Array<{ page_key: string; summary: string; scope: string; scope_id: string | null }>> {
|
||||
const pages: Array<{ page_key: string; summary: string; scope: string; scope_id: string | null }> = [];
|
||||
): Promise<KnowledgeIndexPageListing[]> {
|
||||
const pages: KnowledgeIndexPageListing[] = [];
|
||||
for (const scope of [
|
||||
{ scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' },
|
||||
{ scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` },
|
||||
]) {
|
||||
const listed = await this.project.fileStore.listFiles(scope.dir, true);
|
||||
for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) {
|
||||
const pageKey = file.replace(/\.md$/, '');
|
||||
const parsedPath = parseKnowledgeIndexPath(file.startsWith('global/') || file.startsWith('user/') ? file : `${scope.dir.replace('knowledge/', '')}/${file}`);
|
||||
if (!parsedPath || parsedPath.scope !== scope.scope) {
|
||||
continue;
|
||||
}
|
||||
const pageKey = parsedPath.pageKey;
|
||||
const raw = await this.project.fileStore.readFile(`${scope.dir}/${file}`);
|
||||
const parsed = parseWiki(raw.content);
|
||||
pages.push({
|
||||
|
|
@ -363,6 +372,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
summary: parsed.summary,
|
||||
scope: scope.scope,
|
||||
scope_id: scope.scopeId,
|
||||
tags: parseWikiTags(raw.content),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -432,13 +442,6 @@ function parseKnowledgeIndexPath(file: string): { scope: 'GLOBAL' | 'USER'; page
|
|||
const pageKey = segments[1].replace(/\.md$/, '');
|
||||
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'GLOBAL', pageKey } : null;
|
||||
}
|
||||
if (segments.length >= 3 && segments[0] === 'global' && segments[1] === 'historic-sql') {
|
||||
const historicPath = segments.slice(2).join('/').replace(/\.md$/, '');
|
||||
if (historicPath.split('/').every((segment) => /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment))) {
|
||||
return { scope: 'GLOBAL', pageKey: `historic-sql/${historicPath}` };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (segments.length === 3 && segments[0] === 'user') {
|
||||
const pageKey = segments[2].replace(/\.md$/, '');
|
||||
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'USER', pageKey } : null;
|
||||
|
|
@ -486,38 +489,47 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
|
|||
slSearchService: deps.slSearchService,
|
||||
authorResolver: deps.authorResolver,
|
||||
};
|
||||
const wikiSearchTool = new WikiSearchTool({
|
||||
search: async (input) => {
|
||||
const results = await searchLocalKnowledgePages(deps.project, {
|
||||
userId: input.userId,
|
||||
query: input.query,
|
||||
limit: input.limit,
|
||||
embeddingService: deps.embedding,
|
||||
});
|
||||
return {
|
||||
results: results.slice(0, input.limit).map((result) => ({
|
||||
key: result.key,
|
||||
path: result.path,
|
||||
summary: result.summary,
|
||||
score: result.score,
|
||||
matchReasons: result.matchReasons,
|
||||
lanes: result.lanes,
|
||||
})),
|
||||
totalFound: results.length,
|
||||
};
|
||||
},
|
||||
});
|
||||
const slDiscoverTool = new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 });
|
||||
const warehouseVerificationTools = createWarehouseVerificationTools({
|
||||
connections: deps.connections,
|
||||
fallbackFileStore: deps.project.fileStore,
|
||||
wikiSearchTool,
|
||||
slDiscoverTool,
|
||||
});
|
||||
this.baseTools = [
|
||||
new WikiReadTool(deps.wikiService, deps.knowledgeIndex),
|
||||
new WikiSearchTool({
|
||||
search: async (input) => {
|
||||
const results = await searchLocalKnowledgePages(deps.project, {
|
||||
userId: input.userId,
|
||||
query: input.query,
|
||||
limit: input.limit,
|
||||
embeddingService: deps.embedding,
|
||||
});
|
||||
return {
|
||||
results: results.slice(0, input.limit).map((result) => ({
|
||||
key: result.key,
|
||||
path: result.path,
|
||||
summary: result.summary,
|
||||
score: result.score,
|
||||
matchReasons: result.matchReasons,
|
||||
lanes: result.lanes,
|
||||
})),
|
||||
totalFound: results.length,
|
||||
};
|
||||
},
|
||||
}),
|
||||
new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex),
|
||||
wikiSearchTool,
|
||||
new WikiListTagsTool(deps.knowledgeIndex),
|
||||
new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
|
||||
new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
|
||||
new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }),
|
||||
slDiscoverTool,
|
||||
new SlEditSourceTool(slDeps),
|
||||
new SlReadSourceTool(slDeps),
|
||||
new SlWriteSourceTool(slDeps),
|
||||
new SlValidateTool(slDeps),
|
||||
new SlRollbackTool(deps.slSourcesRepository, deps.connections, 0),
|
||||
...warehouseVerificationTools,
|
||||
];
|
||||
this.contextTools = [
|
||||
new ContextEvidenceSearchTool(deps.contextStore, deps.embedding),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue