mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
feat(context): add warehouse verification tools (#46)
* feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
This commit is contained in:
parent
bcb0d2f8f7
commit
c22248dabf
89 changed files with 7818 additions and 191 deletions
|
|
@ -36,6 +36,7 @@ import { BaseTool, type GitAuthorResolverPort, type ToolContext } from '../tools
|
|||
import {
|
||||
type KnowledgeEventPort,
|
||||
type KnowledgeIndexPort,
|
||||
type KnowledgeIndexPageListing,
|
||||
KnowledgeWikiService,
|
||||
searchLocalKnowledgePages,
|
||||
WikiListTagsTool,
|
||||
|
|
@ -219,7 +220,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
}
|
||||
|
||||
async listPagesForUser(userId: string) {
|
||||
const pages: Array<{ id?: string; page_key: string; summary: string; scope: string; scope_id: string | null }> = [];
|
||||
const pages: KnowledgeIndexPageListing[] = [];
|
||||
for (const scope of [
|
||||
{ scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' },
|
||||
{ scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` },
|
||||
|
|
@ -234,6 +235,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
summary: parsed.summary,
|
||||
scope: scope.scope,
|
||||
scope_id: scope.scopeId,
|
||||
tags: parseWikiTags(raw.content),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -433,7 +435,7 @@ class LocalMemoryToolsetFactory implements MemoryToolsetFactoryPort {
|
|||
};
|
||||
},
|
||||
}),
|
||||
new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex),
|
||||
new WikiListTagsTool(deps.knowledgeIndex),
|
||||
new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
|
||||
new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
|
||||
];
|
||||
|
|
@ -468,6 +470,17 @@ function parseWiki(raw: string): { summary: string; content: string } {
|
|||
};
|
||||
}
|
||||
|
||||
function parseWikiTags(raw: string): string[] {
|
||||
const match = raw.match(/^---\n([\s\S]*?)\n---\n?/);
|
||||
if (!match) {
|
||||
return [];
|
||||
}
|
||||
const frontmatter = (YAML.parse(match[1]) ?? {}) as Record<string, unknown>;
|
||||
return Array.isArray(frontmatter.tags)
|
||||
? frontmatter.tags.filter((tag): tag is string => typeof tag === 'string')
|
||||
: [];
|
||||
}
|
||||
|
||||
function scoreText(text: string, query: string): number {
|
||||
const normalized = query.toLowerCase().trim();
|
||||
if (!normalized) {
|
||||
|
|
|
|||
|
|
@ -23,11 +23,42 @@ const expectedAdapterSkillHeadings: Record<string, string> = {
|
|||
metabase_ingest: '# Metabase to KTX Semantic Layer',
|
||||
metricflow_ingest: '# MetricFlow to KTX Semantic Layer',
|
||||
};
|
||||
const verificationWriterSkills = [
|
||||
'notion_synthesize',
|
||||
'dbt_ingest',
|
||||
'lookml_ingest',
|
||||
'looker_ingest',
|
||||
'metabase_ingest',
|
||||
'metricflow_ingest',
|
||||
'live_database_ingest',
|
||||
'historic_sql_table_digest',
|
||||
'historic_sql_patterns',
|
||||
'knowledge_capture',
|
||||
'sl_capture',
|
||||
] as const;
|
||||
|
||||
function forbiddenProductPattern() {
|
||||
return new RegExp([['Kae', 'lio'].join(''), ['kae', 'lio'].join(''), ['KAE', 'LIO_'].join('')].join('|'));
|
||||
}
|
||||
|
||||
function sqlExecutionCallBlocks(body: string): string[] {
|
||||
const blocks: string[] = [];
|
||||
const marker = 'sql_execution({';
|
||||
let offset = 0;
|
||||
|
||||
while (offset < body.length) {
|
||||
const start = body.indexOf(marker, offset);
|
||||
if (start === -1) {
|
||||
break;
|
||||
}
|
||||
const end = body.indexOf('})', start + marker.length);
|
||||
blocks.push(body.slice(start, end === -1 ? start + marker.length : end + 2));
|
||||
offset = start + marker.length;
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
describe('memory runtime assets', () => {
|
||||
it('packages every memory-agent base prompt referenced by promptNameFor()', async () => {
|
||||
const prompts = new PromptService({ promptsDir, partials: [] });
|
||||
|
|
@ -117,4 +148,50 @@ describe('memory runtime assets', () => {
|
|||
expect(body).toContain('Do not call `sl_write_source` or `sl_edit_source`');
|
||||
expect(body).toContain('LookML writes target the run connection directly');
|
||||
});
|
||||
|
||||
it('ships identifier verification protocol in every synthesis writer skill', async () => {
|
||||
for (const skillName of verificationWriterSkills) {
|
||||
const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8');
|
||||
expect(body).toContain('## Identifier Verification Protocol');
|
||||
expect(body).toMatch(/discover_data|entity_details/);
|
||||
}
|
||||
});
|
||||
|
||||
it('does not ship stale warehouse verification tool names or fictional identifiers', async () => {
|
||||
for (const skillName of verificationWriterSkills) {
|
||||
const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8');
|
||||
expect(body).not.toContain('orbit_analytics.customer');
|
||||
expect(body).not.toContain('wiki_sl_search');
|
||||
expect(body).not.toContain('sl_describe_table');
|
||||
}
|
||||
});
|
||||
|
||||
it('ships only the KTX connectionName sql_execution call shape in writer guidance', async () => {
|
||||
const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8');
|
||||
const bodies = [{ name: '_shared/identifier-verification.md', body: shared }];
|
||||
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
|
||||
|
||||
for (const skillName of verificationWriterSkills) {
|
||||
const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8');
|
||||
bodies.push({ name: `${skillName}/SKILL.md`, body });
|
||||
expect(body).toContain('sql_execution({connectionName');
|
||||
expect(body).not.toContain('sql_execution({ sql');
|
||||
expect(body).not.toContain('session shape');
|
||||
expect(body).not.toContain('connection is already pinned by the ingest session');
|
||||
}
|
||||
|
||||
for (const { name, body } of bodies) {
|
||||
const calls = sqlExecutionCallBlocks(body);
|
||||
expect(calls.length, `${name} should contain sql_execution guidance`).toBeGreaterThan(0);
|
||||
expect(
|
||||
calls.filter((call) => !call.includes('connectionName')),
|
||||
`${name} has sql_execution calls without connectionName`,
|
||||
).toEqual([]);
|
||||
expect(body, `${name} has a connectionless multiline sql_execution call`).not.toMatch(
|
||||
/sql_execution\(\{\s*sql\s*:/,
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue