feat(context): add warehouse verification tools (#46)

* feat(context): add warehouse dialect dispatch

* feat(context): read warehouse scan catalog

* feat(context): add entity details verification tool

* feat(context): add ingest SQL verification tool

* feat(context): add raw warehouse discovery tool

* feat(context): expose warehouse verification tools to ingest

* docs(context): add ingest identifier verification protocol

* test(context): guard ingest identifier verification prompts

* chore(context): verify warehouse verification tools

* docs: add warehouse verification tools plan and spec

* fix(context): expose target warehouses to Notion ingest

* fix(context): update ingest prompts for warehouse verification tools

* fix(context): scope raw schema discovery to allowed connections

* fix(context): verify warehouse column display targets

* docs: add notion warehouse verification gap closure plan

* fix(context): include raw discovery connection names

* fix(context): expose warehouse targets for LookML and MetricFlow

* fix(context): pass connection config to ingest query executors

* fix(cli): enable read-only SQL probes for local ingest

* docs: add warehouse verification final v1 closure plan

* fix(context): align warehouse sql probe prompt shape

* docs: add warehouse verification prompt shape closure plan

* test(context): catch connectionless sql execution prompt examples

* fix(context): include connection name in sl capture sql example

* docs: add warehouse verification sql example closure plan

* fix(context): report structured entity detail misses

* docs: add warehouse verification structured target miss closure plan

* fix: report untracked squash merge conflicts

* feat: require ingest verification ledger

* fix: stabilize ingest wiki references
This commit is contained in:
Andrey Avtomonov 2026-05-13 13:43:23 +02:00 committed by GitHub
parent bcb0d2f8f7
commit c22248dabf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
89 changed files with 7818 additions and 191 deletions

View file

@ -12,6 +12,7 @@ export type {
KnowledgeEventPort,
KnowledgeGitDiffPort,
KnowledgeIndexPort,
KnowledgeIndexPageListing,
UpsertPageParams,
WikiFileStorePort,
} from './ports.js';

View file

@ -113,13 +113,13 @@ describe('KnowledgeWikiService.syncFromCommit', () => {
expect(call.deletes).toEqual([{ scope: 'GLOBAL', scopeId: null, pageKey: 'gone-page' }]);
});
it('indexes historic-SQL nested pages but skips other nested wiki paths from commit sync', async () => {
it('indexes only flat wiki pages and skips nested paths from commit sync', async () => {
const { service, pagesRepository, gitService, logger } = makeService();
gitService.diffNameStatus.mockResolvedValue([
{ status: 'A', path: 'knowledge/global/revenue-policy.md' },
{ status: 'A', path: 'knowledge/global/historic-sql-order-lifecycle.md' },
{ status: 'A', path: 'knowledge/global/historic-sql/order-lifecycle.md' },
{ status: 'A', path: 'knowledge/global/historic-sql/_archived/retired-pattern.md' },
{ status: 'A', path: 'knowledge/global/orbit/company-overview.md' },
]);
gitService.getFileAtCommit.mockImplementation((path: string) => {
@ -138,26 +138,25 @@ describe('KnowledgeWikiService.syncFromCommit', () => {
await service.syncFromCommit('sha-before', 'sha-after', 'run-uuid');
expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('knowledge/global/orbit/company-overview.md', 'sha-after');
expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('knowledge/global/historic-sql/order-lifecycle.md', 'sha-after');
expect(logger.warn).toHaveBeenCalledWith(
'[knowledge.sync] skipping unparseable path: knowledge/global/orbit/company-overview.md',
);
expect(logger.warn).toHaveBeenCalledWith(
'[knowledge.sync] skipping unparseable path: knowledge/global/historic-sql/order-lifecycle.md',
);
const call = pagesRepository.applyDiffTransactional.mock.calls[0][0];
expect(call.upserts).toEqual(
expect.arrayContaining([
expect.objectContaining({ scope: 'GLOBAL', pageKey: 'revenue-policy', summary: 'revenue' }),
expect.objectContaining({
scope: 'GLOBAL',
pageKey: 'historic-sql/order-lifecycle',
pageKey: 'historic-sql-order-lifecycle',
summary: 'order lifecycle',
}),
expect.objectContaining({
scope: 'GLOBAL',
pageKey: 'historic-sql/_archived/retired-pattern',
summary: 'retired',
}),
]),
);
expect(call.upserts).toHaveLength(3);
expect(call.upserts).toHaveLength(2);
});
it('is a no-op when the diff between shas has no knowledge changes', async () => {

View file

@ -11,10 +11,6 @@ const WIKI_PREFIX = 'knowledge';
export type { WikiFrontmatter };
function isHistoricSqlPathSegment(segment: string): boolean {
return /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment);
}
export class KnowledgeWikiService {
private isWorktreeScoped = false;
@ -422,7 +418,6 @@ export class KnowledgeWikiService {
* Parse a `knowledge/<scope>/...` file path into its scope and page key.
* `knowledge/global/foo.md` { scope: 'GLOBAL', scopeId: null, pageKey: 'foo' }
* `knowledge/user/<id>/bar.md` { scope: 'USER', scopeId: '<id>', pageKey: 'bar' }
* `knowledge/global/historic-sql/foo.md` { scope: 'GLOBAL', scopeId: null, pageKey: 'historic-sql/foo' }
*/
function parseKnowledgePath(path: string): { scope: string; scopeId: string | null; pageKey: string } | null {
if (!path.endsWith('.md')) {
@ -437,13 +432,6 @@ function parseKnowledgePath(path: string): { scope: string; scopeId: string | nu
const pageKey = rest[1].replace(/\.md$/, '');
return isFlatWikiKey(pageKey) ? { scope: 'GLOBAL', scopeId: null, pageKey } : null;
}
if (rest.length >= 3 && rest[0] === 'global' && rest[1] === 'historic-sql') {
const historicPath = rest.slice(2).join('/').replace(/\.md$/, '');
if (historicPath.split('/').every(isHistoricSqlPathSegment)) {
return { scope: 'GLOBAL', scopeId: null, pageKey: `historic-sql/${historicPath}` };
}
return null;
}
if (rest.length === 3 && rest[0] === 'user') {
const pageKey = rest[2].replace(/\.md$/, '');
return isFlatWikiKey(pageKey) ? { scope: 'USER', scopeId: rest[1], pageKey } : null;

View file

@ -244,4 +244,30 @@ describe('local knowledge helpers', () => {
}),
).rejects.toThrow('Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".');
});
it('ignores nested historic-SQL legacy paths when listing local knowledge pages', async () => {
await writeLocalKnowledgePage(project, {
key: 'historic-sql-paid-orders',
scope: 'GLOBAL',
summary: 'Flat historic SQL page',
content: 'Flat page body.',
tags: ['historic-sql'],
});
await project.fileStore.writeFile(
'knowledge/global/historic-sql/paid-orders.md',
'---\nsummary: Nested historic SQL page\nusage_mode: auto\n---\n\nNested body\n',
'Test',
'test@example.com',
'Write nested legacy page',
);
await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([
{
key: 'historic-sql-paid-orders',
path: 'knowledge/global/historic-sql-paid-orders.md',
scope: 'GLOBAL',
summary: 'Flat historic SQL page',
},
]);
});
});

View file

@ -80,26 +80,12 @@ function knowledgePath(scope: LocalKnowledgeScope, userId: string | undefined, k
return `knowledge/user/${assertSafePathToken('user id', userId ?? 'local')}/${safeKey}.md`;
}
function isHistoricSqlPathSegment(segment: string): boolean {
return /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment);
}
function keyFromKnowledgePath(path: string, scope: LocalKnowledgeScope, userId: string): string | null {
const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${assertSafePathToken('user id', userId)}/`;
const key = path.slice(prefix.length).replace(/\.md$/, '');
if (isFlatWikiKey(key)) {
return key;
}
if (
scope === 'GLOBAL' &&
key.startsWith('historic-sql/') &&
key
.slice('historic-sql/'.length)
.split('/')
.every(isHistoricSqlPathSegment)
) {
return key;
}
return null;
}

View file

@ -13,6 +13,15 @@ export interface UpsertPageParams {
sourceRunId?: string | null;
}
export interface KnowledgeIndexPageListing {
id?: string;
page_key: string;
summary: string;
scope: string;
scope_id: string | null;
tags: string[];
}
export interface KnowledgeIndexPort {
upsertPage(params: UpsertPageParams): Promise<void>;
applyDiffTransactional(params: {
@ -32,9 +41,7 @@ export interface KnowledgeIndexPort {
scopeId: string | null,
pageKey: string,
): Promise<{ id?: string; page_key: string } | null | undefined>;
listPagesForUser(
userId: string,
): Promise<Array<{ id?: string; page_key: string; summary: string; scope: string; scope_id: string | null }>>;
listPagesForUser(userId: string): Promise<KnowledgeIndexPageListing[]>;
getUserPageCount(userId: string): Promise<number>;
incrementUsageCount(pageIds: string[]): Promise<void>;
searchRRF(

View file

@ -8,22 +8,11 @@ describe('WikiListTagsTool', () => {
it("returns distinct sorted tags across the user's visible pages", async () => {
const pagesRepository = {
listPagesForUser: vi.fn().mockResolvedValue([
{ scope: 'GLOBAL', scope_id: null, page_key: 'k1' },
{ scope: 'USER', scope_id: 'u', page_key: 'k2' },
{ scope: 'GLOBAL', scope_id: null, page_key: 'k1', tags: ['metrics', 'finance'] },
{ scope: 'USER', scope_id: 'u', page_key: 'k2', tags: ['metrics'] },
]),
};
const wikiService = {
readPage: vi.fn().mockImplementation((_scope, _scopeId, key) => {
if (key === 'k1') {
return Promise.resolve({ frontmatter: { tags: ['metrics', 'finance'] }, content: '' });
}
if (key === 'k2') {
return Promise.resolve({ frontmatter: { tags: ['metrics'] }, content: '' });
}
return Promise.resolve(null);
}),
};
const tool = new WikiListTagsTool(wikiService as any, pagesRepository as any);
const tool = new WikiListTagsTool(pagesRepository as any);
const result = await tool.call({}, baseContext);
expect(result.markdown).toContain('finance');
@ -31,10 +20,23 @@ describe('WikiListTagsTool', () => {
expect(result.structured.tags).toEqual(['finance', 'metrics']);
});
it('lists tags from historic-SQL indexed pages with flat wiki keys', async () => {
const pagesRepository = {
listPagesForUser: vi.fn().mockResolvedValue([
{ scope: 'GLOBAL', scope_id: null, page_key: 'company-overview', tags: ['notion'] },
{ scope: 'GLOBAL', scope_id: null, page_key: 'historic-sql-revenue-pattern', tags: ['historic-sql', 'pattern'] },
]),
};
const tool = new WikiListTagsTool(pagesRepository as any);
const result = await tool.call({}, baseContext);
expect(result.structured.tags).toEqual(['historic-sql', 'notion', 'pattern']);
});
it('returns a friendly message when no pages have tags', async () => {
const pagesRepository = { listPagesForUser: vi.fn().mockResolvedValue([]) };
const wikiService = { readPage: vi.fn() };
const tool = new WikiListTagsTool(wikiService as any, pagesRepository as any);
const tool = new WikiListTagsTool(pagesRepository as any);
const result = await tool.call({}, baseContext);
expect(result.markdown).toMatch(/no tags/i);

View file

@ -1,7 +1,5 @@
import { z } from 'zod';
import type { KnowledgeIndexPort } from '../ports.js';
type BlockScope = 'GLOBAL' | 'USER';
import { KnowledgeWikiService } from '../index.js';
import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js';
const wikiListTagsInputSchema = z.object({});
@ -11,10 +9,7 @@ type WikiListTagsInput = z.infer<typeof wikiListTagsInputSchema>;
export class WikiListTagsTool extends BaseTool<typeof wikiListTagsInputSchema> {
readonly name = 'wiki_list_tags';
constructor(
private readonly wikiService: KnowledgeWikiService,
private readonly pagesRepository: KnowledgeIndexPort,
) {
constructor(private readonly pagesRepository: KnowledgeIndexPort) {
super();
}
@ -33,10 +28,7 @@ Call before writing a new page so you can reuse existing tags consistently inste
const pages = await this.pagesRepository.listPagesForUser(context.userId);
const set = new Set<string>();
for (const p of pages) {
const scope = p.scope as BlockScope;
const scopeId = scope === 'USER' ? p.scope_id : null;
const page = await this.wikiService.readPage(scope, scopeId, p.page_key);
for (const t of page?.frontmatter.tags ?? []) {
for (const t of p.tags) {
set.add(t);
}
}

View file

@ -150,6 +150,7 @@ export class WikiWriteTool extends BaseTool<typeof wikiWriteInputSchema> {
Create or update a knowledge page. Provide content for create/rewrite, or replacements for targeted edits.
For existing pages, you may provide only frontmatter fields such as summary, tags, refs, or sl_refs to update metadata while preserving content.
tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to clear, [values] to set.
Keys must be flat file names, not directory paths. Use tags/source frontmatter for grouping.
</purpose>`;
}

View file

@ -25,6 +25,7 @@ export interface WikiFrontmatter {
usage?: HistoricSqlWikiUsageFrontmatter;
fingerprints?: string[];
stale_since?: string;
archived_since?: string;
}
export interface WikiPage {