feat(context): add raw warehouse discovery tool

This commit is contained in:
Andrey Avtomonov 2026-05-12 23:35:21 +02:00
parent a4b0e37254
commit 9cd9cfbc75
3 changed files with 240 additions and 0 deletions

View file

@ -0,0 +1,81 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';
import type { BaseTool, ToolContext } from '../../../tools/index.js';
import { DiscoverDataTool } from './discover-data.tool.js';
import type { WarehouseCatalogService } from './warehouse-catalog.service.js';
describe('DiscoverDataTool', () => {
const wikiSearchTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
const slDiscoverTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
const catalog = { searchByName: vi.fn() } as unknown as WarehouseCatalogService & {
searchByName: ReturnType<typeof vi.fn>;
};
const context: ToolContext = {
sourceId: 'ingest',
messageId: 'm1',
userId: 'system',
session: { allowedConnectionNames: new Set(['warehouse']) } as any,
};
const tool = new DiscoverDataTool({
wikiSearchTool,
slDiscoverTool,
catalogFactory: () => catalog,
});
beforeEach(() => {
wikiSearchTool.call.mockReset();
slDiscoverTool.call.mockReset();
catalog.searchByName.mockReset();
wikiSearchTool.call.mockResolvedValue({
markdown: '- orders wiki',
structured: { totalFound: 1, results: [{ key: 'orders' }] },
});
slDiscoverTool.call.mockResolvedValue({
markdown: '- orders source',
structured: { totalSources: 1, sources: [{ sourceName: 'orders' }] },
});
catalog.searchByName.mockResolvedValue([
{
kind: 'table',
ref: { catalog: null, db: 'public', name: 'orders' },
display: 'public.orders',
matchedOn: 'name',
},
]);
});
it('groups wiki, semantic layer, and raw schema hits with routing hints', async () => {
const result = await tool.call({ query: 'orders', connectionName: 'warehouse', limit: 5 }, context);
expect(result.markdown).toContain('## Wiki Pages');
expect(result.markdown).toContain('use `wiki_read(blockKey)` for full content');
expect(result.markdown).toContain('## Semantic Layer Sources');
expect(result.markdown).toContain('use `sl_read_source(sourceName)` for the YAML');
expect(result.markdown).toContain('## Raw Warehouse Schema');
expect(result.markdown).toContain('use `entity_details({connectionName, targets: [{display}]})`');
expect(result.structured.raw?.hits).toHaveLength(1);
});
it('delegates sourceName inspect mode to sl_discover only', async () => {
slDiscoverTool.call.mockResolvedValueOnce({
markdown: 'source detail',
structured: { sourceName: 'orders' },
});
const result = await tool.call({ sourceName: 'orders', connectionName: 'warehouse' }, context);
expect(slDiscoverTool.call).toHaveBeenCalledWith({ sourceName: 'orders', connectionId: 'warehouse' }, context);
expect(wikiSearchTool.call).not.toHaveBeenCalled();
expect(catalog.searchByName).not.toHaveBeenCalled();
expect(result.markdown).toContain('source detail');
});
it('returns the empty-state message when all sections are empty', async () => {
wikiSearchTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalFound: 0, results: [] } });
slDiscoverTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalSources: 0, sources: [] } });
catalog.searchByName.mockResolvedValueOnce([]);
const result = await tool.call({ query: 'customer source', connectionName: 'warehouse' }, context);
expect(result.markdown).toContain('No matches for "customer source" across wiki, semantic layer, or raw warehouse schema.');
});
});

View file

@ -0,0 +1,125 @@
import { z } from 'zod';
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
import { WarehouseCatalogService, type RawSchemaHit } from './warehouse-catalog.service.js';
const discoverDataInputSchema = z.object({
query: z.string().optional(),
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(),
limit: z.number().int().positive().max(50).optional().default(10),
sourceName: z.string().optional(),
});
type DiscoverDataInput = z.infer<typeof discoverDataInputSchema>;
export interface DiscoverDataStructured {
wiki: unknown | null;
sl: unknown | null;
raw: { hits: RawSchemaHit[] } | null;
}
interface DiscoverDataDeps {
wikiSearchTool: BaseTool;
slDiscoverTool: BaseTool;
catalogFactory: (context: ToolContext) => WarehouseCatalogService;
}
function totalFound(structured: unknown): number {
return typeof structured === 'object' &&
structured !== null &&
'totalFound' in structured &&
typeof structured.totalFound === 'number'
? structured.totalFound
: 0;
}
function totalSources(structured: unknown): number {
return typeof structured === 'object' &&
structured !== null &&
'totalSources' in structured &&
typeof structured.totalSources === 'number'
? structured.totalSources
: 0;
}
export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
readonly name = 'discover_data';
constructor(private readonly deps: DiscoverDataDeps) {
super();
}
get description(): string {
return 'Discover existing wiki pages, semantic layer sources, and raw warehouse schema hits before writing ingest output.';
}
get inputSchema() {
return discoverDataInputSchema;
}
async call(input: DiscoverDataInput, context: ToolContext): Promise<ToolOutput<DiscoverDataStructured>> {
if (input.sourceName) {
const sl = await this.deps.slDiscoverTool.call(
{ sourceName: input.sourceName, connectionId: input.connectionName },
context,
);
return { markdown: sl.markdown, structured: { wiki: null, sl: sl.structured, raw: null } };
}
const query = input.query?.trim() || '';
const limit = input.limit ?? 10;
const parts: string[] = [];
let wiki: unknown | null = null;
let sl: unknown | null = null;
let raw: DiscoverDataStructured['raw'] = null;
if (query) {
const wikiResult = await this.deps.wikiSearchTool.call({ query, limit }, context);
if (totalFound(wikiResult.structured) > 0) {
parts.push('## Wiki Pages', '> use `wiki_read(blockKey)` for full content', wikiResult.markdown, '');
wiki = wikiResult.structured;
}
}
const slResult = await this.deps.slDiscoverTool.call(
{ query: query || undefined, connectionId: input.connectionName },
context,
);
if (totalSources(slResult.structured) > 0) {
parts.push(
'## Semantic Layer Sources',
'> use `sl_read_source(sourceName)` for the YAML, or `entity_details` for warehouse-shape details',
slResult.markdown,
'',
);
sl = slResult.structured;
}
const catalog = this.deps.catalogFactory(context);
const connections = input.connectionName
? [input.connectionName]
: [...(context.session?.allowedConnectionNames ?? [])].sort();
const rawHits: RawSchemaHit[] = [];
for (const connectionName of connections) {
rawHits.push(...(await catalog.searchByName(connectionName, query, limit)));
}
if (rawHits.length > 0) {
parts.push('## Raw Warehouse Schema', '> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values');
parts.push(
rawHits
.slice(0, limit)
.map((hit) => `- ${hit.kind}: ${hit.display} (matched on ${hit.matchedOn})`)
.join('\n'),
);
raw = { hits: rawHits.slice(0, limit) };
}
if (parts.length === 0) {
return {
markdown: `No matches for "${query}" across wiki, semantic layer, or raw warehouse schema. Try broader terms; this concept may not exist yet.`,
structured: { wiki, sl, raw },
};
}
return { markdown: parts.join('\n'), structured: { wiki, sl, raw } };
}
}

View file

@ -0,0 +1,34 @@
import type { KtxFileStorePort } from '../../../core/index.js';
import type { SlConnectionCatalogPort } from '../../../sl/index.js';
import type { BaseTool, ToolContext } from '../../../tools/index.js';
import { DiscoverDataTool } from './discover-data.tool.js';
import { EntityDetailsTool } from './entity-details.tool.js';
import { SqlExecutionTool } from './sql-execution.tool.js';
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
export { DiscoverDataTool } from './discover-data.tool.js';
export { EntityDetailsTool } from './entity-details.tool.js';
export { SqlExecutionTool } from './sql-execution.tool.js';
export { WarehouseCatalogService } from './warehouse-catalog.service.js';
export type { RawSchemaHit, TableDetail, WarehouseColumnDetail } from './warehouse-catalog.service.js';
export function createWarehouseVerificationTools(deps: {
connections: SlConnectionCatalogPort;
fallbackFileStore: KtxFileStorePort;
wikiSearchTool: BaseTool;
slDiscoverTool: BaseTool;
}): BaseTool[] {
const catalogFactory = (context: ToolContext) =>
new WarehouseCatalogService({
fileStore: context.session?.configService ?? deps.fallbackFileStore,
});
return [
new EntityDetailsTool(catalogFactory),
new SqlExecutionTool(deps.connections),
new DiscoverDataTool({
wikiSearchTool: deps.wikiSearchTool,
slDiscoverTool: deps.slDiscoverTool,
catalogFactory,
}),
];
}