From 9cd9cfbc754fa30943d91c2d6b000a363a0ffabd Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 23:35:21 +0200 Subject: [PATCH] feat(context): add raw warehouse discovery tool --- .../discover-data.tool.test.ts | 81 ++++++++++++ .../discover-data.tool.ts | 125 ++++++++++++++++++ .../tools/warehouse-verification/index.ts | 34 +++++ 3 files changed, 240 insertions(+) create mode 100644 packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/index.ts diff --git a/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts b/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts new file mode 100644 index 00000000..a9fb4bdf --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts @@ -0,0 +1,81 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { BaseTool, ToolContext } from '../../../tools/index.js'; +import { DiscoverDataTool } from './discover-data.tool.js'; +import type { WarehouseCatalogService } from './warehouse-catalog.service.js'; + +describe('DiscoverDataTool', () => { + const wikiSearchTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType }; + const slDiscoverTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType }; + const catalog = { searchByName: vi.fn() } as unknown as WarehouseCatalogService & { + searchByName: ReturnType; + }; + const context: ToolContext = { + sourceId: 'ingest', + messageId: 'm1', + userId: 'system', + session: { allowedConnectionNames: new Set(['warehouse']) } as any, + }; + const tool = new DiscoverDataTool({ + wikiSearchTool, + slDiscoverTool, + catalogFactory: () => catalog, + }); + + beforeEach(() => { + wikiSearchTool.call.mockReset(); + slDiscoverTool.call.mockReset(); + catalog.searchByName.mockReset(); + wikiSearchTool.call.mockResolvedValue({ + markdown: '- orders wiki', + structured: { totalFound: 1, results: [{ key: 'orders' }] }, + }); + slDiscoverTool.call.mockResolvedValue({ + markdown: '- orders source', + structured: { totalSources: 1, sources: [{ sourceName: 'orders' }] }, + }); + catalog.searchByName.mockResolvedValue([ + { + kind: 'table', + ref: { catalog: null, db: 'public', name: 'orders' }, + display: 'public.orders', + matchedOn: 'name', + }, + ]); + }); + + it('groups wiki, semantic layer, and raw schema hits with routing hints', async () => { + const result = await tool.call({ query: 'orders', connectionName: 'warehouse', limit: 5 }, context); + + expect(result.markdown).toContain('## Wiki Pages'); + expect(result.markdown).toContain('use `wiki_read(blockKey)` for full content'); + expect(result.markdown).toContain('## Semantic Layer Sources'); + expect(result.markdown).toContain('use `sl_read_source(sourceName)` for the YAML'); + expect(result.markdown).toContain('## Raw Warehouse Schema'); + expect(result.markdown).toContain('use `entity_details({connectionName, targets: [{display}]})`'); + expect(result.structured.raw?.hits).toHaveLength(1); + }); + + it('delegates sourceName inspect mode to sl_discover only', async () => { + slDiscoverTool.call.mockResolvedValueOnce({ + markdown: 'source detail', + structured: { sourceName: 'orders' }, + }); + + const result = await tool.call({ sourceName: 'orders', connectionName: 'warehouse' }, context); + + expect(slDiscoverTool.call).toHaveBeenCalledWith({ sourceName: 'orders', connectionId: 'warehouse' }, context); + expect(wikiSearchTool.call).not.toHaveBeenCalled(); + expect(catalog.searchByName).not.toHaveBeenCalled(); + expect(result.markdown).toContain('source detail'); + }); + + it('returns the empty-state message when all sections are empty', async () => { + wikiSearchTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalFound: 0, results: [] } }); + slDiscoverTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalSources: 0, sources: [] } }); + catalog.searchByName.mockResolvedValueOnce([]); + + const result = await tool.call({ query: 'customer source', connectionName: 'warehouse' }, context); + + expect(result.markdown).toContain('No matches for "customer source" across wiki, semantic layer, or raw warehouse schema.'); + }); +}); diff --git a/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts b/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts new file mode 100644 index 00000000..ec05f03f --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts @@ -0,0 +1,125 @@ +import { z } from 'zod'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js'; +import { WarehouseCatalogService, type RawSchemaHit } from './warehouse-catalog.service.js'; + +const discoverDataInputSchema = z.object({ + query: z.string().optional(), + connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(), + limit: z.number().int().positive().max(50).optional().default(10), + sourceName: z.string().optional(), +}); + +type DiscoverDataInput = z.infer; + +export interface DiscoverDataStructured { + wiki: unknown | null; + sl: unknown | null; + raw: { hits: RawSchemaHit[] } | null; +} + +interface DiscoverDataDeps { + wikiSearchTool: BaseTool; + slDiscoverTool: BaseTool; + catalogFactory: (context: ToolContext) => WarehouseCatalogService; +} + +function totalFound(structured: unknown): number { + return typeof structured === 'object' && + structured !== null && + 'totalFound' in structured && + typeof structured.totalFound === 'number' + ? structured.totalFound + : 0; +} + +function totalSources(structured: unknown): number { + return typeof structured === 'object' && + structured !== null && + 'totalSources' in structured && + typeof structured.totalSources === 'number' + ? structured.totalSources + : 0; +} + +export class DiscoverDataTool extends BaseTool { + readonly name = 'discover_data'; + + constructor(private readonly deps: DiscoverDataDeps) { + super(); + } + + get description(): string { + return 'Discover existing wiki pages, semantic layer sources, and raw warehouse schema hits before writing ingest output.'; + } + + get inputSchema() { + return discoverDataInputSchema; + } + + async call(input: DiscoverDataInput, context: ToolContext): Promise> { + if (input.sourceName) { + const sl = await this.deps.slDiscoverTool.call( + { sourceName: input.sourceName, connectionId: input.connectionName }, + context, + ); + return { markdown: sl.markdown, structured: { wiki: null, sl: sl.structured, raw: null } }; + } + + const query = input.query?.trim() || ''; + const limit = input.limit ?? 10; + const parts: string[] = []; + let wiki: unknown | null = null; + let sl: unknown | null = null; + let raw: DiscoverDataStructured['raw'] = null; + + if (query) { + const wikiResult = await this.deps.wikiSearchTool.call({ query, limit }, context); + if (totalFound(wikiResult.structured) > 0) { + parts.push('## Wiki Pages', '> use `wiki_read(blockKey)` for full content', wikiResult.markdown, ''); + wiki = wikiResult.structured; + } + } + + const slResult = await this.deps.slDiscoverTool.call( + { query: query || undefined, connectionId: input.connectionName }, + context, + ); + if (totalSources(slResult.structured) > 0) { + parts.push( + '## Semantic Layer Sources', + '> use `sl_read_source(sourceName)` for the YAML, or `entity_details` for warehouse-shape details', + slResult.markdown, + '', + ); + sl = slResult.structured; + } + + const catalog = this.deps.catalogFactory(context); + const connections = input.connectionName + ? [input.connectionName] + : [...(context.session?.allowedConnectionNames ?? [])].sort(); + const rawHits: RawSchemaHit[] = []; + for (const connectionName of connections) { + rawHits.push(...(await catalog.searchByName(connectionName, query, limit))); + } + if (rawHits.length > 0) { + parts.push('## Raw Warehouse Schema', '> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values'); + parts.push( + rawHits + .slice(0, limit) + .map((hit) => `- ${hit.kind}: ${hit.display} (matched on ${hit.matchedOn})`) + .join('\n'), + ); + raw = { hits: rawHits.slice(0, limit) }; + } + + if (parts.length === 0) { + return { + markdown: `No matches for "${query}" across wiki, semantic layer, or raw warehouse schema. Try broader terms; this concept may not exist yet.`, + structured: { wiki, sl, raw }, + }; + } + + return { markdown: parts.join('\n'), structured: { wiki, sl, raw } }; + } +} diff --git a/packages/context/src/ingest/tools/warehouse-verification/index.ts b/packages/context/src/ingest/tools/warehouse-verification/index.ts new file mode 100644 index 00000000..0901eace --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/index.ts @@ -0,0 +1,34 @@ +import type { KtxFileStorePort } from '../../../core/index.js'; +import type { SlConnectionCatalogPort } from '../../../sl/index.js'; +import type { BaseTool, ToolContext } from '../../../tools/index.js'; +import { DiscoverDataTool } from './discover-data.tool.js'; +import { EntityDetailsTool } from './entity-details.tool.js'; +import { SqlExecutionTool } from './sql-execution.tool.js'; +import { WarehouseCatalogService } from './warehouse-catalog.service.js'; + +export { DiscoverDataTool } from './discover-data.tool.js'; +export { EntityDetailsTool } from './entity-details.tool.js'; +export { SqlExecutionTool } from './sql-execution.tool.js'; +export { WarehouseCatalogService } from './warehouse-catalog.service.js'; +export type { RawSchemaHit, TableDetail, WarehouseColumnDetail } from './warehouse-catalog.service.js'; + +export function createWarehouseVerificationTools(deps: { + connections: SlConnectionCatalogPort; + fallbackFileStore: KtxFileStorePort; + wikiSearchTool: BaseTool; + slDiscoverTool: BaseTool; +}): BaseTool[] { + const catalogFactory = (context: ToolContext) => + new WarehouseCatalogService({ + fileStore: context.session?.configService ?? deps.fallbackFileStore, + }); + return [ + new EntityDetailsTool(catalogFactory), + new SqlExecutionTool(deps.connections), + new DiscoverDataTool({ + wikiSearchTool: deps.wikiSearchTool, + slDiscoverTool: deps.slDiscoverTool, + catalogFactory, + }), + ]; +}