mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-28 08:49:38 +02:00
feat(context): add raw warehouse discovery tool
This commit is contained in:
parent
a4b0e37254
commit
9cd9cfbc75
3 changed files with 240 additions and 0 deletions
|
|
@ -0,0 +1,81 @@
|
||||||
|
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
|
import type { BaseTool, ToolContext } from '../../../tools/index.js';
|
||||||
|
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||||
|
import type { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||||
|
|
||||||
|
describe('DiscoverDataTool', () => {
|
||||||
|
const wikiSearchTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
|
||||||
|
const slDiscoverTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
|
||||||
|
const catalog = { searchByName: vi.fn() } as unknown as WarehouseCatalogService & {
|
||||||
|
searchByName: ReturnType<typeof vi.fn>;
|
||||||
|
};
|
||||||
|
const context: ToolContext = {
|
||||||
|
sourceId: 'ingest',
|
||||||
|
messageId: 'm1',
|
||||||
|
userId: 'system',
|
||||||
|
session: { allowedConnectionNames: new Set(['warehouse']) } as any,
|
||||||
|
};
|
||||||
|
const tool = new DiscoverDataTool({
|
||||||
|
wikiSearchTool,
|
||||||
|
slDiscoverTool,
|
||||||
|
catalogFactory: () => catalog,
|
||||||
|
});
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
wikiSearchTool.call.mockReset();
|
||||||
|
slDiscoverTool.call.mockReset();
|
||||||
|
catalog.searchByName.mockReset();
|
||||||
|
wikiSearchTool.call.mockResolvedValue({
|
||||||
|
markdown: '- orders wiki',
|
||||||
|
structured: { totalFound: 1, results: [{ key: 'orders' }] },
|
||||||
|
});
|
||||||
|
slDiscoverTool.call.mockResolvedValue({
|
||||||
|
markdown: '- orders source',
|
||||||
|
structured: { totalSources: 1, sources: [{ sourceName: 'orders' }] },
|
||||||
|
});
|
||||||
|
catalog.searchByName.mockResolvedValue([
|
||||||
|
{
|
||||||
|
kind: 'table',
|
||||||
|
ref: { catalog: null, db: 'public', name: 'orders' },
|
||||||
|
display: 'public.orders',
|
||||||
|
matchedOn: 'name',
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('groups wiki, semantic layer, and raw schema hits with routing hints', async () => {
|
||||||
|
const result = await tool.call({ query: 'orders', connectionName: 'warehouse', limit: 5 }, context);
|
||||||
|
|
||||||
|
expect(result.markdown).toContain('## Wiki Pages');
|
||||||
|
expect(result.markdown).toContain('use `wiki_read(blockKey)` for full content');
|
||||||
|
expect(result.markdown).toContain('## Semantic Layer Sources');
|
||||||
|
expect(result.markdown).toContain('use `sl_read_source(sourceName)` for the YAML');
|
||||||
|
expect(result.markdown).toContain('## Raw Warehouse Schema');
|
||||||
|
expect(result.markdown).toContain('use `entity_details({connectionName, targets: [{display}]})`');
|
||||||
|
expect(result.structured.raw?.hits).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('delegates sourceName inspect mode to sl_discover only', async () => {
|
||||||
|
slDiscoverTool.call.mockResolvedValueOnce({
|
||||||
|
markdown: 'source detail',
|
||||||
|
structured: { sourceName: 'orders' },
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await tool.call({ sourceName: 'orders', connectionName: 'warehouse' }, context);
|
||||||
|
|
||||||
|
expect(slDiscoverTool.call).toHaveBeenCalledWith({ sourceName: 'orders', connectionId: 'warehouse' }, context);
|
||||||
|
expect(wikiSearchTool.call).not.toHaveBeenCalled();
|
||||||
|
expect(catalog.searchByName).not.toHaveBeenCalled();
|
||||||
|
expect(result.markdown).toContain('source detail');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns the empty-state message when all sections are empty', async () => {
|
||||||
|
wikiSearchTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalFound: 0, results: [] } });
|
||||||
|
slDiscoverTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalSources: 0, sources: [] } });
|
||||||
|
catalog.searchByName.mockResolvedValueOnce([]);
|
||||||
|
|
||||||
|
const result = await tool.call({ query: 'customer source', connectionName: 'warehouse' }, context);
|
||||||
|
|
||||||
|
expect(result.markdown).toContain('No matches for "customer source" across wiki, semantic layer, or raw warehouse schema.');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -0,0 +1,125 @@
|
||||||
|
import { z } from 'zod';
|
||||||
|
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
|
||||||
|
import { WarehouseCatalogService, type RawSchemaHit } from './warehouse-catalog.service.js';
|
||||||
|
|
||||||
|
const discoverDataInputSchema = z.object({
|
||||||
|
query: z.string().optional(),
|
||||||
|
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(),
|
||||||
|
limit: z.number().int().positive().max(50).optional().default(10),
|
||||||
|
sourceName: z.string().optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
type DiscoverDataInput = z.infer<typeof discoverDataInputSchema>;
|
||||||
|
|
||||||
|
export interface DiscoverDataStructured {
|
||||||
|
wiki: unknown | null;
|
||||||
|
sl: unknown | null;
|
||||||
|
raw: { hits: RawSchemaHit[] } | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DiscoverDataDeps {
|
||||||
|
wikiSearchTool: BaseTool;
|
||||||
|
slDiscoverTool: BaseTool;
|
||||||
|
catalogFactory: (context: ToolContext) => WarehouseCatalogService;
|
||||||
|
}
|
||||||
|
|
||||||
|
function totalFound(structured: unknown): number {
|
||||||
|
return typeof structured === 'object' &&
|
||||||
|
structured !== null &&
|
||||||
|
'totalFound' in structured &&
|
||||||
|
typeof structured.totalFound === 'number'
|
||||||
|
? structured.totalFound
|
||||||
|
: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function totalSources(structured: unknown): number {
|
||||||
|
return typeof structured === 'object' &&
|
||||||
|
structured !== null &&
|
||||||
|
'totalSources' in structured &&
|
||||||
|
typeof structured.totalSources === 'number'
|
||||||
|
? structured.totalSources
|
||||||
|
: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
|
||||||
|
readonly name = 'discover_data';
|
||||||
|
|
||||||
|
constructor(private readonly deps: DiscoverDataDeps) {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
get description(): string {
|
||||||
|
return 'Discover existing wiki pages, semantic layer sources, and raw warehouse schema hits before writing ingest output.';
|
||||||
|
}
|
||||||
|
|
||||||
|
get inputSchema() {
|
||||||
|
return discoverDataInputSchema;
|
||||||
|
}
|
||||||
|
|
||||||
|
async call(input: DiscoverDataInput, context: ToolContext): Promise<ToolOutput<DiscoverDataStructured>> {
|
||||||
|
if (input.sourceName) {
|
||||||
|
const sl = await this.deps.slDiscoverTool.call(
|
||||||
|
{ sourceName: input.sourceName, connectionId: input.connectionName },
|
||||||
|
context,
|
||||||
|
);
|
||||||
|
return { markdown: sl.markdown, structured: { wiki: null, sl: sl.structured, raw: null } };
|
||||||
|
}
|
||||||
|
|
||||||
|
const query = input.query?.trim() || '';
|
||||||
|
const limit = input.limit ?? 10;
|
||||||
|
const parts: string[] = [];
|
||||||
|
let wiki: unknown | null = null;
|
||||||
|
let sl: unknown | null = null;
|
||||||
|
let raw: DiscoverDataStructured['raw'] = null;
|
||||||
|
|
||||||
|
if (query) {
|
||||||
|
const wikiResult = await this.deps.wikiSearchTool.call({ query, limit }, context);
|
||||||
|
if (totalFound(wikiResult.structured) > 0) {
|
||||||
|
parts.push('## Wiki Pages', '> use `wiki_read(blockKey)` for full content', wikiResult.markdown, '');
|
||||||
|
wiki = wikiResult.structured;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const slResult = await this.deps.slDiscoverTool.call(
|
||||||
|
{ query: query || undefined, connectionId: input.connectionName },
|
||||||
|
context,
|
||||||
|
);
|
||||||
|
if (totalSources(slResult.structured) > 0) {
|
||||||
|
parts.push(
|
||||||
|
'## Semantic Layer Sources',
|
||||||
|
'> use `sl_read_source(sourceName)` for the YAML, or `entity_details` for warehouse-shape details',
|
||||||
|
slResult.markdown,
|
||||||
|
'',
|
||||||
|
);
|
||||||
|
sl = slResult.structured;
|
||||||
|
}
|
||||||
|
|
||||||
|
const catalog = this.deps.catalogFactory(context);
|
||||||
|
const connections = input.connectionName
|
||||||
|
? [input.connectionName]
|
||||||
|
: [...(context.session?.allowedConnectionNames ?? [])].sort();
|
||||||
|
const rawHits: RawSchemaHit[] = [];
|
||||||
|
for (const connectionName of connections) {
|
||||||
|
rawHits.push(...(await catalog.searchByName(connectionName, query, limit)));
|
||||||
|
}
|
||||||
|
if (rawHits.length > 0) {
|
||||||
|
parts.push('## Raw Warehouse Schema', '> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values');
|
||||||
|
parts.push(
|
||||||
|
rawHits
|
||||||
|
.slice(0, limit)
|
||||||
|
.map((hit) => `- ${hit.kind}: ${hit.display} (matched on ${hit.matchedOn})`)
|
||||||
|
.join('\n'),
|
||||||
|
);
|
||||||
|
raw = { hits: rawHits.slice(0, limit) };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parts.length === 0) {
|
||||||
|
return {
|
||||||
|
markdown: `No matches for "${query}" across wiki, semantic layer, or raw warehouse schema. Try broader terms; this concept may not exist yet.`,
|
||||||
|
structured: { wiki, sl, raw },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { markdown: parts.join('\n'), structured: { wiki, sl, raw } };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
import type { KtxFileStorePort } from '../../../core/index.js';
|
||||||
|
import type { SlConnectionCatalogPort } from '../../../sl/index.js';
|
||||||
|
import type { BaseTool, ToolContext } from '../../../tools/index.js';
|
||||||
|
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||||
|
import { EntityDetailsTool } from './entity-details.tool.js';
|
||||||
|
import { SqlExecutionTool } from './sql-execution.tool.js';
|
||||||
|
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||||
|
|
||||||
|
export { DiscoverDataTool } from './discover-data.tool.js';
|
||||||
|
export { EntityDetailsTool } from './entity-details.tool.js';
|
||||||
|
export { SqlExecutionTool } from './sql-execution.tool.js';
|
||||||
|
export { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||||
|
export type { RawSchemaHit, TableDetail, WarehouseColumnDetail } from './warehouse-catalog.service.js';
|
||||||
|
|
||||||
|
export function createWarehouseVerificationTools(deps: {
|
||||||
|
connections: SlConnectionCatalogPort;
|
||||||
|
fallbackFileStore: KtxFileStorePort;
|
||||||
|
wikiSearchTool: BaseTool;
|
||||||
|
slDiscoverTool: BaseTool;
|
||||||
|
}): BaseTool[] {
|
||||||
|
const catalogFactory = (context: ToolContext) =>
|
||||||
|
new WarehouseCatalogService({
|
||||||
|
fileStore: context.session?.configService ?? deps.fallbackFileStore,
|
||||||
|
});
|
||||||
|
return [
|
||||||
|
new EntityDetailsTool(catalogFactory),
|
||||||
|
new SqlExecutionTool(deps.connections),
|
||||||
|
new DiscoverDataTool({
|
||||||
|
wikiSearchTool: deps.wikiSearchTool,
|
||||||
|
slDiscoverTool: deps.slDiscoverTool,
|
||||||
|
catalogFactory,
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue