mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
feat(context): add raw warehouse discovery tool
This commit is contained in:
parent
a4b0e37254
commit
9cd9cfbc75
3 changed files with 240 additions and 0 deletions
|
|
@ -0,0 +1,81 @@
|
|||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import type { BaseTool, ToolContext } from '../../../tools/index.js';
|
||||
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||
import type { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
|
||||
describe('DiscoverDataTool', () => {
|
||||
const wikiSearchTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
|
||||
const slDiscoverTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
|
||||
const catalog = { searchByName: vi.fn() } as unknown as WarehouseCatalogService & {
|
||||
searchByName: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
const context: ToolContext = {
|
||||
sourceId: 'ingest',
|
||||
messageId: 'm1',
|
||||
userId: 'system',
|
||||
session: { allowedConnectionNames: new Set(['warehouse']) } as any,
|
||||
};
|
||||
const tool = new DiscoverDataTool({
|
||||
wikiSearchTool,
|
||||
slDiscoverTool,
|
||||
catalogFactory: () => catalog,
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
wikiSearchTool.call.mockReset();
|
||||
slDiscoverTool.call.mockReset();
|
||||
catalog.searchByName.mockReset();
|
||||
wikiSearchTool.call.mockResolvedValue({
|
||||
markdown: '- orders wiki',
|
||||
structured: { totalFound: 1, results: [{ key: 'orders' }] },
|
||||
});
|
||||
slDiscoverTool.call.mockResolvedValue({
|
||||
markdown: '- orders source',
|
||||
structured: { totalSources: 1, sources: [{ sourceName: 'orders' }] },
|
||||
});
|
||||
catalog.searchByName.mockResolvedValue([
|
||||
{
|
||||
kind: 'table',
|
||||
ref: { catalog: null, db: 'public', name: 'orders' },
|
||||
display: 'public.orders',
|
||||
matchedOn: 'name',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('groups wiki, semantic layer, and raw schema hits with routing hints', async () => {
|
||||
const result = await tool.call({ query: 'orders', connectionName: 'warehouse', limit: 5 }, context);
|
||||
|
||||
expect(result.markdown).toContain('## Wiki Pages');
|
||||
expect(result.markdown).toContain('use `wiki_read(blockKey)` for full content');
|
||||
expect(result.markdown).toContain('## Semantic Layer Sources');
|
||||
expect(result.markdown).toContain('use `sl_read_source(sourceName)` for the YAML');
|
||||
expect(result.markdown).toContain('## Raw Warehouse Schema');
|
||||
expect(result.markdown).toContain('use `entity_details({connectionName, targets: [{display}]})`');
|
||||
expect(result.structured.raw?.hits).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('delegates sourceName inspect mode to sl_discover only', async () => {
|
||||
slDiscoverTool.call.mockResolvedValueOnce({
|
||||
markdown: 'source detail',
|
||||
structured: { sourceName: 'orders' },
|
||||
});
|
||||
|
||||
const result = await tool.call({ sourceName: 'orders', connectionName: 'warehouse' }, context);
|
||||
|
||||
expect(slDiscoverTool.call).toHaveBeenCalledWith({ sourceName: 'orders', connectionId: 'warehouse' }, context);
|
||||
expect(wikiSearchTool.call).not.toHaveBeenCalled();
|
||||
expect(catalog.searchByName).not.toHaveBeenCalled();
|
||||
expect(result.markdown).toContain('source detail');
|
||||
});
|
||||
|
||||
it('returns the empty-state message when all sections are empty', async () => {
|
||||
wikiSearchTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalFound: 0, results: [] } });
|
||||
slDiscoverTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalSources: 0, sources: [] } });
|
||||
catalog.searchByName.mockResolvedValueOnce([]);
|
||||
|
||||
const result = await tool.call({ query: 'customer source', connectionName: 'warehouse' }, context);
|
||||
|
||||
expect(result.markdown).toContain('No matches for "customer source" across wiki, semantic layer, or raw warehouse schema.');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
import { z } from 'zod';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
|
||||
import { WarehouseCatalogService, type RawSchemaHit } from './warehouse-catalog.service.js';
|
||||
|
||||
const discoverDataInputSchema = z.object({
|
||||
query: z.string().optional(),
|
||||
connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(),
|
||||
limit: z.number().int().positive().max(50).optional().default(10),
|
||||
sourceName: z.string().optional(),
|
||||
});
|
||||
|
||||
type DiscoverDataInput = z.infer<typeof discoverDataInputSchema>;
|
||||
|
||||
export interface DiscoverDataStructured {
|
||||
wiki: unknown | null;
|
||||
sl: unknown | null;
|
||||
raw: { hits: RawSchemaHit[] } | null;
|
||||
}
|
||||
|
||||
interface DiscoverDataDeps {
|
||||
wikiSearchTool: BaseTool;
|
||||
slDiscoverTool: BaseTool;
|
||||
catalogFactory: (context: ToolContext) => WarehouseCatalogService;
|
||||
}
|
||||
|
||||
function totalFound(structured: unknown): number {
|
||||
return typeof structured === 'object' &&
|
||||
structured !== null &&
|
||||
'totalFound' in structured &&
|
||||
typeof structured.totalFound === 'number'
|
||||
? structured.totalFound
|
||||
: 0;
|
||||
}
|
||||
|
||||
function totalSources(structured: unknown): number {
|
||||
return typeof structured === 'object' &&
|
||||
structured !== null &&
|
||||
'totalSources' in structured &&
|
||||
typeof structured.totalSources === 'number'
|
||||
? structured.totalSources
|
||||
: 0;
|
||||
}
|
||||
|
||||
export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
|
||||
readonly name = 'discover_data';
|
||||
|
||||
constructor(private readonly deps: DiscoverDataDeps) {
|
||||
super();
|
||||
}
|
||||
|
||||
get description(): string {
|
||||
return 'Discover existing wiki pages, semantic layer sources, and raw warehouse schema hits before writing ingest output.';
|
||||
}
|
||||
|
||||
get inputSchema() {
|
||||
return discoverDataInputSchema;
|
||||
}
|
||||
|
||||
async call(input: DiscoverDataInput, context: ToolContext): Promise<ToolOutput<DiscoverDataStructured>> {
|
||||
if (input.sourceName) {
|
||||
const sl = await this.deps.slDiscoverTool.call(
|
||||
{ sourceName: input.sourceName, connectionId: input.connectionName },
|
||||
context,
|
||||
);
|
||||
return { markdown: sl.markdown, structured: { wiki: null, sl: sl.structured, raw: null } };
|
||||
}
|
||||
|
||||
const query = input.query?.trim() || '';
|
||||
const limit = input.limit ?? 10;
|
||||
const parts: string[] = [];
|
||||
let wiki: unknown | null = null;
|
||||
let sl: unknown | null = null;
|
||||
let raw: DiscoverDataStructured['raw'] = null;
|
||||
|
||||
if (query) {
|
||||
const wikiResult = await this.deps.wikiSearchTool.call({ query, limit }, context);
|
||||
if (totalFound(wikiResult.structured) > 0) {
|
||||
parts.push('## Wiki Pages', '> use `wiki_read(blockKey)` for full content', wikiResult.markdown, '');
|
||||
wiki = wikiResult.structured;
|
||||
}
|
||||
}
|
||||
|
||||
const slResult = await this.deps.slDiscoverTool.call(
|
||||
{ query: query || undefined, connectionId: input.connectionName },
|
||||
context,
|
||||
);
|
||||
if (totalSources(slResult.structured) > 0) {
|
||||
parts.push(
|
||||
'## Semantic Layer Sources',
|
||||
'> use `sl_read_source(sourceName)` for the YAML, or `entity_details` for warehouse-shape details',
|
||||
slResult.markdown,
|
||||
'',
|
||||
);
|
||||
sl = slResult.structured;
|
||||
}
|
||||
|
||||
const catalog = this.deps.catalogFactory(context);
|
||||
const connections = input.connectionName
|
||||
? [input.connectionName]
|
||||
: [...(context.session?.allowedConnectionNames ?? [])].sort();
|
||||
const rawHits: RawSchemaHit[] = [];
|
||||
for (const connectionName of connections) {
|
||||
rawHits.push(...(await catalog.searchByName(connectionName, query, limit)));
|
||||
}
|
||||
if (rawHits.length > 0) {
|
||||
parts.push('## Raw Warehouse Schema', '> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values');
|
||||
parts.push(
|
||||
rawHits
|
||||
.slice(0, limit)
|
||||
.map((hit) => `- ${hit.kind}: ${hit.display} (matched on ${hit.matchedOn})`)
|
||||
.join('\n'),
|
||||
);
|
||||
raw = { hits: rawHits.slice(0, limit) };
|
||||
}
|
||||
|
||||
if (parts.length === 0) {
|
||||
return {
|
||||
markdown: `No matches for "${query}" across wiki, semantic layer, or raw warehouse schema. Try broader terms; this concept may not exist yet.`,
|
||||
structured: { wiki, sl, raw },
|
||||
};
|
||||
}
|
||||
|
||||
return { markdown: parts.join('\n'), structured: { wiki, sl, raw } };
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
import type { KtxFileStorePort } from '../../../core/index.js';
|
||||
import type { SlConnectionCatalogPort } from '../../../sl/index.js';
|
||||
import type { BaseTool, ToolContext } from '../../../tools/index.js';
|
||||
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||
import { EntityDetailsTool } from './entity-details.tool.js';
|
||||
import { SqlExecutionTool } from './sql-execution.tool.js';
|
||||
import { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
|
||||
export { DiscoverDataTool } from './discover-data.tool.js';
|
||||
export { EntityDetailsTool } from './entity-details.tool.js';
|
||||
export { SqlExecutionTool } from './sql-execution.tool.js';
|
||||
export { WarehouseCatalogService } from './warehouse-catalog.service.js';
|
||||
export type { RawSchemaHit, TableDetail, WarehouseColumnDetail } from './warehouse-catalog.service.js';
|
||||
|
||||
export function createWarehouseVerificationTools(deps: {
|
||||
connections: SlConnectionCatalogPort;
|
||||
fallbackFileStore: KtxFileStorePort;
|
||||
wikiSearchTool: BaseTool;
|
||||
slDiscoverTool: BaseTool;
|
||||
}): BaseTool[] {
|
||||
const catalogFactory = (context: ToolContext) =>
|
||||
new WarehouseCatalogService({
|
||||
fileStore: context.session?.configService ?? deps.fallbackFileStore,
|
||||
});
|
||||
return [
|
||||
new EntityDetailsTool(catalogFactory),
|
||||
new SqlExecutionTool(deps.connections),
|
||||
new DiscoverDataTool({
|
||||
wikiSearchTool: deps.wikiSearchTool,
|
||||
slDiscoverTool: deps.slDiscoverTool,
|
||||
catalogFactory,
|
||||
}),
|
||||
];
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue