diff --git a/packages/cli/package.json b/packages/cli/package.json index f7f0b214..539618f7 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -26,7 +26,7 @@ ], "scripts": { "assets:demo": "node scripts/build-demo-assets.mjs", - "build": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node ../../scripts/prepare-cli-bin.mjs", + "build": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node scripts/copy-runtime-assets.mjs && node ../../scripts/prepare-cli-bin.mjs", "docs:commands": "pnpm run build && node dist/print-command-tree.js", "smoke": "vitest run src/standalone-smoke.test.ts src/example-smoke.test.ts --testTimeout 30000", "test": "vitest run --exclude src/standalone-smoke.test.ts --exclude src/example-smoke.test.ts --exclude src/setup-databases.test.ts --exclude src/scan.test.ts --exclude src/commands/connection-metabase-setup.test.ts --exclude src/setup-models.test.ts --exclude src/setup-sources.test.ts --exclude src/setup.test.ts --exclude src/connection.test.ts --exclude src/setup-embeddings.test.ts --exclude src/ingest.test.ts --exclude src/commands/connection-mapping.test.ts --exclude src/ingest-viz.test.ts --exclude src/demo.test.ts --exclude src/setup-project.test.ts --exclude src/sl.test.ts --exclude src/local-scan-connectors.test.ts --exclude src/commands/connection-notion.test.ts", diff --git a/packages/cli/scripts/copy-runtime-assets.mjs b/packages/cli/scripts/copy-runtime-assets.mjs new file mode 100644 index 00000000..a7c75658 --- /dev/null +++ b/packages/cli/scripts/copy-runtime-assets.mjs @@ -0,0 +1,11 @@ +import { cp, mkdir, rm } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const packageRoot = fileURLToPath(new URL('..', import.meta.url)); +const skillsSource = join(packageRoot, 'src', 'skills'); +const skillsTarget = join(packageRoot, 'dist', 'skills'); + +await rm(skillsTarget, { recursive: true, force: true }); +await mkdir(dirname(skillsTarget), { recursive: true }); +await cp(skillsSource, skillsTarget, { recursive: true }); diff --git a/packages/cli/src/setup-agents.test.ts b/packages/cli/src/setup-agents.test.ts index 19647a3f..325d6279 100644 --- a/packages/cli/src/setup-agents.test.ts +++ b/packages/cli/src/setup-agents.test.ts @@ -37,23 +37,28 @@ describe('setup agents', () => { await rm(tempDir, { recursive: true, force: true }); }); - it('plans project-scoped CLI files for every target', () => { + it('plans project-scoped CLI and research files for every target', () => { expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'claude-code', scope: 'project', mode: 'cli' })).toEqual([ { kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md'), role: 'skill' }, + { kind: 'file', path: join(tempDir, '.claude/skills/ktx-research/SKILL.md'), role: 'research-skill' }, { kind: 'file', path: join(tempDir, '.claude/rules/ktx.md'), role: 'rule' }, ]); expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'codex', scope: 'project', mode: 'cli' })).toEqual([ { kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md'), role: 'skill' }, + { kind: 'file', path: join(tempDir, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' }, { kind: 'file', path: join(tempDir, '.codex/instructions/ktx.md'), role: 'rule' }, ]); expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'cursor', scope: 'project', mode: 'cli' })).toEqual([ { kind: 'file', path: join(tempDir, '.cursor/rules/ktx.mdc') }, + { kind: 'file', path: join(tempDir, '.cursor/rules/ktx-research.mdc'), role: 'research-skill' }, ]); expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'opencode', scope: 'project', mode: 'cli' })).toEqual([ { kind: 'file', path: join(tempDir, '.opencode/commands/ktx.md') }, + { kind: 'file', path: join(tempDir, '.opencode/commands/ktx-research.md'), role: 'research-skill' }, ]); expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'universal', scope: 'project', mode: 'cli' })).toEqual([ { kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md') }, + { kind: 'file', path: join(tempDir, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' }, ]); }); @@ -97,6 +102,31 @@ describe('setup agents', () => { expect(io.stderr()).toBe(''); }); + it('installs the research skill from the runtime asset', async () => { + const io = makeIo(); + + await expect( + runKtxSetupAgentsStep( + { + projectDir: tempDir, + inputMode: 'disabled', + yes: true, + agents: true, + target: 'universal', + scope: 'project', + mode: 'cli', + skipAgents: false, + }, + io.io, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + const researchSkill = await readFile(join(tempDir, '.agents/skills/ktx-research/SKILL.md'), 'utf-8'); + expect(researchSkill).toContain('name: ktx-research'); + expect(researchSkill).toContain('Always run `discover_data` before writing SQL.'); + expect(researchSkill).toContain('Treat a `dictionary_search` miss as non-authoritative.'); + }); + it('writes PATH-independent launcher commands for skills', async () => { const io = makeIo(); diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts index 7a18a969..429f36a4 100644 --- a/packages/cli/src/setup-agents.ts +++ b/packages/cli/src/setup-agents.ts @@ -45,7 +45,7 @@ export interface KtxAgentInstallManifest { installedAt: string; installs: Array<{ target: KtxAgentTarget; scope: KtxAgentScope; mode: KtxAgentInstallMode }>; entries: Array< - | { kind: 'file'; path: string; role?: 'skill' | 'rule' } + | { kind: 'file'; path: string; role?: 'skill' | 'rule' | 'research-skill' } | { kind: 'json-key'; path: string; jsonPath: string[] } >; } @@ -72,6 +72,7 @@ export function plannedKtxAgentFiles(input: { const home = process.env.HOME ?? ''; return [ { kind: 'file', path: join(home, '.claude/skills/ktx/SKILL.md'), role: 'skill' as const }, + { kind: 'file', path: join(home, '.claude/skills/ktx-research/SKILL.md'), role: 'research-skill' as const }, { kind: 'file', path: join(home, '.claude/rules/ktx.md'), role: 'rule' as const }, ]; } @@ -79,25 +80,44 @@ export function plannedKtxAgentFiles(input: { const codexHome = process.env.CODEX_HOME ?? join(process.env.HOME ?? '', '.codex'); return [ { kind: 'file', path: join(codexHome, 'skills/ktx/SKILL.md'), role: 'skill' as const }, + { kind: 'file', path: join(codexHome, 'skills/ktx-research/SKILL.md'), role: 'research-skill' as const }, { kind: 'file', path: join(codexHome, 'instructions/ktx.md'), role: 'rule' as const }, ]; } + if (input.target === 'cursor' || input.target === 'opencode') { + return []; + } throw new Error(`Global ${input.target} installation is not supported; omit --global.`); } const root = resolve(input.projectDir); - const cliEntries: Partial> = { - 'claude-code': { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md'), role: 'skill' }, - codex: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md'), role: 'skill' }, - cursor: { kind: 'file', path: join(root, '.cursor/rules/ktx.mdc') }, - opencode: { kind: 'file', path: join(root, '.opencode/commands/ktx.md') }, - universal: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') }, + const cliEntries: Partial> = { + 'claude-code': [ + { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md'), role: 'skill' }, + { kind: 'file', path: join(root, '.claude/skills/ktx-research/SKILL.md'), role: 'research-skill' }, + ], + codex: [ + { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md'), role: 'skill' }, + { kind: 'file', path: join(root, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' }, + ], + cursor: [ + { kind: 'file', path: join(root, '.cursor/rules/ktx.mdc') }, + { kind: 'file', path: join(root, '.cursor/rules/ktx-research.mdc'), role: 'research-skill' }, + ], + opencode: [ + { kind: 'file', path: join(root, '.opencode/commands/ktx.md') }, + { kind: 'file', path: join(root, '.opencode/commands/ktx-research.md'), role: 'research-skill' }, + ], + universal: [ + { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') }, + { kind: 'file', path: join(root, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' }, + ], }; const ruleEntries: Partial> = { 'claude-code': { kind: 'file', path: join(root, '.claude/rules/ktx.md'), role: 'rule' }, codex: { kind: 'file', path: join(root, '.codex/instructions/ktx.md'), role: 'rule' }, }; - return [cliEntries[input.target], ruleEntries[input.target]].filter( + return [...(cliEntries[input.target] ?? []), ruleEntries[input.target]].filter( (entry): entry is InstallEntry => entry !== undefined, ); } @@ -109,6 +129,12 @@ function ktxCliLauncher(): KtxCliLauncher { }; } +async function readResearchSkillContent(): Promise { + const path = fileURLToPath(new URL('./skills/research/SKILL.md', import.meta.url)); + const content = await readFile(path, 'utf-8'); + return content.endsWith('\n') ? content : `${content}\n`; +} + function shellQuote(value: string): string { if (/^[A-Za-z0-9_/:=.,@%+-]+$/.test(value)) { return value; @@ -327,6 +353,8 @@ async function installTarget(input: { const content = entry.role === 'rule' ? ruleInstructionContent({ projectDir: input.projectDir }) + : entry.role === 'research-skill' + ? await readResearchSkillContent() : cliInstructionContent({ projectDir: input.projectDir, launcher }); await mkdir(dirname(entry.path), { recursive: true }); await writeFile(entry.path, content, 'utf-8'); diff --git a/packages/cli/src/skills/research/SKILL.md b/packages/cli/src/skills/research/SKILL.md new file mode 100644 index 00000000..e8e354a3 --- /dev/null +++ b/packages/cli/src/skills/research/SKILL.md @@ -0,0 +1,49 @@ +--- +name: ktx-research +description: Use when answering a question that needs data from a KTX-connected database - investigating, analyzing, "how many", "show me", "what's the breakdown of", finding records by value, exploring tables, comparing periods, or any data-investigation request. Triggers even when the user does not say "research"; if the answer requires querying a configured KTX connection, this skill applies. +--- + +# KTX Research Workflow + +You have access to KTX MCP tools for investigating data. Follow this workflow. + + +1. **Discover** - call `discover_data` first to see what exists across wiki, semantic-layer sources, and raw tables. Returns refs only. +2. **Inspect top hits in parallel** - for each promising ref: + - `kind: 'wiki'` -> `wiki_read` + - `kind: 'sl_source'`, `kind: 'sl_measure'`, or `kind: 'sl_dimension'` -> `sl_read_source` + - `kind: 'table'` or `kind: 'column'` -> `entity_details` +3. **Resolve literals** - if the user named a value such as "Acme Corp" or "status=shipped", call `dictionary_search` to find which column holds it. +4. **Query** - + - Prefer `sl_query` when the semantic layer covers the question. + - Use `sql_execution` only for questions the semantic layer does not cover. +5. **Capture learnings** - at the end of the turn, call `memory_capture` so future turns benefit. Skip when the answer carries no durable knowledge. + + + +- Always run `discover_data` before writing SQL. Do not guess table names. +- Prefer the semantic layer over raw SQL when both can answer the question; measures are the source of truth. +- Read entity details before writing SQL against an unfamiliar table. Do not assume column names. +- Treat `sql_execution` as read-only. Writes are rejected by the server. +- Validate value mentions with `dictionary_search` instead of guessing case or spelling. Treat a `dictionary_search` miss as non-authoritative. The index is built from profile-sampled values, so a missing value may simply have been outside the sample. Follow up with `sql_execution` against the most plausible columns before concluding the value is absent. + + + +**Input:** "How many orders did Acme Corp place last month?" + +**Workflow:** +1. `dictionary_search({ values: ["Acme Corp"] })` finds `customers.name`. +2. `discover_data({ query: "orders customer monthly" })` finds an orders semantic-layer source. +3. `sl_read_source({ connectionId: "warehouse", sourceName: "orders_facts" })` confirms the source grain, measures, and dimensions. +4. `sl_query({ connectionId: "warehouse", measures: ["order_count"], filters: ["customer_name = 'Acme Corp'"] })` answers through the semantic layer. +5. `memory_capture({ userMessage, assistantMessage })` captures the durable finding. + +--- + +**Input:** "What columns does the events table have?" + +**Workflow:** +1. `discover_data({ query: "events table" })` returns a `table` ref. +2. `entity_details({ connectionId: "warehouse", entities: [{ table: "analytics.events" }] })` returns columns, types, and foreign keys. +3. Answer directly. No query is needed. +