Merge remote-tracking branch 'origin/main' into fix-wiki-sl-search-output

# Conflicts: # packages/cli/src/status-project.ts
2026-06-22 08:38:08 +02:00 · 2026-05-15 05:51:37 -07:00 · 2026-05-15 05:51:37 -07:00 · de40d05c06
commit de40d05c06
parent 3424d8ff2b beeeda4437
177 changed files with 17742 additions and 1412 deletions
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@ -26,7 +26,8 @@
  ],
  "scripts": {
    "assets:demo": "node scripts/build-demo-assets.mjs",
-    "build": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node ../../scripts/prepare-cli-bin.mjs",
+    "build": "tsc -p tsconfig.json && node scripts/copy-runtime-assets.mjs && node ../../scripts/prepare-cli-bin.mjs",
+    "clean": "node -e \"fs.rmSync('dist', { recursive: true, force: true }); fs.rmSync('node_modules/.cache/tsc.tsbuildinfo', { force: true })\"",
    "docs:commands": "pnpm run build && node dist/print-command-tree.js",
    "smoke": "vitest run src/standalone-smoke.test.ts src/example-smoke.test.ts --testTimeout 30000",
    "test": "vitest run --exclude src/standalone-smoke.test.ts --exclude src/example-smoke.test.ts --exclude src/setup-databases.test.ts --exclude src/scan.test.ts --exclude src/commands/connection-metabase-setup.test.ts --exclude src/setup-models.test.ts --exclude src/setup-sources.test.ts --exclude src/setup.test.ts --exclude src/connection.test.ts --exclude src/setup-embeddings.test.ts --exclude src/ingest.test.ts --exclude src/commands/connection-mapping.test.ts --exclude src/ingest-viz.test.ts --exclude src/demo.test.ts --exclude src/setup-project.test.ts --exclude src/sl.test.ts --exclude src/local-scan-connectors.test.ts --exclude src/commands/connection-notion.test.ts",
@ -45,6 +46,7 @@
    "@ktx/connector-sqlserver": "workspace:*",
    "@ktx/context": "workspace:*",
    "@ktx/llm": "workspace:*",
+    "@modelcontextprotocol/sdk": "^1.29.0",
    "commander": "14.0.3",
    "ink": "^7.0.2",
    "react": "^19.2.6",
--- a/packages/cli/scripts/copy-runtime-assets.mjs
+++ b/packages/cli/scripts/copy-runtime-assets.mjs
@ -0,0 +1,11 @@
+import { cp, mkdir, rm } from 'node:fs/promises';
+import { dirname, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const packageRoot = fileURLToPath(new URL('..', import.meta.url));
+const skillsSource = join(packageRoot, 'src', 'skills');
+const skillsTarget = join(packageRoot, 'dist', 'skills');
+
+await rm(skillsTarget, { recursive: true, force: true });
+await mkdir(dirname(skillsTarget), { recursive: true });
+await cp(skillsSource, skillsTarget, { recursive: true });
--- a/packages/cli/src/cli-program.ts
+++ b/packages/cli/src/cli-program.ts
@ -5,6 +5,7 @@ import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
 import { registerConnectionCommands } from './commands/connection-commands.js';
 import { registerIngestCommands } from './commands/ingest-commands.js';
 import { registerWikiCommands } from './commands/knowledge-commands.js';
+import { registerMcpCommands } from './commands/mcp-commands.js';
 import { registerSetupCommands } from './commands/setup-commands.js';
 import { registerSlCommands } from './commands/sl-commands.js';
 import { registerStatusCommands } from './commands/status-commands.js';
@ -55,9 +56,11 @@ type CommandPathNode = CommandWithGlobalOptions & {
  parent?: CommandPathNode | null;
 };

-const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status']);
+const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'mcp']);
 const COMMANDS_THAT_CREATE_PROJECT = new Set(['setup', 'ktx dev init']);
 const COMMANDS_WITH_OWN_MISSING_PROJECT_HANDLING = new Set(['status']);
+const GLOBAL_OPTIONS_WITH_VALUE = new Set(['--project-dir']);
+const GLOBAL_OPTIONS_WITHOUT_VALUE = new Set(['--debug', '--help', '-h', '--version', '-v']);

 class KtxProjectMissingAbortError extends Error {
  readonly isKtxProjectMissingAbort = true;
@ -72,24 +75,6 @@ function isKtxProjectMissingAbortError(error: unknown): error is KtxProjectMissi
    (typeof error === 'object' && error !== null && (error as { isKtxProjectMissingAbort?: unknown }).isKtxProjectMissingAbort === true)
  );
 }
-const REMOVED_COMMAND_PATHS = new Set([
-  'scan',
-  'wiki read',
-  'wiki write',
-]);
-const GLOBAL_OPTIONS_WITH_VALUE = new Set(['--project-dir']);
-const OPTIONS_WITH_VALUE = new Set([
-  '--project-dir',
-  '--query-history-window-days',
-  '--user-id',
-  '--limit',
-  '--format',
-  '--connection-id',
-  '--source-name',
-  '--query-file',
-  '--max-rows',
-]);
-
 export interface CommandWithGlobalOptions {
  opts: () => object;
  optsWithGlobals?: () => object;
@ -336,43 +321,32 @@ function formatCliError(error: unknown): string {
  return error instanceof Error ? error.message : String(error);
 }

-function commandPathFromArgv(argv: string[]): string[] {
-  const path: string[] = [];
-  for (let index = 0; index < argv.length && path.length < 2; index += 1) {
+function firstTopLevelCommandToken(argv: string[]): string | null {
+  for (let index = 0; index < argv.length; index += 1) {
    const arg = argv[index];
    if (arg === undefined) {
      continue;
    }
    if (arg === '--') {
-      break;
+      return null;
    }
-    if ((path.length === 0 ? GLOBAL_OPTIONS_WITH_VALUE : OPTIONS_WITH_VALUE).has(arg)) {
+    if (GLOBAL_OPTIONS_WITH_VALUE.has(arg)) {
      index += 1;
      continue;
    }
-    const optionsWithValue = path.length === 0 ? GLOBAL_OPTIONS_WITH_VALUE : OPTIONS_WITH_VALUE;
-    if ([...optionsWithValue].some((option) => arg.startsWith(`${option}=`))) {
+    if ([...GLOBAL_OPTIONS_WITH_VALUE].some((option) => arg.startsWith(`${option}=`))) {
      continue;
    }
-    if (path.length === 0 && arg === '--debug') {
+    if (GLOBAL_OPTIONS_WITHOUT_VALUE.has(arg) || arg.startsWith('-')) {
      continue;
    }
-    if (arg.startsWith('-')) {
-      continue;
-    }
-    path.push(arg);
+    return arg;
  }
-  return path;
+  return null;
 }

-function removedCommandName(argv: string[]): string | null {
-  const path = commandPathFromArgv(argv);
-  if (path.length === 0) {
-    return null;
-  }
-
-  const pathKey = path.join(' ');
-  return REMOVED_COMMAND_PATHS.has(pathKey) ? path.at(-1) ?? null : null;
+function isKnownTopLevelCommand(program: Command, commandName: string): boolean {
+  return program.commands.some((command) => command.name() === commandName || command.aliases().includes(commandName));
 }

 async function runBareInteractiveCommand(
@ -439,6 +413,7 @@ export function buildKtxProgram(options: BuildKtxProgramOptions): Command {
  registerWikiCommands(program, context);
  registerSlCommands(program, context);
  registerStatusCommands(program, context);
+  registerMcpCommands(program, context);
  registerDevCommands(program, context);

  return program;
@ -489,9 +464,9 @@ export async function runCommanderKtxCli(
    return 0;
  }

-  const removedCommand = removedCommandName(argv);
-  if (removedCommand) {
-    io.stderr.write(`error: unknown command '${removedCommand}'\n`);
+  const topLevelCommand = firstTopLevelCommandToken(argv);
+  if (topLevelCommand && !isKnownTopLevelCommand(program, topLevelCommand)) {
+    io.stderr.write(`error: unknown command '${topLevelCommand}'\n`);
    return 1;
  }

--- a/packages/cli/src/cli-runtime.ts
+++ b/packages/cli/src/cli-runtime.ts
@ -34,6 +34,12 @@ export interface KtxCliDeps {
  runtime?: (args: KtxRuntimeArgs, io: KtxCliIo) => Promise<number>;
  knowledge?: (args: KtxKnowledgeArgs, io: KtxCliIo) => Promise<number>;
  sl?: (args: KtxSlArgs, io: KtxCliIo) => Promise<number>;
+  mcp?: {
+    startDaemon?: typeof import('./managed-mcp-daemon.js').startKtxMcpDaemon;
+    stopDaemon?: typeof import('./managed-mcp-daemon.js').stopKtxMcpDaemon;
+    readStatus?: typeof import('./managed-mcp-daemon.js').readKtxMcpDaemonStatus;
+    runServer?: typeof import('./mcp-http-server.js').runKtxMcpHttpServer;
+  };
 }

 export function getKtxCliPackageInfo(): KtxCliPackageInfo {
--- a/packages/cli/src/commands/mcp-commands.test.ts
+++ b/packages/cli/src/commands/mcp-commands.test.ts
@ -0,0 +1,57 @@
+import { Command } from '@commander-js/extra-typings';
+import { describe, expect, it, vi } from 'vitest';
+import type { KtxCliCommandContext } from '../cli-program.js';
+import { registerMcpCommands } from './mcp-commands.js';
+
+function makeContext(overrides: Partial<KtxCliCommandContext> = {}): KtxCliCommandContext {
+  let exitCode = 0;
+  return {
+    io: {
+      stdout: { write: vi.fn() },
+      stderr: { write: vi.fn() },
+    },
+    deps: {},
+    packageInfo: { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' },
+    setExitCode: (code) => {
+      exitCode = code;
+    },
+    runInit: vi.fn(),
+    writeDebug: vi.fn(),
+    ...overrides,
+    get exitCode() {
+      return exitCode;
+    },
+  } as KtxCliCommandContext;
+}
+
+describe('registerMcpCommands', () => {
+  it('registers the public mcp lifecycle commands', () => {
+    const program = new Command().exitOverride();
+    registerMcpCommands(program, makeContext());
+    const mcp = program.commands.find((command) => command.name() === 'mcp');
+
+    expect(mcp?.commands.map((command) => command.name()).sort()).toEqual([
+      'logs',
+      'serve-internal',
+      'start',
+      'status',
+      'stop',
+    ]);
+    expect(
+      (mcp?.commands.find((command) => command.name() === 'serve-internal') as { _hidden?: boolean } | undefined)
+        ?._hidden,
+    ).toBe(true);
+  });
+
+  it('rejects non-loopback start without token before spawning', async () => {
+    const program = new Command().exitOverride();
+    const startDaemon = vi.fn();
+    const context = makeContext({ deps: { mcp: { startDaemon } } });
+    registerMcpCommands(program, context);
+
+    await expect(program.parseAsync(['mcp', 'start', '--host', '0.0.0.0'], { from: 'user' })).rejects.toThrow(
+      'Binding KTX MCP to 0.0.0.0 requires --token or KTX_MCP_TOKEN',
+    );
+    expect(startDaemon).not.toHaveBeenCalled();
+  });
+});
--- a/packages/cli/src/commands/mcp-commands.ts
+++ b/packages/cli/src/commands/mcp-commands.ts
@ -0,0 +1,136 @@
+import { spawn } from 'node:child_process';
+import { readFile } from 'node:fs/promises';
+import { fileURLToPath } from 'node:url';
+import { Command } from '@commander-js/extra-typings';
+import type { KtxCliCommandContext } from '../cli-program.js';
+import {
+  collectOption,
+  parsePositiveIntegerOption,
+  resolveCommandProjectDir,
+} from '../cli-program.js';
+import {
+  mcpDaemonLayout,
+  readKtxMcpDaemonStatus,
+  startKtxMcpDaemon,
+  stopKtxMcpDaemon,
+} from '../managed-mcp-daemon.js';
+import { buildMcpSecurityConfig, runKtxMcpHttpServer } from '../mcp-http-server.js';
+
+function tokenFromOption(value: string | undefined): string | undefined {
+  return value ?? process.env.KTX_MCP_TOKEN;
+}
+
+function binPath(): string {
+  return fileURLToPath(new URL('../bin.js', import.meta.url));
+}
+
+export function registerMcpCommands(program: Command, context: KtxCliCommandContext): void {
+  const mcp = program.command('mcp').description('Run the KTX MCP HTTP server');
+
+  mcp
+    .command('start')
+    .description('Start the KTX MCP HTTP server')
+    .option('--host <host>', 'Host to bind', '127.0.0.1')
+    .option('--port <n>', 'Port to bind', parsePositiveIntegerOption, 7878)
+    .option('--token <token>', 'Bearer token required for non-loopback binding')
+    .option('--foreground', 'Run in the foreground', false)
+    .option('--allowed-host <host>', 'Additional allowed Host header', collectOption, [])
+    .option('--allowed-origin <origin>', 'Allowed browser Origin header', collectOption, [])
+    .action(async (options, command) => {
+      const projectDir = resolveCommandProjectDir(command);
+      const token = tokenFromOption(options.token);
+      buildMcpSecurityConfig({
+        host: options.host,
+        port: options.port,
+        token,
+        allowedHosts: options.allowedHost,
+        allowedOrigins: options.allowedOrigin,
+      });
+      if (options.foreground) {
+        await (context.deps.mcp?.runServer ?? runKtxMcpHttpServer)({
+          projectDir,
+          cliVersion: context.packageInfo.version,
+          host: options.host,
+          port: options.port,
+          token,
+          allowedHosts: options.allowedHost,
+          allowedOrigins: options.allowedOrigin,
+          io: context.io,
+        });
+        context.io.stdout.write(`KTX MCP server listening at http://${options.host}:${options.port}/mcp\n`);
+        return;
+      }
+      const result = await (context.deps.mcp?.startDaemon ?? startKtxMcpDaemon)({
+        projectDir,
+        cliVersion: context.packageInfo.version,
+        host: options.host,
+        port: options.port,
+        token,
+        allowedHosts: options.allowedHost,
+        allowedOrigins: options.allowedOrigin,
+        binPath: binPath(),
+      });
+      context.io.stdout.write(`KTX MCP daemon started: ${result.url}\n`);
+    });
+
+  mcp
+    .command('stop')
+    .description('Stop the KTX MCP daemon')
+    .action(async (_options, command) => {
+      const result = await (context.deps.mcp?.stopDaemon ?? stopKtxMcpDaemon)({
+        projectDir: resolveCommandProjectDir(command),
+      });
+      context.io.stdout.write(result.status === 'stopped' ? 'KTX MCP daemon stopped.\n' : 'KTX MCP daemon is not running.\n');
+    });
+
+  mcp
+    .command('status')
+    .description('Show KTX MCP daemon status')
+    .action(async (_options, command) => {
+      const status = await (context.deps.mcp?.readStatus ?? readKtxMcpDaemonStatus)({
+        projectDir: resolveCommandProjectDir(command),
+      });
+      context.io.stdout.write(`${status.detail}\n`);
+      if (status.kind === 'running') {
+        context.io.stdout.write(`URL: ${status.url}\n`);
+        context.io.stdout.write(`PID: ${status.state.pid}\n`);
+        context.io.stdout.write(`Token auth: ${status.state.tokenAuth ? 'enabled' : 'disabled'}\n`);
+        context.io.stdout.write(`Project: ${status.state.projectDir}\n`);
+      }
+    });
+
+  mcp
+    .command('logs')
+    .description('Print the KTX MCP daemon log')
+    .option('--follow', 'Follow log output', false)
+    .action(async (options, command) => {
+      const logPath = mcpDaemonLayout(resolveCommandProjectDir(command)).logPath;
+      if (options.follow) {
+        const child = spawn('tail', ['-f', logPath], { stdio: ['ignore', 'pipe', 'pipe'] });
+        child.stdout?.on('data', (chunk: Buffer) => context.io.stdout.write(chunk.toString('utf8')));
+        child.stderr?.on('data', (chunk: Buffer) => context.io.stderr.write(chunk.toString('utf8')));
+        await new Promise((resolve) => child.on('close', resolve));
+        return;
+      }
+      context.io.stdout.write(await readFile(logPath, 'utf8'));
+    });
+
+  mcp
+    .command('serve-internal', { hidden: true })
+    .option('--host <host>', 'Host to bind', '127.0.0.1')
+    .requiredOption('--port <n>', 'Port to bind', parsePositiveIntegerOption)
+    .option('--allowed-host <host>', 'Additional allowed Host header', collectOption, [])
+    .option('--allowed-origin <origin>', 'Allowed browser Origin header', collectOption, [])
+    .action(async (options, command) => {
+      await (context.deps.mcp?.runServer ?? runKtxMcpHttpServer)({
+        projectDir: resolveCommandProjectDir(command),
+        cliVersion: context.packageInfo.version,
+        host: options.host,
+        port: options.port,
+        token: process.env.KTX_MCP_TOKEN,
+        allowedHosts: options.allowedHost,
+        allowedOrigins: options.allowedOrigin,
+        io: context.io,
+      });
+    });
+}
--- a/packages/cli/src/commands/setup-commands.ts
+++ b/packages/cli/src/commands/setup-commands.ts
@ -90,6 +90,7 @@ function shouldShowSetupEntryMenu(
    agents?: boolean;
    target?: string;
    global?: boolean;
+    local?: boolean;
    skipAgents?: boolean;
    yes?: boolean;
    input?: boolean;
@ -163,6 +164,7 @@ function shouldShowSetupEntryMenu(
    'agents',
    'target',
    'global',
+    'local',
    'skipAgents',
    'yes',
    'input',
@ -223,6 +225,7 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
      ]),
    )
    .option('--global', 'Install agent integration into the global target scope', false)
+    .option('--local', 'Install Claude Code MCP config into the private per-project ~/.claude.json scope', false)
    .addOption(new Option('--skip-agents', 'Leave agent integration incomplete for now').hideHelp().default(false))
    .option('--yes', 'Accept safe defaults in non-interactive setup', false)
    .option('--no-input', 'Disable interactive terminal input')
@ -392,9 +395,19 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
      context.setExitCode(1);
      return;
    }
+    if (options.local && options.global) {
+      context.io.stderr.write('Choose only one agent scope: --local or --global.\n');
+      context.setExitCode(1);
+      return;
+    }
+    if (options.local && options.target && options.target !== 'claude-code') {
+      context.io.stderr.write('--local is only supported with --target claude-code.\n');
+      context.setExitCode(1);
+      return;
+    }

    const mode = options.new ? 'new' : options.existing ? 'existing' : 'auto';
-    const resolvedAgentScope = options.global ? 'global' : 'project';
+    const resolvedAgentScope = options.local ? 'local' : options.global ? 'global' : 'project';
    await runSetupArgs(context, {
      command: 'run',
      projectDir: resolveCommandProjectDir(command),
--- a/packages/cli/src/connection.test.ts
+++ b/packages/cli/src/connection.test.ts
@ -489,15 +489,17 @@ describe('runKtxConnection', () => {
  it('rejects unknown drivers with a helpful error', async () => {
    const projectDir = join(tempDir, 'project');
    await initKtxProject({ projectDir });
-    await writeConnections(projectDir, {
-      mystery: { driver: 'duckdb' },
-    });
+    await writeFile(
+      join(projectDir, 'ktx.yaml'),
+      'connections:\n  mystery:\n    driver: duckdb\n',
+      'utf-8',
+    );
    const io = makeIo();

    await expect(
      runKtxConnection({ command: 'test', projectDir, connectionId: 'mystery' }, io.io),
    ).resolves.toBe(1);
-    expect(io.stderr()).toContain('uses driver "duckdb"');
-    expect(io.stderr()).toContain('Supported:');
+    expect(io.stderr()).toContain('connections.mystery.driver');
+    expect(io.stderr()).toContain('postgres');
  });
 });
--- a/packages/cli/src/doctor.test.ts
+++ b/packages/cli/src/doctor.test.ts
@ -64,6 +64,11 @@ describe('formatDoctorReport', () => {
    expect(output).toContain('Node 22+ · pnpm 10.20+');
    expect(output).not.toContain('v22.16.0');
    expect(output).toContain('Everything ready.');
+    expect(output).toContain('ktx status --json');
+    expect(output).toContain('ktx sl list');
+    expect(output).toContain('ktx wiki list');
+    expect(output).not.toContain('ktx scan');
+    expect(output).not.toContain('ktx sl ask');
  });

  it('shows the underlying detail for a single-check group on the group line', () => {
@ -462,6 +467,7 @@ describe('runKtxDoctor', () => {
  it('includes Postgres query-history readiness in project doctor output', async () => {
    process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret
    process.env.OPENAI_API_KEY = 'test-key'; // pragma: allowlist secret
+    process.env.WAREHOUSE_DATABASE_URL = 'postgresql://reader@example.test/warehouse';
    await writeFile(
      join(tempDir, 'ktx.yaml'),
      [
@ -516,8 +522,14 @@ describe('runKtxDoctor', () => {
    expect(out).toContain('pg_stat_statements ready (PostgreSQL 16.4)');
    expect(out).toContain('info: pg_stat_statements.max is 1000');
    expect(out).not.toContain('Update the Postgres parameter group or config');
+    expect(out).toContain('ktx status --json');
+    expect(out).toContain('ktx sl list');
+    expect(out).toContain('ktx wiki list');
+    expect(out).not.toContain('ktx scan');
+    expect(out).not.toContain('ktx sl ask');
    delete process.env.ANTHROPIC_API_KEY;
    delete process.env.OPENAI_API_KEY;
+    delete process.env.WAREHOUSE_DATABASE_URL;
  });

  it('returns blocked verdict when LLM is not configured', async () => {
@ -543,6 +555,7 @@ describe('runKtxDoctor', () => {
    ).resolves.toBe(1);

    expect(testIo.stdout()).toContain('no LLM configured');
+    expect(testIo.stdout()).not.toContain('ktx ask');
    expect(testIo.stdout()).toContain('ktx setup');
  });

--- a/packages/cli/src/doctor.ts
+++ b/packages/cli/src/doctor.ts
@ -5,6 +5,7 @@ import { join, resolve } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { promisify } from 'node:util';
 import type { KtxConfigIssue } from '@ktx/context/project';
+import { KTX_NEXT_STEP_DIRECT_COMMANDS } from './next-steps.js';
 import type { BuildProjectStatusOptions } from './status-project.js';

 const execFileAsync = promisify(execFile);
@ -287,7 +288,7 @@ interface RenderOptions {
  command?: 'setup' | 'project';
 }

-const NEXT_STEPS_PROJECT = ['ktx scan', 'ktx wiki', 'ktx sl ask "…"'];
+const NEXT_STEPS_PROJECT = KTX_NEXT_STEP_DIRECT_COMMANDS.map((step) => step.command);

 export function formatDoctorReport(report: DoctorReport, options: Partial<RenderOptions> = {}): string {
  const opts: RenderOptions = {
--- a/packages/cli/src/index.test.ts
+++ b/packages/cli/src/index.test.ts
@ -440,6 +440,7 @@ describe('runKtxCli', () => {
    expect(stdout).toContain('--agents');
    expect(stdout).toContain('--target <target>');
    expect(stdout).toContain('--global');
+    expect(stdout).toContain('--local');
    expect(stdout).toContain('--yes');
    expect(stdout).toContain('--no-input');
    expect(stdout).toContain('Global Options:');
@ -1286,6 +1287,38 @@ describe('runKtxCli', () => {
    );
  });

+  it('rejects --local with non-Claude targets', async () => {
+    const setup = vi.fn(async () => 0);
+    const setupIo = makeIo();
+
+    await expect(
+      runKtxCli(
+        ['--project-dir', tempDir, 'setup', '--agents', '--target', 'cursor', '--local', '--no-input'],
+        setupIo.io,
+        { setup },
+      ),
+    ).resolves.toBe(1);
+
+    expect(setupIo.stderr()).toContain('--local is only supported with --target claude-code');
+    expect(setup).not.toHaveBeenCalled();
+  });
+
+  it('rejects --local and --global together', async () => {
+    const setup = vi.fn(async () => 0);
+    const setupIo = makeIo();
+
+    await expect(
+      runKtxCli(
+        ['--project-dir', tempDir, 'setup', '--agents', '--target', 'claude-code', '--local', '--global', '--no-input'],
+        setupIo.io,
+        { setup },
+      ),
+    ).resolves.toBe(1);
+
+    expect(setupIo.stderr()).toContain('Choose only one agent scope: --local or --global.');
+    expect(setup).not.toHaveBeenCalled();
+  });
+
  it('rejects source-path with source-git-url', async () => {
    const setup = vi.fn(async () => 0);
    const testIo = makeIo();
--- a/packages/cli/src/ingest.test-utils.ts
+++ b/packages/cli/src/ingest.test-utils.ts
@ -109,6 +109,7 @@ export async function writeWarehouseConfig(projectDir: string): Promise<void> {
      'connections:',
      '  prod-metabase:',
      '    driver: metabase',
+      '    api_url: https://metabase.example.test',
      '  warehouse_a:',
      '    driver: postgres',
      'ingest:',
--- a/packages/cli/src/io/print-list.ts
+++ b/packages/cli/src/io/print-list.ts
@ -43,13 +43,13 @@ export interface PrintListArgs<Row> {
  io: KtxCliIo;
 }

-export interface KtxJsonResultEnvelope<T> {
+interface KtxJsonResultEnvelope<T> {
  kind: string;
  data: T;
  meta?: Record<string, unknown>;
 }

-export function writeJsonResult<T>(io: KtxCliIo, envelope: KtxJsonResultEnvelope<T>): void {
+function writeJsonResult<T>(io: KtxCliIo, envelope: KtxJsonResultEnvelope<T>): void {
  io.stdout.write(`${JSON.stringify(envelope, null, 2)}\n`);
 }

--- a/packages/cli/src/knowledge.test.ts
+++ b/packages/cli/src/knowledge.test.ts
@ -1,8 +1,9 @@
 import { mkdtemp, rm } from 'node:fs/promises';
 import { tmpdir } from 'node:os';
 import { join } from 'node:path';
-import { initKtxProject } from '@ktx/context/project';
+import { initKtxProject, loadKtxProject } from '@ktx/context/project';
 import type { KtxEmbeddingPort } from '@ktx/context';
+import { writeLocalKnowledgePage } from '@ktx/context/wiki';
 import { afterEach, beforeEach, describe, expect, it } from 'vitest';
 import { runKtxKnowledge } from './knowledge.js';

@ -40,6 +41,28 @@ class FakeEmbeddingPort implements KtxEmbeddingPort {
  }
 }

+interface WikiPageFixture {
+  key?: string;
+  summary?: string;
+  content?: string;
+  tags?: string[];
+  slRefs?: string[];
+}
+
+async function seedWikiPage(projectDir: string, fixture: WikiPageFixture = {}): Promise<void> {
+  const project = await loadKtxProject({ projectDir });
+  await writeLocalKnowledgePage(project, {
+    key: fixture.key ?? 'metrics-revenue',
+    scope: 'GLOBAL',
+    userId: 'local',
+    summary: fixture.summary ?? 'Revenue',
+    content: fixture.content ?? 'Revenue is paid order value.',
+    tags: fixture.tags ?? ['finance'],
+    refs: [],
+    slRefs: fixture.slRefs ?? ['orders'],
+  });
+}
+
 describe('runKtxKnowledge', () => {
  let tempDir: string;

@ -51,36 +74,10 @@ describe('runKtxKnowledge', () => {
    await rm(tempDir, { recursive: true, force: true });
  });

-  it('writes, reads, lists, and searches wiki pages', async () => {
+  it('lists and searches wiki pages', async () => {
    const projectDir = join(tempDir, 'project');
    await initKtxProject({ projectDir });
-
-    const writeIo = makeIo();
-    await expect(
-      runKtxKnowledge(
-        {
-          command: 'write',
-          projectDir,
-          key: 'metrics-revenue',
-          scope: 'GLOBAL',
-          userId: 'local',
-          summary: 'Revenue',
-          content: 'Revenue is paid order value.',
-          tags: ['finance'],
-          refs: [],
-          slRefs: ['orders'],
-        },
-        writeIo.io,
-      ),
-    ).resolves.toBe(0);
-    expect(writeIo.stdout()).toContain('Wrote wiki/global/metrics-revenue.md');
-
-    const readIo = makeIo();
-    await expect(
-      runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local' }, readIo.io),
-    ).resolves.toBe(0);
-    expect(readIo.stdout()).toContain('# metrics-revenue');
-    expect(readIo.stdout()).toContain('Revenue is paid order value.');
+    await seedWikiPage(projectDir);

    const listIo = makeIo();
    await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local' }, listIo.io)).resolves.toBe(0);
@ -93,27 +90,10 @@ describe('runKtxKnowledge', () => {
    expect(searchIo.stdout()).toContain('metrics-revenue');
  });

-  it('prints wiki list, search, and read as public JSON envelopes', async () => {
+  it('prints wiki list and search as public JSON envelopes', async () => {
    const projectDir = join(tempDir, 'project');
    await initKtxProject({ projectDir });
-
-    await expect(
-      runKtxKnowledge(
-        {
-          command: 'write',
-          projectDir,
-          key: 'metrics-revenue',
-          scope: 'GLOBAL',
-          userId: 'local',
-          summary: 'Revenue',
-          content: 'Revenue is paid order value.',
-          tags: ['finance'],
-          refs: [],
-          slRefs: ['orders'],
-        },
-        makeIo().io,
-      ),
-    ).resolves.toBe(0);
+    await seedWikiPage(projectDir);

    const listIo = makeIo();
    await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local', json: true }, listIo.io)).resolves.toBe(
@ -137,48 +117,6 @@ describe('runKtxKnowledge', () => {
      data: { items: [expect.objectContaining({ key: 'metrics-revenue', summary: 'Revenue' })] },
      meta: { command: 'wiki search' },
    });
-
-    const readIo = makeIo();
-    await expect(
-      runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local', json: true }, readIo.io),
-    ).resolves.toBe(0);
-    expect(JSON.parse(readIo.stdout())).toMatchObject({
-      kind: 'wiki.page',
-      data: {
-        key: 'metrics-revenue',
-        summary: 'Revenue',
-        content: 'Revenue is paid order value.',
-      },
-    });
-  });
-
-  it('rejects slash-delimited write keys with a flat-key suggestion', async () => {
-    const projectDir = join(tempDir, 'project');
-    await initKtxProject({ projectDir });
-
-    const writeIo = makeIo();
-    await expect(
-      runKtxKnowledge(
-        {
-          command: 'write',
-          projectDir,
-          key: 'orbit/company-overview',
-          scope: 'GLOBAL',
-          userId: 'local',
-          summary: 'Orbit',
-          content: 'Orbit overview.',
-          tags: [],
-          refs: [],
-          slRefs: [],
-        },
-        writeIo.io,
-      ),
-    ).resolves.toBe(1);
-
-    expect(writeIo.stderr()).toContain(
-      'Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".',
-    );
-    expect(writeIo.stdout()).toBe('');
  });

  it('explains empty search results for a project without wiki pages', async () => {
@ -198,24 +136,13 @@ describe('runKtxKnowledge', () => {
  it('uses configured embeddings for semantic wiki search', async () => {
    const projectDir = join(tempDir, 'semantic-project');
    await initKtxProject({ projectDir });
-
-    await expect(
-      runKtxKnowledge(
-        {
-          command: 'write',
-          projectDir,
-          key: 'active-contract-arr-open-tickets',
-          scope: 'GLOBAL',
-          userId: 'local',
-          summary: 'Active Contract ARR Ranked by Open Support Ticket Count',
-          content: 'Accounts ranked by annual recurring contract value and support ticket load.',
-          tags: ['historic-sql'],
-          refs: [],
-          slRefs: [],
-        },
-        makeIo().io,
-      ),
-    ).resolves.toBe(0);
+    await seedWikiPage(projectDir, {
+      key: 'active-contract-arr-open-tickets',
+      summary: 'Active Contract ARR Ranked by Open Support Ticket Count',
+      content: 'Accounts ranked by annual recurring contract value and support ticket load.',
+      tags: ['historic-sql'],
+      slRefs: [],
+    });

    const searchIo = makeIo();
    await expect(
--- a/packages/cli/src/knowledge.ts
+++ b/packages/cli/src/knowledge.ts
@ -5,20 +5,16 @@ import {
 } from '@ktx/context';
 import { loadKtxProject } from '@ktx/context/project';
 import {
-  type LocalKnowledgeScope,
  type LocalKnowledgeSearchResult,
  type LocalKnowledgeSummary,
  listLocalKnowledgePages,
-  readLocalKnowledgePage,
  searchLocalKnowledgePages,
-  writeLocalKnowledgePage,
 } from '@ktx/context/wiki';
 import { resolveOutputMode } from './io/mode.js';
-import { printList, type PrintListColumn, writeJsonResult } from './io/print-list.js';
+import { printList, type PrintListColumn } from './io/print-list.js';

 export type KtxKnowledgeArgs =
  | { command: 'list'; projectDir: string; userId: string; output?: string; json?: boolean }
-  | { command: 'read'; projectDir: string; key: string; userId: string; json?: boolean }
  | {
      command: 'search';
      projectDir: string;
@ -27,18 +23,6 @@ export type KtxKnowledgeArgs =
      output?: string;
      json?: boolean;
      limit?: number;
-    }
-  | {
-      command: 'write';
-      projectDir: string;
-      key: string;
-      scope: LocalKnowledgeScope;
-      userId: string;
-      summary: string;
-      content: string;
-      tags: string[];
-      refs: string[];
-      slRefs: string[];
    };

 type KtxKnowledgeIo = import('./cli-runtime.js').KtxCliIo;
@ -104,25 +88,6 @@ export async function runKtxKnowledge(
      });
      return 0;
    }
-    if (args.command === 'read') {
-      const page = await readLocalKnowledgePage(project, { key: args.key, userId: args.userId });
-      if (!page) {
-        throw new Error(`Wiki page "${args.key}" was not found`);
-      }
-      if (args.json) {
-        writeJsonResult(io, {
-          kind: 'wiki.page',
-          data: page,
-          meta: { command: 'wiki read' },
-        });
-        return 0;
-      }
-      io.stdout.write(`# ${page.key}\n\n`);
-      io.stdout.write(`Scope: ${page.scope}\n`);
-      io.stdout.write(`Summary: ${page.summary}\n\n`);
-      io.stdout.write(`${page.content}\n`);
-      return 0;
-    }
    if (args.command === 'search') {
      const results = await searchLocalKnowledgePages(project, {
        query: args.query,
@ -153,18 +118,6 @@ export async function runKtxKnowledge(
      });
      return 0;
    }
-
-    const write = await writeLocalKnowledgePage(project, {
-      key: args.key,
-      scope: args.scope,
-      userId: args.userId,
-      summary: args.summary,
-      content: args.content,
-      tags: args.tags,
-      refs: args.refs,
-      slRefs: args.slRefs,
-    });
-    io.stdout.write(`Wrote ${write.path}\n`);
    return 0;
  } catch (error) {
    io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
--- a/packages/cli/src/local-adapters.test.ts
+++ b/packages/cli/src/local-adapters.test.ts
@ -18,6 +18,9 @@ function sqlAnalysisStub() {
    async analyzeBatch() {
      return new Map();
    },
+    async validateReadOnly() {
+      return { ok: true };
+    },
  };
 }

--- a/packages/cli/src/local-scan-connectors.test.ts
+++ b/packages/cli/src/local-scan-connectors.test.ts
@ -92,7 +92,7 @@ describe('createKtxCliScanConnector', () => {
    expect(bigQueryMock.constructorInputs[0]).not.toHaveProperty('maxBytesBilled');
  });

-  it('throws for structural daemon-only fallback configs', async () => {
+  it('rejects daemon-only fallback driver configs at config parse time', async () => {
    await initKtxProject({ projectDir: tempDir });
    await writeFile(
      join(tempDir, 'ktx.yaml'),
@ -105,14 +105,13 @@ describe('createKtxCliScanConnector', () => {
      ].join('\n'),
      'utf-8',
    );
-    const project = await loadKtxProject({ projectDir: tempDir });

-    await expect(createKtxCliScanConnector(project, 'warehouse')).rejects.toThrow(
-      'Connection "warehouse" uses driver "duckdb", which has no native standalone KTX scan connector',
+    await expect(loadKtxProject({ projectDir: tempDir })).rejects.toThrow(
+      /connections\.warehouse\.driver:.*Invalid discriminator value/,
    );
  });

-  it('throws a clear error when the connection block has no driver field', async () => {
+  it('rejects connection blocks with no driver field at config parse time', async () => {
    await initKtxProject({ projectDir: tempDir });
    await writeFile(
      join(tempDir, 'ktx.yaml'),
@ -125,10 +124,9 @@ describe('createKtxCliScanConnector', () => {
      ].join('\n'),
      'utf-8',
    );
-    const project = await loadKtxProject({ projectDir: tempDir });

-    await expect(createKtxCliScanConnector(project, 'warehouse')).rejects.toThrow(
-      'Connection "warehouse" has no `driver` field in ktx.yaml',
+    await expect(loadKtxProject({ projectDir: tempDir })).rejects.toThrow(
+      /connections\.warehouse\.driver:.*Invalid discriminator value/,
    );
  });
 });
--- a/packages/cli/src/managed-mcp-daemon.test.ts
+++ b/packages/cli/src/managed-mcp-daemon.test.ts
@ -0,0 +1,133 @@
+import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import {
+  mcpDaemonLayout,
+  readKtxMcpDaemonStatus,
+  startKtxMcpDaemon,
+  stopKtxMcpDaemon,
+  type KtxMcpDaemonChild,
+  type KtxMcpDaemonState,
+} from './managed-mcp-daemon.js';
+
+function child(pid = 4242): KtxMcpDaemonChild {
+  return { pid, unref: vi.fn() };
+}
+
+function state(projectDir: string, overrides: Partial<KtxMcpDaemonState> = {}): KtxMcpDaemonState {
+  return {
+    schemaVersion: 1,
+    pid: 4242,
+    host: '127.0.0.1',
+    port: 7878,
+    tokenAuth: false,
+    projectDir,
+    startedAt: '2026-05-14T00:00:00.000Z',
+    logPath: join(projectDir, '.ktx/logs/mcp.log'),
+    ...overrides,
+  };
+}
+
+describe('managed MCP daemon lifecycle', () => {
+  let tempDir: string;
+  let projectDir: string;
+
+  beforeEach(async () => {
+    tempDir = await mkdtemp(join(tmpdir(), 'ktx-mcp-daemon-'));
+    projectDir = join(tempDir, 'project');
+    await mkdir(projectDir, { recursive: true });
+  });
+
+  afterEach(async () => {
+    await rm(tempDir, { recursive: true, force: true });
+  });
+
+  it('uses the spec state and log paths', () => {
+    expect(mcpDaemonLayout(projectDir)).toEqual({
+      statePath: join(projectDir, '.ktx/mcp.json'),
+      logPath: join(projectDir, '.ktx/logs/mcp.log'),
+    });
+  });
+
+  it('starts a detached child and writes state without the token value', async () => {
+    const spawnDaemon = vi.fn(() => child(5555));
+    await startKtxMcpDaemon({
+      projectDir,
+      cliVersion: '0.0.0-test',
+      host: '0.0.0.0',
+      port: 7879,
+      token: 'secret-token',
+      allowedHosts: ['mcp.example.test'],
+      allowedOrigins: ['https://mcp.example.test'],
+      binPath: '/repo/packages/cli/dist/bin.js',
+      spawnDaemon,
+      processAlive: vi.fn(() => false),
+      portAvailable: vi.fn(async () => true),
+      now: () => new Date('2026-05-14T00:00:00.000Z'),
+    });
+
+    expect(spawnDaemon).toHaveBeenCalledWith(
+      process.execPath,
+      [
+        '/repo/packages/cli/dist/bin.js',
+        '--project-dir',
+        projectDir,
+        'mcp',
+        'serve-internal',
+        '--host',
+        '0.0.0.0',
+        '--port',
+        '7879',
+        '--allowed-host',
+        'mcp.example.test',
+        '--allowed-origin',
+        'https://mcp.example.test',
+      ],
+      expect.objectContaining({
+        detached: true,
+        env: expect.objectContaining({ KTX_MCP_TOKEN: 'secret-token' }),
+      }),
+    );
+    expect(JSON.stringify(JSON.parse(await readFile(join(projectDir, '.ktx/mcp.json'), 'utf8')))).not.toContain(
+      'secret-token',
+    );
+  });
+
+  it('reports running when the process is alive and health passes', async () => {
+    await mkdir(join(projectDir, '.ktx'), { recursive: true });
+    await writeFile(join(projectDir, '.ktx/mcp.json'), `${JSON.stringify(state(projectDir), null, 2)}\n`);
+
+    const status = await readKtxMcpDaemonStatus({
+      projectDir,
+      processAlive: vi.fn(() => true),
+      fetchHealth: vi.fn(async () => ({ ok: true, body: { status: 'ok', projectDir, port: 7878 } })),
+    });
+
+    expect(status.kind).toBe('running');
+    if (status.kind !== 'running') {
+      throw new Error(`Expected running status, received ${status.kind}`);
+    }
+    expect(status.url).toBe('http://127.0.0.1:7878/mcp');
+  });
+
+  it('stops a recorded daemon and removes state', async () => {
+    await mkdir(join(projectDir, '.ktx'), { recursive: true });
+    await writeFile(join(projectDir, '.ktx/mcp.json'), `${JSON.stringify(state(projectDir), null, 2)}\n`);
+    const alive = new Set([4242]);
+    const killProcess = vi.fn((pid: number) => alive.delete(pid));
+
+    await expect(
+      stopKtxMcpDaemon({
+        projectDir,
+        processAlive: vi.fn((pid) => alive.has(pid)),
+        killProcess,
+        stopGraceMs: 1,
+        pollIntervalMs: 1,
+      }),
+    ).resolves.toEqual({ status: 'stopped' });
+
+    expect(killProcess).toHaveBeenCalledWith(4242, 'SIGTERM');
+    await expect(readFile(join(projectDir, '.ktx/mcp.json'), 'utf8')).rejects.toThrow();
+  });
+});
--- a/packages/cli/src/managed-mcp-daemon.ts
+++ b/packages/cli/src/managed-mcp-daemon.ts
@ -0,0 +1,238 @@
+import { spawn } from 'node:child_process';
+import { mkdir, open, readFile, rm, writeFile } from 'node:fs/promises';
+import { createServer } from 'node:net';
+import { dirname, join } from 'node:path';
+import { setTimeout as delay } from 'node:timers/promises';
+import { z } from 'zod';
+
+export interface KtxMcpDaemonState {
+  schemaVersion: 1;
+  pid: number;
+  host: string;
+  port: number;
+  tokenAuth: boolean;
+  projectDir: string;
+  startedAt: string;
+  logPath: string;
+}
+
+export interface KtxMcpDaemonChild {
+  pid?: number;
+  unref(): void;
+}
+
+export type KtxMcpDaemonStatus =
+  | { kind: 'stopped'; detail: string }
+  | { kind: 'running'; detail: string; state: KtxMcpDaemonState; url: string }
+  | { kind: 'stale'; detail: string; state?: KtxMcpDaemonState };
+
+const stateSchema = z.object({
+  schemaVersion: z.literal(1),
+  pid: z.number().int().positive(),
+  host: z.string().min(1),
+  port: z.number().int().min(1).max(65535),
+  tokenAuth: z.boolean(),
+  projectDir: z.string().min(1),
+  startedAt: z.string().min(1),
+  logPath: z.string().min(1),
+});
+
+export function mcpDaemonLayout(projectDir: string): { statePath: string; logPath: string } {
+  return {
+    statePath: join(projectDir, '.ktx/mcp.json'),
+    logPath: join(projectDir, '.ktx/logs/mcp.log'),
+  };
+}
+
+function defaultProcessAlive(pid: number): boolean {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+function defaultKillProcess(pid: number, signal: NodeJS.Signals): void {
+  try {
+    process.kill(pid, signal);
+  } catch (error) {
+    if ((error as { code?: unknown }).code !== 'ESRCH') {
+      throw error;
+    }
+  }
+}
+
+async function readState(projectDir: string): Promise<KtxMcpDaemonState | undefined> {
+  try {
+    return stateSchema.parse(JSON.parse(await readFile(mcpDaemonLayout(projectDir).statePath, 'utf8')) as unknown);
+  } catch (error) {
+    if ((error as { code?: unknown }).code === 'ENOENT') {
+      return undefined;
+    }
+    throw error;
+  }
+}
+
+async function writeState(projectDir: string, state: KtxMcpDaemonState): Promise<void> {
+  const { statePath } = mcpDaemonLayout(projectDir);
+  await mkdir(dirname(statePath), { recursive: true });
+  await writeFile(statePath, `${JSON.stringify(state, null, 2)}\n`, 'utf8');
+}
+
+async function defaultPortAvailable(host: string, port: number): Promise<boolean> {
+  return await new Promise((resolve) => {
+    const server = createServer();
+    server.once('error', () => resolve(false));
+    server.listen(port, host, () => server.close(() => resolve(true)));
+  });
+}
+
+function defaultSpawnDaemon(
+  command: string,
+  args: string[],
+  options: { detached: boolean; stdio: ['ignore', number, number]; env: NodeJS.ProcessEnv },
+): KtxMcpDaemonChild {
+  return spawn(command, args, options);
+}
+
+async function defaultFetchHealth(state: KtxMcpDaemonState): Promise<{ ok: boolean; body: unknown; detail?: string }> {
+  try {
+    const response = await fetch(`http://${state.host}:${state.port}/health`, {
+      headers: { host: `${state.host}:${state.port}` },
+    });
+    const body = await response.json();
+    return { ok: response.ok, body, detail: response.ok ? undefined : `HTTP ${response.status}` };
+  } catch (error) {
+    return { ok: false, body: null, detail: error instanceof Error ? error.message : String(error) };
+  }
+}
+
+export async function startKtxMcpDaemon(options: {
+  projectDir: string;
+  cliVersion: string;
+  host: string;
+  port: number;
+  token?: string;
+  allowedHosts: string[];
+  allowedOrigins: string[];
+  binPath: string;
+  processAlive?: (pid: number) => boolean;
+  portAvailable?: (host: string, port: number) => Promise<boolean>;
+  spawnDaemon?: typeof defaultSpawnDaemon;
+  now?: () => Date;
+}): Promise<{ status: 'started'; state: KtxMcpDaemonState; url: string }> {
+  const existing = await readState(options.projectDir).catch(() => undefined);
+  const processAlive = options.processAlive ?? defaultProcessAlive;
+  if (existing && processAlive(existing.pid)) {
+    throw new Error(`KTX MCP daemon is already recorded at http://${existing.host}:${existing.port}/mcp`);
+  }
+  const portAvailable = options.portAvailable ?? defaultPortAvailable;
+  if (!(await portAvailable(options.host, options.port))) {
+    throw new Error(`Port ${options.port} is already in use. Choose another port with --port <n>.`);
+  }
+
+  const { logPath } = mcpDaemonLayout(options.projectDir);
+  await mkdir(dirname(logPath), { recursive: true });
+  const log = await open(logPath, 'a');
+  try {
+    const args = [
+      options.binPath,
+      '--project-dir',
+      options.projectDir,
+      'mcp',
+      'serve-internal',
+      '--host',
+      options.host,
+      '--port',
+      String(options.port),
+      ...options.allowedHosts.flatMap((host) => ['--allowed-host', host]),
+      ...options.allowedOrigins.flatMap((origin) => ['--allowed-origin', origin]),
+    ];
+    const child = (options.spawnDaemon ?? defaultSpawnDaemon)(process.execPath, args, {
+      detached: true,
+      stdio: ['ignore', log.fd, log.fd],
+      env: {
+        ...process.env,
+        KTX_CLI_VERSION: options.cliVersion,
+        ...(options.token ? { KTX_MCP_TOKEN: options.token } : {}),
+      },
+    });
+    if (!child.pid) {
+      throw new Error('Failed to start KTX MCP daemon: child process pid was not available.');
+    }
+    child.unref();
+    const state: KtxMcpDaemonState = {
+      schemaVersion: 1,
+      pid: child.pid,
+      host: options.host,
+      port: options.port,
+      tokenAuth: Boolean(options.token),
+      projectDir: options.projectDir,
+      startedAt: (options.now ?? (() => new Date()))().toISOString(),
+      logPath,
+    };
+    await writeState(options.projectDir, state);
+    return { status: 'started', state, url: `http://${state.host}:${state.port}/mcp` };
+  } finally {
+    await log.close();
+  }
+}
+
+export async function readKtxMcpDaemonStatus(options: {
+  projectDir: string;
+  processAlive?: (pid: number) => boolean;
+  fetchHealth?: (state: KtxMcpDaemonState) => Promise<{ ok: boolean; body: unknown; detail?: string }>;
+}): Promise<KtxMcpDaemonStatus> {
+  let state: KtxMcpDaemonState | undefined;
+  try {
+    state = await readState(options.projectDir);
+  } catch (error) {
+    return { kind: 'stale', detail: `MCP daemon state is invalid: ${error instanceof Error ? error.message : String(error)}` };
+  }
+  if (!state) {
+    return { kind: 'stopped', detail: `No MCP daemon state at ${mcpDaemonLayout(options.projectDir).statePath}` };
+  }
+  const processAlive = options.processAlive ?? defaultProcessAlive;
+  if (!processAlive(state.pid)) {
+    return { kind: 'stale', detail: `MCP daemon process ${state.pid} is not running`, state };
+  }
+  const health = await (options.fetchHealth ?? defaultFetchHealth)(state);
+  if (!health.ok) {
+    return { kind: 'stale', detail: health.detail ?? 'MCP daemon health check failed', state };
+  }
+  return {
+    kind: 'running',
+    detail: `KTX MCP daemon running at http://${state.host}:${state.port}/mcp`,
+    state,
+    url: `http://${state.host}:${state.port}/mcp`,
+  };
+}
+
+export async function stopKtxMcpDaemon(options: {
+  projectDir: string;
+  processAlive?: (pid: number) => boolean;
+  killProcess?: (pid: number, signal: NodeJS.Signals) => void;
+  stopGraceMs?: number;
+  pollIntervalMs?: number;
+}): Promise<{ status: 'stopped' | 'already-stopped' }> {
+  const state = await readState(options.projectDir);
+  const { statePath } = mcpDaemonLayout(options.projectDir);
+  if (!state) {
+    return { status: 'already-stopped' };
+  }
+  const processAlive = options.processAlive ?? defaultProcessAlive;
+  const killProcess = options.killProcess ?? defaultKillProcess;
+  if (processAlive(state.pid)) {
+    killProcess(state.pid, 'SIGTERM');
+    const deadline = Date.now() + (options.stopGraceMs ?? 10_000);
+    while (Date.now() <= deadline && processAlive(state.pid)) {
+      await delay(options.pollIntervalMs ?? 100);
+    }
+    if (processAlive(state.pid)) {
+      killProcess(state.pid, 'SIGKILL');
+    }
+  }
+  await rm(statePath, { force: true });
+  return { status: 'stopped' };
+}
--- a/packages/cli/src/mcp-http-server.test.ts
+++ b/packages/cli/src/mcp-http-server.test.ts
@ -0,0 +1,274 @@
+import { request } from 'node:http';
+import type { AddressInfo } from 'node:net';
+import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { describe, expect, it } from 'vitest';
+import {
+  buildMcpSecurityConfig,
+  isMcpRequestAuthorized,
+  normalizeHostHeader,
+  runKtxMcpHttpServer,
+} from './mcp-http-server.js';
+
+describe('normalizeHostHeader', () => {
+  it('normalizes host headers before allow-list comparison', () => {
+    expect(normalizeHostHeader('LOCALHOST:7878')).toBe('localhost');
+    expect(normalizeHostHeader('127.0.0.1:7878')).toBe('127.0.0.1');
+    expect(normalizeHostHeader('[::1]:7878')).toBe('::1');
+    expect(normalizeHostHeader('  Example.COM  ')).toBe('example.com');
+  });
+});
+
+describe('buildMcpSecurityConfig', () => {
+  it('allows loopback hosts without a token', () => {
+    const config = buildMcpSecurityConfig({
+      host: '127.0.0.1',
+      port: 7878,
+      token: undefined,
+      allowedHosts: [],
+      allowedOrigins: [],
+    });
+
+    expect(config.token).toBeUndefined();
+    expect(config.allowedHosts).toEqual(['localhost', '127.0.0.1', '::1']);
+  });
+
+  it('requires a token for non-loopback binding', () => {
+    expect(() =>
+      buildMcpSecurityConfig({
+        host: '0.0.0.0',
+        port: 7878,
+        token: undefined,
+        allowedHosts: [],
+        allowedOrigins: [],
+      }),
+    ).toThrow('Binding KTX MCP to 0.0.0.0 requires --token or KTX_MCP_TOKEN');
+  });
+
+  it('validates allowed origins as full origins', () => {
+    expect(() =>
+      buildMcpSecurityConfig({
+        host: '127.0.0.1',
+        port: 7878,
+        token: undefined,
+        allowedHosts: [],
+        allowedOrigins: ['localhost:7878'],
+      }),
+    ).toThrow('Allowed origin must be a full origin URL');
+  });
+});
+
+describe('isMcpRequestAuthorized', () => {
+  const config = buildMcpSecurityConfig({
+    host: '0.0.0.0',
+    port: 7878,
+    token: 'secret-token',
+    allowedHosts: ['mcp.example.test'],
+    allowedOrigins: ['https://mcp.example.test'],
+  });
+
+  it('accepts a valid host, origin, and bearer token', () => {
+    expect(
+      isMcpRequestAuthorized(
+        {
+          path: '/mcp',
+          headers: {
+            host: 'mcp.example.test:7878',
+            origin: 'https://mcp.example.test',
+            authorization: 'Bearer secret-token',
+          },
+        },
+        config,
+      ),
+    ).toEqual({ ok: true });
+  });
+
+  it('rejects bad host headers before MCP handling', () => {
+    expect(
+      isMcpRequestAuthorized(
+        { path: '/health', headers: { host: 'evil.example.test' } },
+        config,
+      ),
+    ).toEqual({ ok: false, status: 403, message: 'Host header is not allowed for KTX MCP.' });
+  });
+
+  it('rejects browser origins unless explicitly allowed', () => {
+    expect(
+      isMcpRequestAuthorized(
+        {
+          path: '/health',
+          headers: { host: 'mcp.example.test', origin: 'https://evil.example.test' },
+        },
+        config,
+      ),
+    ).toEqual({ ok: false, status: 403, message: 'Origin header is not allowed for KTX MCP.' });
+  });
+
+  it('requires bearer auth on /mcp when token auth is enabled', () => {
+    expect(
+      isMcpRequestAuthorized(
+        { path: '/mcp', headers: { host: 'mcp.example.test', authorization: 'Bearer wrong' } },
+        config,
+      ),
+    ).toEqual({ ok: false, status: 401, message: 'Missing or invalid KTX MCP bearer token.' });
+  });
+
+  it('does not require bearer auth on /health', () => {
+    expect(isMcpRequestAuthorized({ path: '/health', headers: { host: 'mcp.example.test' } }, config)).toEqual({
+      ok: true,
+    });
+  });
+});
+
+function postJson(port: number, path: string, body: unknown, headers: Record<string, string> = {}) {
+  return new Promise<{ status: number; headers: Record<string, string | string[] | undefined>; body: string }>(
+    (resolve, reject) => {
+      const payload = JSON.stringify(body);
+      const req = request(
+        {
+          host: '127.0.0.1',
+          port,
+          path,
+          method: 'POST',
+          headers: {
+            host: `127.0.0.1:${port}`,
+            accept: 'application/json, text/event-stream',
+            'content-type': 'application/json',
+            'content-length': Buffer.byteLength(payload),
+            ...headers,
+          },
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on('data', (chunk: Buffer) => chunks.push(chunk));
+          res.on('end', () =>
+            resolve({
+              status: res.statusCode ?? 0,
+              headers: res.headers,
+              body: Buffer.concat(chunks).toString('utf8'),
+            }),
+          );
+        },
+      );
+      req.on('error', reject);
+      req.end(payload);
+    },
+  );
+}
+
+function get(port: number, path: string, headers: Record<string, string> = {}) {
+  return new Promise<{ status: number; headers: Record<string, string | string[] | undefined>; body: string }>(
+    (resolve, reject) => {
+      const req = request(
+        {
+          host: '127.0.0.1',
+          port,
+          path,
+          method: 'GET',
+          headers: { host: `127.0.0.1:${port}`, ...headers },
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on('data', (chunk: Buffer) => chunks.push(chunk));
+          res.on('end', () =>
+            resolve({
+              status: res.statusCode ?? 0,
+              headers: res.headers,
+              body: Buffer.concat(chunks).toString('utf8'),
+            }),
+          );
+        },
+      );
+      req.on('error', reject);
+      req.end();
+    },
+  );
+}
+
+function createTestMcpServer() {
+  return () => {
+    const server = new McpServer({ name: 'ktx-test', version: '0.0.0-test' });
+    server.registerTool('ping', { inputSchema: {} }, async () => ({
+      content: [{ type: 'text', text: 'pong' }],
+    }));
+    return server;
+  };
+}
+
+describe('runKtxMcpHttpServer', () => {
+  it('serves /health with project metadata', async () => {
+    const handle = await runKtxMcpHttpServer({
+      projectDir: '/tmp/ktx-project',
+      host: '127.0.0.1',
+      port: 0,
+      allowedHosts: [],
+      allowedOrigins: [],
+      createMcpServer: createTestMcpServer(),
+    });
+    try {
+      const port = (handle.server.address() as AddressInfo).port;
+      const response = await get(port, '/health');
+      expect(response.status).toBe(200);
+      expect(JSON.parse(response.body)).toEqual({
+        status: 'ok',
+        projectDir: '/tmp/ktx-project',
+        port,
+      });
+    } finally {
+      await handle.close();
+    }
+  });
+
+  it('allocates a stateful MCP session on initialize', async () => {
+    const handle = await runKtxMcpHttpServer({
+      projectDir: '/tmp/ktx-project',
+      host: '127.0.0.1',
+      port: 0,
+      allowedHosts: [],
+      allowedOrigins: [],
+      createMcpServer: createTestMcpServer(),
+    });
+    try {
+      const port = (handle.server.address() as AddressInfo).port;
+      const response = await postJson(port, '/mcp', {
+        jsonrpc: '2.0',
+        id: 1,
+        method: 'initialize',
+        params: {
+          protocolVersion: '2025-06-18',
+          capabilities: {},
+          clientInfo: { name: 'vitest', version: '0.0.0' },
+        },
+      });
+
+      expect(response.status).toBe(200);
+      expect(response.headers['mcp-session-id']).toBeTruthy();
+    } finally {
+      await handle.close();
+    }
+  });
+
+  it('rejects unknown session ids with 404', async () => {
+    const handle = await runKtxMcpHttpServer({
+      projectDir: '/tmp/ktx-project',
+      host: '127.0.0.1',
+      port: 0,
+      allowedHosts: [],
+      allowedOrigins: [],
+      createMcpServer: createTestMcpServer(),
+    });
+    try {
+      const port = (handle.server.address() as AddressInfo).port;
+      const response = await postJson(
+        port,
+        '/mcp',
+        { jsonrpc: '2.0', id: 2, method: 'tools/list', params: {} },
+        { 'mcp-session-id': 'missing-session' },
+      );
+
+      expect(response.status).toBe(404);
+      expect(response.body).toContain('Unknown MCP session');
+    } finally {
+      await handle.close();
+    }
+  });
+});
--- a/packages/cli/src/mcp-http-server.ts
+++ b/packages/cli/src/mcp-http-server.ts
@ -0,0 +1,340 @@
+import { randomUUID } from 'node:crypto';
+import { createServer, type IncomingHttpHeaders, type IncomingMessage, type Server, type ServerResponse } from 'node:http';
+import { createDefaultKtxMcpServer, createLocalProjectMcpContextPorts } from '@ktx/context/mcp';
+import { createLocalProjectMemoryCapture } from '@ktx/context/memory';
+import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
+import { isInitializeRequest } from '@modelcontextprotocol/sdk/types.js';
+import type { KtxCliIo } from './cli-runtime.js';
+import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
+import { createKtxCliScanConnector } from './local-scan-connectors.js';
+import { createManagedPythonSemanticLayerComputePort } from './managed-python-command.js';
+import { createManagedDaemonSqlAnalysisPort } from './managed-python-http.js';
+
+const DEFAULT_ALLOWED_HOSTS = ['localhost', '127.0.0.1', '::1'] as const;
+
+export interface McpSecurityConfigInput {
+  host: string;
+  port: number;
+  token?: string;
+  allowedHosts: string[];
+  allowedOrigins: string[];
+}
+
+export interface McpSecurityConfig {
+  host: string;
+  port: number;
+  token?: string;
+  allowedHosts: string[];
+  allowedOrigins: string[];
+}
+
+export type McpAuthorizationResult =
+  | { ok: true }
+  | { ok: false; status: 401 | 403; message: string };
+
+function isLoopbackHost(host: string): boolean {
+  const normalized = normalizeHostHeader(host);
+  return normalized === 'localhost' || normalized === '127.0.0.1' || normalized === '::1';
+}
+
+export function normalizeHostHeader(value: string): string {
+  const trimmed = value.trim().toLowerCase();
+  if (trimmed.startsWith('[')) {
+    const close = trimmed.indexOf(']');
+    return close >= 0 ? trimmed.slice(1, close) : trimmed.replace(/^\[/, '');
+  }
+  const colon = trimmed.lastIndexOf(':');
+  if (colon > -1 && trimmed.indexOf(':') === colon) {
+    return trimmed.slice(0, colon);
+  }
+  return trimmed;
+}
+
+function fullOrigin(value: string): string {
+  let parsed: URL;
+  try {
+    parsed = new URL(value);
+  } catch {
+    throw new Error(`Allowed origin must be a full origin URL: ${value}`);
+  }
+  if (!parsed.protocol || !parsed.host || parsed.pathname !== '/' || parsed.search || parsed.hash) {
+    throw new Error(`Allowed origin must be a full origin URL: ${value}`);
+  }
+  return parsed.origin;
+}
+
+export function buildMcpSecurityConfig(input: McpSecurityConfigInput): McpSecurityConfig {
+  if (!isLoopbackHost(input.host) && !input.token) {
+    throw new Error(`Binding KTX MCP to ${input.host} requires --token or KTX_MCP_TOKEN`);
+  }
+  const allowedHostSet = new Set<string>(DEFAULT_ALLOWED_HOSTS);
+  if (!isLoopbackHost(input.host)) {
+    allowedHostSet.add(normalizeHostHeader(input.host));
+  }
+  for (const host of input.allowedHosts) {
+    allowedHostSet.add(normalizeHostHeader(host));
+  }
+  return {
+    host: input.host,
+    port: input.port,
+    ...(input.token ? { token: input.token } : {}),
+    allowedHosts: [...allowedHostSet],
+    allowedOrigins: input.allowedOrigins.map(fullOrigin),
+  };
+}
+
+function headerValue(headers: IncomingHttpHeaders | Record<string, string | undefined>, name: string): string | undefined {
+  const value = headers[name.toLowerCase()];
+  return Array.isArray(value) ? value[0] : value;
+}
+
+export function isMcpRequestAuthorized(
+  request: { path: string; headers: IncomingHttpHeaders | Record<string, string | undefined> },
+  config: McpSecurityConfig,
+): McpAuthorizationResult {
+  const host = headerValue(request.headers, 'host');
+  if (!host || !config.allowedHosts.includes(normalizeHostHeader(host))) {
+    return { ok: false, status: 403, message: 'Host header is not allowed for KTX MCP.' };
+  }
+  const origin = headerValue(request.headers, 'origin');
+  if (origin && !config.allowedOrigins.includes(origin)) {
+    return { ok: false, status: 403, message: 'Origin header is not allowed for KTX MCP.' };
+  }
+  if (request.path === '/mcp' && config.token) {
+    const auth = headerValue(request.headers, 'authorization');
+    if (auth !== `Bearer ${config.token}`) {
+      return { ok: false, status: 401, message: 'Missing or invalid KTX MCP bearer token.' };
+    }
+  }
+  return { ok: true };
+}
+
+export interface KtxMcpHttpServerHandle {
+  server: Server;
+  close(): Promise<void>;
+}
+
+export interface RunKtxMcpHttpServerOptions extends McpSecurityConfigInput {
+  projectDir: string;
+  cliVersion?: string;
+  io?: KtxCliIo;
+  createMcpServer?: () => McpServer;
+  loadProject?: typeof loadKtxProject;
+}
+
+function noopIo(): KtxCliIo {
+  return {
+    stdout: { write() {} },
+    stderr: { write() {} },
+  };
+}
+
+function writeJson(res: ServerResponse, status: number, body: object): void {
+  const payload = `${JSON.stringify(body)}\n`;
+  res.writeHead(status, {
+    'content-type': 'application/json',
+    'content-length': Buffer.byteLength(payload),
+  });
+  res.end(payload);
+}
+
+function writeText(res: ServerResponse, status: number, body: string): void {
+  res.writeHead(status, { 'content-type': 'text/plain; charset=utf-8' });
+  res.end(body);
+}
+
+function requestPath(req: IncomingMessage): string {
+  const url = new URL(req.url ?? '/', 'http://127.0.0.1');
+  return url.pathname;
+}
+
+async function readJsonBody(req: IncomingMessage): Promise<unknown> {
+  const chunks: Buffer[] = [];
+  for await (const chunk of req) {
+    chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
+  }
+  const raw = Buffer.concat(chunks).toString('utf8');
+  return raw.trim().length === 0 ? undefined : (JSON.parse(raw) as unknown);
+}
+
+async function defaultMcpServerFactory(input: {
+  project: KtxLocalProject;
+  projectDir: string;
+  cliVersion: string;
+  io?: KtxCliIo;
+}): Promise<() => McpServer> {
+  const io = input.io ?? noopIo();
+  const queryExecutor = createKtxCliIngestQueryExecutor(input.project);
+  const semanticLayerCompute = await createManagedPythonSemanticLayerComputePort({
+    cliVersion: input.cliVersion,
+    installPolicy: 'auto',
+    io,
+  });
+  const sqlAnalysis = createManagedDaemonSqlAnalysisPort({
+    cliVersion: input.cliVersion,
+    projectDir: input.projectDir,
+    installPolicy: 'auto',
+    io,
+  });
+  const contextTools = createLocalProjectMcpContextPorts(input.project, {
+    semanticLayerCompute,
+    queryExecutor,
+    sqlAnalysis,
+    localScan: {
+      createConnector: async (connectionId) => createKtxCliScanConnector(input.project, connectionId),
+    },
+    localIngest: {
+      semanticLayerCompute,
+      queryExecutor,
+    },
+  });
+
+  let memoryCapture: ReturnType<typeof createLocalProjectMemoryCapture> | undefined;
+  try {
+    memoryCapture = createLocalProjectMemoryCapture(input.project, { semanticLayerCompute, queryExecutor });
+  } catch (error) {
+    input.io?.stderr.write(`KTX MCP memory_capture disabled: ${error instanceof Error ? error.message : String(error)}\n`);
+  }
+
+  return () =>
+    createDefaultKtxMcpServer({
+      name: 'ktx',
+      version: input.cliVersion,
+      userContext: { userId: 'local' },
+      contextTools,
+      memoryCapture,
+    });
+}
+
+function listenerPort(server: Server, fallback: number): number {
+  const address = server.address();
+  return typeof address === 'object' && address ? address.port : fallback;
+}
+
+function transportAllowedHosts(config: McpSecurityConfig, server: Server): string[] {
+  const port = listenerPort(server, config.port);
+  const hosts = new Set<string>(config.allowedHosts);
+  for (const host of config.allowedHosts) {
+    hosts.add(`${host}:${port}`);
+    if (config.port !== 0 && config.port !== port) {
+      hosts.add(`${host}:${config.port}`);
+    }
+  }
+  return [...hosts];
+}
+
+export async function runKtxMcpHttpServer(options: RunKtxMcpHttpServerOptions): Promise<KtxMcpHttpServerHandle> {
+  const config = buildMcpSecurityConfig(options);
+  const project =
+    options.createMcpServer === undefined
+      ? await (options.loadProject ?? loadKtxProject)({ projectDir: options.projectDir })
+      : undefined;
+  const createMcpServer =
+    options.createMcpServer ??
+    (await defaultMcpServerFactory({
+      project: project!,
+      projectDir: options.projectDir,
+      cliVersion: options.cliVersion ?? '0.0.0-private',
+      io: options.io,
+    }));
+  const sessions = new Map<string, StreamableHTTPServerTransport>();
+
+  async function newTransport(): Promise<StreamableHTTPServerTransport> {
+    let transport: StreamableHTTPServerTransport;
+    transport = new StreamableHTTPServerTransport({
+      sessionIdGenerator: () => randomUUID(),
+      onsessioninitialized: (sessionId) => {
+        sessions.set(sessionId, transport);
+      },
+      onsessionclosed: (sessionId) => {
+        sessions.delete(sessionId);
+      },
+      allowedHosts: transportAllowedHosts(config, server),
+      allowedOrigins: config.allowedOrigins,
+      enableDnsRebindingProtection: true,
+    });
+    transport.onclose = () => {
+      if (transport.sessionId) {
+        sessions.delete(transport.sessionId);
+      }
+    };
+    await createMcpServer().connect(transport);
+    return transport;
+  }
+
+  const server = createServer(async (req, res) => {
+    const path = requestPath(req);
+    const auth = isMcpRequestAuthorized({ path, headers: req.headers }, config);
+    if (!auth.ok) {
+      writeText(res, auth.status, auth.message);
+      return;
+    }
+
+    if (path === '/health' && req.method === 'GET') {
+      const port = listenerPort(server, config.port);
+      writeJson(res, 200, { status: 'ok', projectDir: options.projectDir, port });
+      return;
+    }
+
+    if (path !== '/mcp' || !['POST', 'GET', 'DELETE'].includes(req.method ?? '')) {
+      writeText(res, 404, 'Not found');
+      return;
+    }
+
+    const sessionId = req.headers['mcp-session-id'];
+    const normalizedSessionId = Array.isArray(sessionId) ? sessionId[0] : sessionId;
+
+    if (req.method === 'POST') {
+      let body: unknown;
+      try {
+        body = await readJsonBody(req);
+      } catch (error) {
+        writeText(res, 400, `Invalid JSON body: ${error instanceof Error ? error.message : String(error)}`);
+        return;
+      }
+      const existing = normalizedSessionId ? sessions.get(normalizedSessionId) : undefined;
+      if (existing) {
+        await existing.handleRequest(req, res, body);
+        return;
+      }
+      if (normalizedSessionId) {
+        writeText(res, 404, `Unknown MCP session: ${normalizedSessionId}`);
+        return;
+      }
+      if (!isInitializeRequest(body)) {
+        writeText(res, 400, 'MCP initialize request is required before session traffic.');
+        return;
+      }
+      await (await newTransport()).handleRequest(req, res, body);
+      return;
+    }
+
+    if (!normalizedSessionId || !sessions.has(normalizedSessionId)) {
+      writeText(res, 404, normalizedSessionId ? `Unknown MCP session: ${normalizedSessionId}` : 'Missing MCP session id.');
+      return;
+    }
+    await sessions.get(normalizedSessionId)!.handleRequest(req, res);
+  });
+
+  await new Promise<void>((resolve, reject) => {
+    server.once('error', reject);
+    server.listen(config.port, config.host, () => {
+      server.off('error', reject);
+      resolve();
+    });
+  });
+
+  return {
+    server,
+    async close() {
+      for (const transport of sessions.values()) {
+        await transport.close();
+      }
+      await new Promise<void>((resolve, reject) => {
+        server.close((error) => (error ? reject(error) : resolve()));
+      });
+    },
+  };
+}
--- a/packages/cli/src/print-command-tree.test.ts
+++ b/packages/cli/src/print-command-tree.test.ts
@ -12,11 +12,12 @@ describe('renderKtxCommandTree', () => {
      .filter((line) => /^ {2}[├└]── \S/.test(line))
      .map((line) => line.replace(/^ {2}[├└]── /, '').trim().split(' ')[0]);

-    for (const expected of ['setup', 'connection', 'ingest', 'sl', 'dev']) {
+    for (const expected of ['setup', 'connection', 'ingest', 'sl', 'mcp', 'dev']) {
      expect(topLevel).toContain(expected);
    }

    expect(output).toContain('│   └── test [connectionId]');
+    expect(output).toContain('│   ├── status                          Show KTX MCP daemon status');
    expect(output).not.toContain('│   ├── add');
    expect(output).not.toContain('│   ├── remove');
    expect(output).not.toContain('│   ├── map');
@ -24,7 +25,6 @@ describe('renderKtxCommandTree', () => {
    expect(output).not.toContain('│   ├── metabase');
    expect(output).not.toContain('│   ├── notion');
    expect(output).not.toContain('scan <connectionId>');
-    expect(output).not.toContain('│   ├── status');
    expect(output).not.toContain('│   ├── replay');
    expect(output).not.toContain('│   └── replay');
    expect(output).not.toContain('│   ├── run');
--- a/packages/cli/src/public-ingest.test.ts
+++ b/packages/cli/src/public-ingest.test.ts
@ -85,7 +85,7 @@ describe('buildPublicIngestPlan', () => {
  it('plans warehouse connections as scan targets and source connections as source ingest targets', () => {
    const project = projectWithConnections({
      warehouse: { driver: 'postgres' },
-      prod_metabase: { driver: 'metabase' },
+      prod_metabase: { driver: 'metabase', api_url: 'https://metabase.example.com' },
      docs: { driver: 'notion' },
    });

@ -745,7 +745,7 @@ describe('runKtxPublicIngest', () => {
    const io = makeIo();
    const project = projectWithConnections({
      warehouse: { driver: 'postgres' },
-      prod_metabase: { driver: 'metabase' },
+      prod_metabase: { driver: 'metabase', api_url: 'https://metabase.example.com' },
    });
    const runScan = vi.fn(async () => 1);
    const runIngest = vi.fn(async () => 0);
--- a/packages/cli/src/scan.ts
+++ b/packages/cli/src/scan.ts
@ -133,6 +133,50 @@ function warningLine(warning: KtxScanWarning): string {
  return `${warning.code}: ${location}${warning.message}`;
 }

+function groupWarningsByCode(warnings: readonly KtxScanWarning[]): Map<string, KtxScanWarning[]> {
+  const groups = new Map<string, KtxScanWarning[]>();
+  for (const warning of warnings) {
+    const list = groups.get(warning.code);
+    if (list) {
+      list.push(warning);
+    } else {
+      groups.set(warning.code, [warning]);
+    }
+  }
+  return groups;
+}
+
+function describeWarningGroup(code: string, count: number): string {
+  switch (code) {
+    case 'sampling_failed':
+      return `${count} ${plural(count, 'table')} could not be sampled (retries exhausted); descriptions used metadata-only fallback or were skipped.`;
+    case 'description_fallback_used':
+      return `${count} ${plural(count, 'table')} got an AI description from column metadata only (no sample rows available).`;
+    case 'enrichment_failed':
+      return `${count} ${plural(count, 'table/column')} could not be enriched.`;
+    case 'connector_capability_missing':
+      return `${count} ${plural(count, 'table')} affected by missing connector capability.`;
+    case 'statistics_failed':
+      return `${count} statistics ${plural(count, 'lookup')} failed.`;
+    case 'llm_unavailable':
+      return 'LLM provider unavailable; AI enrichment was skipped.';
+    case 'embedding_unavailable':
+      return 'Embedding provider unavailable; embeddings were skipped.';
+    case 'relationship_validation_failed':
+      return `${count} relationship ${plural(count, 'validation')} could not run.`;
+    case 'relationship_llm_invalid_reference':
+      return `${count} LLM-proposed ${plural(count, 'relationship')} referenced unknown columns.`;
+    case 'relationship_llm_proposal_failed':
+      return `${count} LLM relationship ${plural(count, 'proposal')} failed.`;
+    case 'scan_enrichment_backend_not_configured':
+      return 'Scan enrichment backend is not configured; AI stages were skipped.';
+    case 'credential_redacted':
+      return `${count} ${plural(count, 'credential')} were redacted from scan output.`;
+    default:
+      return `${count} ${plural(count, 'warning')} (${code})`;
+  }
+}
+
 function managedDaemonOptionsForScanRun(args: Extract<KtxScanArgs, { command: 'run' }>, io: KtxCliIo) {
  if (args.databaseIntrospectionUrl || !args.cliVersion || !args.runtimeInstallPolicy) {
    return undefined;
@ -153,11 +197,26 @@ function writeNeedsAttention(report: KtxScanReport, io: KtxCliIo): void {
  }
  if (report.warnings.length > 0) {
    io.stdout.write(`  ${report.warnings.length} ${plural(report.warnings.length, 'warning')}\n`);
-    for (const warning of report.warnings.slice(0, 5)) {
-      io.stdout.write(`    - ${warningLine(warning)}\n`);
-    }
-    if (report.warnings.length > 5) {
-      io.stdout.write(`    - ${report.warnings.length - 5} more warnings in the JSON report\n`);
+    const groups = groupWarningsByCode(report.warnings);
+    for (const [code, warnings] of groups) {
+      io.stdout.write(`    - ${describeWarningGroup(code, warnings.length)}\n`);
+      const first = warnings[0];
+      if (first) {
+        io.stdout.write(`        ${warningLine(first)}\n`);
+      }
+      if (warnings.length > 1) {
+        const moreTables = warnings
+          .slice(1)
+          .map((warning) =>
+            warning.table ? (warning.column ? `${warning.table}.${warning.column}` : warning.table) : null,
+          )
+          .filter((value): value is string => value !== null)
+          .slice(0, 3);
+        if (moreTables.length > 0) {
+          const suffix = warnings.length - 1 > moreTables.length ? `, …` : '';
+          io.stdout.write(`        also: ${moreTables.join(', ')}${suffix}\n`);
+        }
+      }
    }
  }
  if (report.capabilityGaps.length > 0) {
--- a/packages/cli/src/setup-agents.test.ts
+++ b/packages/cli/src/setup-agents.test.ts
@ -37,23 +37,28 @@ describe('setup agents', () => {
    await rm(tempDir, { recursive: true, force: true });
  });

-  it('plans project-scoped CLI files for every target', () => {
+  it('plans project-scoped CLI and research files for every target', () => {
    expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'claude-code', scope: 'project', mode: 'cli' })).toEqual([
      { kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md'), role: 'skill' },
+      { kind: 'file', path: join(tempDir, '.claude/skills/ktx-research/SKILL.md'), role: 'research-skill' },
      { kind: 'file', path: join(tempDir, '.claude/rules/ktx.md'), role: 'rule' },
    ]);
    expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'codex', scope: 'project', mode: 'cli' })).toEqual([
      { kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md'), role: 'skill' },
+      { kind: 'file', path: join(tempDir, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' },
      { kind: 'file', path: join(tempDir, '.codex/instructions/ktx.md'), role: 'rule' },
    ]);
    expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'cursor', scope: 'project', mode: 'cli' })).toEqual([
      { kind: 'file', path: join(tempDir, '.cursor/rules/ktx.mdc') },
+      { kind: 'file', path: join(tempDir, '.cursor/rules/ktx-research.mdc'), role: 'research-skill' },
    ]);
    expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'opencode', scope: 'project', mode: 'cli' })).toEqual([
      { kind: 'file', path: join(tempDir, '.opencode/commands/ktx.md') },
+      { kind: 'file', path: join(tempDir, '.opencode/commands/ktx-research.md'), role: 'research-skill' },
    ]);
    expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'universal', scope: 'project', mode: 'cli' })).toEqual([
      { kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md') },
+      { kind: 'file', path: join(tempDir, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' },
    ]);
  });

@ -97,6 +102,31 @@ describe('setup agents', () => {
    expect(io.stderr()).toBe('');
  });

+  it('installs the research skill from the runtime asset', async () => {
+    const io = makeIo();
+
+    await expect(
+      runKtxSetupAgentsStep(
+        {
+          projectDir: tempDir,
+          inputMode: 'disabled',
+          yes: true,
+          agents: true,
+          target: 'universal',
+          scope: 'project',
+          mode: 'cli',
+          skipAgents: false,
+        },
+        io.io,
+      ),
+    ).resolves.toMatchObject({ status: 'ready' });
+
+    const researchSkill = await readFile(join(tempDir, '.agents/skills/ktx-research/SKILL.md'), 'utf-8');
+    expect(researchSkill).toContain('name: ktx-research');
+    expect(researchSkill).toContain('Always run `discover_data` before writing SQL.');
+    expect(researchSkill).toContain('Treat a `dictionary_search` miss as non-authoritative.');
+  });
+
  it('writes PATH-independent launcher commands for skills', async () => {
    const io = makeIo();

@ -123,6 +153,178 @@ describe('setup agents', () => {
    expect(skill).not.toContain('sql execute');
  });

+  it('writes Claude Code project MCP config and tracks the json key', async () => {
+    const io = makeIo();
+
+    await expect(
+      runKtxSetupAgentsStep(
+        {
+          projectDir: tempDir,
+          inputMode: 'disabled',
+          yes: true,
+          agents: true,
+          target: 'claude-code',
+          scope: 'project',
+          mode: 'cli',
+          skipAgents: false,
+        },
+        io.io,
+      ),
+    ).resolves.toMatchObject({ status: 'ready' });
+
+    const mcpJson = JSON.parse(await readFile(join(tempDir, '.mcp.json'), 'utf-8')) as {
+      mcpServers: { ktx: { type: string; url: string; headers?: Record<string, string> } };
+    };
+    expect(mcpJson.mcpServers.ktx).toEqual({ type: 'http', url: 'http://localhost:7878/mcp' });
+    expect(await readKtxAgentInstallManifest(tempDir)).toMatchObject({
+      entries: expect.arrayContaining([{ kind: 'json-key', path: join(tempDir, '.mcp.json'), jsonPath: ['mcpServers', 'ktx'] }]),
+    });
+    expect(io.stdout()).toContain('Run `ktx mcp start` to enable the configured KTX MCP server.');
+  });
+
+  it('writes Cursor project MCP config', async () => {
+    const io = makeIo();
+
+    await runKtxSetupAgentsStep(
+      {
+        projectDir: tempDir,
+        inputMode: 'disabled',
+        yes: true,
+        agents: true,
+        target: 'cursor',
+        scope: 'project',
+        mode: 'cli',
+        skipAgents: false,
+      },
+      io.io,
+    );
+
+    const cursorJson = JSON.parse(await readFile(join(tempDir, '.cursor/mcp.json'), 'utf-8')) as {
+      mcpServers: { ktx: { url: string; headers?: Record<string, string> } };
+    };
+    expect(cursorJson.mcpServers.ktx).toEqual({ url: 'http://localhost:7878/mcp' });
+  });
+
+  it('prints Codex and opencode snippets without mutating printed-only config files', async () => {
+    const codexIo = makeIo();
+    await runKtxSetupAgentsStep(
+      {
+        projectDir: tempDir,
+        inputMode: 'disabled',
+        yes: true,
+        agents: true,
+        target: 'codex',
+        scope: 'project',
+        mode: 'cli',
+        skipAgents: false,
+      },
+      codexIo.io,
+    );
+    expect(codexIo.stdout()).toContain('[mcp_servers.ktx]');
+    expect(codexIo.stdout()).toContain('url = "http://localhost:7878/mcp"');
+
+    const opencodeIo = makeIo();
+    await runKtxSetupAgentsStep(
+      {
+        projectDir: tempDir,
+        inputMode: 'disabled',
+        yes: true,
+        agents: true,
+        target: 'opencode',
+        scope: 'project',
+        mode: 'cli',
+        skipAgents: false,
+      },
+      opencodeIo.io,
+    );
+    expect(opencodeIo.stdout()).toContain('"mcp"');
+    expect(opencodeIo.stdout()).toContain('"type": "remote"');
+    await expect(readFile(join(tempDir, 'opencode.json'), 'utf-8')).rejects.toThrow();
+  });
+
+  it('uses MCP daemon state for port and token metadata without rendering literal tokens', async () => {
+    await mkdir(join(tempDir, '.ktx'), { recursive: true });
+    await writeFile(
+      join(tempDir, '.ktx/mcp.json'),
+      `${JSON.stringify(
+        {
+          schemaVersion: 1,
+          pid: 999999,
+          host: '127.0.0.1',
+          port: 8787,
+          tokenAuth: true,
+          projectDir: tempDir,
+          startedAt: '2026-05-14T00:00:00.000Z',
+          logPath: join(tempDir, '.ktx/logs/mcp.log'),
+        },
+        null,
+        2,
+      )}\n`,
+      'utf-8',
+    );
+    const io = makeIo();
+    const previousToken = process.env.KTX_MCP_TOKEN;
+    process.env.KTX_MCP_TOKEN = 'secret-token';
+
+    try {
+      await runKtxSetupAgentsStep(
+        {
+          projectDir: tempDir,
+          inputMode: 'disabled',
+          yes: true,
+          agents: true,
+          target: 'claude-code',
+          scope: 'project',
+          mode: 'cli',
+          skipAgents: false,
+        },
+        io.io,
+      );
+
+      const rendered = JSON.stringify(JSON.parse(await readFile(join(tempDir, '.mcp.json'), 'utf-8')));
+      expect(rendered).toContain('http://127.0.0.1:8787/mcp');
+      expect(rendered).toContain('Bearer ${KTX_MCP_TOKEN}');
+      expect(rendered).not.toContain('secret-token');
+      expect(io.stdout()).toContain('Run `ktx mcp start` to enable the configured KTX MCP server.');
+    } finally {
+      if (previousToken === undefined) {
+        delete process.env.KTX_MCP_TOKEN;
+      } else {
+        process.env.KTX_MCP_TOKEN = previousToken;
+      }
+    }
+  });
+
+  it('writes Claude Code local MCP config under the project key in ~/.claude.json', async () => {
+    const home = await mkdtemp(join(tmpdir(), 'ktx-setup-agents-home-'));
+    const previousHome = process.env.HOME;
+    process.env.HOME = home;
+    try {
+      const io = makeIo();
+      await runKtxSetupAgentsStep(
+        {
+          projectDir: tempDir,
+          inputMode: 'disabled',
+          yes: true,
+          agents: true,
+          target: 'claude-code',
+          scope: 'local',
+          mode: 'cli',
+          skipAgents: false,
+        },
+        io.io,
+      );
+
+      const config = JSON.parse(await readFile(join(home, '.claude.json'), 'utf-8')) as {
+        projects: Record<string, { mcpServers: { ktx: { type: string; url: string } } }>;
+      };
+      expect(config.projects[tempDir].mcpServers.ktx).toEqual({ type: 'http', url: 'http://localhost:7878/mcp' });
+    } finally {
+      process.env.HOME = previousHome;
+      await rm(home, { recursive: true, force: true });
+    }
+  });
+
  it('removes only manifest-listed files', async () => {
    const io = makeIo();
    await runKtxSetupAgentsStep(
--- a/packages/cli/src/setup-agents.ts
+++ b/packages/cli/src/setup-agents.ts
@ -1,3 +1,4 @@
+import { existsSync } from 'node:fs';
 import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
 import { dirname, join, relative, resolve } from 'node:path';
 import { fileURLToPath } from 'node:url';
@ -12,9 +13,10 @@ import {
  createKtxSetupPromptAdapter,
  type KtxSetupPromptOption,
 } from './setup-prompts.js';
+import { readKtxMcpDaemonStatus } from './managed-mcp-daemon.js';

 export type KtxAgentTarget = 'claude-code' | 'codex' | 'cursor' | 'opencode' | 'universal';
-export type KtxAgentScope = 'project' | 'global';
+export type KtxAgentScope = 'project' | 'global' | 'local';
 export type KtxAgentInstallMode = 'cli';

 export interface KtxSetupAgentsArgs {
@ -45,18 +47,179 @@ export interface KtxAgentInstallManifest {
  installedAt: string;
  installs: Array<{ target: KtxAgentTarget; scope: KtxAgentScope; mode: KtxAgentInstallMode }>;
  entries: Array<
-    | { kind: 'file'; path: string; role?: 'skill' | 'rule' }
+    | { kind: 'file'; path: string; role?: 'skill' | 'rule' | 'research-skill' }
    | { kind: 'json-key'; path: string; jsonPath: string[] }
  >;
 }

 type InstallEntry = KtxAgentInstallManifest['entries'][number];

+interface KtxMcpEndpointInfo {
+  url: string;
+  tokenAuth: boolean;
+  running: boolean;
+}
+
+interface KtxMcpClientInstallResult {
+  entries: InstallEntry[];
+  snippets: string[];
+  notices: string[];
+}
+
 interface KtxCliLauncher {
  command: string;
  args: string[];
 }

+async function readJsonObject(path: string): Promise<Record<string, unknown>> {
+  if (!existsSync(path)) return {};
+  const parsed = JSON.parse(await readFile(path, 'utf-8')) as unknown;
+  if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
+    throw new Error(`Expected JSON object in ${path}`);
+  }
+  return parsed as Record<string, unknown>;
+}
+
+function objectAtPath(root: Record<string, unknown>, jsonPath: string[]): Record<string, unknown> {
+  let cursor = root;
+  for (const segment of jsonPath) {
+    const current = cursor[segment];
+    if (!current || typeof current !== 'object' || Array.isArray(current)) {
+      cursor[segment] = {};
+    }
+    cursor = cursor[segment] as Record<string, unknown>;
+  }
+  return cursor;
+}
+
+async function writeJsonKey(path: string, jsonPath: string[], value: unknown): Promise<void> {
+  const root = await readJsonObject(path);
+  const parent = objectAtPath(root, jsonPath.slice(0, -1));
+  parent[jsonPath.at(-1) as string] = value;
+  await mkdir(dirname(path), { recursive: true });
+  await writeFile(path, `${JSON.stringify(root, null, 2)}\n`, 'utf-8');
+}
+
+async function resolveMcpEndpoint(projectDir: string): Promise<KtxMcpEndpointInfo> {
+  const status = await readKtxMcpDaemonStatus({ projectDir }).catch(() => null);
+  if (status?.kind === 'running') {
+    return {
+      url: status.url,
+      tokenAuth: status.state.tokenAuth,
+      running: true,
+    };
+  }
+  if (status?.kind === 'stale' && status.state) {
+    return {
+      url: `http://${status.state.host}:${status.state.port}/mcp`,
+      tokenAuth: status.state.tokenAuth || Boolean(process.env.KTX_MCP_TOKEN),
+      running: false,
+    };
+  }
+  return {
+    url: 'http://localhost:7878/mcp',
+    tokenAuth: Boolean(process.env.KTX_MCP_TOKEN),
+    running: false,
+  };
+}
+
+function tokenHeaders(endpoint: KtxMcpEndpointInfo): Record<string, string> | undefined {
+  return endpoint.tokenAuth ? { Authorization: 'Bearer ${KTX_MCP_TOKEN}' } : undefined;
+}
+
+function claudeMcpEntry(endpoint: KtxMcpEndpointInfo): Record<string, unknown> {
+  return {
+    type: 'http',
+    url: endpoint.url,
+    ...(tokenHeaders(endpoint) ? { headers: tokenHeaders(endpoint) } : {}),
+  };
+}
+
+function cursorMcpEntry(endpoint: KtxMcpEndpointInfo): Record<string, unknown> {
+  return {
+    url: endpoint.url,
+    ...(tokenHeaders(endpoint) ? { headers: tokenHeaders(endpoint) } : {}),
+  };
+}
+
+function codexSnippet(endpoint: KtxMcpEndpointInfo): string {
+  if (endpoint.tokenAuth) {
+    return [
+      'Codex MCP config does not currently document HTTP headers.',
+      'Run KTX on loopback without token auth for Codex, or configure headers after Codex documents support.',
+    ].join('\n');
+  }
+  return [`[mcp_servers.ktx]`, `url = "${endpoint.url}"`].join('\n');
+}
+
+function opencodeSnippet(endpoint: KtxMcpEndpointInfo): string {
+  return JSON.stringify(
+    {
+      mcp: {
+        ktx: {
+          type: 'remote',
+          url: endpoint.url,
+          enabled: true,
+          ...(tokenHeaders(endpoint) ? { headers: tokenHeaders(endpoint) } : {}),
+        },
+      },
+    },
+    null,
+    2,
+  );
+}
+
+function claudeConfigPath(projectDir: string, scope: KtxAgentScope): { path: string; jsonPath: string[] } {
+  const home = process.env.HOME ?? '';
+  if (scope === 'global') {
+    return { path: join(home, '.claude.json'), jsonPath: ['mcpServers', 'ktx'] };
+  }
+  if (scope === 'local') {
+    return { path: join(home, '.claude.json'), jsonPath: ['projects', resolve(projectDir), 'mcpServers', 'ktx'] };
+  }
+  return { path: join(resolve(projectDir), '.mcp.json'), jsonPath: ['mcpServers', 'ktx'] };
+}
+
+function cursorConfigPath(projectDir: string, scope: KtxAgentScope): { path: string; jsonPath: string[] } {
+  const home = process.env.HOME ?? '';
+  return {
+    path: scope === 'global' ? join(home, '.cursor/mcp.json') : join(resolve(projectDir), '.cursor/mcp.json'),
+    jsonPath: ['mcpServers', 'ktx'],
+  };
+}
+
+async function installMcpClientConfig(input: {
+  projectDir: string;
+  target: KtxAgentTarget;
+  scope: KtxAgentScope;
+}): Promise<KtxMcpClientInstallResult> {
+  const endpoint = await resolveMcpEndpoint(input.projectDir);
+  const entries: InstallEntry[] = [];
+  const snippets: string[] = [];
+  const notices: string[] = [];
+
+  if (!endpoint.running) {
+    notices.push('Run `ktx mcp start` to enable the configured KTX MCP server.');
+  }
+
+  if (input.target === 'claude-code') {
+    const config = claudeConfigPath(input.projectDir, input.scope);
+    await writeJsonKey(config.path, config.jsonPath, claudeMcpEntry(endpoint));
+    entries.push({ kind: 'json-key', path: config.path, jsonPath: config.jsonPath });
+  } else if (input.target === 'cursor') {
+    const config = cursorConfigPath(input.projectDir, input.scope);
+    await writeJsonKey(config.path, config.jsonPath, cursorMcpEntry(endpoint));
+    entries.push({ kind: 'json-key', path: config.path, jsonPath: config.jsonPath });
+  } else if (input.target === 'codex') {
+    snippets.push(`Codex MCP snippet for ~/.codex/config.toml:\n${codexSnippet(endpoint)}`);
+  } else if (input.target === 'opencode') {
+    const path = input.scope === 'global' ? '~/.config/opencode/opencode.json' : relative(input.projectDir, join(input.projectDir, 'opencode.json'));
+    snippets.push(`opencode MCP snippet for ${path}:\n${opencodeSnippet(endpoint)}`);
+  }
+
+  return { entries, snippets, notices };
+}
+
 export function agentInstallManifestPath(projectDir: string): string {
  return join(resolve(projectDir), '.ktx/agents/install-manifest.json');
 }
@ -72,6 +235,7 @@ export function plannedKtxAgentFiles(input: {
      const home = process.env.HOME ?? '';
      return [
        { kind: 'file', path: join(home, '.claude/skills/ktx/SKILL.md'), role: 'skill' as const },
+        { kind: 'file', path: join(home, '.claude/skills/ktx-research/SKILL.md'), role: 'research-skill' as const },
        { kind: 'file', path: join(home, '.claude/rules/ktx.md'), role: 'rule' as const },
      ];
    }
@ -79,25 +243,44 @@ export function plannedKtxAgentFiles(input: {
      const codexHome = process.env.CODEX_HOME ?? join(process.env.HOME ?? '', '.codex');
      return [
        { kind: 'file', path: join(codexHome, 'skills/ktx/SKILL.md'), role: 'skill' as const },
+        { kind: 'file', path: join(codexHome, 'skills/ktx-research/SKILL.md'), role: 'research-skill' as const },
        { kind: 'file', path: join(codexHome, 'instructions/ktx.md'), role: 'rule' as const },
      ];
    }
+    if (input.target === 'cursor' || input.target === 'opencode') {
+      return [];
+    }
    throw new Error(`Global ${input.target} installation is not supported; omit --global.`);
  }

  const root = resolve(input.projectDir);
-  const cliEntries: Partial<Record<KtxAgentTarget, InstallEntry>> = {
-    'claude-code': { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md'), role: 'skill' },
-    codex: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md'), role: 'skill' },
-    cursor: { kind: 'file', path: join(root, '.cursor/rules/ktx.mdc') },
-    opencode: { kind: 'file', path: join(root, '.opencode/commands/ktx.md') },
-    universal: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') },
+  const cliEntries: Partial<Record<KtxAgentTarget, InstallEntry[]>> = {
+    'claude-code': [
+      { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md'), role: 'skill' },
+      { kind: 'file', path: join(root, '.claude/skills/ktx-research/SKILL.md'), role: 'research-skill' },
+    ],
+    codex: [
+      { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md'), role: 'skill' },
+      { kind: 'file', path: join(root, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' },
+    ],
+    cursor: [
+      { kind: 'file', path: join(root, '.cursor/rules/ktx.mdc') },
+      { kind: 'file', path: join(root, '.cursor/rules/ktx-research.mdc'), role: 'research-skill' },
+    ],
+    opencode: [
+      { kind: 'file', path: join(root, '.opencode/commands/ktx.md') },
+      { kind: 'file', path: join(root, '.opencode/commands/ktx-research.md'), role: 'research-skill' },
+    ],
+    universal: [
+      { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') },
+      { kind: 'file', path: join(root, '.agents/skills/ktx-research/SKILL.md'), role: 'research-skill' },
+    ],
  };
  const ruleEntries: Partial<Record<KtxAgentTarget, InstallEntry>> = {
    'claude-code': { kind: 'file', path: join(root, '.claude/rules/ktx.md'), role: 'rule' },
    codex: { kind: 'file', path: join(root, '.codex/instructions/ktx.md'), role: 'rule' },
  };
-  return [cliEntries[input.target], ruleEntries[input.target]].filter(
+  return [...(cliEntries[input.target] ?? []), ruleEntries[input.target]].filter(
    (entry): entry is InstallEntry => entry !== undefined,
  );
 }
@ -109,6 +292,12 @@ function ktxCliLauncher(): KtxCliLauncher {
  };
 }

+async function readResearchSkillContent(): Promise<string> {
+  const path = fileURLToPath(new URL('./skills/research/SKILL.md', import.meta.url));
+  const content = await readFile(path, 'utf-8');
+  return content.endsWith('\n') ? content : `${content}\n`;
+}
+
 function shellQuote(value: string): string {
  if (/^[A-Za-z0-9_/:=.,@%+-]+$/.test(value)) {
    return value;
@ -283,16 +472,22 @@ export function formatInstallSummary(
  projectDir: string,
 ): string {
  const entriesByTarget = new Map<KtxAgentTarget, InstallEntry[]>();
-  let idx = 0;
  for (const install of installs) {
-    const planned = plannedKtxAgentFiles({ projectDir, ...install });
-    entriesByTarget.set(install.target, entries.slice(idx, idx + planned.length));
-    idx += planned.length;
+    const plannedFilePaths = new Set(
+      plannedKtxAgentFiles({ projectDir, ...install })
+        .filter((entry) => entry.kind === 'file')
+        .map((entry) => entry.path),
+    );
+    entriesByTarget.set(
+      install.target,
+      entries.filter((entry) => entry.kind === 'file' && plannedFilePaths.has(entry.path)),
+    );
  }

  const fileHints: Record<string, string> = {
    skill: 'teaches your agent which KTX commands to run',
    rule: 'tells your agent when to use KTX',
+    'research-skill': 'teaches your agent the KTX MCP research workflow',
  };

  const lines: string[] = [];
@ -304,7 +499,7 @@ export function formatInstallSummary(
        install.scope === 'global' ? entry.path : relative(projectDir, entry.path);
      if (entry.kind === 'file') {
        const isRule = entry.role === 'rule' || fileEntryLabels[install.target] === 'Rule installed';
-        const label = isRule ? 'Rule installed' : fileEntryLabels[install.target];
+        const label = entry.role === 'research-skill' ? 'Research skill installed' : isRule ? 'Rule installed' : fileEntryLabels[install.target];
        const hint = fileHints[isRule ? 'rule' : (entry.role ?? 'skill')] ?? '';
        lines.push(`    + ${label} — ${hint}`);
        lines.push(`      ${displayPath}`);
@ -327,6 +522,8 @@ async function installTarget(input: {
    const content =
      entry.role === 'rule'
        ? ruleInstructionContent({ projectDir: input.projectDir })
+        : entry.role === 'research-skill'
+          ? await readResearchSkillContent()
        : cliInstructionContent({ projectDir: input.projectDir, launcher });
    await mkdir(dirname(entry.path), { recursive: true });
    await writeFile(entry.path, content, 'utf-8');
@ -391,11 +588,25 @@ export async function runKtxSetupAgentsStep(

  const installs = targets.map((target) => ({ target, scope: args.scope, mode }));
  const entries: InstallEntry[] = [];
+  const snippets: string[] = [];
+  const notices = new Set<string>();
  try {
-    for (const install of installs) entries.push(...(await installTarget({ projectDir: args.projectDir, ...install })));
+    for (const install of installs) {
+      entries.push(...(await installTarget({ projectDir: args.projectDir, ...install })));
+      const mcpResult = await installMcpClientConfig({ projectDir: args.projectDir, target: install.target, scope: install.scope });
+      entries.push(...mcpResult.entries);
+      for (const snippet of mcpResult.snippets) snippets.push(snippet);
+      for (const notice of mcpResult.notices) notices.add(notice);
+    }
    await writeManifest(args.projectDir, mergeManifest(args.projectDir, await readKtxAgentInstallManifest(args.projectDir), installs, entries));
    await markAgentsComplete(args.projectDir);
    io.stdout.write(`\nAgent integration complete\n\n${formatInstallSummary(installs, entries, args.projectDir)}\n`);
+    for (const snippet of snippets) {
+      io.stdout.write(`\n${snippet}\n`);
+    }
+    for (const notice of notices) {
+      io.stdout.write(`\n${notice}\n`);
+    }
    return { status: 'ready', projectDir: args.projectDir, installs };
  } catch (error) {
    io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
--- a/packages/cli/src/setup-context.test.ts
+++ b/packages/cli/src/setup-context.test.ts
@ -198,7 +198,7 @@ describe('setup context build state', () => {

    await writeKtxSetupContextState(tempDir, {
      runId: 'setup-context-local-abc123',
-      status: 'running',
+      status: 'stale',
      startedAt: '2026-05-09T10:00:00.000Z',
      updatedAt: '2026-05-09T10:00:00.000Z',
      primarySourceConnectionIds: ['warehouse'],
@ -207,6 +207,7 @@ describe('setup context build state', () => {
      artifactPaths: [],
      retryableFailedTargets: [],
      commands: contextBuildCommands(tempDir, 'setup-context-local-abc123'),
+      failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.',
      sourceProgress: [
        {
          connectionId: 'warehouse',
@ -623,34 +624,13 @@ describe('setup context build state', () => {
    expect(io.stderr()).toContain('No databases or context sources are configured for a KTX context build.');
  });

-  it('normalizes legacy detached and paused setup context states to stale', async () => {
-    await writeReadyProject(tempDir);
-    await writeKtxSetupContextState(tempDir, {
-      runId: 'setup-context-local-old',
-      status: 'detached' as never,
-      startedAt: '2026-05-09T09:00:00.000Z',
-      updatedAt: '2026-05-09T09:00:00.000Z',
-      primarySourceConnectionIds: ['warehouse'],
-      contextSourceConnectionIds: [],
-      reportIds: [],
-      artifactPaths: [],
-      retryableFailedTargets: [],
-      commands: contextBuildCommands(tempDir, 'setup-context-local-old'),
-    });
-
-    await expect(readKtxSetupContextState(tempDir)).resolves.toMatchObject({
-      status: 'stale',
-      failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.',
-    });
-  });
-
-  it('starts a fresh foreground build when a stale running state is found', async () => {
+  it('starts a fresh foreground build when stale state is found', async () => {
    await writeReadyProject(tempDir, {
      connections: { warehouse: { driver: 'postgres', readonly: true, context: { depth: 'fast' } } },
    });
    await writeKtxSetupContextState(tempDir, {
-      runId: 'setup-context-local-running',
-      status: 'running',
+      runId: 'setup-context-local-stale',
+      status: 'stale',
      startedAt: '2026-05-09T09:00:00.000Z',
      updatedAt: '2026-05-09T09:00:00.000Z',
      primarySourceConnectionIds: ['warehouse'],
@ -658,7 +638,8 @@ describe('setup context build state', () => {
      reportIds: [],
      artifactPaths: [],
      retryableFailedTargets: [],
-      commands: contextBuildCommands(tempDir, 'setup-context-local-running'),
+      commands: contextBuildCommands(tempDir, 'setup-context-local-stale'),
+      failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.',
    });
    const io = makeIo();
    const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 }));
--- a/packages/cli/src/setup-context.ts
+++ b/packages/cli/src/setup-context.ts
@ -27,10 +27,8 @@ import {

 export type KtxSetupContextBuildStatus =
  | 'not_started'
-  | 'running'
  | 'completed'
  | 'failed'
-  | 'interrupted'
  | 'stale';

 export interface KtxSetupContextCommands {
@ -84,7 +82,6 @@ export interface KtxSetupContextStepArgs {
  forcePrompt?: boolean;
  allowEmpty?: boolean;
  prompt?: boolean;
-  autoWatch?: boolean;
  cliVersion?: string;
  runtimeInstallPolicy?: KtxManagedPythonInstallPolicy;
 }
@ -154,14 +151,8 @@ function normalizeState(projectDir: string, value: unknown): KtxSetupContextStat
  }
  const record = value as Record<string, unknown>;
  const rawStatus = typeof record.status === 'string' ? record.status : 'not_started';
-  const legacyActive = rawStatus === 'detached' || rawStatus === 'paused' || rawStatus === 'running';
-  const status: KtxSetupContextBuildStatus = legacyActive
-    ? 'stale'
-    : rawStatus === 'completed' ||
-        rawStatus === 'failed' ||
-        rawStatus === 'interrupted' ||
-        rawStatus === 'not_started' ||
-        rawStatus === 'stale'
+  const status: KtxSetupContextBuildStatus =
+    rawStatus === 'completed' || rawStatus === 'failed' || rawStatus === 'not_started' || rawStatus === 'stale'
      ? rawStatus
      : 'not_started';
  const runId = typeof record.runId === 'string' && record.runId.length > 0 ? record.runId : undefined;
@ -187,11 +178,7 @@ function normalizeState(projectDir: string, value: unknown): KtxSetupContextStat
      ? record.retryableFailedTargets.filter((item): item is string => typeof item === 'string')
      : [],
    commands: contextBuildCommands(projectDir, runId),
-    ...(typeof record.failureReason === 'string'
-      ? { failureReason: record.failureReason }
-      : legacyActive
-        ? { failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.' }
-        : {}),
+    ...(typeof record.failureReason === 'string' ? { failureReason: record.failureReason } : {}),
    ...(normalizeSourceProgress(record.sourceProgress) ? { sourceProgress: normalizeSourceProgress(record.sourceProgress) } : {}),
  };
 }
@ -552,9 +539,9 @@ async function runBuild(
  const now = deps.now ?? (() => new Date());
  const runId = deps.runIdFactory?.() ?? runIdFactory();
  const startedAt = now().toISOString();
-  const runningState: KtxSetupContextState = {
+  const incompleteState: KtxSetupContextState = {
    runId,
-    status: 'running',
+    status: 'stale',
    startedAt,
    updatedAt: startedAt,
    primarySourceConnectionIds: targets.primarySourceConnectionIds,
@ -563,8 +550,9 @@ async function runBuild(
    artifactPaths: [],
    retryableFailedTargets: [],
    commands: contextBuildCommands(args.projectDir, runId),
+    failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.',
  };
-  await writeKtxSetupContextState(args.projectDir, runningState);
+  await writeKtxSetupContextState(args.projectDir, incompleteState);

  let lastSourceProgress: ContextBuildSourceProgressUpdate[] | undefined;
  const contextBuild = deps.runContextBuild ?? runContextBuild;
@ -584,7 +572,7 @@ async function runBuild(
          const resolvedDir = resolve(args.projectDir);
          mkdirSync(join(resolvedDir, '.ktx', 'setup'), { recursive: true });
          const progressState = normalizeState(resolvedDir, {
-            ...runningState,
+            ...incompleteState,
            sourceProgress: sources,
            updatedAt: new Date().toISOString(),
          });
@ -600,7 +588,7 @@ async function runBuild(
  if (buildResult.exitCode !== 0) {
    const updatedAt = now().toISOString();
    await writeKtxSetupContextState(args.projectDir, {
-      ...runningState,
+      ...incompleteState,
      status: 'failed',
      updatedAt,
      reportIds: completedReportIds,
@ -616,7 +604,7 @@ async function runBuild(
  if (!readiness.ready) {
    const updatedAt = now().toISOString();
    await writeKtxSetupContextState(args.projectDir, {
-      ...runningState,
+      ...incompleteState,
      status: 'failed',
      updatedAt,
      reportIds: completedReportIds,
@ -635,13 +623,14 @@ async function runBuild(
  await markContextComplete(project.projectDir);
  const completedAt = now().toISOString();
  await writeKtxSetupContextState(args.projectDir, {
-    ...runningState,
+    ...incompleteState,
    status: 'completed',
    updatedAt: completedAt,
    completedAt,
    reportIds: completedReportIds,
    artifactPaths: completedArtifactPaths,
    retryableFailedTargets: [],
+    failureReason: undefined,
    ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}),
  });
  writeSuccess(project, readiness, targets, io);
--- a/packages/cli/src/setup-sources.test.ts
+++ b/packages/cli/src/setup-sources.test.ts
@ -1024,6 +1024,8 @@ describe('setup sources step', () => {
            databaseMappings: { '1': 'warehouse' },
            syncEnabled: { '1': true },
            syncMode: 'ALL',
+            selections: { collections: [], items: [] },
+            defaultTagNames: [],
          },
        },
        deps: {
@ -1181,6 +1183,8 @@ describe('setup sources step', () => {
        databaseMappings: { '1': 'warehouse' },
        syncEnabled: { '1': true },
        syncMode: 'ALL',
+        selections: { collections: [], items: [] },
+        defaultTagNames: [],
      },
    });
    const testPrompts = prompts({
--- a/packages/cli/src/setup-sources.ts
+++ b/packages/cli/src/setup-sources.ts
@ -451,6 +451,8 @@ function buildMetabaseConnection(args: KtxSetupSourcesArgs): KtxProjectConnectio
      databaseMappings: { [String(args.metabaseDatabaseId)]: args.sourceWarehouseConnectionId },
      syncEnabled: { [String(args.metabaseDatabaseId)]: true },
      syncMode: 'ALL',
+      selections: { collections: [], items: [] },
+      defaultTagNames: [],
    },
  };
 }
--- a/packages/cli/src/setup.test.ts
+++ b/packages/cli/src/setup.test.ts
@ -7,7 +7,7 @@ import { writeKtxSetupState } from '@ktx/context/project';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';

 import { localFakeBundleReport, persistLocalBundleReport } from './ingest.test-utils.js';
-import { contextBuildCommands, readKtxSetupContextState, writeKtxSetupContextState } from './setup-context.js';
+import { contextBuildCommands, writeKtxSetupContextState } from './setup-context.js';
 import { runDemoTour } from './setup-demo-tour.js';
 import { formatKtxSetupStatus, readKtxSetupStatus, runKtxSetup } from './setup.js';

@ -276,7 +276,7 @@ describe('setup status', () => {
    });
    await writeKtxSetupContextState(tempDir, {
      runId: 'setup-context-local-abc123',
-      status: 'running',
+      status: 'stale',
      startedAt: '2026-05-09T10:00:00.000Z',
      updatedAt: '2026-05-09T10:01:00.000Z',
      primarySourceConnectionIds: ['warehouse'],
@ -285,6 +285,7 @@ describe('setup status', () => {
      artifactPaths: [],
      retryableFailedTargets: [],
      commands: contextBuildCommands(tempDir, 'setup-context-local-abc123'),
+      failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.',
    });

    await expect(readKtxSetupStatus(tempDir)).resolves.toMatchObject({
@ -311,7 +312,7 @@ describe('setup status', () => {
        '    url: env:DATABASE_URL',
        '  metabase:',
        '    driver: metabase',
-        '    url: env:METABASE_URL',
+        '    api_url: https://metabase.example.test',
        '    api_key_ref: env:METABASE_API_KEY',
        '    warehouse_connection_id: warehouse',
        'llm:',
@ -1619,40 +1620,6 @@ describe('setup status', () => {
    expect(io.stderr()).toContain('KTX context is not ready for agents.');
  });

-  it('does not offer background watch choices from setup status', async () => {
-    await writeFile(
-      join(tempDir, 'ktx.yaml'),
-      [
-        'setup:',
-        '  database_connection_ids:',
-        '    - warehouse',
-        'connections:',
-        '  warehouse:',
-        '    driver: postgres',
-        '    url: env:DATABASE_URL',
-        '',
-      ].join('\n'),
-      'utf-8',
-    );
-    await writeKtxSetupContextState(tempDir, {
-      runId: 'setup-context-local-stale',
-      status: 'running',
-      startedAt: '2026-05-09T09:00:00.000Z',
-      updatedAt: '2026-05-09T09:00:00.000Z',
-      primarySourceConnectionIds: ['warehouse'],
-      contextSourceConnectionIds: [],
-      reportIds: [],
-      artifactPaths: [],
-      retryableFailedTargets: [],
-      commands: contextBuildCommands(tempDir, 'setup-context-local-stale'),
-    });
-
-    const status = await readKtxSetupStatus(tempDir);
-    expect(status.context.status).toBe('stale');
-    const state = await readKtxSetupContextState(tempDir);
-    expect(state.status).toBe('stale');
-  });
-
  it('routes a ready project menu selection to agent setup', async () => {
    const calls: string[] = [];
    const io = makeIo();
--- a/packages/cli/src/setup.ts
+++ b/packages/cli/src/setup.ts
@ -163,10 +163,7 @@ type KtxSetupFlowStatus =
  | 'skipped'
  | 'back'
  | 'missing-input'
-  | 'failed'
-  | 'detached'
-  | 'paused'
-  | 'interrupted';
+  | 'failed';

 export interface KtxSetupEntryMenuPromptAdapter {
  select(options: { message: string; options: KtxSetupPromptOption[] }): Promise<string>;
@ -408,10 +405,6 @@ function setupContextReady(status: KtxSetupStatus): boolean {
  return status.context.ready;
 }

-function setupContextActive(status: KtxSetupStatus): boolean {
-  return status.context.status === 'running';
-}
-
 function writeContextNotReadyForAgents(projectDir: string, io: KtxCliIo): void {
  io.stderr.write('KTX context is not ready for agents.\n\n');
  io.stderr.write(`Build context first:\n  ktx setup --project-dir ${resolve(projectDir)}\n\n`);
@ -451,27 +444,22 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
    args.inputMode !== 'disabled' &&
    !args.agents &&
    (io.stdout.isTTY === true || deps.entryMenuDeps?.prompts !== undefined);
-  let autoWatchActiveBuild = false;

  setupLoop: while (true) {
    entryAction = undefined;
    if (canShowEntryMenu) {
      const status = await readKtxSetupStatus(args.projectDir);
-      if (setupContextActive(status)) {
-        autoWatchActiveBuild = true;
-      } else {
-        entryAction = (await runKtxSetupEntryMenu(status, deps.entryMenuDeps)).action;
-        if (entryAction === 'exit') {
-          (deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.');
-          return 0;
-        }
-        if (entryAction === 'status') {
-          io.stdout.write(formatKtxSetupStatus(status));
-          return 0;
-        }
-        if (entryAction === 'demo') {
-          return await runKtxSetupDemoFromEntryMenu(args, io, deps);
-        }
+      entryAction = (await runKtxSetupEntryMenu(status, deps.entryMenuDeps)).action;
+      if (entryAction === 'exit') {
+        (deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.');
+        return 0;
+      }
+      if (entryAction === 'status') {
+        io.stdout.write(formatKtxSetupStatus(status));
+        return 0;
+      }
+      if (entryAction === 'demo') {
+        return await runKtxSetupDemoFromEntryMenu(args, io, deps);
      }
    }

@ -500,30 +488,6 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
    const currentStatus = await readKtxSetupStatus(projectResult.projectDir);
    let readyAction: string | undefined;

-    if (args.inputMode !== 'disabled' && !agentsRequested && setupContextActive(currentStatus)) {
-      const contextRunner =
-        deps.context ?? ((contextArgs, contextIo) => runKtxSetupContextStep(contextArgs, contextIo, deps.contextDeps));
-      const contextResult = await contextRunner(
-        {
-          projectDir: projectResult.projectDir,
-          inputMode: args.inputMode,
-          allowEmpty: true,
-          ...(autoWatchActiveBuild ? { autoWatch: true } : {}),
-        },
-        io,
-      );
-      autoWatchActiveBuild = false;
-      if (contextResult.status === 'back') {
-        continue;
-      }
-      if (contextResult.status === 'failed' || contextResult.status === 'missing-input') {
-        return 1;
-      }
-      if (contextResult.status !== 'ready') {
-        return 0;
-      }
-    }
-
    if (args.inputMode !== 'disabled' && !agentsRequested) {
      if (isKtxSetupReady(currentStatus)) {
        readyAction = (await runKtxSetupReadyChangeMenu(currentStatus, deps.readyMenuDeps)).action;
--- a/packages/cli/src/skills/research/SKILL.md
+++ b/packages/cli/src/skills/research/SKILL.md
@ -0,0 +1,49 @@
+---
+name: ktx-research
+description: Use when answering a question that needs data from a KTX-connected database - investigating, analyzing, "how many", "show me", "what's the breakdown of", finding records by value, exploring tables, comparing periods, or any data-investigation request. Triggers even when the user does not say "research"; if the answer requires querying a configured KTX connection, this skill applies.
+---
+
+# KTX Research Workflow
+
+You have access to KTX MCP tools for investigating data. Follow this workflow.
+
+<workflow>
+1. **Discover** - call `discover_data` first to see what exists across wiki, semantic-layer sources, and raw tables. Returns refs only.
+2. **Inspect top hits in parallel** - for each promising ref:
+   - `kind: 'wiki'` -> `wiki_read`
+   - `kind: 'sl_source'`, `kind: 'sl_measure'`, or `kind: 'sl_dimension'` -> `sl_read_source`
+   - `kind: 'table'` or `kind: 'column'` -> `entity_details`
+3. **Resolve literals** - if the user named a value such as "Acme Corp" or "status=shipped", call `dictionary_search` to find which column holds it.
+4. **Query** -
+   - Prefer `sl_query` when the semantic layer covers the question.
+   - Use `sql_execution` only for questions the semantic layer does not cover.
+5. **Capture learnings** - at the end of the turn, call `memory_capture` so future turns benefit. Skip when the answer carries no durable knowledge.
+</workflow>
+
+<rules>
+- Always run `discover_data` before writing SQL. Do not guess table names.
+- Prefer the semantic layer over raw SQL when both can answer the question; measures are the source of truth.
+- Read entity details before writing SQL against an unfamiliar table. Do not assume column names.
+- Treat `sql_execution` as read-only. Writes are rejected by the server.
+- Validate value mentions with `dictionary_search` instead of guessing case or spelling. Treat a `dictionary_search` miss as non-authoritative. The index is built from profile-sampled values, so a missing value may simply have been outside the sample. Follow up with `sql_execution` against the most plausible columns before concluding the value is absent.
+</rules>
+
+<examples>
+**Input:** "How many orders did Acme Corp place last month?"
+
+**Workflow:**
+1. `dictionary_search({ values: ["Acme Corp"] })` finds `customers.name`.
+2. `discover_data({ query: "orders customer monthly" })` finds an orders semantic-layer source.
+3. `sl_read_source({ connectionId: "warehouse", sourceName: "orders_facts" })` confirms the source grain, measures, and dimensions.
+4. `sl_query({ connectionId: "warehouse", measures: ["order_count"], filters: ["customer_name = 'Acme Corp'"] })` answers through the semantic layer.
+5. `memory_capture({ userMessage, assistantMessage })` captures the durable finding.
+
+---
+
+**Input:** "What columns does the events table have?"
+
+**Workflow:**
+1. `discover_data({ query: "events table" })` returns a `table` ref.
+2. `entity_details({ connectionId: "warehouse", entities: [{ table: "analytics.events" }] })` returns columns, types, and foreign keys.
+3. Answer directly. No query is needed.
+</examples>
--- a/packages/cli/src/sl.ts
+++ b/packages/cli/src/sl.ts
@ -213,7 +213,11 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
      if (!source) {
        throw new Error(`Semantic-layer source "${args.connectionId}/${args.sourceName}" was not found`);
      }
-      const result = await validateLocalSlSource(source.yaml, { project, connectionId: args.connectionId });
+      const result = await validateLocalSlSource(source.yaml, {
+        project,
+        connectionId: args.connectionId,
+        sourceName: args.sourceName,
+      });
      if (!result.valid) {
        for (const error of result.errors) {
          io.stderr.write(`${error}\n`);
--- a/packages/cli/src/status-project.ts
+++ b/packages/cli/src/status-project.ts
@ -16,6 +16,7 @@ import {
  red,
  yellow,
 } from './io/symbols.js';
+import { KTX_NEXT_STEP_DIRECT_COMMANDS } from './next-steps.js';

 type ProjectStatusLevel = 'ok' | 'warn' | 'fail';
 type ProjectVerdict = 'ready' | 'partial' | 'blocked';
@ -76,6 +77,8 @@ interface WarningItem {
  fix?: string;
 }

+const PROJECT_READY_COMMANDS = KTX_NEXT_STEP_DIRECT_COMMANDS.map((step) => step.command);
+
 function isRecord(value: unknown): value is Record<string, unknown> {
  return typeof value === 'object' && value !== null && !Array.isArray(value);
 }
@ -139,7 +142,7 @@ function buildLlmStatus(config: KtxProjectLlmConfig, env: NodeJS.ProcessEnv): Ll
      backend,
      model,
      status: 'fail',
-      detail: 'no LLM configured — ktx ask will not work',
+      detail: 'no LLM configured; research agent will not run',
      fix: 'Run: ktx setup (choose an LLM provider)',
    };
  }
@ -578,7 +581,7 @@ function buildVerdict(
  if (llm.status === 'fail') {
    return {
      verdict: 'blocked',
-      reason: 'LLM not configured — `ktx ask` will not work.',
+      reason: 'LLM not configured; research agent will not run.',
      nextActions: ['ktx setup'],
    };
  }
@ -612,7 +615,7 @@ function buildVerdict(
    return {
      verdict: 'ready',
      reason: 'Ready.',
-      nextActions: ['ktx scan', 'ktx wiki', 'ktx sl ask "…"'],
+      nextActions: [...PROJECT_READY_COMMANDS],
    };
  }

--- a/packages/cli/tsconfig.json
+++ b/packages/cli/tsconfig.json
@ -3,7 +3,8 @@
  "compilerOptions": {
    "outDir": "./dist",
    "rootDir": "./src",
-    "jsx": "react-jsx"
+    "jsx": "react-jsx",
+    "tsBuildInfoFile": "./node_modules/.cache/tsc.tsbuildinfo"
  },
  "include": ["src/**/*.ts", "src/**/*.tsx"],
  "exclude": ["dist", "node_modules"]
--- a/packages/connector-bigquery/src/connector.test.ts
+++ b/packages/connector-bigquery/src/connector.test.ts
@ -100,7 +100,7 @@ const connection = {
  dataset_id: 'analytics',
  credentials_json: JSON.stringify({ project_id: 'project-1', client_email: 'reader@example.test' }),
  location: 'US',
-};
+} as const;

 describe('KtxBigQueryScanConnector', () => {
  it('resolves configuration safely', () => {
--- a/packages/context/package.json
+++ b/packages/context/package.json
@ -153,6 +153,7 @@
    "@types/node": "^25.7.0",
    "@types/pg": "^8.20.0",
    "@vitest/coverage-v8": "^4.1.6",
+    "ajv": "8.20.0",
    "typescript": "^6.0.3",
    "vitest": "^4.1.6"
  },
--- a/packages/context/skills/_shared/identifier-verification.md
+++ b/packages/context/skills/_shared/identifier-verification.md
@ -8,16 +8,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
--- a/packages/context/skills/dbt_ingest/SKILL.md
+++ b/packages/context/skills/dbt_ingest/SKILL.md
@ -14,14 +14,14 @@ Use this skill for **uploaded** dbt projects (`dbt_project.yml` at stage root, `
 |-----|--------|--------|
 | `models:` entry with `columns:` | **Overlay** on the manifest table with the same name (after `discover_data` / `entity_details`) | One SL source per physical table; model name may differ from DB name - resolve with `read_raw_file` + warehouse context. |
 | `sources:` → `tables:` | Same as models; use `identifier` when present instead of logical `name`. | Schema + name must match how the connection sees tables. |
-| Column `description` | `descriptions.user` or merged `descriptions` map on the column | Do not overwrite `dbt` description keys from sync. |
+| Column `description` | `column_overrides[].descriptions.user` on the overlay | Do not overwrite `dbt` description keys from sync. |
 | `data_tests: not_null` / `unique` | Short hint in column `descriptions` or notes: “dbt: not null”, “dbt: unique” | Full structured metadata lands in manifest via **sync**; the skill keeps bundle-time SL text useful for the agent. |
 | `accepted_values` | Add a **brief** line in the column description: allowed values (truncate long lists) | Also mention enum-like use in `discover_data` / filters. |
 | `relationships` | Add or confirm `joins:` on the overlay **only** when `to` resolves to a real table via `read_raw_file` + `discover_data` / `entity_details` | If the ref cannot be resolved, capture the intent in a wiki page instead. |

 ## Physical schema grounding

-dbt YAML is documentation and test metadata; it is not permission to invent physical columns. Before writing any table-backed SL source, confirm the real warehouse shape with `discover_data`, `sl_discover`, or `entity_details` and use only confirmed column names in `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr`.
+dbt YAML is documentation and test metadata; it is not permission to invent physical columns. Before writing any table-backed SL source, confirm the real warehouse shape with `discover_data`, `sl_discover`, or `entity_details` and use only confirmed column names in `column_overrides:`, computed-only `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr`.

 For dbt context-source ingest, the dbt connection is usually not the warehouse connection. Call `sl_discover` without `connectionId` first, then write overlays to the connection that owns the matching manifest-backed source (for example `postgres-warehouse`), not to the dbt connection (for example `dbt-main`). If no matching manifest-backed source is visible on any warehouse connection, do not call `sl_write_source`; record `emit_unmapped_fallback` and keep the fact wiki-only.

@ -41,16 +41,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
@ -61,7 +61,7 @@ SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

 ## 1.1 test hints (descriptions / meta)

-When YAML shows `accepted_values` or `not_null`, add **short** hints into `columns[].descriptions` (e.g. under `user`) or freeform column notes so chat and validation see intent before the next git sync refreshes `constraints` / `enum_values` in `_schema`. Keep hints under a few words when possible.
+When YAML shows `accepted_values` or `not_null`, add **short** hints into `column_overrides[].descriptions` (for example under `user`) or freeform column notes so chat and validation see intent before the next git sync refreshes `constraints` / `enum_values` in `_schema`. Keep hints under a few words when possible.

 ## Overlap with MetricFlow

@ -71,6 +71,6 @@ If the same bundle also has MetricFlow `semantic_models:` / `metrics:`, the **`m

 - Do not run `dbt` CLI or assume `target/` / `manifest.json` exists in the upload.
 - Do not invent column names, grain keys, or measure expressions from dbt model names, descriptions, tests, or common naming patterns.
- Do not write `columns:`, `grain:`, or `measures:` for a dbt model unless those exact column names are confirmed by dbt YAML columns or warehouse schema discovery.
+- Do not write computed `columns:`, `column_overrides:`, `grain:`, or `measures:` for a dbt model unless those exact column names are confirmed by dbt YAML columns or warehouse schema discovery.
 - Do not invent joins from `relationships` tests if the target model/table is not found in SL or the warehouse.
 - Do not read `peerFileIndex` paths - use `read_raw_file` only on `rawFiles` and `dependencyPaths` from the WorkUnit.
--- a/packages/context/skills/historic_sql_patterns/SKILL.md
+++ b/packages/context/skills/historic_sql_patterns/SKILL.md
@ -31,16 +31,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
--- a/packages/context/skills/historic_sql_table_digest/SKILL.md
+++ b/packages/context/skills/historic_sql_table_digest/SKILL.md
@ -27,16 +27,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
--- a/packages/context/skills/live_database_ingest/SKILL.md
+++ b/packages/context/skills/live_database_ingest/SKILL.md
@ -37,16 +37,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
--- a/packages/context/skills/looker_ingest/SKILL.md
+++ b/packages/context/skills/looker_ingest/SKILL.md
@ -34,16 +34,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
--- a/packages/context/skills/lookml_ingest/SKILL.md
+++ b/packages/context/skills/lookml_ingest/SKILL.md
@ -12,7 +12,7 @@ LookML views map to SL sources, `measure:` to measures, `explore: { join: }` to

 | LookML | KTX form | Notes |
 |---|---|---|
-| `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** at `<connId>/X.yaml` with `measures`, `columns` (computed), `joins`, `segments` | Manifest-backed; inherit grain/columns |
+| `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** at `<connId>/X.yaml` with `measures`, computed-only `columns`, `column_overrides`, `joins`, `segments` | Manifest-backed; inherit grain/columns |
 | `view: X { derived_table: { sql: … } }` | **Standalone** with top-level `sql:`, explicit `grain:` + `columns:` | No manifest entry exists |
 | `view: X { sql_always_where: <p> }` | **Standalone** with `sql: SELECT * FROM <base> WHERE <p>` | Enforcement, not opt-in |
 | `explore: { join: Y { sql_on: …; relationship: … } }` | `joins:` entry `{ to: Y, on: "<local> = Y.<col>", relationship: … }` | On the overlay or standalone |
@ -64,16 +64,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
@ -85,11 +85,11 @@ SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
 **Required flow before writing any overlay or standalone**:

 1. Call `sl_discover({ query: "<tableName>" })` for each base table you're about to touch. That returns the real columns.
-2. If the table isn't in the manifest, use the warehouse `connectionName`
+2. If the table isn't in the manifest, use the warehouse `connectionId`
   returned by `discover_data` or the target connection chosen from
   `sl_discover`, then call a dialect-appropriate SQL probe with that
-   connection name, for example:
-   `sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
+   connection id, for example:
+   `sql_execution({connectionId: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
   Replace `warehouse`, `analytics`, and `orders` with the verified connection,
   schema or dataset, and table from the WorkUnit evidence.
 3. Use only those names in `sql:`, `columns:`, and `grain:`. Map each `dimension_group` to ONE `{ name: <physical_col>, type: time, role: time }` entry - never one per timeframe.
@ -136,7 +136,8 @@ KTX overlay at `<connId>/fct_labs.yaml`:

 ```yaml
 name: fct_labs
-description: "Lab-order fact table. One row per lab order event."
+descriptions:
+  user: "Lab-order fact table. One row per lab order event."
 columns:
  - name: is_byol
    type: boolean
--- a/packages/context/skills/metabase_ingest/SKILL.md
+++ b/packages/context/skills/metabase_ingest/SKILL.md
@ -57,16 +57,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
@ -79,7 +79,7 @@ SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

 For each card:
 1. Analyze `resolvedSql` + `resultMetadata`: identify base tables, aggregations, joins, filters, column types.
-2. **REQUIRED before any write**: call `sl_discover` for every candidate target source name. The response tells you whether the name is manifest-backed (`Type: table` or `Type: sql`). For manifest-backed names you MUST use the overlay shape (`name:` + `measures:`/`segments:`/`description:` only - no `sql:`, `table:`, `grain:`, or `columns:`); the tool will reject a standalone write and you'll have wasted the call. If `sl_discover` returns nothing for the name, you can write a standalone source. Also call `sl_read_source` on existing sources you intend to extend so you don't duplicate measures.
+2. **REQUIRED before any write**: call `sl_discover` for every candidate target source name. The response tells you whether the name is manifest-backed (`Type: table` or `Type: sql`). For manifest-backed names you MUST use the overlay shape (`name:` plus overlay fields such as `measures:`, `segments:`, `descriptions:`, `joins:`, `disable_joins:`, `column_overrides:`, and computed-only `columns:` entries with `expr` + `type`; no `sql:`, `table:`, `grain:`, or base-table `columns:`); the tool will reject a standalone write and you'll have wasted the call. If `sl_discover` returns nothing for the name, you can write a standalone source. Also call `sl_read_source` on existing sources you intend to extend so you don't duplicate measures.
 3. Include `rawPaths: ["cards/<id>.json"]` on every `sl_write_source`, `sl_edit_source`, and `wiki_write` call. If one artifact generalizes multiple near-duplicate cards, include each contributing card path and no unrelated cards.
 4. Decide:
   - Simple aggregation on a table that already has a source → `sl_edit_source` to add a measure.
@ -98,7 +98,7 @@ measures:
    expr: "<expression>"
 ```

-Overlay shape: `name:` plus any of `measures:`, `segments:`, `descriptions:`, `joins:`, `disable_joins:`. Never include `sql:`, `table:`, `grain:`, or `columns:` on a manifest-backed name - those would shadow the manifest's schema and drop its joins. Overlay `joins:` are merged additively with the manifest's joins (deduped by `to` + `on`); use `disable_joins: ["<on-clause>"]` to suppress a specific manifest join. After the overlay exists, use `sl_edit_source` for further tweaks. See `sl_capture` skill for the canonical overlay rule.
+Overlay shape: `name:` plus any of `measures:`, `segments:`, `descriptions:`, `joins:`, `disable_joins:`, `exclude_columns:`, `column_overrides:`, or computed-only `columns:` entries with `expr` + `type`. Never include `sql:`, `table:`, `grain:`, or base-table `columns:` on a manifest-backed name — those would shadow the manifest's schema and drop its joins. Use `column_overrides:` for inherited column descriptions. Overlay `joins:` are merged additively with the manifest's joins (deduped by `to` + `on`); use `disable_joins: ["<on-clause>"]` to suppress a specific manifest join. After the overlay exists, use `sl_edit_source` for further tweaks. See `sl_capture` skill for the canonical overlay rule.

 **Join discovery:** When your card's SQL references warehouse tables (e.g. in `FROM` or `JOIN` clauses), call `sl_discover({ query: '<table>' })` before writing. The matching manifest entry's `name` is the value you use in `joins: [- to: <name>]` only when the card output exposes a local key that matches the target source grain (for example `account_id = mart_account_segments.account_id`). Do not declare a KTX join just because the card SQL joins that table internally. If the output only exposes display fields such as `account_name`, keep the SQL source self-contained or project the key before adding the join. Use `many_to_one` for FK-to-dimension joins, `one_to_many` for the reverse.

--- a/packages/context/skills/metricflow_ingest/SKILL.md
+++ b/packages/context/skills/metricflow_ingest/SKILL.md
@ -12,7 +12,7 @@ A MetricFlow `semantic_model` maps to an SL source; MetricFlow `measures` map to

 | MetricFlow | KTX form | Notes |
 |---|---|---|
-| `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** at `<connId>/X.yaml` with `measures`, `columns` (computed), `joins` | The `model:` ref resolves to a manifest table. |
+| `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** at `<connId>/X.yaml` with `measures`, computed-only `columns`, `column_overrides`, `joins` | The `model:` ref resolves to a manifest table. |
 | `semantic_model: X { model: source('s','t') }` | **Overlay** at `<connId>/X.yaml` over table `t`. | Same shape; `source()` still resolves to a physical table. |
 | `semantic_model: X { model: <literal> }` with no manifest entry | **Standalone** with explicit `sql:`, `grain:`, `columns:` | Happens when the dbt manifest isn't available. |
 | `semantic_model: Y { extends: X }` | **Merge** Y's measures/dimensions/entities into X's overlay, or write a single overlay named for the most-derived child (Y) containing both X's and Y's primitives | Do not emit a second overlay for X - flatten. |
@ -42,16 +42,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
@ -82,9 +82,9 @@ The `model:` field on a semantic_model is a string like `ref('table_name')`, `so

 If `sl_discover` errors because no such table exists, use `discover_data` and
 `entity_details` to find the warehouse target. If a SQL probe is still needed,
-call `sql_execution` with the same warehouse connection name, for example:
-`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
-**Never invent column names** - every column in `columns:`, `grain:`, and
+call `sql_execution` with the same warehouse connection id, for example:
+`sql_execution({connectionId: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
+**Never invent column names** - every column in computed `columns:`, `column_overrides:`, `grain:`, and
 `sql:` must be sourced from raw files, `entity_details`, or a successful SQL
 probe.

--- a/packages/context/skills/notion_synthesize/SKILL.md
+++ b/packages/context/skills/notion_synthesize/SKILL.md
@ -79,16 +79,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
--- a/packages/context/skills/sl/SKILL.md
+++ b/packages/context/skills/sl/SKILL.md
@ -39,6 +39,10 @@ columns:                    # computed dimensions only
  - name: is_large_order
    type: boolean
    expr: "amount > 1000"
+column_overrides:           # metadata patches for inherited columns
+  - name: status
+    descriptions:
+      user: "Order lifecycle status."
 segments:
  - name: paid_non_refunded
    expr: "is_paid = true AND is_refunded = false"
@ -51,6 +55,7 @@ joins:
 Rules:
 - Do **not** repeat base-table columns, grain, `table`, or `source_type` in an overlay - those are inherited.
 - Overlay columns MUST be computed (`expr` + `type`).
+- Use `column_overrides` to add descriptions or metadata to inherited manifest columns. Do not put `type` or `expr` in `column_overrides`.
 - `exclude_columns` hides specific manifest columns; `disable_joins` suppresses specific auto-detected joins.

 ### Standalone table sources
@ -110,7 +115,7 @@ An SQL source is a one-shot answer: the aggregation is frozen, callers cannot re

 ### Columns

-Every standalone column requires `name` and `type`. Overlays have computed columns only.
+Every standalone column requires `name` and `type`. Overlays have computed columns in `columns:` and manifest column metadata patches in `column_overrides:`.

 - `type`: one of `string`, `number`, `boolean`, `time`. Map LookML `date`/`datetime`/`timestamp` → `time`. Map LookML `yesno` → `boolean`.
 - `role` (optional): `time` enables time-granularity queries (month, week, day). `default` is the implicit fallback.
--- a/packages/context/skills/sl_capture/SKILL.md
+++ b/packages/context/skills/sl_capture/SKILL.md
@ -100,7 +100,33 @@ measures:

 **Extract repeated filter bundles into named segments.** If the same predicate appears on multiple measures of the same source, lift it to a `segments[]` entry and have each measure reference it. One edit updates every measure that depends on it.

-**Never write a standalone file on a manifest-backed name.** If `sl_discover({ query: "<table-or-source-name>" })` finds an existing schema for that name, you MUST write an overlay (`name:` + `measures:`/`segments:`/`descriptions:` only - no `sql:`, `table:`, `grain:`, `columns:`, `joins:`). A standalone with `sql:` or `table:` on a manifest-backed name clobbers the inherited columns and joins; `sl_write_source` and `sl_validate` both reject this shape with a clear fix hint. Always run `sl_discover` before your first write on any existing name.
+**Never write a standalone file on a manifest-backed name.** If `sl_discover({ query: "<table-or-source-name>" })` finds an existing schema for that name, you MUST write an overlay. A standalone with `sql:` or `table:` on a manifest-backed name clobbers the inherited columns and joins; `sl_write_source` and `sl_validate` both reject this shape with a clear fix hint. Always run `sl_discover` before your first write on any existing name.
+
+Overlay before/after examples:
+
+```yaml
+# Wrong: patches an inherited manifest column through columns:
+name: fct_orders
+columns:
+  - name: status
+    descriptions:
+      user: "Order lifecycle status."
+```
+
+```yaml
+# Right: patch inherited columns with column_overrides:
+name: fct_orders
+column_overrides:
+  - name: status
+    descriptions:
+      user: "Order lifecycle status."
+columns:
+  - name: is_large_order
+    type: boolean
+    expr: "amount > 1000"
+```
+
+Overlay YAML may include `measures:`, `segments:`, `descriptions:`, `joins:`, `disable_joins:`, `exclude_columns:`, `column_overrides:`, and computed-only `columns:` entries with `expr` and `type`. Do not include `sql:`, `table:`, `grain:`, or base-table `columns:`.

 **Prefer overlay decomposition over standalone SQL sources.** Before reaching for `source_type: sql`, check whether the metric decomposes into measures on existing overlays (including cross-source derived measures). Use `source_type: sql` only when:
 - The metric requires per-user/per-entity derivation that cannot be expressed as a single `expr` (e.g., `EXISTS` over a time-windowed subset), OR
@ -188,16 +214,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
@ -213,7 +239,7 @@ SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
 3. `sl_read_source({ connectionId, sourceName })` - read the raw YAML before editing.
 4. For modifications: `sl_edit_source({ connectionId, sourceName, yaml_edits: [{ oldText, newText, reason }] })` with exact-string replacements. `oldText` must match exactly and be unique in the file.
 5. For new sources or full rewrites: `sl_write_source({ connectionId, sourceName, source })` with the full structured source definition.
-6. For join discovery: use `sql_execution({connectionName: "warehouse", sql: "SELECT count(*) FROM public.orders o JOIN public.customers c ON c.id = o.customer_id LIMIT 20"})` with the target warehouse connection name and dialect-correct table names to verify the join key exists in both tables and assess cardinality before declaring the join.
+6. For join discovery: use `sql_execution({connectionId: "warehouse", sql: "SELECT count(*) FROM public.orders o JOIN public.customers c ON c.id = o.customer_id LIMIT 20"})` with the target warehouse connection id and dialect-correct table names to verify the join key exists in both tables and assess cardinality before declaring the join.
 7. Cross-reference knowledge: author the edge once on the **wiki** side via `sl_refs: [source_name]` in the page's front-matter. The reverse edge (wiki pages that cite an SL source) is derived automatically by the reconciler - do not add a `knowledge_refs:` field to SL YAMLs.
 8. `sl_validate` - run after writing or editing to surface schema issues, duplicate measure names, and cross-source validation errors. Read-only; the writes are already committed (the squash-at-end flow will collapse them into one commit).

@ -289,7 +315,7 @@ Prior turn: user asked to correlate LTV with protocol count; assistant joined `f
 sl_read_source({ connectionId: "warehouse", sourceName: "fct_orders" })
  → no joins section yet
 sql_execution({
-  connectionName: "warehouse",
+  connectionId: "warehouse",
  sql: "SELECT COUNT(*), COUNT(DISTINCT a.admin_user_id) FROM public.fct_orders a JOIN public.fct_mau_multiprotocol b ON a.admin_user_id = b.admin_user_id LIMIT 1"
 })
  → confirms cardinality (many orders per MAU row = many_to_one)
--- a/packages/context/skills/wiki_capture/SKILL.md
+++ b/packages/context/skills/wiki_capture/SKILL.md
@ -60,16 +60,16 @@ Before writing a wiki page or SL source on any topic:
 Before emitting any `schema.table` or `schema.table.column` into a wiki body,
 SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:

-2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
+2. `entity_details({connectionId, targets: [{display: "<identifier>"}]})` -
   confirm the identifier resolves; inspect native types, FK/PK, and
   sampleValues.
 3. For literal values from the source, such as status codes or plan tiers,
   check whether they appear in `entity_details` sampleValues for the relevant
   column. If sampleValues is short or the sample may have missed real values,
-   run a `sql_execution` probe with the same warehouse connection name:
-   `sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
+   run a `sql_execution` probe with the same warehouse connection id:
+   `sql_execution({connectionId, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
 4. If the candidate identifier still does not resolve, do one of:
-   - Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
+   - Use `sql_execution({connectionId, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
     If it errors, the identifier is fictional.
   - Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
     citing the exact raw path that mentioned it.
--- a/packages/context/src/agent/agent-runner.service.ts
+++ b/packages/context/src/agent/agent-runner.service.ts
@ -1,4 +1,4 @@
-import { KtxMessageBuilder, type KtxLlmProvider, type KtxModelRole } from '@ktx/llm';
+import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider, type KtxModelRole } from '@ktx/llm';
 import { generateText, stepCountIs, type TelemetrySettings, type Tool } from 'ai';
 import { noopLogger, type KtxLogger } from '../core/index.js';
 import { summarizeKtxLlmDebugRequest, type KtxLlmDebugRequestRecorder } from '../llm/index.js';
@ -36,14 +36,6 @@ export interface AgentRunnerServiceDeps {
  logger?: KtxLogger;
 }

-function splitSystemPromptMessages(messages: ReturnType<KtxMessageBuilder['wrapSimple']>['messages']) {
-  const systemMessages = messages.filter((message) => message.role === 'system');
-  return {
-    system: systemMessages.length === 0 ? undefined : systemMessages.length === 1 ? systemMessages[0] : systemMessages,
-    messages: messages.filter((message) => message.role !== 'system'),
-  };
-}
-
 export class AgentRunnerService {
  private readonly logger: KtxLogger;

@ -62,7 +54,7 @@ export class AgentRunnerService {
        tools: params.toolSet,
        model,
      });
-      const promptMessages = splitSystemPromptMessages(built.messages);
+      const promptMessages = splitKtxSystemMessages(built.messages);

      await this.deps.debugRequestRecorder?.record(
        summarizeKtxLlmDebugRequest({
--- a/packages/context/src/connections/local-warehouse-descriptor.test.ts
+++ b/packages/context/src/connections/local-warehouse-descriptor.test.ts
@ -36,7 +36,13 @@ describe('localConnectionToWarehouseDescriptor', () => {
  });

  it('returns null for non-warehouse adapters', () => {
-    expect(localConnectionToWarehouseDescriptor('looker', { driver: 'looker' })).toBeNull();
+    expect(
+      localConnectionToWarehouseDescriptor('looker', {
+        driver: 'looker',
+        base_url: 'https://looker.example.com',
+        client_id: 'client',
+      }),
+    ).toBeNull();
  });
 });

@ -48,7 +54,9 @@ describe('local connection info helpers', () => {
  });

  it('keeps non-warehouse adapter labels for display-only local connection surfaces', () => {
-    expect(localConnectionTypeForConfig('prod-metabase', { driver: 'metabase' })).toBe('metabase');
+    expect(localConnectionTypeForConfig('prod-metabase', { driver: 'metabase', api_url: 'https://metabase.example.com' })).toBe(
+      'metabase',
+    );
    expect(localConnectionTypeForConfig('missing-driver', {} as never)).toBe('unknown');
  });

--- a/packages/context/src/connections/notion-config.ts
+++ b/packages/context/src/connections/notion-config.ts
@ -13,7 +13,20 @@ export const KTX_NOTION_ORG_KNOWLEDGE_WARNING =

 type KtxNotionCrawlMode = 'all_accessible' | 'selected_roots';

-export interface KtxNotionConnectionConfig extends KtxProjectConnectionConfig {
+type RawKtxNotionConnectionConfig = Extract<KtxProjectConnectionConfig, { driver: 'notion' }>;
+
+export type KtxNotionConnectionConfig = Omit<
+  RawKtxNotionConnectionConfig,
+  | 'auth_token'
+  | 'auth_token_ref'
+  | 'crawl_mode'
+  | 'root_page_ids'
+  | 'root_database_ids'
+  | 'root_data_source_ids'
+  | 'max_pages_per_run'
+  | 'max_knowledge_creates_per_run'
+  | 'max_knowledge_updates_per_run'
+> & {
  driver: 'notion';
  auth_token: string | null;
  auth_token_ref: string | null;
@ -24,7 +37,7 @@ export interface KtxNotionConnectionConfig extends KtxProjectConnectionConfig {
  max_pages_per_run: number;
  max_knowledge_creates_per_run: number;
  max_knowledge_updates_per_run: number;
-}
+};

 export interface RedactedKtxNotionConnectionConfig {
  driver: 'notion';
--- a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts
+++ b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts
@ -18,6 +18,9 @@ const sqlAnalysis: SqlAnalysisPort = {
  async analyzeBatch() {
    return new Map();
  },
+  async validateReadOnly() {
+    return { ok: true };
+  },
 };

 const reader: HistoricSqlReader = {
@ -79,6 +82,9 @@ describe('HistoricSqlSourceAdapter', () => {
          ],
        ]);
      },
+      async validateReadOnly() {
+        return { ok: true };
+      },
    };
    const adapter = new HistoricSqlSourceAdapter({
      sqlAnalysis: batchSqlAnalysis,
--- a/packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
+++ b/packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
@ -159,6 +159,7 @@ function acceptanceSqlAnalysis(): SqlAnalysisPort {
        );
      },
    ),
+    validateReadOnly: vi.fn(async () => ({ ok: true })),
  };
 }

--- a/packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts
+++ b/packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts
@ -83,6 +83,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
        ],
        ['bad-parse', { tablesTouched: [], columnsByClause: {}, error: 'parse failed' }],
      ])),
+      validateReadOnly: vi.fn(async () => ({ ok: true })),
    };

    await stageHistoricSqlAggregatedSnapshot({
@ -207,6 +208,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
          },
        ],
      ])),
+      validateReadOnly: vi.fn(async () => ({ ok: true })),
    };

    await stageHistoricSqlAggregatedSnapshot({
@ -283,6 +285,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
          },
        ],
      ])),
+      validateReadOnly: vi.fn(async () => ({ ok: true })),
    };

    await stageHistoricSqlAggregatedSnapshot({
@ -403,6 +406,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
          },
        ],
      ])),
+      validateReadOnly: vi.fn(async () => ({ ok: true })),
    };

    await stageHistoricSqlAggregatedSnapshot({
--- a/packages/context/src/ingest/adapters/metabase/local-source-state-store.test.ts
+++ b/packages/context/src/ingest/adapters/metabase/local-source-state-store.test.ts
@ -3,6 +3,7 @@ import { tmpdir } from 'node:os';
 import { join } from 'node:path';
 import { afterEach, beforeEach, describe, expect, it } from 'vitest';
 import { buildDefaultKtxProjectConfig } from '../../../project/index.js';
+import { connectionConfigSchema } from '../../../project/driver-schemas.js';
 import { KtxYamlMetabaseSourceStateReader, LocalMetabaseDiscoveryCache } from './local-source-state-store.js';

 describe('Metabase YAML source state and discovery cache', () => {
@ -23,10 +24,11 @@ describe('Metabase YAML source state and discovery cache', () => {
      config: {
        ...buildDefaultKtxProjectConfig(),
        connections: {
-          'prod-metabase': {
+          'prod-metabase': connectionConfigSchema.parse({
            driver: 'metabase',
+            api_url: 'https://metabase.example.com',
            mappings,
-          },
+          }),
        },
      },
    };
--- a/packages/context/src/ingest/adapters/metricflow/import-semantic-models.test.ts
+++ b/packages/context/src/ingest/adapters/metricflow/import-semantic-models.test.ts
@ -38,7 +38,7 @@ describe('importMetricflowSemanticModels', () => {
    const scoped = {
      getManifestEntry: vi.fn().mockResolvedValue(null),
      isManifestBacked: vi.fn().mockResolvedValue(false),
-      loadAllSources: vi.fn().mockResolvedValue([]),
+      loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
      loadSource: vi.fn().mockResolvedValue(null),
      writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
    };
@ -104,7 +104,7 @@ describe('importMetricflowSemanticModels', () => {
    const scoped = {
      getManifestEntry: vi.fn().mockResolvedValue(null),
      isManifestBacked: vi.fn().mockResolvedValue(false),
-      loadAllSources: vi.fn().mockResolvedValue([]),
+      loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
      loadSource: vi.fn().mockImplementation((connectionId: string, sourceName: string) =>
        Promise.resolve(sourceName === 'orders' ? { name: 'orders' } : null),
      ),
@ -139,7 +139,7 @@ describe('importMetricflowSemanticModels', () => {
    const scoped = {
      getManifestEntry: vi.fn().mockResolvedValue(null),
      isManifestBacked: vi.fn().mockResolvedValue(false),
-      loadAllSources: vi.fn().mockResolvedValue([]),
+      loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
      loadSource: vi.fn().mockResolvedValue(null),
      writeSource: vi.fn().mockRejectedValueOnce(new Error('cannot write orders')).mockResolvedValue({ warnings: [] }),
    };
@ -190,7 +190,7 @@ describe('importMetricflowSemanticModels', () => {
      isManifestBacked: vi.fn().mockImplementation(async (_connectionId: string, sourceName: string) => {
        return sourceName === 'orders';
      }),
-      loadAllSources: vi.fn().mockResolvedValue([]),
+      loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
      loadSource: vi.fn().mockResolvedValue(null),
      writeSource: vi.fn().mockImplementation(async (_connectionId: string, source: (typeof written)[number]) => {
        written.push(source);
@ -268,7 +268,7 @@ describe('importMetricflowSemanticModels', () => {
      isManifestBacked: vi.fn().mockImplementation(async (_connectionId: string, sourceName: string) => {
        return sourceName === 'orders';
      }),
-      loadAllSources: vi.fn().mockResolvedValue([]),
+      loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
      loadSource: vi.fn().mockResolvedValue(null),
      writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
    };
@ -311,7 +311,7 @@ describe('importMetricflowSemanticModels', () => {
    const scoped = {
      getManifestEntry: vi.fn().mockResolvedValue(null),
      isManifestBacked: vi.fn().mockResolvedValue(false),
-      loadAllSources: vi.fn().mockResolvedValue([]),
+      loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
      loadSource: vi.fn().mockResolvedValue(null),
      writeSource: vi
        .fn()
--- a/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts
+++ b/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts
@ -71,7 +71,7 @@ export async function importMetricflowSemanticModels(
  let crossModelSourcesCreated = 0;

  const preexistingSourceNames = new Set(
-    (await semanticLayerService.loadAllSources(input.connectionId)).map((source) => source.name),
+    (await semanticLayerService.loadAllSources(input.connectionId)).sources.map((source) => source.name),
  );
  const modelContexts: MetricflowSemanticModelImportContext[] = [];
  const sourceNameByModelRef = new Map<string, string>();
--- a/packages/context/src/ingest/ingest-bundle.runner.test.ts
+++ b/packages/context/src/ingest/ingest-bundle.runner.test.ts
@ -187,7 +187,10 @@ const makeDeps = () => {
    loadAllSources: vi
      .fn()
      .mockImplementation((connectionId: string) =>
-        Promise.resolve(connectionId === 'warehouse-2' ? [{ name: 'looker__orders' }] : []),
+        Promise.resolve({
+          sources: connectionId === 'warehouse-2' ? [{ name: 'looker__orders' }] : [],
+          loadErrors: [],
+        }),
      ),
  };
  const slSearchService = {
@ -1347,7 +1350,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
      frontmatter: { sl_refs: ['looker__b2b__sales_pipeline.arr'] },
    });
    deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
-      Promise.resolve([{ name: `${connectionId}_source` }]),
+      Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
    );
    deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
      if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
@ -1447,7 +1450,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
      parseArtifacts: { semanticModels: [{ name: 'orders' }] },
    });
    deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
-      Promise.resolve([{ name: `${connectionId}_source` }]),
+      Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
    );
    const postProcessor = {
      run: vi.fn().mockResolvedValue({
@ -1631,7 +1634,10 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
    const deps = makeDeps();
    deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['postgres-warehouse']);
    deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
-      Promise.resolve(connectionId === 'postgres-warehouse' ? [{ name: 'stg_accounts' }] : []),
+      Promise.resolve({
+        sources: connectionId === 'postgres-warehouse' ? [{ name: 'stg_accounts' }] : [],
+        loadErrors: [],
+      }),
    );

    const runner = buildRunner(deps);
@ -1659,7 +1665,10 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {

  it('does not resolve qualified fallback table refs by source name alone', async () => {
    const deps = makeDeps();
-    deps.semanticLayerService.loadAllSources.mockResolvedValue([{ name: 'orders', table: 'sales.orders' }]);
+    deps.semanticLayerService.loadAllSources.mockResolvedValue({
+      sources: [{ name: 'orders', table: 'sales.orders' }],
+      loadErrors: [],
+    });
    const runner = buildRunner(deps);

    await expect(
--- a/packages/context/src/ingest/ingest-bundle.runner.ts
+++ b/packages/context/src/ingest/ingest-bundle.runner.ts
@ -300,7 +300,7 @@ export class IngestBundleRunner {
    const blocks = await Promise.all(
      connectionIds.map(async (connectionId) => {
        try {
-          const sources = await this.deps.semanticLayerService.loadAllSources(connectionId);
+          const { sources } = await this.deps.semanticLayerService.loadAllSources(connectionId);
          const names = sources.map((source) => source.name).sort((left, right) => left.localeCompare(right));
          const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
          return `## ${connectionId}\n${body}`;
@ -329,7 +329,7 @@ export class IngestBundleRunner {
  ): Promise<boolean> {
    for (const connectionId of connectionIds) {
      try {
-        const sources = await semanticLayerService.loadAllSources(connectionId);
+        const { sources } = await semanticLayerService.loadAllSources(connectionId);
        if (sources.some((source) => semanticSourceMatchesTableRef(source, tableRef))) {
          return true;
        }
@ -1211,7 +1211,7 @@ export class IngestBundleRunner {
        ].sort();
        for (const connectionId of touchedConnections) {
          try {
-            const allSources = await this.deps.semanticLayerService.loadAllSources(connectionId);
+            const { sources: allSources } = await this.deps.semanticLayerService.loadAllSources(connectionId);
            await this.deps.slSearchService.indexSources(connectionId, allSources);
          } catch (err) {
            this.logger.warn(
--- a/packages/context/src/ingest/ingest-runtime-assets.test.ts
+++ b/packages/context/src/ingest/ingest-runtime-assets.test.ts
@ -94,11 +94,15 @@ describe('ingest runtime assets', () => {

  it('packages identifier verification prompt assets', async () => {
    const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8');
+    const legacyConnectionPrefix = ['connection', 'Name'].join('');
+
    expect(shared).toContain('## Identifier Verification Protocol');
    expect(shared).toContain('discover_data');
    expect(shared).toContain('entity_details');
    expect(shared).toContain('sql_execution');
-    expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
-    expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
+    expect(shared).toContain('sql_execution({connectionId, sql: "SELECT DISTINCT');
+    expect(shared).toContain('sql_execution({connectionId, sql: "SELECT 1 FROM');
+    expect(shared).not.toContain(`entity_details({${legacyConnectionPrefix}`);
+    expect(shared).not.toContain(`sql_execution({${legacyConnectionPrefix}`);
  });
 });
--- a/packages/context/src/ingest/local-adapters.test.ts
+++ b/packages/context/src/ingest/local-adapters.test.ts
@ -97,6 +97,9 @@ describe('local ingest adapters', () => {
      async analyzeBatch() {
        return new Map();
      },
+      async validateReadOnly() {
+        return { ok: true };
+      },
    };
    const adapters = createDefaultLocalIngestAdapters(project, {
      historicSql: {
@ -140,6 +143,9 @@ describe('local ingest adapters', () => {
          async analyzeBatch() {
            return new Map();
          },
+          async validateReadOnly() {
+            return { ok: true };
+          },
        },
        reader,
        queryClient,
@ -166,6 +172,9 @@ describe('local ingest adapters', () => {
          async analyzeBatch() {
            return new Map();
          },
+          async validateReadOnly() {
+            return { ok: true };
+          },
        },
        postgresQueryClient: {
          async executeQuery() {
@ -258,6 +267,9 @@ describe('local ingest adapters', () => {
          async analyzeBatch() {
            return new Map();
          },
+          async validateReadOnly() {
+            return { ok: true };
+          },
        },
        postgresQueryClient: {
          async executeQuery() {
--- a/packages/context/src/ingest/local-mapping-reconcile.test.ts
+++ b/packages/context/src/ingest/local-mapping-reconcile.test.ts
@ -27,11 +27,12 @@ describe('local mapping yaml reconciliation bridge', () => {
    const project = projectWithConnections({
      'prod-metabase': {
        driver: 'metabase',
+        api_url: 'https://metabase.example.com',
        mappings: {
          databaseMappings: { '1': 'prod-warehouse' },
          syncEnabled: { '1': true },
          syncMode: 'ONLY',
-          selections: { collections: [12] },
+          selections: { collections: [12], items: [] },
          defaultTagNames: ['ktx'],
        },
      },
@ -46,6 +47,8 @@ describe('local mapping yaml reconciliation bridge', () => {
    const project = projectWithConnections({
      'prod-looker': {
        driver: 'looker',
+        base_url: 'https://looker.example.com',
+        client_id: 'client',
        mappings: { connectionMappings: { analytics: 'prod-warehouse' } },
      },
      'prod-warehouse': { driver: 'postgres', url: 'postgresql://readonly@db.test/analytics' },
--- a/packages/context/src/ingest/page-triage/page-triage.service.test.ts
+++ b/packages/context/src/ingest/page-triage/page-triage.service.test.ts
@ -227,9 +227,10 @@ describe('PageTriageService', () => {
    });
    generateTextMock
      .mockImplementationOnce((args: any) => {
-        const systemMessage = args.messages.find((m: { role: string }) => m.role === 'system');
+        const systemMessage = args.system ?? args.messages.find((m: { role: string }) => m.role === 'system');
        const userMessage = args.messages.find((m: { role: string }) => m.role === 'user');
-        const systemText = systemMessage.content as string;
+        const systemText =
+          typeof systemMessage === 'string' ? systemMessage : (systemMessage.content as string);
        const userText = userMessage.content as string;
        expect(systemText).toContain(
          'Reusable templates and scripts are durable knowledge regardless of subject matter.',
--- a/packages/context/src/ingest/page-triage/page-triage.service.ts
+++ b/packages/context/src/ingest/page-triage/page-triage.service.ts
@ -1,7 +1,7 @@
 import { createHash } from 'node:crypto';
 import { readdir, readFile } from 'node:fs/promises';
 import { dirname, join, relative } from 'node:path';
-import { KtxMessageBuilder, type KtxLlmProvider } from '@ktx/llm';
+import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider } from '@ktx/llm';
 import { generateText, type ToolSet } from 'ai';
 import pLimit from 'p-limit';
 import { z } from 'zod';
@ -346,10 +346,12 @@ export class PageTriageService {
      tools: {},
      model,
    });
+    const split = splitKtxSystemMessages(built.messages);
    const result = await this.runGenerateText({
      model,
      temperature: 0,
-      messages: built.messages,
+      ...(split.system ? { system: split.system } : {}),
+      messages: split.messages,
      tools: built.tools as ToolSet,
    });
    return result.text;
--- a/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
+++ b/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
@ -1,7 +1,7 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest';
+import type { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
 import type { BaseTool, ToolContext } from '../../../tools/index.js';
 import { DiscoverDataTool } from './discover-data.tool.js';
-import type { WarehouseCatalogService } from './warehouse-catalog.service.js';

 describe('DiscoverDataTool', () => {
  const wikiSearchTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType<typeof vi.fn> };
@ -36,7 +36,7 @@ describe('DiscoverDataTool', () => {
    catalog.searchByName.mockResolvedValue([
      {
        kind: 'table',
-        connectionName: 'warehouse',
+        connectionId: 'warehouse',
        ref: { catalog: null, db: 'public', name: 'orders' },
        display: 'public.orders',
        matchedOn: 'name',
@ -45,28 +45,28 @@ describe('DiscoverDataTool', () => {
  });

  it('groups wiki, semantic layer, and raw schema hits with routing hints', async () => {
-    const result = await tool.call({ query: 'orders', connectionName: 'warehouse', limit: 5 }, context);
+    const result = await tool.call({ query: 'orders', connectionId: 'warehouse', limit: 5 }, context);

    expect(result.markdown).toContain('## Wiki Pages');
    expect(result.markdown).toContain('use `wiki_read(blockKey)` for full content');
    expect(result.markdown).toContain('## Semantic Layer Sources');
    expect(result.markdown).toContain('use `sl_read_source(sourceName)` for the YAML');
    expect(result.markdown).toContain('## Raw Warehouse Schema');
-    expect(result.markdown).toContain('use `entity_details({connectionName, targets: [{display}]})`');
+    expect(result.markdown).toContain('use `entity_details({connectionId, targets: [{display}]})`');
    expect(result.structured.raw?.hits).toHaveLength(1);
  });

-  it('includes connectionName on raw schema hits so entity_details can follow up', async () => {
+  it('includes connectionId on raw schema hits so entity_details can follow up', async () => {
    const multiConnectionContext: ToolContext = {
      ...context,
      session: { allowedConnectionNames: new Set(['warehouse', 'analytics']) } as any,
    };
-    catalog.searchByName.mockImplementation(async (connectionName: string, query: string) => [
+    catalog.searchByName.mockImplementation(async (connectionId: string, query: string) => [
      {
        kind: 'table',
-        connectionName,
-        ref: { catalog: null, db: 'public', name: `${connectionName}_${query}` },
-        display: `public.${connectionName}_${query}`,
+        connectionId,
+        ref: { catalog: null, db: 'public', name: `${connectionId}_${query}` },
+        display: `public.${connectionId}_${query}`,
        matchedOn: 'name',
      },
    ]);
@ -75,16 +75,16 @@ describe('DiscoverDataTool', () => {

    expect(catalog.searchByName).toHaveBeenCalledWith('analytics', 'orders', 10);
    expect(catalog.searchByName).toHaveBeenCalledWith('warehouse', 'orders', 10);
-    expect(result.markdown).toContain('connectionName=analytics');
-    expect(result.markdown).toContain('connectionName=warehouse');
+    expect(result.markdown).toContain('connectionId=analytics');
+    expect(result.markdown).toContain('connectionId=warehouse');
    expect(result.markdown).toContain(
-      'entity_details({connectionName: "analytics", targets: [{display: "public.analytics_orders"}]})',
+      'entity_details({connectionId: "analytics", targets: [{display: "public.analytics_orders"}]})',
    );
-    expect(result.structured.raw?.hits.map((hit) => hit.connectionName)).toEqual(['analytics', 'warehouse']);
+    expect(result.structured.raw?.hits.map((hit) => hit.connectionId)).toEqual(['analytics', 'warehouse']);
  });

  it('refuses explicit out-of-scope connection names', async () => {
-    const result = await tool.call({ query: 'orders', connectionName: 'billing' }, context);
+    const result = await tool.call({ query: 'orders', connectionId: 'billing' }, context);

    expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.');
    expect(result.structured).toEqual({ wiki: null, sl: null, raw: null });
@ -99,7 +99,7 @@ describe('DiscoverDataTool', () => {
      structured: { sourceName: 'orders' },
    });

-    const result = await tool.call({ sourceName: 'orders', connectionName: 'warehouse' }, context);
+    const result = await tool.call({ sourceName: 'orders', connectionId: 'warehouse' }, context);

    expect(slDiscoverTool.call).toHaveBeenCalledWith({ sourceName: 'orders', connectionId: 'warehouse' }, context);
    expect(wikiSearchTool.call).not.toHaveBeenCalled();
@ -112,8 +112,20 @@ describe('DiscoverDataTool', () => {
    slDiscoverTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalSources: 0, sources: [] } });
    catalog.searchByName.mockResolvedValueOnce([]);

-    const result = await tool.call({ query: 'customer source', connectionName: 'warehouse' }, context);
+    const result = await tool.call({ query: 'customer source', connectionId: 'warehouse' }, context);

    expect(result.markdown).toContain('No matches for "customer source" across wiki, semantic layer, or raw warehouse schema.');
  });
+
+  it('uses connectionId as the optional connection filter', () => {
+    const legacyConnectionField = ['connection', 'Name'].join('');
+
+    expect(tool.parseInput({ query: 'orders', connectionId: 'warehouse', limit: 5 })).toEqual({
+      query: 'orders',
+      connectionId: 'warehouse',
+      limit: 5,
+    });
+
+    expect(() => tool.parseInput({ query: 'orders', [legacyConnectionField]: 'warehouse', limit: 5 })).toThrow();
+  });
 });
--- a/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts
+++ b/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts
@ -1,13 +1,13 @@
 import { z } from 'zod';
+import { WarehouseCatalogService, type RawSchemaHit } from '../../../scan/warehouse-catalog.js';
 import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
-import { WarehouseCatalogService, type RawSchemaHit } from './warehouse-catalog.service.js';

 const discoverDataInputSchema = z.object({
  query: z.string().optional(),
-  connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(),
+  connectionId: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(),
  limit: z.number().int().positive().max(50).optional().default(10),
  sourceName: z.string().optional(),
-});
+}).strict();

 type DiscoverDataInput = z.input<typeof discoverDataInputSchema>;

@ -62,16 +62,16 @@ export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {

  async call(input: DiscoverDataInput, context: ToolContext): Promise<ToolOutput<DiscoverDataStructured>> {
    const allowed = allowedConnectionNames(context);
-    if (input.connectionName && allowed && !allowed.has(input.connectionName)) {
+    if (input.connectionId && allowed && !allowed.has(input.connectionId)) {
      return {
-        markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
+        markdown: `Connection "${input.connectionId}" is not available to this ingest stage.`,
        structured: { wiki: null, sl: null, raw: null },
      };
    }

    if (input.sourceName) {
      const sl = await this.deps.slDiscoverTool.call(
-        { sourceName: input.sourceName, connectionId: input.connectionName },
+        { sourceName: input.sourceName, connectionId: input.connectionId },
        context,
      );
      return { markdown: sl.markdown, structured: { wiki: null, sl: sl.structured, raw: null } };
@ -93,7 +93,7 @@ export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
    }

    const slResult = await this.deps.slDiscoverTool.call(
-      { query: query || undefined, connectionId: input.connectionName },
+      { query: query || undefined, connectionId: input.connectionId },
      context,
    );
    if (totalSources(slResult.structured) > 0) {
@ -107,23 +107,23 @@ export class DiscoverDataTool extends BaseTool<typeof discoverDataInputSchema> {
    }

    const catalog = this.deps.catalogFactory(context);
-    const connections = input.connectionName ? [input.connectionName] : [...(allowed ?? [])].sort();
+    const connections = input.connectionId ? [input.connectionId] : [...(allowed ?? [])].sort();
    const rawHits: RawSchemaHit[] = [];
-    for (const connectionName of connections) {
-      rawHits.push(...(await catalog.searchByName(connectionName, query, limit)));
+    for (const connectionId of connections) {
+      rawHits.push(...(await catalog.searchByName(connectionId, query, limit)));
    }
    if (rawHits.length > 0) {
      parts.push(
        '## Raw Warehouse Schema',
-        '> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values',
+        '> use `entity_details({connectionId, targets: [{display}]})` for full DDL + sample values',
      );
      parts.push(
        rawHits
          .slice(0, limit)
          .map(
            (hit) =>
-              `- ${hit.kind}: ${hit.display} [connectionName=${hit.connectionName}] (matched on ${hit.matchedOn}) - ` +
-              `follow up with \`entity_details({connectionName: "${hit.connectionName}", targets: [{display: "${hit.display}"}]})\``,
+              `- ${hit.kind}: ${hit.display} [connectionId=${hit.connectionId}] (matched on ${hit.matchedOn}) - ` +
+              `follow up with \`entity_details({connectionId: "${hit.connectionId}", targets: [{display: "${hit.display}"}]})\``,
          )
          .join('\n'),
      );
--- a/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
+++ b/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
@ -3,9 +3,9 @@ import { tmpdir } from 'node:os';
 import { join } from 'node:path';
 import { afterEach, beforeEach, describe, expect, it } from 'vitest';
 import { initKtxProject, type KtxLocalProject } from '../../../project/index.js';
+import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
 import type { ToolContext } from '../../../tools/index.js';
 import { EntityDetailsTool } from './entity-details.tool.js';
-import { WarehouseCatalogService } from './warehouse-catalog.service.js';

 describe('EntityDetailsTool', () => {
  let tempDir: string;
@ -32,11 +32,11 @@ describe('EntityDetailsTool', () => {
    await rm(tempDir, { recursive: true, force: true });
  });

-  async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-1') {
-    const root = `raw-sources/${connectionName}/live-database/${syncId}`;
+  async function seedLiveDatabaseScan(connectionId = 'warehouse', syncId = 'sync-1') {
+    const root = `raw-sources/${connectionId}/live-database/${syncId}`;
    await project.fileStore.writeFile(
      `${root}/connection.json`,
-      JSON.stringify({ connectionId: connectionName, driver: 'postgres', extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
+      JSON.stringify({ connectionId, driver: 'postgres', extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2),
      'ktx',
      'ktx@example.com',
      'seed connection',
@ -84,7 +84,7 @@ describe('EntityDetailsTool', () => {
      `${root}/enrichment/relationship-profile.json`,
      JSON.stringify(
        {
-          connectionId: connectionName,
+          connectionId,
          driver: 'postgres',
          tables: [{ table: { catalog: null, db: 'public', name: 'orders' }, rowCount: 12 }],
          columns: {
@ -109,7 +109,7 @@ describe('EntityDetailsTool', () => {
  }

  it('returns scoped table detail for a display target', async () => {
-    const result = await tool.call({ connectionName: 'warehouse', targets: [{ display: 'public.orders' }] }, context);
+    const result = await tool.call({ connectionId: 'warehouse', targets: [{ display: 'public.orders' }] }, context);

    expect(result.markdown).toContain('### public.orders');
    expect(result.markdown).toContain('- status (text, nullable=false)');
@ -120,7 +120,7 @@ describe('EntityDetailsTool', () => {

  it('resolves display targets that include a column name', async () => {
    const result = await tool.call(
-      { connectionName: 'warehouse', targets: [{ display: 'public.orders.status' }] },
+      { connectionId: 'warehouse', targets: [{ display: 'public.orders.status' }] },
      context,
    );

@ -133,7 +133,7 @@ describe('EntityDetailsTool', () => {

  it('reports missing explicit columns instead of returning an empty column list', async () => {
    const result = await tool.call(
-      { connectionName: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] },
+      { connectionId: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] },
      context,
    );

@ -146,7 +146,7 @@ describe('EntityDetailsTool', () => {
  it('reports missing structured table targets in model-visible markdown', async () => {
    const result = await tool.call(
      {
-        connectionName: 'warehouse',
+        connectionId: 'warehouse',
        targets: [{ catalog: null, db: 'public', name: 'orderz' }],
      },
      context,
@ -161,7 +161,7 @@ describe('EntityDetailsTool', () => {
  it('reports missing structured column targets in model-visible markdown', async () => {
    const result = await tool.call(
      {
-        connectionName: 'warehouse',
+        connectionId: 'warehouse',
        targets: [{ catalog: null, db: 'public', name: 'orders', column: 'plan_tier' }],
      },
      context,
@ -175,7 +175,7 @@ describe('EntityDetailsTool', () => {

  it('returns a no-scan state distinct from not found', async () => {
    const result = await tool.call(
-      { connectionName: 'empty', targets: [{ display: 'public.orders' }] },
+      { connectionId: 'empty', targets: [{ display: 'public.orders' }] },
      { ...context, session: { ...context.session!, allowedConnectionNames: new Set(['empty']) } },
    );

@ -184,9 +184,30 @@ describe('EntityDetailsTool', () => {
  });

  it('refuses out-of-scope connections', async () => {
-    const result = await tool.call({ connectionName: 'billing', targets: [{ display: 'public.orders' }] }, context);
+    const result = await tool.call({ connectionId: 'billing', targets: [{ display: 'public.orders' }] }, context);

    expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.');
    expect(result.structured.scanAvailable).toBe(false);
  });
+
+  it('uses connectionId as the public input field', async () => {
+    const legacyConnectionField = ['connection', 'Name'].join('');
+
+    expect(
+      tool.parseInput({
+        connectionId: 'warehouse',
+        targets: [{ display: 'public.orders' }],
+      }),
+    ).toEqual({
+      connectionId: 'warehouse',
+      targets: [{ display: 'public.orders' }],
+    });
+
+    expect(() =>
+      tool.parseInput({
+        [legacyConnectionField]: 'warehouse',
+        targets: [{ display: 'public.orders' }],
+      }),
+    ).toThrow();
+  });
 });
--- a/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts
+++ b/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts
@ -1,7 +1,7 @@
 import { z } from 'zod';
 import type { KtxTableRef } from '../../../scan/types.js';
+import { WarehouseCatalogService, type TableDetail } from '../../../scan/warehouse-catalog.js';
 import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';
-import { WarehouseCatalogService, type TableDetail } from './warehouse-catalog.service.js';

 const targetSchema = z.union([
  z.object({ display: z.string().min(1) }),
@ -14,9 +14,9 @@ const targetSchema = z.union([
 ]);

 const entityDetailsInputSchema = z.object({
-  connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
+  connectionId: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
  targets: z.array(targetSchema).min(1).max(50),
-});
+}).strict();

 type EntityDetailsInput = z.infer<typeof entityDetailsInputSchema>;
 type EntityDetailsTarget = EntityDetailsInput['targets'][number];
@ -47,14 +47,14 @@ function appendMissingTargetMarkdown(parts: string[], target: EntityDetailsTarge

 async function resolveTarget(
  catalog: WarehouseCatalogService,
-  connectionName: string,
+  connectionId: string,
  target: EntityDetailsTarget,
 ): Promise<{ resolved: (KtxTableRef & { column?: string }) | null; candidates: KtxTableRef[] }> {
  if ('display' in target) {
-    return catalog.resolveDisplayTarget(connectionName, target.display);
+    return catalog.resolveDisplayTarget(connectionId, target.display);
  }

-  const candidateResolution = await catalog.resolveDisplayTarget(connectionName, targetLabel(target));
+  const candidateResolution = await catalog.resolveDisplayTarget(connectionId, targetLabel(target));
  return {
    resolved: {
      catalog: target.catalog,
@ -107,18 +107,18 @@ export class EntityDetailsTool extends BaseTool<typeof entityDetailsInputSchema>

  async call(input: EntityDetailsInput, context: ToolContext): Promise<ToolOutput<EntityDetailsStructured>> {
    const allowed = allowedConnectionNames(context);
-    if (allowed && !allowed.has(input.connectionName)) {
+    if (allowed && !allowed.has(input.connectionId)) {
      return {
-        markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
+        markdown: `Connection "${input.connectionId}" is not available to this ingest stage.`,
        structured: { resolved: [], missing: [], scanAvailable: false },
      };
    }

    const catalog = this.catalogFactory(context);
-    const scanAvailable = await catalog.hasScan(input.connectionName);
+    const scanAvailable = await catalog.hasScan(input.connectionId);
    if (!scanAvailable) {
      return {
-        markdown: `No live-database scan available for connection "${input.connectionName}"; run \`ktx scan\` first.`,
+        markdown: `No live-database scan available for connection "${input.connectionId}"; run \`ktx scan\` first.`,
        structured: { resolved: [], missing: [], scanAvailable: false },
      };
    }
@ -128,13 +128,13 @@ export class EntityDetailsTool extends BaseTool<typeof entityDetailsInputSchema>
    const missing: EntityDetailsStructured['missing'] = [];

    for (const target of input.targets) {
-      const resolution = await resolveTarget(catalog, input.connectionName, target);
+      const resolution = await resolveTarget(catalog, input.connectionId, target);
      if (!resolution.resolved) {
        missing.push({ target, candidates: resolution.candidates });
        appendMissingTargetMarkdown(parts, target, resolution.candidates);
        continue;
      }
-      const detail = await catalog.getTable({ connectionName: input.connectionName, ...resolution.resolved });
+      const detail = await catalog.getTable({ connectionId: input.connectionId, ...resolution.resolved });
      if (!detail) {
        missing.push({ target, candidates: resolution.candidates });
        appendMissingTargetMarkdown(parts, target, resolution.candidates);
--- a/packages/context/src/ingest/tools/warehouse-verification/index.ts
+++ b/packages/context/src/ingest/tools/warehouse-verification/index.ts
@ -1,10 +1,10 @@
 import type { KtxFileStorePort } from '../../../core/index.js';
 import type { SlConnectionCatalogPort } from '../../../sl/index.js';
+import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
 import type { BaseTool, ToolContext } from '../../../tools/index.js';
 import { DiscoverDataTool } from './discover-data.tool.js';
 import { EntityDetailsTool } from './entity-details.tool.js';
 import { SqlExecutionTool } from './sql-execution.tool.js';
-import { WarehouseCatalogService } from './warehouse-catalog.service.js';

 export function createWarehouseVerificationTools(deps: {
  connections: SlConnectionCatalogPort;
--- a/packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts
+++ b/packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts
@ -19,7 +19,7 @@ describe('SqlExecutionTool', () => {
    connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 });

    const result = await tool.call(
-      { connectionName: 'warehouse', sql: 'select status from public.orders', rowLimit: 5 },
+      { connectionId: 'warehouse', sql: 'select status from public.orders', rowLimit: 5 },
      context,
    );

@ -34,7 +34,7 @@ describe('SqlExecutionTool', () => {
  it.each(['insert into x values (1)', 'drop table x', 'vacuum'])('rejects mutating SQL: %s', async (sql) => {
    connections.executeQuery.mockClear();

-    const result = await tool.call({ connectionName: 'warehouse', sql }, context);
+    const result = await tool.call({ connectionId: 'warehouse', sql }, context);

    expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.');
    expect(connections.executeQuery).not.toHaveBeenCalled();
@ -44,11 +44,35 @@ describe('SqlExecutionTool', () => {
    connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist'));

    const result = await tool.call(
-      { connectionName: 'warehouse', sql: 'select 1 from orbit_analytics.customer', rowLimit: 1 },
+      { connectionId: 'warehouse', sql: 'select 1 from orbit_analytics.customer', rowLimit: 1 },
      context,
    );

    expect(result.markdown).toContain('relation "orbit_analytics.customer" does not exist');
    expect(result.structured.error).toContain('relation "orbit_analytics.customer" does not exist');
  });
+
+  it('uses connectionId as the public input field', () => {
+    const legacyConnectionField = ['connection', 'Name'].join('');
+
+    expect(
+      tool.parseInput({
+        connectionId: 'warehouse',
+        sql: 'select 1',
+        rowLimit: 5,
+      }),
+    ).toEqual({
+      connectionId: 'warehouse',
+      sql: 'select 1',
+      rowLimit: 5,
+    });
+
+    expect(() =>
+      tool.parseInput({
+        [legacyConnectionField]: 'warehouse',
+        sql: 'select 1',
+        rowLimit: 5,
+      }),
+    ).toThrow();
+  });
 });
--- a/packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts
+++ b/packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts
@ -4,10 +4,10 @@ import type { SlConnectionCatalogPort } from '../../../sl/index.js';
 import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js';

 const sqlExecutionInputSchema = z.object({
-  connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
+  connectionId: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/),
  sql: z.string().min(1),
  rowLimit: z.number().int().positive().max(1000).optional().default(100),
-});
+}).strict();

 type SqlExecutionInput = z.input<typeof sqlExecutionInputSchema>;

@ -54,9 +54,9 @@ export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {

  async call(input: SqlExecutionInput, context: ToolContext): Promise<ToolOutput<SqlExecutionStructured>> {
    const allowed = context.session?.allowedConnectionNames;
-    if (allowed && !allowed.has(input.connectionName)) {
+    if (allowed && !allowed.has(input.connectionId)) {
      return {
-        markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
+        markdown: `Connection "${input.connectionId}" is not available to this ingest stage.`,
        structured: {
          headers: [],
          rows: [],
@ -83,7 +83,7 @@ export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
    }

    try {
-      const result = await this.connections.executeQuery(input.connectionName, wrappedSql);
+      const result = await this.connections.executeQuery(input.connectionId, wrappedSql);
      const headers = result.headers ?? [];
      const rows = result.rows ?? [];
      const rowCount = result.totalRows ?? rows.length;
--- a/packages/context/src/ingest/wiki-sl-ref-repair.test.ts
+++ b/packages/context/src/ingest/wiki-sl-ref-repair.test.ts
@ -44,23 +44,26 @@ describe('repairWikiSlRefs', () => {
      })),
    };
    const semanticLayerService = {
-      loadAllSources: vi.fn(async () => [
-        {
-          name: 'mart_customer_health',
-          grain: [],
-          columns: [],
-          joins: [],
-          measures: [{ name: 'high_risk_account_count', expr: 'count(*)' }],
-          segments: [{ name: 'high_risk', expr: "risk_level = 'high'" }],
-        },
-        {
-          name: 'int_procurement_qualifying_actions',
-          grain: [],
-          columns: [],
-          joins: [],
-          measures: [],
-        },
-      ]),
+      loadAllSources: vi.fn(async () => ({
+        sources: [
+          {
+            name: 'mart_customer_health',
+            grain: [],
+            columns: [],
+            joins: [],
+            measures: [{ name: 'high_risk_account_count', expr: 'count(*)' }],
+            segments: [{ name: 'high_risk', expr: "risk_level = 'high'" }],
+          },
+          {
+            name: 'int_procurement_qualifying_actions',
+            grain: [],
+            columns: [],
+            joins: [],
+            measures: [],
+          },
+        ],
+        loadErrors: [],
+      })),
    };

    const result = await repairWikiSlRefs({
--- a/packages/context/src/ingest/wiki-sl-ref-repair.ts
+++ b/packages/context/src/ingest/wiki-sl-ref-repair.ts
@ -56,7 +56,8 @@ async function loadVisibleSlRefs(
  const warnings: string[] = [];
  for (const connectionId of connectionIds) {
    try {
-      for (const source of await semanticLayerService.loadAllSources(connectionId)) {
+      const { sources } = await semanticLayerService.loadAllSources(connectionId);
+      for (const source of sources) {
        for (const ref of entityRefsForSource(source)) {
          refs.add(ref);
        }
--- a/packages/context/src/llm/generation.ts
+++ b/packages/context/src/llm/generation.ts
@ -1,4 +1,4 @@
-import { KtxMessageBuilder, type KtxLlmProvider, type KtxModelRole } from '@ktx/llm';
+import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider, type KtxModelRole } from '@ktx/llm';
 import { generateText, Output, type FlexibleSchema, type ToolSet } from 'ai';

 type GenerateTextInput = Parameters<typeof generateText>[0];
@ -29,10 +29,12 @@ export async function generateKtxText(input: GenerateKtxTextInput): Promise<stri
    tools: input.tools ?? {},
    model,
  });
+  const split = splitKtxSystemMessages(built.messages);
  const result = await (input.generateText ?? generateText)({
    model,
    temperature: input.temperature ?? 0,
-    messages: built.messages,
+    ...(split.system ? { system: split.system } : {}),
+    messages: split.messages,
    tools: built.tools as ToolSet,
    ...(hasTools(built.tools as ToolSet)
      ? {
@ -58,10 +60,12 @@ export async function generateKtxObject<TOutput, TSchema>(
    tools: input.tools ?? {},
    model,
  });
+  const split = splitKtxSystemMessages(built.messages);
  const result = await (input.generateText ?? generateText)({
    model,
    temperature: input.temperature ?? 0,
-    messages: built.messages,
+    ...(split.system ? { system: split.system } : {}),
+    messages: split.messages,
    tools: built.tools as ToolSet,
    ...(hasTools(built.tools as ToolSet)
      ? {
--- a/packages/context/src/mcp/context-tools.ts
+++ b/packages/context/src/mcp/context-tools.ts
@ -143,6 +143,45 @@ const scanArtifactReadSchema = z.object({
  path: z.string().min(1),
 });

+const entityDetailsTableRefSchema = z.object({
+  catalog: z.string().nullable(),
+  db: z.string().nullable(),
+  name: z.string().min(1),
+});
+
+const entityDetailsSchema = z.object({
+  connectionId: connectionIdSchema,
+  entities: z
+    .array(
+      z.object({
+        table: z.union([z.string().min(1), entityDetailsTableRefSchema]),
+        columns: z.array(z.string().min(1)).optional(),
+      }),
+    )
+    .min(1)
+    .max(20),
+});
+
+const dictionarySearchSchema = z.object({
+  values: z.array(z.string().min(1)).min(1).max(20),
+  connectionId: connectionIdSchema.optional(),
+});
+
+const discoverDataKindSchema = z.enum(['wiki', 'sl_source', 'sl_measure', 'sl_dimension', 'table', 'column']);
+
+const discoverDataSchema = z.object({
+  query: z.string().min(1),
+  connectionId: connectionIdSchema.optional(),
+  kinds: z.array(discoverDataKindSchema).optional(),
+  limit: z.number().int().min(1).max(50).default(15).optional(),
+});
+
+const sqlExecutionSchema = z.object({
+  connectionId: connectionIdSchema,
+  sql: z.string().min(1),
+  maxRows: z.number().int().min(1).max(10_000).default(1000).optional(),
+});
+
 export function jsonToolResult<T extends object>(structuredContent: T): KtxMcpToolResult<T> {
  return {
    content: [{ type: 'text', text: JSON.stringify(structuredContent, null, 2) }],
@ -361,6 +400,81 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void
    );
  }

+  if (ports.entityDetails) {
+    const entityDetails = ports.entityDetails;
+    registerParsedTool(
+      server,
+      'entity_details',
+      {
+        title: 'Entity Details',
+        description: 'Read raw table and column metadata from the latest KTX live-database scan snapshot.',
+        inputSchema: entityDetailsSchema.shape,
+      },
+      entityDetailsSchema,
+      async (input) => jsonToolResult(await entityDetails.read(input)),
+    );
+  }
+
+  if (ports.dictionarySearch) {
+    const dictionarySearch = ports.dictionarySearch;
+    registerParsedTool(
+      server,
+      'dictionary_search',
+      {
+        title: 'Dictionary Search',
+        description:
+          'Search profile-sampled warehouse values and report matching connection/source/column locations plus non-authoritative miss reasons.',
+        inputSchema: dictionarySearchSchema.shape,
+      },
+      dictionarySearchSchema,
+      async (input) => jsonToolResult(await dictionarySearch.search(input)),
+    );
+  }
+
+  if (ports.discover) {
+    const discover = ports.discover;
+    registerParsedTool(
+      server,
+      'discover_data',
+      {
+        title: 'Discover Data',
+        description:
+          'Search across KTX wiki pages, semantic-layer sources/measures/dimensions, and raw warehouse schema refs.',
+        inputSchema: discoverDataSchema.shape,
+      },
+      discoverDataSchema,
+      async (input) => jsonToolResult(await discover.search(input)),
+    );
+  }
+
+  if (ports.sqlExecution) {
+    const sqlExecution = ports.sqlExecution;
+    registerParsedTool(
+      server,
+      'sql_execution',
+      {
+        title: 'SQL Execution',
+        description:
+          'Execute one parser-validated read-only SQL query against a configured KTX connection and return structured rows.',
+        inputSchema: sqlExecutionSchema.shape,
+      },
+      sqlExecutionSchema,
+      async (input) => {
+        try {
+          return jsonToolResult(
+            await sqlExecution.execute({
+              connectionId: input.connectionId,
+              sql: input.sql,
+              maxRows: input.maxRows ?? 1000,
+            }),
+          );
+        } catch (error) {
+          return jsonErrorToolResult(error instanceof Error ? error.message : String(error));
+        }
+      },
+    );
+  }
+
  if (ports.ingest) {
    const ingest = ports.ingest;
    registerParsedTool(
--- a/packages/context/src/mcp/index.ts
+++ b/packages/context/src/mcp/index.ts
@ -5,6 +5,9 @@ export { createDefaultKtxMcpServer, createKtxMcpServer } from './server.js';
 export type {
  KtxConnectionSummary,
  KtxConnectionsMcpPort,
+  KtxDiscoverDataMcpPort,
+  KtxDictionarySearchMcpPort,
+  KtxEntityDetailsMcpPort,
  KtxIngestDiffSummary,
  KtxIngestMcpPort,
  KtxIngestStatusResponse,
--- a/packages/context/src/mcp/local-project-ports.test.ts
+++ b/packages/context/src/mcp/local-project-ports.test.ts
@ -5,7 +5,12 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { AgentRunnerService } from '../agent/index.js';
 import { FakeSourceAdapter, type MemoryFlowReplayInput } from '../ingest/index.js';
 import { initKtxProject } from '../project/index.js';
-import { createKtxConnectorCapabilities, type KtxScanConnector, type KtxSchemaSnapshot } from '../scan/index.js';
+import {
+  createKtxConnectorCapabilities,
+  type KtxQueryResult,
+  type KtxScanConnector,
+  type KtxSchemaSnapshot,
+} from '../scan/index.js';
 import { writeLocalSlSource } from '../sl/index.js';
 import { createLocalProjectMcpContextPorts } from './local-project-ports.js';

@ -60,16 +65,119 @@ describe('createLocalProjectMcpContextPorts', () => {
    };
  }

-  function testConnector(snapshot = testSnapshot()): KtxScanConnector {
+  function testConnector(snapshot = testSnapshot(), queryResult?: KtxQueryResult): KtxScanConnector {
    return {
      id: `test:${snapshot.connectionId}`,
      driver: snapshot.driver,
-      capabilities: createKtxConnectorCapabilities(),
+      capabilities: createKtxConnectorCapabilities({ readOnlySql: queryResult !== undefined }),
      introspect: vi.fn(async () => snapshot),
+      executeReadOnly: queryResult === undefined ? undefined : vi.fn(async () => queryResult),
      cleanup: vi.fn(async () => {}),
    };
  }

+  async function seedScanReport(projectDir: string, syncId = 'sync-1'): Promise<void> {
+    const root = `raw-sources/warehouse/live-database/${syncId}`;
+    await mkdir(join(projectDir, root, 'tables'), { recursive: true });
+    await writeFile(
+      join(projectDir, root, 'connection.json'),
+      JSON.stringify(
+        {
+          connectionId: 'warehouse',
+          driver: 'postgres',
+          extractedAt: '2026-05-14T09:00:00.000Z',
+          scope: { schemas: ['public'] },
+        },
+        null,
+        2,
+      ),
+      'utf-8',
+    );
+    await writeFile(
+      join(projectDir, root, 'tables', 'orders.json'),
+      JSON.stringify(
+        {
+          catalog: null,
+          db: 'public',
+          name: 'orders',
+          kind: 'table',
+          comment: 'Customer orders',
+          estimatedRows: 12,
+          columns: [
+            {
+              name: 'id',
+              nativeType: 'integer',
+              normalizedType: 'integer',
+              dimensionType: 'number',
+              nullable: false,
+              primaryKey: true,
+              comment: null,
+            },
+          ],
+          foreignKeys: [],
+        },
+        null,
+        2,
+      ),
+      'utf-8',
+    );
+    await writeFile(
+      join(projectDir, root, 'scan-report.json'),
+      JSON.stringify(
+        {
+          connectionId: 'warehouse',
+          driver: 'postgres',
+          syncId,
+          runId: 'scan-1',
+          trigger: 'mcp',
+          mode: 'structural',
+          dryRun: false,
+          artifactPaths: {
+            rawSourcesDir: root,
+            reportPath: `${root}/scan-report.json`,
+            manifestShards: [],
+            enrichmentArtifacts: [],
+          },
+          diffSummary: {
+            tablesAdded: 0,
+            tablesModified: 0,
+            tablesDeleted: 0,
+            tablesUnchanged: 1,
+            columnsAdded: 0,
+            columnsModified: 0,
+            columnsDeleted: 0,
+          },
+          manifestShardsWritten: 0,
+          structuralSyncStats: {
+            tablesCreated: 1,
+            tablesUpdated: 0,
+            tablesDeleted: 0,
+            columnsCreated: 0,
+            columnsUpdated: 0,
+            columnsDeleted: 0,
+          },
+          enrichment: {
+            dataDictionary: 'skipped',
+            tableDescriptions: 'skipped',
+            columnDescriptions: 'skipped',
+            embeddings: 'skipped',
+            deterministicRelationships: 'skipped',
+            llmRelationshipValidation: 'skipped',
+            statisticalValidation: 'skipped',
+          },
+          capabilityGaps: [],
+          warnings: [],
+          relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
+          enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
+          createdAt: '2026-05-14T09:00:00.000Z',
+        },
+        null,
+        2,
+      ),
+      'utf-8',
+    );
+  }
+
  it('lists local project connections from ktx.yaml', async () => {
    const project = await initKtxProject({ projectDir: tempDir });
    project.config.connections.warehouse = {
@ -119,6 +227,382 @@ describe('createLocalProjectMcpContextPorts', () => {
    expect(connector.cleanup).toHaveBeenCalled();
  });

+  it('executes MCP SQL only after parser-backed validation passes', async () => {
+    const project = await initKtxProject({ projectDir: tempDir });
+    project.config.connections.warehouse = {
+      driver: 'postgres',
+      url: 'env:DATABASE_URL',
+    };
+    const connector = testConnector(testSnapshot(), {
+      headers: ['id'],
+      headerTypes: ['integer'],
+      rows: [[1]],
+      totalRows: 1,
+      rowCount: 1,
+    });
+    const createConnector = vi.fn(async () => connector);
+    const sqlAnalysis = {
+      analyzeForFingerprint: vi.fn(),
+      analyzeBatch: vi.fn(),
+      validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
+    };
+    const ports = createLocalProjectMcpContextPorts(project, {
+      sqlAnalysis,
+      localScan: {
+        createConnector,
+      },
+    });
+
+    await expect(
+      ports.sqlExecution?.execute({
+        connectionId: 'warehouse',
+        sql: 'select id from public.orders',
+        maxRows: 5,
+      }),
+    ).resolves.toEqual({
+      headers: ['id'],
+      headerTypes: ['integer'],
+      rows: [[1]],
+      rowCount: 1,
+    });
+    expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select id from public.orders', 'postgres');
+    expect(createConnector).toHaveBeenCalledWith('warehouse');
+    expect(connector.executeReadOnly).toHaveBeenCalledWith(
+      {
+        connectionId: 'warehouse',
+        sql: 'select id from public.orders',
+        maxRows: 5,
+      },
+      { runId: 'mcp-sql-execution' },
+    );
+    expect(connector.cleanup).toHaveBeenCalled();
+  });
+
+  it('rejects MCP SQL before connector execution when parser validation fails', async () => {
+    const project = await initKtxProject({ projectDir: tempDir });
+    project.config.connections.warehouse = {
+      driver: 'postgres',
+      url: 'env:DATABASE_URL',
+    };
+    const connector = testConnector(testSnapshot(), {
+      headers: ['id'],
+      rows: [[1]],
+      totalRows: 1,
+      rowCount: 1,
+    });
+    const sqlAnalysis = {
+      analyzeForFingerprint: vi.fn(),
+      analyzeBatch: vi.fn(),
+      validateReadOnly: vi.fn(async () => ({
+        ok: false,
+        error: 'SQL contains read/write operation: Insert',
+      })),
+    };
+    const ports = createLocalProjectMcpContextPorts(project, {
+      sqlAnalysis,
+      localScan: {
+        createConnector: vi.fn(async () => connector),
+      },
+    });
+
+    await expect(
+      ports.sqlExecution?.execute({
+        connectionId: 'warehouse',
+        sql: 'with x as (insert into t values (1) returning *) select * from x',
+        maxRows: 1000,
+      }),
+    ).rejects.toThrow('SQL contains read/write operation: Insert');
+    expect(connector.executeReadOnly).not.toHaveBeenCalled();
+  });
+
+  it('exposes local scan entity details through MCP ports', async () => {
+    const project = await initKtxProject({ projectDir: tempDir });
+    project.config.connections.warehouse = {
+      driver: 'postgres',
+      url: 'env:DATABASE_URL',
+    };
+    await seedScanReport(project.projectDir);
+    const ports = createLocalProjectMcpContextPorts(project);
+
+    await expect(
+      ports.entityDetails?.read({
+        connectionId: 'warehouse',
+        entities: [{ table: 'public.orders', columns: ['id'] }],
+      }),
+    ).resolves.toMatchObject({
+      results: [
+        {
+          ok: true,
+          connectionId: 'warehouse',
+          display: 'public.orders',
+          columns: [{ name: 'id', nativeType: 'integer' }],
+          snapshot: { syncId: 'sync-1', scanRunId: 'scan-1' },
+        },
+      ],
+    });
+  });
+
+  it('returns a structured local entity details error when no scan exists', async () => {
+    const project = await initKtxProject({ projectDir: tempDir });
+    project.config.connections.warehouse = {
+      driver: 'postgres',
+      url: 'env:DATABASE_URL',
+    };
+    const ports = createLocalProjectMcpContextPorts(project);
+
+    await expect(
+      ports.entityDetails?.read({
+        connectionId: 'warehouse',
+        entities: [{ table: 'public.orders' }],
+      }),
+    ).resolves.toMatchObject({
+      results: [
+        {
+          ok: false,
+          connectionId: 'warehouse',
+          error: { code: 'scan_missing' },
+        },
+      ],
+    });
+  });
+
+  it('exposes local dictionary search through MCP ports', async () => {
+    const project = await initKtxProject({ projectDir: tempDir });
+    project.config.connections.warehouse = {
+      driver: 'postgres',
+      url: 'env:DATABASE_URL',
+    };
+    await project.fileStore.writeFile(
+      'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
+      `${JSON.stringify(
+        {
+          connectionId: 'warehouse',
+          driver: 'postgres',
+          sqlAvailable: true,
+          queryCount: 4,
+          tables: [],
+          columns: {
+            'orders.status': {
+              table: { catalog: null, db: 'public', name: 'orders' },
+              column: 'status',
+              nativeType: 'text',
+              normalizedType: 'string',
+              distinctCount: 2,
+              sampleValues: ['paid', 'refunded'],
+            },
+          },
+          warnings: [],
+        },
+        null,
+        2,
+      )}\n`,
+      'ktx',
+      'ktx@example.com',
+      'Seed dictionary profile',
+    );
+
+    const ports = createLocalProjectMcpContextPorts(project);
+
+    await expect(ports.dictionarySearch?.search({ values: ['paid'] })).resolves.toMatchObject({
+      searched: [{ connectionId: 'warehouse', status: 'ready' }],
+      results: [
+        {
+          value: 'paid',
+          matches: [{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', matchedValue: 'paid' }],
+          misses: [],
+        },
+      ],
+    });
+  });
+
+  it('reports missing local dictionary profiles through MCP ports', async () => {
+    const project = await initKtxProject({ projectDir: tempDir });
+    project.config.connections.warehouse = {
+      driver: 'postgres',
+      url: 'env:DATABASE_URL',
+    };
+
+    const ports = createLocalProjectMcpContextPorts(project);
+
+    await expect(ports.dictionarySearch?.search({ values: ['paid'] })).resolves.toEqual({
+      searched: [
+        {
+          connectionId: 'warehouse',
+          coverage: {
+            sampledRows: null,
+            valuesPerColumn: null,
+            profiledColumns: 0,
+            syncId: null,
+            profiledAt: null,
+          },
+          status: 'no_profile_artifact',
+        },
+      ],
+      results: [
+        {
+          value: 'paid',
+          matches: [],
+          misses: [{ connectionId: 'warehouse', reason: 'no_profile_artifact' }],
+        },
+      ],
+    });
+  });
+
+  it('exposes local project discover_data across wiki, semantic-layer, and raw schema', async () => {
+    const project = await initKtxProject({ projectDir: tempDir });
+    project.config.connections.warehouse = {
+      driver: 'postgres',
+      url: 'env:DATABASE_URL',
+    };
+    await project.fileStore.writeFile(
+      'wiki/global/orders-playbook.md',
+      [
+        '---',
+        'summary: Paid order operations',
+        'tags: [orders]',
+        'refs: []',
+        'sl_refs: []',
+        'usage_mode: auto',
+        '---',
+        '',
+        'Paid orders are used for customer activity analysis.',
+        '',
+      ].join('\n'),
+      'ktx',
+      'ktx@example.com',
+      'seed wiki',
+    );
+    await project.fileStore.writeFile(
+      'semantic-layer/warehouse/orders.yaml',
+      [
+        'name: orders',
+        'descriptions:',
+        '  user: Paid order facts',
+        'table: public.orders',
+        'grain: [id]',
+        'columns:',
+        '  - name: status',
+        '    type: string',
+        '    descriptions:',
+        '      user: Payment status',
+        'measures:',
+        '  - name: order_count',
+        '    expr: count(*)',
+        '    description: Number of paid orders',
+        '',
+      ].join('\n'),
+      'ktx',
+      'ktx@example.com',
+      'seed sl',
+    );
+    await project.fileStore.writeFile(
+      'raw-sources/warehouse/live-database/sync-1/connection.json',
+      JSON.stringify({ connectionId: 'warehouse', driver: 'postgres', extractedAt: '2026-05-14T09:00:00.000Z' }, null, 2),
+      'ktx',
+      'ktx@example.com',
+      'seed connection',
+    );
+    await project.fileStore.writeFile(
+      'raw-sources/warehouse/live-database/sync-1/tables/public-orders.json',
+      JSON.stringify(
+        {
+          catalog: null,
+          db: 'public',
+          name: 'orders',
+          kind: 'table',
+          comment: 'Orders table',
+          estimatedRows: 10,
+          columns: [
+            {
+              name: 'status',
+              nativeType: 'text',
+              normalizedType: 'text',
+              dimensionType: 'string',
+              nullable: false,
+              primaryKey: false,
+              comment: 'Order status',
+              sampleValues: ['paid'],
+            },
+          ],
+          foreignKeys: [],
+        },
+        null,
+        2,
+      ),
+      'ktx',
+      'ktx@example.com',
+      'seed table',
+    );
+    await project.fileStore.writeFile(
+      'raw-sources/warehouse/live-database/sync-1/scan-report.json',
+      JSON.stringify(
+        {
+          connectionId: 'warehouse',
+          driver: 'postgres',
+          syncId: 'sync-1',
+          runId: 'scan-1',
+          trigger: 'mcp',
+          mode: 'enriched',
+          dryRun: false,
+          artifactPaths: {
+            rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1',
+            reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
+            manifestShards: [],
+            enrichmentArtifacts: [],
+          },
+          diffSummary: {
+            tablesAdded: 1,
+            tablesModified: 0,
+            tablesDeleted: 0,
+            tablesUnchanged: 0,
+            columnsAdded: 0,
+            columnsModified: 0,
+            columnsDeleted: 0,
+          },
+          manifestShardsWritten: 0,
+          structuralSyncStats: {
+            tablesCreated: 0,
+            tablesUpdated: 0,
+            tablesDeleted: 0,
+            columnsCreated: 0,
+            columnsUpdated: 0,
+            columnsDeleted: 0,
+          },
+          enrichment: {
+            dataDictionary: 'completed',
+            tableDescriptions: 'completed',
+            columnDescriptions: 'completed',
+            embeddings: 'skipped',
+            deterministicRelationships: 'skipped',
+            llmRelationshipValidation: 'skipped',
+            statisticalValidation: 'skipped',
+          },
+          capabilityGaps: [],
+          warnings: [],
+          relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
+          enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
+          createdAt: '2026-05-14T09:00:00.000Z',
+        },
+        null,
+        2,
+      ),
+      'ktx',
+      'ktx@example.com',
+      'seed scan report',
+    );
+
+    const ports = createLocalProjectMcpContextPorts(project);
+    const results = await ports.discover?.search({ query: 'paid orders', connectionId: 'warehouse', limit: 10 });
+
+    expect(results).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({ kind: 'wiki', id: 'orders-playbook' }),
+        expect.objectContaining({ kind: 'sl_source', id: 'orders', connectionId: 'warehouse' }),
+        expect.objectContaining({ kind: 'table', id: 'public.orders', connectionId: 'warehouse' }),
+      ]),
+    );
+  });
+
  it('triggers canonical bundle ingest and reads status, report, and replay through MCP ports', async () => {
    const project = await initKtxProject({ projectDir: tempDir });
    project.config.connections.warehouse = {
--- a/packages/context/src/mcp/local-project-ports.ts
+++ b/packages/context/src/mcp/local-project-ports.ts
@ -18,6 +18,7 @@ import {
 import { createLocalKtxEmbeddingProviderFromConfig, KtxIngestEmbeddingPortAdapter } from '../llm/index.js';
 import type { KtxLocalProject } from '../project/index.js';
 import {
+  createKtxEntityDetailsService,
  getLocalScanReport,
  getLocalScanStatus,
  type KtxConnectionDriver,
@ -26,8 +27,11 @@ import {
  type LocalScanMcpOptions,
  runLocalScan,
 } from '../scan/index.js';
+import { createKtxDiscoverDataService } from '../search/index.js';
+import type { SqlAnalysisDialect, SqlAnalysisPort } from '../sql-analysis/index.js';
 import {
  compileLocalSlQuery,
+  createKtxDictionarySearchService,
  type LocalSlSourceSearchResult,
  type LocalSlSourceSummary,
  listLocalSlSources,
@ -44,6 +48,7 @@ import type {
  KtxScanArtifactReadResponse,
  KtxScanArtifactSummary,
  KtxScanArtifactType,
+  KtxSqlExecutionResponse,
 } from './types.js';

 const LOCAL_AUTHOR = 'ktx';
@ -53,6 +58,7 @@ const SL_SHAPE_WARNING = 'Local stdio validation checks YAML shape only; Python
 interface CreateLocalProjectMcpContextPortsOptions {
  semanticLayerCompute?: KtxSemanticLayerComputePort;
  queryExecutor?: KtxSqlQueryExecutorPort;
+  sqlAnalysis?: SqlAnalysisPort;
  localIngest?: LocalIngestMcpOptions;
  localScan?: LocalScanMcpOptions;
  embeddingService?: KtxEmbeddingPort | null;
@ -77,6 +83,10 @@ function dialectForDriver(driver: string | undefined): string {
  return map[normalized] ?? 'postgres';
 }

+function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
+  return dialectForDriver(driver) as SqlAnalysisDialect;
+}
+
 function assertSafePathToken(kind: string, value: string): string {
  if (
    value.trim().length === 0 ||
@ -378,6 +388,53 @@ function statusFromIngestReport(report: IngestReportSnapshot): KtxIngestStatusRe
  };
 }

+async function executeValidatedReadOnlySql(
+  project: KtxLocalProject,
+  options: CreateLocalProjectMcpContextPortsOptions,
+  input: { connectionId: string; sql: string; maxRows: number },
+): Promise<KtxSqlExecutionResponse> {
+  const connectionId = assertSafeConnectionId(input.connectionId);
+  const connection = project.config.connections[connectionId];
+  if (!connection) {
+    throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`);
+  }
+  if (!options.sqlAnalysis) {
+    throw new Error('sql_execution requires parser-backed SQL validation.');
+  }
+  const validation = await options.sqlAnalysis.validateReadOnly(input.sql, sqlAnalysisDialectForDriver(connection.driver));
+  if (!validation.ok) {
+    throw new Error(validation.error ?? 'SQL is not read-only.');
+  }
+  const createConnector = options.localScan?.createConnector;
+  if (!createConnector) {
+    throw new Error('sql_execution requires a local scan connector factory.');
+  }
+
+  let connector: KtxScanConnector | null = null;
+  try {
+    connector = await createConnector(connectionId);
+    if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
+      throw new Error(`Connection "${connectionId}" does not support read-only SQL execution.`);
+    }
+    const result = await connector.executeReadOnly(
+      {
+        connectionId,
+        sql: input.sql,
+        maxRows: input.maxRows,
+      },
+      { runId: 'mcp-sql-execution' },
+    );
+    return {
+      headers: result.headers,
+      ...(result.headerTypes ? { headerTypes: result.headerTypes } : {}),
+      rows: result.rows,
+      rowCount: result.rowCount ?? result.rows.length,
+    };
+  } finally {
+    await cleanupConnector(connector);
+  }
+}
+
 export function createLocalProjectMcpContextPorts(
  project: KtxLocalProject,
  options: CreateLocalProjectMcpContextPortsOptions = {},
@ -575,8 +632,31 @@ export function createLocalProjectMcpContextPorts(
        });
      },
    },
+    entityDetails: {
+      async read(input) {
+        return createKtxEntityDetailsService(project).read(input);
+      },
+    },
+    dictionarySearch: {
+      async search(input) {
+        return createKtxDictionarySearchService(project).search(input);
+      },
+    },
+    discover: {
+      async search(input) {
+        return createKtxDiscoverDataService(project, { userId: 'local', embeddingService }).search(input);
+      },
+    },
  };

+  if (options.sqlAnalysis && options.localScan?.createConnector) {
+    ports.sqlExecution = {
+      async execute(input) {
+        return executeValidatedReadOnlySql(project, options, input);
+      },
+    };
+  }
+
  if (options.localIngest) {
    ports.ingest = {
      async trigger(input) {
--- a/packages/context/src/mcp/server.test.ts
+++ b/packages/context/src/mcp/server.test.ts
@ -6,11 +6,16 @@ import { createLocalProjectMemoryCapture } from '../memory/index.js';
 import { initKtxProject } from '../project/index.js';
 import { createKtxMcpServer } from './server.js';
 import type {
+  KtxDiscoverDataMcpPort,
+  KtxDictionarySearchMcpPort,
+  KtxEntityDetailsMcpPort,
  KtxIngestMcpPort,
  KtxKnowledgeMcpPort,
  KtxMcpContextPorts,
  KtxScanMcpPort,
  KtxSemanticLayerMcpPort,
+  KtxSqlExecutionMcpPort,
+  KtxSqlExecutionResponse,
  MemoryCapturePort,
 } from './types.js';

@ -64,6 +69,242 @@ describe('createKtxMcpServer', () => {
    });
  });

+  it('registers parser-gated sql_execution when the host provides a SQL execution port', async () => {
+    const fake = makeFakeServer();
+    const response: KtxSqlExecutionResponse = {
+      headers: ['status', 'count'],
+      headerTypes: ['text', 'bigint'],
+      rows: [['paid', 42]],
+      rowCount: 1,
+    };
+    const sqlExecution: KtxSqlExecutionMcpPort = {
+      execute: vi.fn<KtxSqlExecutionMcpPort['execute']>().mockResolvedValue(response),
+    };
+
+    createKtxMcpServer({
+      server: fake.server,
+      userContext: { userId: 'local-user' },
+      contextTools: {
+        sqlExecution,
+      },
+    });
+
+    expect(fake.tools.map((tool) => tool.name)).toEqual(['sql_execution']);
+    await expect(
+      getTool(fake.tools, 'sql_execution').handler({
+        connectionId: 'warehouse',
+        sql: 'select status, count(*) from public.orders group by status',
+        maxRows: 50,
+      }),
+    ).resolves.toEqual({
+      content: [
+        {
+          type: 'text',
+          text: JSON.stringify(
+            {
+              headers: ['status', 'count'],
+              headerTypes: ['text', 'bigint'],
+              rows: [['paid', 42]],
+              rowCount: 1,
+            },
+            null,
+            2,
+          ),
+        },
+      ],
+      structuredContent: {
+        headers: ['status', 'count'],
+        headerTypes: ['text', 'bigint'],
+        rows: [['paid', 42]],
+        rowCount: 1,
+      },
+    });
+    expect(sqlExecution.execute).toHaveBeenCalledWith({
+      connectionId: 'warehouse',
+      sql: 'select status, count(*) from public.orders group by status',
+      maxRows: 50,
+    });
+  });
+
+  it('registers entity_details when the host provides an entity-details port', async () => {
+    const fake = makeFakeServer();
+    const entityDetails: KtxEntityDetailsMcpPort = {
+      read: vi.fn<KtxEntityDetailsMcpPort['read']>().mockResolvedValue({
+        results: [
+          {
+            ok: true,
+            connectionId: 'warehouse',
+            tableRef: { catalog: null, db: 'public', name: 'orders' },
+            display: 'public.orders',
+            kind: 'table',
+            comment: 'Customer orders',
+            estimatedRows: 12,
+            columns: [
+              {
+                name: 'id',
+                nativeType: 'integer',
+                normalizedType: 'integer',
+                dimensionType: 'number',
+                nullable: false,
+                primaryKey: true,
+                comment: null,
+              },
+            ],
+            foreignKeys: [],
+            snapshot: {
+              syncId: 'sync-1',
+              extractedAt: '2026-05-14T09:00:00.000Z',
+              scanRunId: 'scan-1',
+            },
+          },
+        ],
+      }),
+    };
+
+    createKtxMcpServer({
+      server: fake.server,
+      userContext: { userId: 'local-user' },
+      contextTools: { entityDetails },
+    });
+
+    expect(fake.tools.map((tool) => tool.name)).toEqual(['entity_details']);
+    await expect(
+      getTool(fake.tools, 'entity_details').handler({
+        connectionId: 'warehouse',
+        entities: [{ table: 'public.orders', columns: ['id'] }],
+      }),
+    ).resolves.toMatchObject({
+      structuredContent: {
+        results: [
+          {
+            ok: true,
+            connectionId: 'warehouse',
+            display: 'public.orders',
+            columns: [{ name: 'id' }],
+          },
+        ],
+      },
+    });
+    expect(entityDetails.read).toHaveBeenCalledWith({
+      connectionId: 'warehouse',
+      entities: [{ table: 'public.orders', columns: ['id'] }],
+    });
+  });
+
+  it('registers dictionary_search when the host provides a dictionary-search port', async () => {
+    const fake = makeFakeServer();
+    const dictionarySearch: KtxDictionarySearchMcpPort = {
+      search: vi.fn<KtxDictionarySearchMcpPort['search']>().mockResolvedValue({
+        searched: [
+          {
+            connectionId: 'warehouse',
+            coverage: {
+              sampledRows: null,
+              valuesPerColumn: null,
+              profiledColumns: 1,
+              syncId: 'sync-1',
+              profiledAt: null,
+            },
+            status: 'ready',
+          },
+        ],
+        results: [
+          {
+            value: 'paid',
+            matches: [
+              {
+                connectionId: 'warehouse',
+                sourceName: 'orders',
+                columnName: 'status',
+                matchedValue: 'paid',
+                cardinality: 3,
+              },
+            ],
+            misses: [],
+          },
+        ],
+      }),
+    };
+
+    createKtxMcpServer({
+      server: fake.server,
+      userContext: { userId: 'local-user' },
+      contextTools: { dictionarySearch },
+    });
+
+    expect(fake.tools.map((tool) => tool.name)).toEqual(['dictionary_search']);
+    await expect(
+      getTool(fake.tools, 'dictionary_search').handler({
+        connectionId: 'warehouse',
+        values: ['paid'],
+      }),
+    ).resolves.toMatchObject({
+      structuredContent: {
+        searched: [{ connectionId: 'warehouse', status: 'ready' }],
+        results: [
+          {
+            value: 'paid',
+            matches: [{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status' }],
+            misses: [],
+          },
+        ],
+      },
+    });
+    expect(dictionarySearch.search).toHaveBeenCalledWith({
+      connectionId: 'warehouse',
+      values: ['paid'],
+    });
+  });
+
+  it('registers discover_data when the host provides a discover port', async () => {
+    const fake = makeFakeServer();
+    const discover: KtxDiscoverDataMcpPort = {
+      search: vi.fn<KtxDiscoverDataMcpPort['search']>().mockResolvedValue([
+        {
+          kind: 'table',
+          id: 'public.orders',
+          score: 1,
+          summary: 'Orders table',
+          snippet: 'id, status',
+          matchedOn: 'name',
+          connectionId: 'warehouse',
+          tableRef: { catalog: null, db: 'public', name: 'orders' },
+        },
+      ]),
+    };
+
+    createKtxMcpServer({
+      server: fake.server,
+      userContext: { userId: 'local-user' },
+      contextTools: { discover },
+    });
+
+    expect(fake.tools.map((tool) => tool.name)).toEqual(['discover_data']);
+    await expect(
+      getTool(fake.tools, 'discover_data').handler({
+        query: 'orders',
+        connectionId: 'warehouse',
+        kinds: ['table'],
+        limit: 5,
+      }),
+    ).resolves.toMatchObject({
+      structuredContent: [
+        {
+          kind: 'table',
+          id: 'public.orders',
+          connectionId: 'warehouse',
+          tableRef: { catalog: null, db: 'public', name: 'orders' },
+        },
+      ],
+    });
+    expect(discover.search).toHaveBeenCalledWith({
+      query: 'orders',
+      connectionId: 'warehouse',
+      kinds: ['table'],
+      limit: 5,
+    });
+  });
+
  it('registers memory capture tools without host app dependencies', async () => {
    const fake = makeFakeServer();
    const capture: MemoryCapturePort = {
--- a/packages/context/src/mcp/types.ts
+++ b/packages/context/src/mcp/types.ts
@ -1,7 +1,11 @@
 import type { IngestReportSnapshot, MemoryFlowReplayInput, TableUsageOutput } from '../ingest/index.js';
 import type { MemoryCaptureService } from '../memory/index.js';
+import type { KtxEntityDetailsInput, KtxEntityDetailsResponse } from '../scan/entity-details.js';
 import type { KtxScanMode, KtxScanReport } from '../scan/index.js';
+import type { KtxDiscoverDataInput, KtxDiscoverDataResponse } from '../search/index.js';
 import type {
+  KtxDictionarySearchInput,
+  KtxDictionarySearchResponse,
  SemanticLayerQueryInput,
  SlDictionaryMatch,
  SlSearchLaneSummary,
@ -312,10 +316,37 @@ export interface KtxScanMcpPort {
  readArtifact?(input: { runId: string; path: string }): Promise<KtxScanArtifactReadResponse | null>;
 }

+export interface KtxEntityDetailsMcpPort {
+  read(input: KtxEntityDetailsInput): Promise<KtxEntityDetailsResponse>;
+}
+
+export interface KtxDictionarySearchMcpPort {
+  search(input: KtxDictionarySearchInput): Promise<KtxDictionarySearchResponse>;
+}
+
+export interface KtxDiscoverDataMcpPort {
+  search(input: KtxDiscoverDataInput): Promise<KtxDiscoverDataResponse>;
+}
+
+export interface KtxSqlExecutionResponse {
+  headers: string[];
+  headerTypes?: string[];
+  rows: unknown[][];
+  rowCount: number;
+}
+
+export interface KtxSqlExecutionMcpPort {
+  execute(input: { connectionId: string; sql: string; maxRows: number }): Promise<KtxSqlExecutionResponse>;
+}
+
 export interface KtxMcpContextPorts {
  connections?: KtxConnectionsMcpPort;
  knowledge?: KtxKnowledgeMcpPort;
  semanticLayer?: KtxSemanticLayerMcpPort;
+  entityDetails?: KtxEntityDetailsMcpPort;
+  dictionarySearch?: KtxDictionarySearchMcpPort;
+  discover?: KtxDiscoverDataMcpPort;
+  sqlExecution?: KtxSqlExecutionMcpPort;
  ingest?: KtxIngestMcpPort;
  scan?: KtxScanMcpPort;
 }
--- a/packages/context/src/memory/memory-agent.service.ingest.test.ts
+++ b/packages/context/src/memory/memory-agent.service.ingest.test.ts
@ -89,7 +89,7 @@ const buildMocks = (overrides: Partial<BuiltMocks> = {}): BuiltMocks => {
    embeddingService: { computeEmbedding: vi.fn() },
    semanticLayerService: {
      forWorktree: vi.fn().mockReturnThis(),
-      loadAllSources: vi.fn().mockResolvedValue([]),
+      loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
      readSourceFile: vi.fn(),
    },
    slSearchService: { indexSources: vi.fn(), buildSearchText: vi.fn() },
--- a/packages/context/src/memory/memory-agent.service.ts
+++ b/packages/context/src/memory/memory-agent.service.ts
@ -308,7 +308,7 @@ export class MemoryAgentService {
    // Reindex SL search if any SL actions actually landed on main.
    if (hasSL && finalActions.some((a) => a.target === 'sl')) {
      try {
-        const allSources = await this.deps.semanticLayerService.loadAllSources(input.connectionId!);
+        const { sources: allSources } = await this.deps.semanticLayerService.loadAllSources(input.connectionId!);
        await this.deps.slSearchService.indexSources(input.connectionId!, allSources);
      } catch (e) {
        this.logger.warn(
@ -610,7 +610,7 @@ export class MemoryAgentService {

  private async buildSlIndex(connectionId: string): Promise<string> {
    const [sources, warehouseLine] = await Promise.all([
-      this.deps.semanticLayerService.loadAllSources(connectionId),
+      this.deps.semanticLayerService.loadAllSources(connectionId).then((result) => result.sources),
      this.buildWarehouseLine(connectionId),
    ]);
    const indexLines =
--- a/packages/context/src/memory/memory-runtime-assets.test.ts
+++ b/packages/context/src/memory/memory-runtime-assets.test.ts
@ -166,17 +166,17 @@ describe('memory runtime assets', () => {
    }
  });

-  it('ships only the KTX connectionName sql_execution call shape in writer guidance', async () => {
+  it('ships only the KTX connectionId sql_execution call shape in writer guidance', async () => {
    const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8');
    const bodies = [{ name: '_shared/identifier-verification.md', body: shared }];

-    expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
-    expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
+    expect(shared).toContain('sql_execution({connectionId, sql: "SELECT DISTINCT');
+    expect(shared).toContain('sql_execution({connectionId, sql: "SELECT 1 FROM');

    for (const skillName of verificationWriterSkills) {
      const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8');
      bodies.push({ name: `${skillName}/SKILL.md`, body });
-      expect(body).toContain('sql_execution({connectionName');
+      expect(body).toContain('sql_execution({connectionId');
      expect(body).not.toContain('sql_execution({ sql');
      expect(body).not.toContain('session shape');
      expect(body).not.toContain('connection is already pinned by the ingest session');
@ -186,8 +186,8 @@ describe('memory runtime assets', () => {
      const calls = sqlExecutionCallBlocks(body);
      expect(calls.length, `${name} should contain sql_execution guidance`).toBeGreaterThan(0);
      expect(
-        calls.filter((call) => !call.includes('connectionName')),
-        `${name} has sql_execution calls without connectionName`,
+        calls.filter((call) => !call.includes('connectionId')),
+        `${name} has sql_execution calls without connectionId`,
      ).toEqual([]);
      expect(body, `${name} has a connectionless multiline sql_execution call`).not.toMatch(
        /sql_execution\(\{\s*sql\s*:/,
--- a/packages/context/src/project/config.test.ts
+++ b/packages/context/src/project/config.test.ts
@ -509,4 +509,11 @@ describe('generateKtxProjectConfigJsonSchema', () => {
    const relationships = scan?.properties?.relationships as { properties?: Record<string, { description?: string }> };
    expect(relationships?.properties?.acceptThreshold?.description).toMatch(/auto-accepted/);
  });
+
+  it('emits the mappings shapes under connections', () => {
+    const serialized = JSON.stringify(schema);
+    expect(serialized).toContain('databaseMappings');
+    expect(serialized).toContain('connectionMappings');
+    expect(serialized).toContain('expectedLookerConnectionName');
+  });
 });
--- a/packages/context/src/project/config.ts
+++ b/packages/context/src/project/config.ts
@ -1,6 +1,7 @@
 import { KTX_MODEL_ROLES } from '@ktx/llm';
 import YAML from 'yaml';
 import * as z from 'zod';
+import { connectionConfigSchema } from './driver-schemas.js';

 const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway'] as const;
 const KTX_EMBEDDING_BACKENDS = ['none', 'deterministic', 'openai', 'sentence-transformers'] as const;
@ -206,12 +207,7 @@ const storageSchema = z
  })
  .describe('Storage backends and commit policy for KTX state and search indexes.');

-const connectionSchema = z
-  .looseObject({
-    driver: z.string().min(1).optional().describe('Connector driver identifier (e.g. "postgres", "bigquery", "snowflake").'),
-    url: z.string().optional().describe('Connection URL or DSN. Format depends on the driver; may contain environment-variable references.'),
-  })
-  .describe('A single database/connector connection entry. Additional driver-specific fields are accepted and passed through.');
+const connectionSchema = connectionConfigSchema;

 const agentSchema = z
  .strictObject({
--- a/packages/context/src/project/driver-schemas.test.ts
+++ b/packages/context/src/project/driver-schemas.test.ts
@ -0,0 +1,140 @@
+import { describe, expect, it } from 'vitest';
+import { connectionConfigSchema } from './driver-schemas.js';
+
+describe('connectionConfigSchema (driver discriminated union)', () => {
+  it.each([
+    ['postgres', 'postgres://user:pass@host:5432/db'], // pragma: allowlist secret
+    ['postgresql', 'postgresql://user:pass@host:5432/db'], // pragma: allowlist secret
+    ['mysql', 'mysql://user:pass@host:3306/db'], // pragma: allowlist secret
+    ['snowflake', 'snowflake://account/db'],
+    ['bigquery', 'bigquery://project/dataset'],
+    ['sqlite', 'sqlite:///tmp/db.sqlite'],
+    ['clickhouse', 'clickhouse://host:8123/db'],
+    ['sqlserver', 'sqlserver://host:1433;database=db'],
+  ])('parses %s warehouse connection', (driver, url) => {
+    expect(connectionConfigSchema.parse({ driver, url })).toMatchObject({ driver, url });
+  });
+
+  it('preserves unknown warehouse fields via looseObject passthrough', () => {
+    const parsed = connectionConfigSchema.parse({
+      driver: 'postgres',
+      url: 'postgres://x',
+      historicSql: { enabled: true },
+      context: { queryHistory: { enabled: false } },
+    });
+    expect(parsed).toMatchObject({
+      driver: 'postgres',
+      historicSql: { enabled: true },
+      context: { queryHistory: { enabled: false } },
+    });
+  });
+
+  it('rejects an unknown driver', () => {
+    expect(() => connectionConfigSchema.parse({ driver: 'nope', url: 'x' })).toThrow();
+  });
+});
+
+describe('connectionConfigSchema - context source drivers with mappings', () => {
+  it('parses a metabase connection with mappings', () => {
+    const parsed = connectionConfigSchema.parse({
+      driver: 'metabase',
+      api_url: 'https://metabase.example.com',
+      api_key_ref: 'env:METABASE_API_KEY', // pragma: allowlist secret
+      mappings: {
+        databaseMappings: { '3': 'prod-warehouse' },
+        syncEnabled: { '3': true },
+        syncMode: 'ONLY',
+      },
+    });
+    expect(parsed).toMatchObject({
+      driver: 'metabase',
+      api_url: 'https://metabase.example.com',
+      mappings: {
+        databaseMappings: { '3': 'prod-warehouse' },
+        syncMode: 'ONLY',
+      },
+    });
+  });
+
+  it('parses a looker connection with connectionMappings', () => {
+    const parsed = connectionConfigSchema.parse({
+      driver: 'looker',
+      base_url: 'https://looker.example.com',
+      client_id: 'abc',
+      client_secret_ref: 'env:LOOKER_CLIENT_SECRET', // pragma: allowlist secret
+      mappings: { connectionMappings: { bigquery_prod: 'wh' } },
+    });
+    expect(parsed.mappings).toEqual({ connectionMappings: { bigquery_prod: 'wh' } });
+  });
+
+  it('parses a lookml connection with expectedLookerConnectionName', () => {
+    const parsed = connectionConfigSchema.parse({
+      driver: 'lookml',
+      repoUrl: 'https://github.com/acme/looker.git',
+      branch: 'main',
+      mappings: { expectedLookerConnectionName: 'bigquery_prod' },
+    });
+    expect(parsed.mappings).toEqual({ expectedLookerConnectionName: 'bigquery_prod' });
+  });
+
+  it('rejects metabase mapping with non-integer database key', () => {
+    expect(() =>
+      connectionConfigSchema.parse({
+        driver: 'metabase',
+        api_url: 'https://x',
+        mappings: { databaseMappings: { abc: 'wh' } },
+      }),
+    ).toThrow();
+  });
+});
+
+describe('connectionConfigSchema - notion / dbt / metricflow', () => {
+  it('parses a notion connection with selected_roots crawl', () => {
+    const parsed = connectionConfigSchema.parse({
+      driver: 'notion',
+      auth_token_ref: 'env:NOTION_TOKEN',
+      crawl_mode: 'selected_roots',
+      root_page_ids: ['abc', 'def'],
+      max_pages_per_run: 500,
+    });
+    expect(parsed).toMatchObject({
+      driver: 'notion',
+      crawl_mode: 'selected_roots',
+      root_page_ids: ['abc', 'def'],
+      max_pages_per_run: 500,
+    });
+  });
+
+  it('rejects notion with unknown crawl_mode', () => {
+    expect(() =>
+      connectionConfigSchema.parse({
+        driver: 'notion',
+        auth_token_ref: 'env:NOTION_TOKEN',
+        crawl_mode: 'everything',
+      }),
+    ).toThrow();
+  });
+
+  it('parses a dbt connection from a local source_dir', () => {
+    const parsed = connectionConfigSchema.parse({
+      driver: 'dbt',
+      source_dir: '/tmp/dbt-project',
+      target: 'dev',
+    });
+    expect(parsed).toMatchObject({ driver: 'dbt', source_dir: '/tmp/dbt-project', target: 'dev' });
+  });
+
+  it('parses a metricflow connection with nested config', () => {
+    const parsed = connectionConfigSchema.parse({
+      driver: 'metricflow',
+      metricflow: {
+        repoUrl: 'https://github.com/acme/sl.git',
+        branch: 'main',
+      },
+    });
+    expect(parsed).toMatchObject({
+      driver: 'metricflow',
+      metricflow: { repoUrl: 'https://github.com/acme/sl.git' },
+    });
+  });
+});
--- a/packages/context/src/project/driver-schemas.ts
+++ b/packages/context/src/project/driver-schemas.ts
@ -0,0 +1,205 @@
+import * as z from 'zod';
+import {
+  lookerMappingsSchema,
+  lookmlMappingsSchema,
+  metabaseMappingsSchema,
+} from './mappings-yaml-schema.js';
+
+const warehouseDrivers = [
+  'postgres',
+  'postgresql',
+  'mysql',
+  'snowflake',
+  'bigquery',
+  'sqlite',
+  'clickhouse',
+  'sqlserver',
+] as const;
+
+type WarehouseDriver = (typeof warehouseDrivers)[number];
+
+function warehouseConnectionSchema<const Driver extends WarehouseDriver>(driver: Driver) {
+  return z
+    .looseObject({
+      driver: z.literal(driver),
+      url: z
+        .string()
+        .min(1)
+        .optional()
+        .describe('Warehouse connection URL or DSN; may contain environment-variable references like env:DATABASE_URL.'),
+    })
+    .describe(
+      `${driver} warehouse connection. Additional driver-tunable fields (e.g. historicSql, context.queryHistory) are accepted and passed through.`,
+    );
+}
+
+const warehouseConnectionSchemas = [
+  warehouseConnectionSchema('postgres'),
+  warehouseConnectionSchema('postgresql'),
+  warehouseConnectionSchema('mysql'),
+  warehouseConnectionSchema('snowflake'),
+  warehouseConnectionSchema('bigquery'),
+  warehouseConnectionSchema('sqlite'),
+  warehouseConnectionSchema('clickhouse'),
+  warehouseConnectionSchema('sqlserver'),
+] as const;
+
+const positiveIntKeyMessage = (field: string) => `${field} keys must be positive-integer strings (e.g. "1", "42")`;
+
+const positiveIntKeyRegex = /^[1-9]\d*$/;
+
+const metabaseMappingsStrictSchema = metabaseMappingsSchema.superRefine((value, ctx) => {
+  for (const key of Object.keys(value.databaseMappings ?? {})) {
+    if (!positiveIntKeyRegex.test(key)) {
+      ctx.addIssue({
+        code: 'custom',
+        path: ['databaseMappings', key],
+        message: positiveIntKeyMessage('databaseMappings'),
+      });
+    }
+  }
+  for (const key of Object.keys(value.syncEnabled ?? {})) {
+    if (!positiveIntKeyRegex.test(key)) {
+      ctx.addIssue({
+        code: 'custom',
+        path: ['syncEnabled', key],
+        message: positiveIntKeyMessage('syncEnabled'),
+      });
+    }
+  }
+});
+
+const metabaseConnectionSchema = z
+  .looseObject({
+    driver: z.literal('metabase'),
+    api_url: z.string().url().describe('Metabase instance API URL (e.g. https://metabase.example.com).'),
+    api_key: z.string().min(1).optional().describe('Literal Metabase API key. Prefer api_key_ref for safety.'),
+    api_key_ref: z
+      .string()
+      .min(1)
+      .optional()
+      .describe('Reference to Metabase API key (e.g. env:METABASE_API_KEY or file:/path).'),
+    network_proxy: z.looseObject({}).optional().describe('Optional network proxy configuration (snake_case form).'),
+    networkProxy: z.looseObject({}).optional().describe('Optional network proxy configuration (camelCase form).'),
+    mappings: metabaseMappingsStrictSchema
+      .optional()
+      .describe('Metabase database-to-warehouse mappings and sync configuration.'),
+  })
+  .describe('Metabase context-source connection.');
+
+const lookerConnectionSchema = z
+  .looseObject({
+    driver: z.literal('looker'),
+    base_url: z.string().url().describe('Looker instance base URL (e.g. https://looker.example.com).'),
+    client_id: z.string().min(1).describe('Looker OAuth client ID.'),
+    client_secret: z.string().min(1).optional().describe('Literal Looker OAuth client secret. Prefer client_secret_ref.'),
+    client_secret_ref: z
+      .string()
+      .min(1)
+      .optional()
+      .describe('Reference to Looker OAuth client secret (e.g. env:LOOKER_CLIENT_SECRET).'),
+    mappings: lookerMappingsSchema.optional().describe('Looker connection-name to KTX warehouse mappings.'),
+  })
+  .describe('Looker context-source connection.');
+
+const lookmlConnectionSchema = z
+  .looseObject({
+    driver: z.literal('lookml'),
+    repoUrl: z
+      .string()
+      .min(1)
+      .describe('Git URL of the LookML project (https, ssh, or file:). Field is camelCase by convention.'),
+    branch: z.string().min(1).optional().describe('Git branch (default "main" downstream).'),
+    path: z.string().optional().describe('Subdirectory within the repo when the LookML project lives in a monorepo.'),
+    auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos (e.g. env:GITHUB_TOKEN).'),
+    mappings: lookmlMappingsSchema.optional().describe('LookML expected-connection mapping for ingest gating.'),
+  })
+  .describe('LookML context-source connection.');
+
+const notionConnectionSchema = z
+  .looseObject({
+    driver: z.literal('notion'),
+    auth_token: z.string().min(1).optional().describe('Literal Notion integration token. Prefer auth_token_ref.'),
+    auth_token_ref: z
+      .string()
+      .min(1)
+      .optional()
+      .describe('Reference to Notion integration token (e.g. env:NOTION_TOKEN).'),
+    crawl_mode: z
+      .enum(['selected_roots', 'all_accessible'])
+      .optional()
+      .describe(
+        'Crawl scope. "selected_roots" requires at least one of root_page_ids, root_database_ids, root_data_source_ids.',
+      ),
+    root_page_ids: z.array(z.string().min(1)).optional().describe('Notion page IDs to crawl when crawl_mode is selected_roots.'),
+    root_database_ids: z
+      .array(z.string().min(1))
+      .optional()
+      .describe('Notion database IDs to crawl when crawl_mode is selected_roots.'),
+    root_data_source_ids: z
+      .array(z.string().min(1))
+      .optional()
+      .describe('Notion data source IDs to crawl when crawl_mode is selected_roots.'),
+    max_pages_per_run: z
+      .number()
+      .int()
+      .min(1)
+      .max(10000)
+      .optional()
+      .describe('Maximum Notion pages fetched in a single ingest run.'),
+    max_knowledge_creates_per_run: z
+      .number()
+      .int()
+      .min(0)
+      .max(25)
+      .optional()
+      .describe('Maximum new wiki pages created per run.'),
+    max_knowledge_updates_per_run: z
+      .number()
+      .int()
+      .min(0)
+      .max(100)
+      .optional()
+      .describe('Maximum existing wiki pages updated per run.'),
+  })
+  .describe('Notion context-source connection.');
+
+const dbtConnectionSchema = z
+  .looseObject({
+    driver: z.literal('dbt'),
+    source_dir: z.string().min(1).optional().describe('Absolute or project-relative path to a local dbt project.'),
+    repo_url: z.string().min(1).optional().describe('Git URL of the dbt project (https, ssh, or file:).'),
+    branch: z.string().min(1).optional().describe('Git branch when using repo_url.'),
+    path: z.string().optional().describe('Subdirectory within the repo when the dbt project lives in a monorepo.'),
+    auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos.'),
+    profiles_path: z.string().optional().describe('Override path to dbt profiles.yml.'),
+    target: z.string().min(1).optional().describe('dbt target name (e.g. dev, prod).'),
+    project_name: z.string().min(1).optional().describe('Override auto-detected dbt project name.'),
+  })
+  .describe('dbt context-source connection.');
+
+const metricflowConnectionSchema = z
+  .looseObject({
+    driver: z.literal('metricflow'),
+    metricflow: z
+      .looseObject({
+        repoUrl: z.string().min(1).describe('Git URL of the MetricFlow / SL project.'),
+        branch: z.string().min(1).optional().describe('Git branch (default "main").'),
+        path: z.string().optional().describe('Subdirectory within the repo when the SL config lives in a monorepo.'),
+        auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos.'),
+      })
+      .describe('Nested MetricFlow configuration block.'),
+  })
+  .describe('MetricFlow / SL context-source connection.');
+
+export const connectionConfigSchema = z.discriminatedUnion('driver', [
+  ...warehouseConnectionSchemas,
+  metabaseConnectionSchema,
+  lookerConnectionSchema,
+  lookmlConnectionSchema,
+  notionConnectionSchema,
+  dbtConnectionSchema,
+  metricflowConnectionSchema,
+]);
+
+export type KtxConnectionConfig = z.infer<typeof connectionConfigSchema>;
--- a/packages/context/src/project/index.ts
+++ b/packages/context/src/project/index.ts
@ -15,6 +15,7 @@ export {
  serializeKtxProjectConfig,
  validateKtxProjectConfig,
 } from './config.js';
+export type { KtxConnectionConfig } from './driver-schemas.js';
 export type { LocalGitFileStoreDeps } from './local-git-file-store.js';
 export { LocalGitFileStore } from './local-git-file-store.js';
 export { ktxLocalStateDbPath } from './local-state-db.js';
--- a/packages/context/src/project/mappings-yaml-schema.test.ts
+++ b/packages/context/src/project/mappings-yaml-schema.test.ts
@ -1,5 +1,8 @@
 import { describe, expect, it } from 'vitest';
 import {
+  lookerMappingsSchema,
+  lookmlMappingsSchema,
+  metabaseMappingsSchema,
  parseConnectionMappingBootstrap,
  parseLookmlMappingBootstrap,
  parseLookerMappingBootstrap,
@ -82,4 +85,17 @@ describe('ktx.yaml mapping bootstrap schema', () => {
      }),
    ).toMatchObject({ adapter: 'looker', connectionId: 'prod-looker' });
  });
+
+  it('exports mapping shapes that parse documented examples', () => {
+    expect(metabaseMappingsSchema.parse({ databaseMappings: { '1': 'wh' } })).toMatchObject({
+      databaseMappings: { '1': 'wh' },
+      syncMode: 'ALL',
+    });
+    expect(lookerMappingsSchema.parse({ connectionMappings: { x: 'wh' } })).toEqual({
+      connectionMappings: { x: 'wh' },
+    });
+    expect(lookmlMappingsSchema.parse({ expectedLookerConnectionName: 'x' })).toEqual({
+      expectedLookerConnectionName: 'x',
+    });
+  });
 });
--- a/packages/context/src/project/mappings-yaml-schema.ts
+++ b/packages/context/src/project/mappings-yaml-schema.ts
@ -1,5 +1,4 @@
 import * as z from 'zod';
-import type { KtxProjectConnectionConfig } from './config.js';

 const metabaseSyncModeSchema = z.enum(['ALL', 'ONLY', 'EXCEPT']);
 const positiveIntegerValueSchema = z.number().int().positive();
@ -11,24 +10,48 @@ const metabaseSelectionsSchema = z
    items: z.array(positiveIntegerValueSchema).default([]),
  });

-const metabaseMappingsSchema = z
+export const metabaseMappingsSchema = z
  .object({
-    databaseMappings: z.record(z.string(), stringTargetSchema).default({}),
-    syncEnabled: z.record(z.string(), z.boolean()).default({}),
-    syncMode: metabaseSyncModeSchema.default('ALL'),
-    selections: metabaseSelectionsSchema.default({ collections: [], items: [] }),
-    defaultTagNames: z.array(z.string().min(1)).default([]),
-  });
+    databaseMappings: z
+      .record(z.string(), stringTargetSchema)
+      .default({})
+      .describe('Map of Metabase database ID (positive integer string) to KTX connection ID. Use null to explicitly unmap.'),
+    syncEnabled: z
+      .record(z.string(), z.boolean())
+      .default({})
+      .describe('Per-Metabase-database sync toggle, keyed by Metabase database ID string.'),
+    syncMode: metabaseSyncModeSchema
+      .default('ALL')
+      .describe('Sync scope: ALL ingests every mapped DB; ONLY restricts to syncEnabled=true; EXCEPT excludes syncEnabled=true.'),
+    selections: metabaseSelectionsSchema
+      .default({ collections: [], items: [] })
+      .describe('Optional Metabase collection and item IDs to scope ingest.'),
+    defaultTagNames: z
+      .array(z.string().min(1))
+      .default([])
+      .describe('Default tag names applied to ingested Metabase artifacts.'),
+  })
+  .describe('Metabase database-to-warehouse mapping and sync configuration.');

-const lookerMappingsSchema = z
+export const lookerMappingsSchema = z
  .object({
-    connectionMappings: z.record(z.string().min(1), stringTargetSchema).default({}),
-  });
+    connectionMappings: z
+      .record(z.string().min(1), stringTargetSchema)
+      .default({})
+      .describe('Map of Looker connection name to KTX connection ID. Use null to explicitly unmap.'),
+  })
+  .describe('Looker connection-to-warehouse mapping configuration.');

-const lookmlMappingsSchema = z
+export const lookmlMappingsSchema = z
  .object({
-    expectedLookerConnectionName: z.string().min(1).nullable().default(null),
-  });
+    expectedLookerConnectionName: z
+      .string()
+      .min(1)
+      .nullable()
+      .default(null)
+      .describe('Looker connection name that LookML models must declare; mismatches block sl_write_source at ingest time.'),
+  })
+  .describe('LookML connection-name expectation for ingest gating.');

 export type MetabaseMappingBootstrap = {
  adapter: 'metabase';
@ -54,6 +77,11 @@ export type LookmlMappingBootstrap = {

 export type ConnectionMappingBootstrap = MetabaseMappingBootstrap | LookerMappingBootstrap | LookmlMappingBootstrap;

+type MappingConnectionInput = Record<string, unknown> & {
+  driver?: unknown;
+  mappings?: unknown;
+};
+
 function recordValue(value: unknown): Record<string, unknown> {
  return typeof value === 'object' && value !== null && !Array.isArray(value) ? (value as Record<string, unknown>) : {};
 }
@ -66,13 +94,13 @@ function assertPositiveIntegerKeys(field: string, record: Record<string, unknown
  }
 }

-function driverOf(connection: KtxProjectConnectionConfig): string {
+function driverOf(connection: MappingConnectionInput): string {
  return String(connection.driver ?? '').toLowerCase();
 }

 export function parseMetabaseMappingBootstrap(
  connectionId: string,
-  connection: KtxProjectConnectionConfig,
+  connection: MappingConnectionInput,
 ): MetabaseMappingBootstrap {
  const rawMappings = recordValue(connection.mappings);
  assertPositiveIntegerKeys('databaseMappings', recordValue(rawMappings.databaseMappings));
@ -91,7 +119,7 @@ export function parseMetabaseMappingBootstrap(

 export function parseLookerMappingBootstrap(
  connectionId: string,
-  connection: KtxProjectConnectionConfig,
+  connection: MappingConnectionInput,
 ): LookerMappingBootstrap {
  const parsed = lookerMappingsSchema.parse(recordValue(connection.mappings));
  return {
@ -103,7 +131,7 @@ export function parseLookerMappingBootstrap(

 export function parseLookmlMappingBootstrap(
  connectionId: string,
-  connection: KtxProjectConnectionConfig,
+  connection: MappingConnectionInput,
 ): LookmlMappingBootstrap {
  const parsed = lookmlMappingsSchema.parse(recordValue(connection.mappings));
  return {
@ -115,7 +143,7 @@ export function parseLookmlMappingBootstrap(

 export function parseConnectionMappingBootstrap(
  connectionId: string,
-  connection: KtxProjectConnectionConfig,
+  connection: MappingConnectionInput,
 ): ConnectionMappingBootstrap | null {
  if (!connection.mappings || typeof connection.mappings !== 'object' || Array.isArray(connection.mappings)) {
    return null;
--- a/packages/context/src/scan/description-generation.test.ts
+++ b/packages/context/src/scan/description-generation.test.ts
@ -203,11 +203,11 @@ describe('KtxDescriptionGenerator', () => {
    expect(generateText).toHaveBeenCalledWith(
      expect.objectContaining({
        temperature: 0.2,
+        system: expect.objectContaining({
+          role: 'system',
+          content: expect.stringContaining('Please provide a concise description in 12 words or less.'),
+        }),
        messages: expect.arrayContaining([
-          expect.objectContaining({
-            role: 'system',
-            content: expect.stringContaining('Please provide a concise description in 12 words or less.'),
-          }),
          expect.objectContaining({
            role: 'user',
            content: expect.stringContaining('<column_name> status </column_name>'),
@ -215,6 +215,8 @@ describe('KtxDescriptionGenerator', () => {
        ]),
      }),
    );
+    const lastCall = vi.mocked(generateText).mock.calls.at(-1)?.[0];
+    expect(lastCall?.messages?.some((message) => message.role === 'system')).toBe(false);
  });

  it('samples through the connector when column values are not pre-fetched', async () => {
@ -391,3 +393,289 @@ describe('KtxDescriptionGenerator', () => {
    expect(cache.set).toHaveBeenCalledWith('__connection:Warehouse', 'Commerce orders');
  });
 });
+
+describe('KtxDescriptionGenerator resilience', () => {
+  function createLogger() {
+    return {
+      debug: vi.fn(),
+      info: vi.fn(),
+      warn: vi.fn(),
+      error: vi.fn(),
+    };
+  }
+
+  it('retries sampleTable on transient failure and uses sampled rows when it eventually succeeds', async () => {
+    const sampleTable = vi
+      .fn<NonNullable<KtxScanConnector['sampleTable']>>()
+      .mockRejectedValueOnce(new Error('pool: transient ECONNRESET'))
+      .mockRejectedValueOnce(new Error('pool: transient ECONNRESET'))
+      .mockResolvedValue({
+        headers: ['id', 'status'],
+        rows: [
+          [1, 'paid'],
+          [2, 'refunded'],
+        ],
+        totalRows: 2,
+      });
+    const connector: KtxScanConnector = {
+      ...createConnector(),
+      sampleTable,
+    };
+    const logger = createLogger();
+    const warnings: Array<{ code: string; table?: string }> = [];
+    const generator = new KtxDescriptionGenerator({
+      llmProvider: createLlmProvider('Commerce orders'),
+      logger,
+      onWarning: (warning) => warnings.push({ code: warning.code, ...(warning.table ? { table: warning.table } : {}) }),
+      settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24, concurrencyLimit: 2 },
+    });
+
+    const description = await generator.generateTableDescription({
+      connectionId: 'conn-1',
+      connector,
+      context: { runId: 'run-1' },
+      dataSourceType: 'POSTGRESQL',
+      table: { catalog: null, db: 'public', name: 'orders' },
+    });
+
+    expect(description).toBe('Commerce orders');
+    expect(sampleTable).toHaveBeenCalledTimes(3);
+    expect(logger.warn).toHaveBeenCalledTimes(2);
+    expect(warnings).toEqual([]);
+  });
+
+  it('falls back to metadata-only prompt when sampleTable retries exhaust', async () => {
+    const sampleTable = vi
+      .fn<NonNullable<KtxScanConnector['sampleTable']>>()
+      .mockRejectedValue(new Error('pool: connection refused'));
+    const connector: KtxScanConnector = {
+      ...createConnector(),
+      sampleTable,
+    };
+    const logger = createLogger();
+    const warnings: Array<{ code: string; table?: string; metadata?: Record<string, unknown> }> = [];
+    const generator = new KtxDescriptionGenerator({
+      llmProvider: createLlmProvider('Customer reference data'),
+      logger,
+      onWarning: (warning) =>
+        warnings.push({
+          code: warning.code,
+          ...(warning.table ? { table: warning.table } : {}),
+          ...(warning.metadata ? { metadata: warning.metadata } : {}),
+        }),
+      settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24, concurrencyLimit: 2 },
+    });
+
+    const description = await generator.generateTableDescription({
+      connectionId: 'conn-1',
+      connector,
+      context: { runId: 'run-1' },
+      dataSourceType: 'POSTGRESQL',
+      table: {
+        catalog: null,
+        db: 'public',
+        name: 'customers',
+        columns: [
+          { name: 'id', nativeType: 'uuid' },
+          { name: 'email', nativeType: 'text', comment: 'Primary contact email' },
+        ],
+      },
+    });
+
+    expect(description).toBe('Customer reference data');
+    expect(sampleTable).toHaveBeenCalledTimes(3);
+    expect(warnings.map((warning) => warning.code)).toEqual(['sampling_failed', 'description_fallback_used']);
+    expect(warnings[1]?.metadata?.reason).toBe('sampling_failed');
+    const userPrompt = (vi.mocked(generateText).mock.calls.at(-1)?.[0] as { messages: Array<{ role: string; content: string }> })
+      .messages.find((message) => message.role === 'user')?.content;
+    expect(userPrompt).toContain('Columns (metadata only, no sample rows)');
+    expect(userPrompt).toContain('email (text)');
+    expect(userPrompt).toContain('Primary contact email');
+  });
+
+  it('emits enrichment_failed and returns null when both sampling and metadata-only LLM fail', async () => {
+    const sampleTable = vi
+      .fn<NonNullable<KtxScanConnector['sampleTable']>>()
+      .mockRejectedValue(new Error('pool: connection refused'));
+    const connector: KtxScanConnector = {
+      ...createConnector(),
+      sampleTable,
+    };
+    const warnings: string[] = [];
+    const generator = new KtxDescriptionGenerator({
+      llmProvider: createFailingLlmProvider(),
+      onWarning: (warning) => warnings.push(warning.code),
+      settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
+    });
+
+    const description = await generator.generateTableDescription({
+      connectionId: 'conn-1',
+      connector,
+      context: { runId: 'run-1' },
+      dataSourceType: 'POSTGRESQL',
+      table: { catalog: null, db: 'public', name: 'orphan', columns: [{ name: 'id' }] },
+    });
+
+    expect(description).toBeNull();
+    expect(warnings).toEqual(['sampling_failed', 'enrichment_failed']);
+  });
+
+  it('uses metadata-only fallback when connector has no sampleTable', async () => {
+    const connector = createConnector();
+    const samplerWithoutTable: KtxScanConnector = {
+      ...connector,
+      sampleTable: undefined,
+    };
+    const warnings: string[] = [];
+    const generator = new KtxDescriptionGenerator({
+      llmProvider: createLlmProvider('Orders mart'),
+      onWarning: (warning) => warnings.push(warning.code),
+      settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
+    });
+
+    const description = await generator.generateTableDescription({
+      connectionId: 'conn-1',
+      connector: samplerWithoutTable,
+      context: { runId: 'run-1' },
+      dataSourceType: 'POSTGRESQL',
+      table: {
+        catalog: null,
+        db: 'public',
+        name: 'mart_orders',
+        columns: [{ name: 'order_id', nativeType: 'uuid' }],
+      },
+    });
+
+    expect(description).toBe('Orders mart');
+    expect(warnings).toEqual(['connector_capability_missing', 'description_fallback_used']);
+  });
+
+  it('aborts retry loop when the scan context signal fires', async () => {
+    const controller = new AbortController();
+    const sampleTable = vi.fn<NonNullable<KtxScanConnector['sampleTable']>>().mockImplementation(async () => {
+      controller.abort();
+      throw new Error('first attempt blew up');
+    });
+    const connector: KtxScanConnector = {
+      ...createConnector(),
+      sampleTable,
+    };
+    const warnings: string[] = [];
+    const generator = new KtxDescriptionGenerator({
+      llmProvider: createLlmProvider('should not be called'),
+      onWarning: (warning) => warnings.push(warning.code),
+      settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
+    });
+
+    await expect(
+      generator.generateTableDescription({
+        connectionId: 'conn-1',
+        connector,
+        context: { runId: 'run-1', signal: controller.signal },
+        dataSourceType: 'POSTGRESQL',
+        table: { catalog: null, db: 'public', name: 'orders' },
+      }),
+    ).rejects.toThrow('aborted');
+
+    expect(sampleTable).toHaveBeenCalledTimes(1);
+    expect(warnings).toEqual([]);
+  });
+
+  it('generates column descriptions from rawDescriptions when sampleColumn is unavailable', async () => {
+    const samplerWithoutColumn: KtxScanConnector = {
+      ...createConnector(),
+      sampleColumn: undefined,
+    };
+    const logger = createLogger();
+    const generator = new KtxDescriptionGenerator({
+      llmProvider: createLlmProvider('Payment lifecycle state'),
+      logger,
+      settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
+    });
+
+    const result = await generator.generateColumnDescriptions({
+      connectionId: 'conn-1',
+      connector: samplerWithoutColumn,
+      context: { runId: 'run-1' },
+      dataSourceType: 'POSTGRESQL',
+      supportsNestedAnalysis: false,
+      table: {
+        catalog: null,
+        db: 'public',
+        name: 'orders',
+        columns: [{ name: 'status', rawDescriptions: { db: 'order lifecycle state' } }],
+      },
+    });
+
+    expect(result.columnDescriptions).toEqual([['status', 'Payment lifecycle state']]);
+    expect(logger.warn).toHaveBeenCalled();
+    const userPrompt = (
+      vi.mocked(generateText).mock.calls.at(-1)?.[0] as { messages: Array<{ role: string; content: string }> }
+    ).messages.find((message) => message.role === 'user')?.content;
+    expect(userPrompt).toContain('<sample_values> unavailable </sample_values>');
+    expect(userPrompt).toContain('<db_documentation> order lifecycle state </db_documentation>');
+  });
+
+  it('generates column descriptions from rawDescriptions when sampleColumn retries exhaust', async () => {
+    const sampleColumn = vi
+      .fn<NonNullable<KtxScanConnector['sampleColumn']>>()
+      .mockRejectedValue(new Error('pool: connection refused'));
+    const flakyConnector: KtxScanConnector = {
+      ...createConnector(),
+      sampleColumn,
+    };
+    const generator = new KtxDescriptionGenerator({
+      llmProvider: createLlmProvider('Customer reference identifier'),
+      settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
+    });
+
+    const result = await generator.generateColumnDescriptions({
+      connectionId: 'conn-1',
+      connector: flakyConnector,
+      context: { runId: 'run-1' },
+      dataSourceType: 'POSTGRESQL',
+      supportsNestedAnalysis: false,
+      table: {
+        catalog: null,
+        db: 'public',
+        name: 'orders',
+        columns: [{ name: 'customer_id', rawDescriptions: { db: 'FK to customers.id' } }],
+      },
+    });
+
+    expect(sampleColumn).toHaveBeenCalledTimes(3);
+    expect(result.columnDescriptions).toEqual([['customer_id', 'Customer reference identifier']]);
+  });
+
+  it('skips column LLM call only when neither samples nor rawDescriptions are available', async () => {
+    const sampleColumn = vi
+      .fn<NonNullable<KtxScanConnector['sampleColumn']>>()
+      .mockResolvedValue({ values: [null, null], nullCount: 2, distinctCount: 0 });
+    const connector: KtxScanConnector = {
+      ...createConnector(),
+      sampleColumn,
+    };
+    vi.mocked(generateText).mockClear();
+    const generator = new KtxDescriptionGenerator({
+      llmProvider: createLlmProvider('should not be called'),
+      settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
+    });
+
+    const result = await generator.generateColumnDescriptions({
+      connectionId: 'conn-1',
+      connector,
+      context: { runId: 'run-1' },
+      dataSourceType: 'POSTGRESQL',
+      supportsNestedAnalysis: false,
+      table: {
+        catalog: null,
+        db: 'public',
+        name: 'orders',
+        columns: [{ name: 'opaque_blob' }],
+      },
+    });
+
+    expect(result.columnDescriptions).toEqual([['opaque_blob', null]]);
+    expect(generateText).not.toHaveBeenCalled();
+  });
+});
--- a/packages/context/src/scan/description-generation.ts
+++ b/packages/context/src/scan/description-generation.ts
@ -5,11 +5,18 @@ import type {
  KtxColumnSampleResult,
  KtxScanContext,
  KtxScanLoggerPort,
+  KtxScanWarning,
  KtxTableRef,
  KtxTableSampleInput,
  KtxTableSampleResult,
 } from './types.js';

+interface KtxDescriptionTableColumn {
+  name: string;
+  nativeType?: string | null;
+  comment?: string | null;
+}
+
 export interface KtxDescriptionCachePort {
  buildTableKey(table: KtxTableRef): string;
  buildColumnKey(table: KtxTableRef, columnName: string): string;
@ -53,6 +60,7 @@ export interface KtxDescriptionColumnTable extends KtxTableRef {

 export interface KtxDescriptionTableInput extends KtxTableRef {
  rawDescriptions?: Record<string, string>;
+  columns?: KtxDescriptionTableColumn[];
 }

 export interface KtxColumnAnalysisResult {
@ -72,7 +80,8 @@ export interface KtxColumnDescriptionPromptInput {

 export interface KtxTableDescriptionPromptInput {
  tableName: string;
-  sampleData: KtxTableSampleResult;
+  sampleData?: KtxTableSampleResult;
+  columns?: KtxDescriptionTableColumn[];
  dataSourceType: string;
  rawDescriptions?: Record<string, string>;
 }
@ -114,6 +123,7 @@ export interface KtxDescriptionGeneratorOptions {
  llmProvider: KtxLlmProvider;
  cache?: KtxDescriptionCachePort;
  logger?: KtxScanLoggerPort;
+  onWarning?: (warning: KtxScanWarning) => void;
  settings: KtxDescriptionGenerationSettings;
 }

@ -136,6 +146,66 @@ function errorMessage(error: unknown): string {
  return error instanceof Error ? error.message : String(error);
 }

+class KtxAbortedError extends Error {
+  constructor() {
+    super('aborted');
+    this.name = 'KtxAbortedError';
+  }
+}
+
+async function delayWithAbort(ms: number, signal?: AbortSignal): Promise<void> {
+  if (!signal) {
+    await new Promise<void>((resolve) => setTimeout(resolve, ms));
+    return;
+  }
+  if (signal.aborted) {
+    throw new KtxAbortedError();
+  }
+  await new Promise<void>((resolve, reject) => {
+    const timer = setTimeout(() => {
+      signal.removeEventListener('abort', onAbort);
+      resolve();
+    }, ms);
+    const onAbort = (): void => {
+      clearTimeout(timer);
+      reject(new KtxAbortedError());
+    };
+    signal.addEventListener('abort', onAbort, { once: true });
+  });
+}
+
+interface RetryAsyncOptions {
+  attempts: number;
+  baseDelayMs: number;
+  signal?: AbortSignal;
+  onAttemptFailure?: (error: unknown, attempt: number) => void;
+}
+
+async function retryAsync<T>(fn: () => Promise<T>, options: RetryAsyncOptions): Promise<T> {
+  const attempts = Math.max(1, options.attempts);
+  let lastError: unknown;
+  for (let attempt = 1; attempt <= attempts; attempt += 1) {
+    if (options.signal?.aborted) {
+      throw new KtxAbortedError();
+    }
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = error;
+      if (error instanceof KtxAbortedError) {
+        throw error;
+      }
+      options.onAttemptFailure?.(error, attempt);
+      if (attempt === attempts) {
+        break;
+      }
+      const delay = options.baseDelayMs * 2 ** (attempt - 1);
+      await delayWithAbort(delay, options.signal);
+    }
+  }
+  throw lastError;
+}
+
 function toTableRef(table: KtxTableRef): KtxTableRef {
  return {
    catalog: table.catalog,
@ -205,11 +275,12 @@ Example:
    systemParts.push(wordLimitLine(input.maxWords));
  }

+  const sampleValuesContent = valuesStr.length > 0 ? valuesStr : 'unavailable';
  let user = `<table_context> ${input.tableContext} </table_context>

 <column_name> ${input.columnName} </column_name>

-<sample_values> ${valuesStr} </sample_values>
+<sample_values> ${sampleValuesContent} </sample_values>
 `;

  const sources = descriptionSources(input.rawDescriptions);
@ -228,16 +299,6 @@ Example:
 export function buildKtxTableDescriptionPrompt(
  input: KtxTableDescriptionPromptInput & { maxWords?: number },
 ): KtxDescriptionPrompt {
-  const columnInfo: string[] = [];
-  for (let index = 0; index < Math.min(input.sampleData.headers.length, 10); index += 1) {
-    const header = input.sampleData.headers[index];
-    const sampleValues = input.sampleData.rows
-      .slice(0, 3)
-      .map((row) => row[index])
-      .filter((value) => value !== null && value !== undefined);
-    columnInfo.push(`${header}: ${sampleValues.map((value) => String(value)).join(', ')}`);
-  }
-
  const systemParts: string[] = [
    `Analyze database tables and provide a concise description.

@ -256,9 +317,38 @@ Example: "Information about healthcare professionals used for workforce manageme
    systemParts.push(wordLimitLine(input.maxWords));
  }

+  const hasSamples = !!input.sampleData && input.sampleData.rows.length > 0;
+  let columnsLine: string;
+  let rowsLine: string;
+  if (hasSamples) {
+    const sampleData = input.sampleData!;
+    const columnInfo: string[] = [];
+    for (let index = 0; index < Math.min(sampleData.headers.length, 10); index += 1) {
+      const header = sampleData.headers[index];
+      const sampleValues = sampleData.rows
+        .slice(0, 3)
+        .map((row) => row[index])
+        .filter((value) => value !== null && value !== undefined);
+      columnInfo.push(`${header}: ${sampleValues.map((value) => String(value)).join(', ')}`);
+    }
+    columnsLine = `Columns and sample data: ${columnInfo.join(' | ')}`;
+    rowsLine = `Total rows in sample: ${sampleData.rows.length}`;
+  } else if (input.columns && input.columns.length > 0) {
+    const columnInfo = input.columns.slice(0, 30).map((column) => {
+      const typePart = column.nativeType ? ` (${column.nativeType})` : '';
+      const commentPart = column.comment ? ` — ${column.comment}` : '';
+      return `${column.name}${typePart}${commentPart}`;
+    });
+    columnsLine = `Columns (metadata only, no sample rows): ${columnInfo.join(' | ')}`;
+    rowsLine = 'Sample rows: unavailable';
+  } else {
+    columnsLine = 'Columns: unavailable';
+    rowsLine = 'Sample rows: unavailable';
+  }
+
  let user = `Table: ${input.tableName}
-Columns and sample data: ${columnInfo.join(' | ')}
-Total rows in sample: ${input.sampleData.rows.length}
+${columnsLine}
+${rowsLine}
 Data source type: ${input.dataSourceType}`;

  const sources = descriptionSources(input.rawDescriptions);
@ -313,12 +403,14 @@ export class KtxDescriptionGenerator {
  private readonly llmProvider: KtxLlmProvider;
  private readonly cache?: KtxDescriptionCachePort;
  private readonly logger?: KtxScanLoggerPort;
+  private readonly onWarning?: (warning: KtxScanWarning) => void;
  private readonly settings: ResolvedKtxDescriptionGenerationSettings;

  constructor(options: KtxDescriptionGeneratorOptions) {
    this.llmProvider = options.llmProvider;
    this.cache = options.cache;
    this.logger = options.logger;
+    this.onWarning = options.onWarning;
    this.settings = {
      columnMaxWords: options.settings.columnMaxWords,
      tableMaxWords: options.settings.tableMaxWords,
@ -366,26 +458,82 @@ export class KtxDescriptionGenerator {
      }
    }

-    if (!input.connector.sampleTable) {
-      this.logger?.warn('KTX scan connector does not support table sampling for table description generation', {
+    const sampleTable = input.connector.sampleTable;
+    let sampleData: KtxTableSampleResult | null = null;
+    let fallbackReason: 'capability_missing' | 'sampling_failed' | 'empty_sample' | null = null;
+
+    if (!sampleTable) {
+      fallbackReason = 'capability_missing';
+      this.logger?.warn('KTX scan connector does not support table sampling; falling back to metadata-only prompt', {
        connectorId: input.connector.id,
        table: input.table.name,
      });
-      return 'Table not found';
+      this.onWarning?.({
+        code: 'connector_capability_missing',
+        message: `Connector ${input.connector.id} does not support sampleTable; using metadata-only description prompt`,
+        table: input.table.name,
+        recoverable: true,
+        metadata: { connectorId: input.connector.id, capability: 'sampleTable' },
+      });
+    } else {
+      try {
+        sampleData = await retryAsync(
+          () =>
+            sampleTable(
+              {
+                connectionId: input.connectionId,
+                table: tableRef,
+                limit: 20,
+              },
+              input.context,
+            ),
+          {
+            attempts: 3,
+            baseDelayMs: 200,
+            signal: input.context.signal,
+            onAttemptFailure: (error, attempt) => {
+              this.logger?.warn(
+                `sampleTable attempt ${attempt} failed for ${input.table.name}: ${errorMessage(error)}`,
+                {
+                  connectorId: input.connector.id,
+                  table: input.table.name,
+                  attempt,
+                },
+              );
+            },
+          },
+        );
+        if (sampleData.rows.length === 0) {
+          fallbackReason = 'empty_sample';
+          this.logger?.warn('sampleTable returned no rows; using metadata-only prompt', {
+            connectorId: input.connector.id,
+            table: input.table.name,
+          });
+        }
+      } catch (error) {
+        if (error instanceof KtxAbortedError) {
+          throw error;
+        }
+        fallbackReason = 'sampling_failed';
+        this.logger?.error(`sampleTable exhausted retries for ${input.table.name}: ${errorMessage(error)}`, {
+          connectorId: input.connector.id,
+          table: input.table.name,
+        });
+        this.onWarning?.({
+          code: 'sampling_failed',
+          message: `Failed to sample table ${input.table.name} after retries: ${errorMessage(error)}`,
+          table: input.table.name,
+          recoverable: true,
+          metadata: { connectorId: input.connector.id, error: errorMessage(error) },
+        });
+      }
    }

    try {
-      const sampleData = await input.connector.sampleTable(
-        {
-          connectionId: input.connectionId,
-          table: tableRef,
-          limit: 20,
-        },
-        input.context,
-      );
      const prompt = buildKtxTableDescriptionPrompt({
        tableName: input.table.name,
-        sampleData,
+        ...(fallbackReason === null && sampleData ? { sampleData } : {}),
+        ...(input.table.columns && input.table.columns.length > 0 ? { columns: input.table.columns } : {}),
        dataSourceType: input.dataSourceType,
        rawDescriptions: input.table.rawDescriptions,
        maxWords: this.settings.tableMaxWords,
@ -394,10 +542,38 @@ export class KtxDescriptionGenerator {
      if (cacheKey && description) {
        await this.cache?.set(cacheKey, description);
      }
+      if (description && fallbackReason !== null) {
+        this.onWarning?.({
+          code: 'description_fallback_used',
+          message: `Generated table description without sample rows for ${input.table.name} (reason: ${fallbackReason})`,
+          table: input.table.name,
+          recoverable: true,
+          metadata: { connectorId: input.connector.id, reason: fallbackReason },
+        });
+      }
+      if (!description) {
+        this.onWarning?.({
+          code: 'enrichment_failed',
+          message: `Failed to generate description for table ${input.table.name}`,
+          table: input.table.name,
+          recoverable: true,
+          metadata: { connectorId: input.connector.id, usedFallback: fallbackReason !== null },
+        });
+      }
      return description;
    } catch (error) {
-      this.logger?.error(`Error generating table description: ${errorMessage(error)}`);
-      return 'Table not found';
+      this.logger?.error(`Error generating table description: ${errorMessage(error)}`, {
+        connectorId: input.connector.id,
+        table: input.table.name,
+      });
+      this.onWarning?.({
+        code: 'enrichment_failed',
+        message: `Failed to generate description for table ${input.table.name}: ${errorMessage(error)}`,
+        table: input.table.name,
+        recoverable: true,
+        metadata: { connectorId: input.connector.id },
+      });
+      return null;
    }
  }

@ -496,33 +672,64 @@ export class KtxDescriptionGenerator {
      let columnValues = column.sampleValues;
      if (!columnValues || columnValues.length === 0) {
        if (!input.connector.sampleColumn) {
-          this.logger?.warn('KTX scan connector does not support column sampling for column description generation', {
+          this.logger?.warn('KTX scan connector does not support column sampling; using available metadata only', {
            connectorId: input.connector.id,
            table: input.table.name,
            column: column.name,
          });
-          return {
-            columnName: column.name,
-            description: null,
-            skipped: false,
-            processed: false,
-          };
+          columnValues = [];
+        } else {
+          const sampleColumn = input.connector.sampleColumn;
+          try {
+            const sample = await retryAsync(
+              () =>
+                sampleColumn(
+                  {
+                    connectionId: input.connectionId,
+                    table: tableRef,
+                    column: column.name,
+                    limit: 50,
+                  },
+                  input.context,
+                ),
+              {
+                attempts: 3,
+                baseDelayMs: 200,
+                signal: input.context.signal,
+                onAttemptFailure: (error, attempt) => {
+                  this.logger?.warn(
+                    `sampleColumn attempt ${attempt} failed for ${input.table.name}.${column.name}: ${errorMessage(error)}`,
+                    {
+                      connectorId: input.connector.id,
+                      table: input.table.name,
+                      column: column.name,
+                      attempt,
+                    },
+                  );
+                },
+              },
+            );
+            columnValues = sample.values;
+          } catch (error) {
+            if (error instanceof KtxAbortedError) {
+              throw error;
+            }
+            this.logger?.warn(
+              `sampleColumn exhausted retries for ${input.table.name}.${column.name}; using available metadata only: ${errorMessage(error)}`,
+              {
+                connectorId: input.connector.id,
+                table: input.table.name,
+                column: column.name,
+              },
+            );
+            columnValues = [];
+          }
        }
-
-        const sample = await input.connector.sampleColumn(
-          {
-            connectionId: input.connectionId,
-            table: tableRef,
-            column: column.name,
-            limit: 50,
-          },
-          input.context,
-        );
-        columnValues = sample.values;
      }

      const nonNullValues = (columnValues ?? []).filter((value) => value !== null && value !== undefined);
-      if (nonNullValues.length === 0) {
+      const hasRawDescriptions = descriptionSources(column.rawDescriptions).length > 0;
+      if (nonNullValues.length === 0 && !hasRawDescriptions) {
        return {
          columnName: column.name,
          description: null,
@ -553,7 +760,14 @@ export class KtxDescriptionGenerator {
        processed: description !== null,
      };
    } catch (error) {
-      this.logger?.error(`Error analyzing column '${column.name}': ${errorMessage(error)}`);
+      if (error instanceof KtxAbortedError) {
+        throw error;
+      }
+      this.logger?.error(`Error analyzing column '${column.name}': ${errorMessage(error)}`, {
+        connectorId: input.connector.id,
+        table: input.table.name,
+        column: column.name,
+      });
      return {
        columnName: column.name,
        description: null,
--- a/packages/context/src/scan/entity-details.test.ts
+++ b/packages/context/src/scan/entity-details.test.ts
@ -0,0 +1,291 @@
+import { mkdtemp, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { initKtxProject, type KtxLocalProject } from '../project/index.js';
+import { createKtxEntityDetailsService } from './entity-details.js';
+import type { KtxConnectionDriver, KtxScanReport, KtxSchemaTable } from './types.js';
+
+describe('createKtxEntityDetailsService', () => {
+  let tempDir: string;
+  let project: KtxLocalProject;
+
+  beforeEach(async () => {
+    tempDir = await mkdtemp(join(tmpdir(), 'ktx-entity-details-service-'));
+    project = await initKtxProject({ projectDir: join(tempDir, 'project') });
+  });
+
+  afterEach(async () => {
+    await rm(tempDir, { recursive: true, force: true });
+  });
+
+  function scanReport(input: {
+    connectionId: string;
+    syncId: string;
+    runId: string;
+    driver?: KtxConnectionDriver;
+    createdAt?: string;
+  }): KtxScanReport {
+    const rawSourcesDir = `raw-sources/${input.connectionId}/live-database/${input.syncId}`;
+    return {
+      connectionId: input.connectionId,
+      driver: input.driver ?? 'postgres',
+      syncId: input.syncId,
+      runId: input.runId,
+      trigger: 'mcp',
+      mode: 'structural',
+      dryRun: false,
+      artifactPaths: {
+        rawSourcesDir,
+        reportPath: `${rawSourcesDir}/scan-report.json`,
+        manifestShards: [],
+        enrichmentArtifacts: [],
+      },
+      diffSummary: {
+        tablesAdded: 0,
+        tablesModified: 0,
+        tablesDeleted: 0,
+        tablesUnchanged: 1,
+        columnsAdded: 0,
+        columnsModified: 0,
+        columnsDeleted: 0,
+      },
+      manifestShardsWritten: 0,
+      structuralSyncStats: {
+        tablesCreated: 1,
+        tablesUpdated: 0,
+        tablesDeleted: 0,
+        columnsCreated: 0,
+        columnsUpdated: 0,
+        columnsDeleted: 0,
+      },
+      enrichment: {
+        dataDictionary: 'skipped',
+        tableDescriptions: 'skipped',
+        columnDescriptions: 'skipped',
+        embeddings: 'skipped',
+        deterministicRelationships: 'skipped',
+        llmRelationshipValidation: 'skipped',
+        statisticalValidation: 'skipped',
+      },
+      capabilityGaps: [],
+      warnings: [],
+      relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
+      enrichmentState: { resumedStages: [], completedStages: [], failedStages: [] },
+      createdAt: input.createdAt ?? '2026-05-14T09:00:00.000Z',
+    };
+  }
+
+  function ordersTable(input: { db?: string | null; estimatedRows?: number | null } = {}): KtxSchemaTable {
+    return {
+      catalog: null,
+      db: input.db ?? 'public',
+      name: 'orders',
+      kind: 'table',
+      comment: 'Customer orders',
+      estimatedRows: input.estimatedRows ?? 12,
+      columns: [
+        {
+          name: 'id',
+          nativeType: 'integer',
+          normalizedType: 'integer',
+          dimensionType: 'number',
+          nullable: false,
+          primaryKey: true,
+          comment: 'Order id',
+        },
+        {
+          name: 'status',
+          nativeType: 'text',
+          normalizedType: 'text',
+          dimensionType: 'string',
+          nullable: false,
+          primaryKey: false,
+          comment: 'Order status',
+        },
+      ],
+      foreignKeys: [
+        {
+          fromColumn: 'customer_id',
+          toCatalog: null,
+          toDb: 'public',
+          toTable: 'customers',
+          toColumn: 'id',
+          constraintName: 'orders_customer_id_fkey',
+        },
+      ],
+    };
+  }
+
+  async function seedScan(input: {
+    connectionId?: string;
+    syncId: string;
+    runId: string;
+    driver?: KtxConnectionDriver;
+    extractedAt?: string;
+    tables?: KtxSchemaTable[];
+  }): Promise<void> {
+    const connectionId = input.connectionId ?? 'warehouse';
+    const report = scanReport({
+      connectionId,
+      syncId: input.syncId,
+      runId: input.runId,
+      driver: input.driver,
+      createdAt: input.extractedAt,
+    });
+    const root = report.artifactPaths.rawSourcesDir;
+    await project.fileStore.writeFile(
+      `${root}/connection.json`,
+      JSON.stringify(
+        {
+          connectionId,
+          driver: report.driver,
+          extractedAt: input.extractedAt ?? report.createdAt,
+          scope: { schemas: ['public'] },
+        },
+        null,
+        2,
+      ),
+      'ktx',
+      'ktx@example.com',
+      'seed connection',
+    );
+    for (const table of input.tables ?? [ordersTable()]) {
+      await project.fileStore.writeFile(
+        `${root}/tables/${table.db ?? 'default'}-${table.name}.json`,
+        JSON.stringify(table, null, 2),
+        'ktx',
+        'ktx@example.com',
+        `seed ${table.name}`,
+      );
+    }
+    await project.fileStore.writeFile(
+      `${root}/scan-report.json`,
+      JSON.stringify(report, null, 2),
+      'ktx',
+      'ktx@example.com',
+      'seed scan report',
+    );
+  }
+
+  it('returns the latest scan snapshot table details for a display string', async () => {
+    await seedScan({ syncId: 'sync-1', runId: 'scan-old', extractedAt: '2026-05-14T08:00:00.000Z' });
+    await seedScan({
+      syncId: 'sync-2',
+      runId: 'scan-new',
+      extractedAt: '2026-05-14T09:00:00.000Z',
+      tables: [ordersTable({ estimatedRows: 99 })],
+    });
+    const service = createKtxEntityDetailsService(project);
+
+    const result = await service.read({
+      connectionId: 'warehouse',
+      entities: [{ table: 'public.orders' }],
+    });
+
+    expect(result.results).toHaveLength(1);
+    expect(result.results[0]).toMatchObject({
+      ok: true,
+      connectionId: 'warehouse',
+      display: 'public.orders',
+      estimatedRows: 99,
+      snapshot: {
+        syncId: 'sync-2',
+        scanRunId: 'scan-new',
+        extractedAt: '2026-05-14T09:00:00.000Z',
+      },
+      columns: [
+        { name: 'id', nativeType: 'integer', primaryKey: true },
+        { name: 'status', nativeType: 'text', nullable: false },
+      ],
+    });
+  });
+
+  it('filters requested columns while keeping full-table foreign keys', async () => {
+    await seedScan({ syncId: 'sync-1', runId: 'scan-1' });
+    const service = createKtxEntityDetailsService(project);
+
+    const result = await service.read({
+      connectionId: 'warehouse',
+      entities: [{ table: { catalog: null, db: 'public', name: 'orders' }, columns: ['status'] }],
+    });
+
+    expect(result.results[0]).toMatchObject({
+      ok: true,
+      columns: [{ name: 'status' }],
+      foreignKeys: [
+        {
+          fromColumn: 'customer_id',
+          toDb: 'public',
+          toTable: 'customers',
+          toColumn: 'id',
+        },
+      ],
+    });
+  });
+
+  it('returns a structured missing-scan error', async () => {
+    const service = createKtxEntityDetailsService(project);
+
+    const result = await service.read({
+      connectionId: 'warehouse',
+      entities: [{ table: 'public.orders' }],
+    });
+
+    expect(result.results).toEqual([
+      {
+        ok: false,
+        connectionId: 'warehouse',
+        table: 'public.orders',
+        error: {
+          code: 'scan_missing',
+          message: 'No live-database scan found for connection "warehouse"; run `ktx ingest warehouse` or `ktx scan warehouse`.',
+        },
+      },
+    ]);
+  });
+
+  it('reports ambiguous bare table names across schemas', async () => {
+    await seedScan({
+      syncId: 'sync-1',
+      runId: 'scan-1',
+      tables: [ordersTable({ db: 'public' }), ordersTable({ db: 'archive' })],
+    });
+    const service = createKtxEntityDetailsService(project);
+
+    const result = await service.read({
+      connectionId: 'warehouse',
+      entities: [{ table: 'orders' }],
+    });
+
+    expect(result.results[0]).toMatchObject({
+      ok: false,
+      error: {
+        code: 'ambiguous_table',
+        candidates: [
+          { tableRef: { catalog: null, db: 'archive', name: 'orders' }, display: 'archive.orders' },
+          { tableRef: { catalog: null, db: 'public', name: 'orders' }, display: 'public.orders' },
+        ],
+      },
+    });
+  });
+
+  it('reports missing requested columns with available column candidates', async () => {
+    await seedScan({ syncId: 'sync-1', runId: 'scan-1' });
+    const service = createKtxEntityDetailsService(project);
+
+    const result = await service.read({
+      connectionId: 'warehouse',
+      entities: [{ table: 'public.orders', columns: ['status', 'plan_tier'] }],
+    });
+
+    expect(result.results[0]).toMatchObject({
+      ok: false,
+      error: {
+        code: 'column_not_found',
+        message: 'Column(s) not found on public.orders: plan_tier',
+        candidates: ['id', 'status'],
+      },
+    });
+  });
+});
--- a/packages/context/src/scan/entity-details.ts
+++ b/packages/context/src/scan/entity-details.ts
@ -0,0 +1,315 @@
+import type { KtxLocalProject } from '../project/index.js';
+import { readLocalScanStructuralSnapshot } from './local-structural-artifacts.js';
+import type {
+  KtxConnectionDriver,
+  KtxScanReport,
+  KtxSchemaColumn,
+  KtxSchemaSnapshot,
+  KtxSchemaTable,
+  KtxTableRef,
+} from './types.js';
+
+export type KtxEntityDetailsTableInput = string | KtxTableRef;
+
+export interface KtxEntityDetailsInput {
+  connectionId: string;
+  entities: Array<{
+    table: KtxEntityDetailsTableInput;
+    columns?: string[];
+  }>;
+}
+
+export interface KtxEntityDetailsSnapshotInfo {
+  syncId: string;
+  extractedAt: string;
+  scanRunId: string | null;
+}
+
+export interface KtxEntityDetailsColumn {
+  name: string;
+  nativeType: string;
+  normalizedType: string;
+  dimensionType: KtxSchemaColumn['dimensionType'];
+  nullable: boolean;
+  primaryKey: boolean;
+  comment: string | null;
+}
+
+export interface KtxEntityDetailsRecord {
+  ok: true;
+  connectionId: string;
+  tableRef: KtxTableRef;
+  display: string;
+  kind: KtxSchemaTable['kind'];
+  comment: string | null;
+  estimatedRows: number | null;
+  columns: KtxEntityDetailsColumn[];
+  foreignKeys: KtxSchemaTable['foreignKeys'];
+  snapshot: KtxEntityDetailsSnapshotInfo;
+}
+
+export type KtxEntityDetailsErrorCode = 'scan_missing' | 'table_not_found' | 'ambiguous_table' | 'column_not_found';
+
+export interface KtxEntityDetailsErrorResult {
+  ok: false;
+  connectionId: string;
+  table: KtxEntityDetailsTableInput;
+  snapshot?: KtxEntityDetailsSnapshotInfo;
+  error: {
+    code: KtxEntityDetailsErrorCode;
+    message: string;
+    candidates?: Array<{ tableRef: KtxTableRef; display: string }> | string[];
+  };
+}
+
+export interface KtxEntityDetailsResponse {
+  results: Array<KtxEntityDetailsRecord | KtxEntityDetailsErrorResult>;
+}
+
+interface LatestScan {
+  report: KtxScanReport;
+  snapshot: KtxSchemaSnapshot;
+}
+
+interface ResolveResult {
+  table: KtxSchemaTable | null;
+  error?: Omit<KtxEntityDetailsErrorResult['error'], 'message'> & { message: string };
+}
+
+function normalize(value: string | null | undefined): string {
+  return (value ?? '').toLowerCase();
+}
+
+function refsEqual(left: KtxTableRef, right: KtxTableRef): boolean {
+  return (
+    normalize(left.catalog) === normalize(right.catalog) &&
+    normalize(left.db) === normalize(right.db) &&
+    normalize(left.name) === normalize(right.name)
+  );
+}
+
+function cleanIdentifierPart(part: string): string {
+  return part.trim().replace(/^["'`\[]|["'`\]]$/g, '');
+}
+
+function splitDisplay(display: string): string[] {
+  return display
+    .trim()
+    .split('.')
+    .map(cleanIdentifierPart)
+    .filter(Boolean);
+}
+
+function displayForTable(driver: KtxConnectionDriver, table: KtxTableRef): string {
+  if (driver === 'sqlite') {
+    return table.name;
+  }
+  return [table.catalog, table.db, table.name].filter((part): part is string => Boolean(part)).join('.');
+}
+
+function tableRef(table: KtxSchemaTable): KtxTableRef {
+  return { catalog: table.catalog, db: table.db, name: table.name };
+}
+
+function candidateList(
+  driver: KtxConnectionDriver,
+  tables: KtxSchemaTable[],
+): Array<{ tableRef: KtxTableRef; display: string }> {
+  return tables
+    .map((table) => ({
+      tableRef: tableRef(table),
+      display: displayForTable(driver, table),
+    }))
+    .sort((left, right) => left.display.localeCompare(right.display));
+}
+
+function parseDisplayRef(driver: KtxConnectionDriver, display: string): KtxTableRef | null {
+  const parts = splitDisplay(display);
+  if (driver === 'sqlite') {
+    return parts.length === 1 ? { catalog: null, db: null, name: parts[0]! } : null;
+  }
+  if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') {
+    return parts.length === 3 ? { catalog: parts[0]!, db: parts[1]!, name: parts[2]! } : null;
+  }
+  if (parts.length === 2) {
+    return { catalog: null, db: parts[0]!, name: parts[1]! };
+  }
+  if (parts.length === 3) {
+    return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
+  }
+  return null;
+}
+
+function resolveTable(snapshot: KtxSchemaSnapshot, input: KtxEntityDetailsTableInput): ResolveResult {
+  if (typeof input !== 'string') {
+    const table = snapshot.tables.find((candidate) => refsEqual(candidate, input)) ?? null;
+    return table
+      ? { table }
+      : {
+          table: null,
+          error: {
+            code: 'table_not_found',
+            message: `Table not found in latest scan: ${displayForTable(snapshot.driver, input)}`,
+            candidates: candidateList(snapshot.driver, snapshot.tables),
+          },
+        };
+  }
+
+  const parsed = parseDisplayRef(snapshot.driver, input);
+  if (parsed) {
+    const table = snapshot.tables.find((candidate) => refsEqual(candidate, parsed)) ?? null;
+    return table
+      ? { table }
+      : {
+          table: null,
+          error: {
+            code: 'table_not_found',
+            message: `Table not found in latest scan: ${input}`,
+            candidates: candidateList(snapshot.driver, snapshot.tables),
+          },
+        };
+  }
+
+  const byName = snapshot.tables.filter((candidate) => normalize(candidate.name) === normalize(input));
+  if (byName.length === 1) {
+    return { table: byName[0]! };
+  }
+  if (byName.length > 1) {
+    return {
+      table: null,
+      error: {
+        code: 'ambiguous_table',
+        message: `Table name "${input}" is ambiguous across schemas/catalogs; pass a structured table ref.`,
+        candidates: candidateList(snapshot.driver, byName),
+      },
+    };
+  }
+  return {
+    table: null,
+    error: {
+      code: 'table_not_found',
+      message: `Table not found in latest scan: ${input}`,
+      candidates: candidateList(snapshot.driver, snapshot.tables),
+    },
+  };
+}
+
+function toColumn(column: KtxSchemaColumn): KtxEntityDetailsColumn {
+  return {
+    name: column.name,
+    nativeType: column.nativeType,
+    normalizedType: column.normalizedType,
+    dimensionType: column.dimensionType,
+    nullable: column.nullable,
+    primaryKey: column.primaryKey,
+    comment: column.comment,
+  };
+}
+
+function snapshotInfo(report: KtxScanReport, snapshot: KtxSchemaSnapshot): KtxEntityDetailsSnapshotInfo {
+  return {
+    syncId: report.syncId,
+    extractedAt: snapshot.extractedAt,
+    scanRunId: report.runId ?? null,
+  };
+}
+
+async function readJson<T>(project: KtxLocalProject, path: string): Promise<T> {
+  return JSON.parse((await project.fileStore.readFile(path)).content) as T;
+}
+
+async function latestScan(project: KtxLocalProject, connectionId: string): Promise<LatestScan | null> {
+  const root = `raw-sources/${connectionId}/live-database`;
+  let listed;
+  try {
+    listed = await project.fileStore.listFiles(root);
+  } catch {
+    return null;
+  }
+  const reportPath = listed.files.filter((path) => path.endsWith('/scan-report.json')).sort().at(-1);
+  if (!reportPath) {
+    return null;
+  }
+  const report = await readJson<KtxScanReport>(project, reportPath);
+  const rawSourcesDir = report.artifactPaths.rawSourcesDir ?? reportPath.slice(0, -'/scan-report.json'.length);
+  const snapshot = await readLocalScanStructuralSnapshot({
+    project,
+    connectionId,
+    driver: report.driver,
+    rawSourcesDir,
+    extractedAtFallback: report.createdAt,
+  });
+  return { report, snapshot };
+}
+
+export function createKtxEntityDetailsService(project: KtxLocalProject) {
+  return {
+    async read(input: KtxEntityDetailsInput): Promise<KtxEntityDetailsResponse> {
+      const scan = await latestScan(project, input.connectionId);
+      if (!scan) {
+        return {
+          results: input.entities.map((entity) => ({
+            ok: false,
+            connectionId: input.connectionId,
+            table: entity.table,
+            error: {
+              code: 'scan_missing',
+              message: `No live-database scan found for connection "${input.connectionId}"; run \`ktx ingest ${input.connectionId}\` or \`ktx scan ${input.connectionId}\`.`,
+            },
+          })),
+        };
+      }
+
+      const info = snapshotInfo(scan.report, scan.snapshot);
+      const results: KtxEntityDetailsResponse['results'] = [];
+      for (const entity of input.entities) {
+        const resolved = resolveTable(scan.snapshot, entity.table);
+        if (!resolved.table) {
+          results.push({
+            ok: false,
+            connectionId: input.connectionId,
+            table: entity.table,
+            snapshot: info,
+            error: resolved.error!,
+          });
+          continue;
+        }
+
+        const requested = new Set((entity.columns ?? []).map((column) => normalize(column)));
+        const columns = requested.size
+          ? resolved.table.columns.filter((column) => requested.has(normalize(column.name)))
+          : resolved.table.columns;
+        if (requested.size && columns.length !== requested.size) {
+          const found = new Set(columns.map((column) => normalize(column.name)));
+          const missing = [...requested].filter((column) => !found.has(column));
+          results.push({
+            ok: false,
+            connectionId: input.connectionId,
+            table: entity.table,
+            snapshot: info,
+            error: {
+              code: 'column_not_found',
+              message: `Column(s) not found on ${displayForTable(scan.snapshot.driver, resolved.table)}: ${missing.join(', ')}`,
+              candidates: resolved.table.columns.map((column) => column.name),
+            },
+          });
+          continue;
+        }
+
+        results.push({
+          ok: true,
+          connectionId: input.connectionId,
+          tableRef: tableRef(resolved.table),
+          display: displayForTable(scan.snapshot.driver, resolved.table),
+          kind: resolved.table.kind,
+          comment: resolved.table.comment,
+          estimatedRows: resolved.table.estimatedRows,
+          columns: columns.map(toColumn),
+          foreignKeys: resolved.table.foreignKeys,
+          snapshot: info,
+        });
+      }
+      return { results };
+    },
+  };
+}
--- a/packages/context/src/scan/index.ts
+++ b/packages/context/src/scan/index.ts
@ -60,6 +60,24 @@ export {
  ktxScanErrorMessage,
  skippedKtxScanEnrichmentSummary,
 } from './enrichment-summary.js';
+export type {
+  KtxEntityDetailsColumn,
+  KtxEntityDetailsErrorCode,
+  KtxEntityDetailsErrorResult,
+  KtxEntityDetailsInput,
+  KtxEntityDetailsRecord,
+  KtxEntityDetailsResponse,
+  KtxEntityDetailsSnapshotInfo,
+  KtxEntityDetailsTableInput,
+} from './entity-details.js';
+export { createKtxEntityDetailsService } from './entity-details.js';
+export type {
+  DisplayTargetResolution,
+  RawSchemaHit,
+  TableDetail,
+  WarehouseCatalogServiceDeps,
+} from './warehouse-catalog.js';
+export { WarehouseCatalogService } from './warehouse-catalog.js';
 export type {
  KtxColumnSampleUpdate,
  KtxDescriptionSource,
--- a/packages/context/src/scan/local-enrichment.test.ts
+++ b/packages/context/src/scan/local-enrichment.test.ts
@ -404,6 +404,41 @@ describe('local scan enrichment', () => {
    expect(result.resolvedRelationships).toBeNull();
  });

+  it('forwards context.logger and emits warnings when sampleTable fails repeatedly', async () => {
+    const failingConnector: KtxScanConnector = {
+      ...connector(),
+      sampleTable: vi.fn(async () => {
+        throw new Error('pool: ECONNRESET');
+      }),
+    };
+    const logger = {
+      debug: vi.fn(),
+      info: vi.fn(),
+      warn: vi.fn(),
+      error: vi.fn(),
+    };
+
+    const result = await runLocalScanEnrichment({
+      connectionId: 'warehouse',
+      mode: 'enriched',
+      detectRelationships: false,
+      connector: failingConnector,
+      context: { runId: 'scan-run-warnings', logger },
+      providers: createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 6 }),
+    });
+
+    const codes = result.warnings.map((warning) => warning.code);
+    expect(codes).toContain('sampling_failed');
+    expect(codes).toContain('description_fallback_used');
+    expect(result.warnings.some((warning) => warning.table === 'customers')).toBe(true);
+    expect(logger.warn).toHaveBeenCalled();
+    expect(logger.error).toHaveBeenCalled();
+    // Each of the two tables produced sampling_failed + description_fallback_used, so 2 + 2 = 4 warnings minimum.
+    expect(result.warnings.length).toBeGreaterThanOrEqual(4);
+    // Sampling was retried 3× for each of the 2 tables = 6 calls
+    expect(failingConnector.sampleTable).toHaveBeenCalledTimes(6);
+  });
+
  it('runs configured deterministic enrichment with descriptions and embeddings', async () => {
    const result = await runLocalScanEnrichment({
      connectionId: 'warehouse',
--- a/packages/context/src/scan/local-enrichment.ts
+++ b/packages/context/src/scan/local-enrichment.ts
@ -298,6 +298,18 @@ function descriptionTable(table: KtxSchemaTable): KtxDescriptionColumnTable {
  };
 }

+function tableMetadataColumns(table: KtxSchemaTable): Array<{
+  name: string;
+  nativeType?: string | null;
+  comment?: string | null;
+}> {
+  return table.columns.map((column) => ({
+    name: column.name,
+    nativeType: column.nativeType ?? null,
+    comment: column.comment ?? null,
+  }));
+}
+
 function embeddingBatchSize(maxBatchSize: number): number {
  return Number.isInteger(maxBatchSize) && maxBatchSize > 0 ? maxBatchSize : 100;
 }
@ -308,9 +320,19 @@ async function generateDescriptions(input: {
  context: KtxScanContext;
  providers: KtxLocalScanEnrichmentProviders;
  progress?: KtxProgressPort;
+  warnings?: KtxScanWarning[];
 }): Promise<KtxLocalScanEnrichmentResult['descriptionUpdates']> {
+  const warningSink = input.warnings;
  const generator = new KtxDescriptionGenerator({
    llmProvider: input.providers.llm,
+    ...(input.context.logger ? { logger: input.context.logger } : {}),
+    ...(warningSink
+      ? {
+          onWarning: (warning: KtxScanWarning) => {
+            warningSink.push(warning);
+          },
+        }
+      : {}),
    settings: {
      columnMaxWords: 16,
      tableMaxWords: 24,
@ -355,6 +377,7 @@ async function generateDescriptions(input: {
            db: table.db,
            name: table.name,
            rawDescriptions: table.comment ? { db: table.comment } : {},
+            columns: tableMetadataColumns(table),
          },
        });
        return {
@ -559,6 +582,7 @@ export async function runLocalScanEnrichment(
          context: input.context,
          providers,
          progress: descriptionProgress,
+          warnings,
        }),
    });
    const embeddingProgress = progress?.startPhase(0.2);
--- a/Show more
+++ b/Show more