mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
fix(setup): unblock clean Linux installs and add enabled_tables allowlist
- Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal.
This commit is contained in:
parent
c513d61dca
commit
2403f58eff
12 changed files with 307 additions and 8 deletions
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"title": "Getting Started",
|
||||
"defaultOpen": true,
|
||||
"pages": ["introduction", "quickstart"]
|
||||
"pages": ["introduction", "quickstart", "troubleshooting-linux"]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -296,7 +296,7 @@ surface.
|
|||
| Anthropic health check fails | API key, model id, or access is invalid | Fix `ANTHROPIC_API_KEY` or rerun setup with a different key or model |
|
||||
| Vertex AI health check fails | Vertex API, Claude access, project, location, or IAM permissions are missing | Check the project, location, Application Default Credentials, and Vertex AI permissions |
|
||||
| OpenAI embeddings fail | `OPENAI_API_KEY` is missing or invalid | Export the key or choose local sentence-transformers embeddings |
|
||||
| Local embeddings fail | Managed Python runtime cannot install or start | Run `ktx dev runtime status`, then install the local embeddings runtime |
|
||||
| Local embeddings fail | Managed Python runtime cannot install or start | See [Troubleshooting clean Linux install](/docs/getting-started/troubleshooting-linux) — usually missing Python 3.13 or an IPv6 proxy env var |
|
||||
| Database test fails | Credentials, network access, database, warehouse, or schema is wrong | Test the same values with the database's native client, then rerun setup |
|
||||
| Context is not built | Setup saved configuration but skipped or interrupted the build | Run `ktx setup` or `ktx ingest --all` |
|
||||
| Agent integration is incomplete | Setup skipped the agents step or installed a different target | Run `ktx setup --agents --target <target>` |
|
||||
|
|
|
|||
163
docs-site/content/docs/getting-started/troubleshooting-linux.mdx
Normal file
163
docs-site/content/docs/getting-started/troubleshooting-linux.mdx
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
---
|
||||
title: Troubleshooting clean Linux install
|
||||
description: Known gotchas when installing KTX from scratch on a clean Linux host (Ubuntu, Debian, container images). Read this before debugging managed-runtime or daemon failures.
|
||||
---
|
||||
|
||||
This page documents the friction a coding agent (or human) will hit when running `npm install -g @kaelio/ktx@next` on a clean Linux host with no Python ≥ 3.13 installed, and during the first `ktx setup` on that host. Each item lists the symptom, the cause, and the exact recovery command.
|
||||
|
||||
## Prerequisites that aren't always satisfied
|
||||
|
||||
KTX needs:
|
||||
|
||||
| Tool | Minimum version | Why |
|
||||
|------|-----------------|-----|
|
||||
| Node.js | 22 | Runs the CLI |
|
||||
| `uv` | 0.5+ | Manages the local Python runtime (semantic-layer daemon, local embeddings) |
|
||||
| Python | 3.13 | KTX's managed Python runtime targets `>=3.13`. The system Python on Ubuntu 24.04 is 3.12. |
|
||||
|
||||
If `uv` is not on `PATH`, install it:
|
||||
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
source $HOME/.local/bin/env # or: export PATH="$HOME/.local/bin:$PATH"
|
||||
```
|
||||
|
||||
Install Python 3.13 via `uv` so it sits alongside whatever the system ships:
|
||||
|
||||
```bash
|
||||
uv python install 3.13
|
||||
```
|
||||
|
||||
You do not need to make 3.13 the system default. KTX's runtime installer will pick it up when you set `UV_PYTHON=3.13` for the install command (see below).
|
||||
|
||||
## Symptom: `ktx dev runtime install` fails on the venv step
|
||||
|
||||
The install log (`~/.ktx/runtime/<version>/install.log`) shows something like:
|
||||
|
||||
```
|
||||
$ uv venv /home/runner/.ktx/runtime/<version>/.venv
|
||||
Using CPython 3.12.3 interpreter at: /usr/bin/python3
|
||||
...
|
||||
Package requires Python >=3.13 but the running Python is 3.12.3
|
||||
```
|
||||
|
||||
**Cause:** `uv venv` picked the system Python (3.12) when it built the runtime virtualenv. KTX's wheels declare `requires-python = ">=3.13"`, so the subsequent install fails.
|
||||
|
||||
**Fix:** install Python 3.13 (above), then force the runtime installer to use it:
|
||||
|
||||
```bash
|
||||
uv python install 3.13
|
||||
UV_PYTHON=3.13 ktx dev runtime install --feature local-embeddings --yes --force
|
||||
```
|
||||
|
||||
The `--force` flag rebuilds the venv. Without it, the failed venv from the previous attempt is reused.
|
||||
|
||||
## Symptom: managed Python daemon crashes immediately with `URL parse error`
|
||||
|
||||
The daemon stderr (`<project>/.ktx/runtime/daemon.stderr.log`) contains an httpx traceback ending in something like:
|
||||
|
||||
```
|
||||
File ".../httpx/_client.py", line 698, in __init__
|
||||
URLPattern(key): None
|
||||
File ".../httpx/_urls.py", line ..., in __init__
|
||||
raise InvalidURL(...)
|
||||
```
|
||||
|
||||
**Cause:** an environment variable holds a value httpx cannot parse — typically `NO_PROXY` or `no_proxy` containing an **IPv6 CIDR** such as `fd07:b51a:cc66:f0::/64`. OrbStack and some Docker network setups inject this by default. httpx interprets every comma-separated entry as a URL pattern and rejects raw IPv6 CIDRs.
|
||||
|
||||
**Fix:** scrub the bad entries before starting the daemon. The simplest workaround is to unset proxy vars entirely for daemon-related commands:
|
||||
|
||||
```bash
|
||||
unset HTTP_PROXY HTTPS_PROXY NO_PROXY http_proxy https_proxy no_proxy
|
||||
ktx dev runtime start --feature local-embeddings
|
||||
```
|
||||
|
||||
If you need proxy entries to remain set for outbound HTTP, keep only the IPv4 + hostname entries:
|
||||
|
||||
```bash
|
||||
export NO_PROXY="localhost,127.0.0.1,*.orb.internal,*.orb.local"
|
||||
```
|
||||
|
||||
This issue is tracked for an upstream fix in the daemon: it should sanitize unparseable entries before constructing httpx clients.
|
||||
|
||||
## Symptom: `ktx setup` keeps connecting to an old daemon port
|
||||
|
||||
Running `ktx setup` more than once can leave orphan `ktx-daemon` processes. Each `setup` invocation may spawn a fresh daemon on a new port and write a new `daemon.json`, while the old one keeps running. Subsequent setup attempts may pick the stale port and fail with a connection-refused error or a `500` health check.
|
||||
|
||||
**Fix:** stop all daemons and remove the state files before re-running setup:
|
||||
|
||||
```bash
|
||||
pkill -9 -f ktx-daemon || true
|
||||
rm -f ~/.ktx/runtime/*/daemon.json
|
||||
rm -f /path/to/project/.ktx/runtime/daemon.json
|
||||
```
|
||||
|
||||
Then start the daemon explicitly **before** re-running setup so `setup` reuses it:
|
||||
|
||||
```bash
|
||||
unset HTTP_PROXY HTTPS_PROXY NO_PROXY http_proxy https_proxy no_proxy
|
||||
ktx dev runtime start --feature local-embeddings
|
||||
```
|
||||
|
||||
## Symptom: `ktx status --json` reports a connection as failed but `ktx connection test <id>` passes
|
||||
|
||||
`ktx status` may cache a failure record from a prior bad run (for example, when the daemon was crashing). A successful `ktx connection test` does not always invalidate the cache.
|
||||
|
||||
**Fix:** re-run a fast ingest, which writes a fresh status record:
|
||||
|
||||
```bash
|
||||
ktx ingest <connection-id> --fast
|
||||
ktx status --json
|
||||
```
|
||||
|
||||
## A minimal "clean Linux install" recipe
|
||||
|
||||
If you only want one working sequence, this one works from a fresh Ubuntu 24.04 container with Node 22 and Claude Code installed:
|
||||
|
||||
```bash
|
||||
# 1. Prerequisites
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
source $HOME/.local/bin/env
|
||||
uv python install 3.13
|
||||
npm install -g @kaelio/ktx@next
|
||||
|
||||
# 2. Pre-warm the managed Python runtime with the right Python
|
||||
UV_PYTHON=3.13 ktx dev runtime install --feature local-embeddings --yes --force
|
||||
|
||||
# 3. Start the daemon with a clean proxy env
|
||||
unset HTTP_PROXY HTTPS_PROXY NO_PROXY http_proxy https_proxy no_proxy
|
||||
ktx dev runtime start --feature local-embeddings
|
||||
|
||||
# 4. Scripted setup (replace DATABASE_URL with your warehouse)
|
||||
mkdir -p /work/project
|
||||
cd /work/project
|
||||
export ANTHROPIC_API_KEY=... # already in env from your Claude Code session
|
||||
export DATABASE_URL=postgresql://...
|
||||
|
||||
ktx setup \
|
||||
--no-input \
|
||||
--yes \
|
||||
--project-dir /work/project \
|
||||
--llm-backend anthropic \
|
||||
--anthropic-api-key-env ANTHROPIC_API_KEY \
|
||||
--anthropic-model claude-sonnet-4-6 \
|
||||
--embedding-backend sentence-transformers \
|
||||
--database postgres \
|
||||
--new-database-connection-id warehouse \
|
||||
--database-url env:DATABASE_URL \
|
||||
--skip-sources \
|
||||
--skip-agents
|
||||
|
||||
# 5. Build schema context
|
||||
ktx ingest warehouse --fast
|
||||
|
||||
# 6. Verify
|
||||
ktx status --json
|
||||
ktx connection test warehouse
|
||||
```
|
||||
|
||||
Success looks like:
|
||||
|
||||
- `ktx status --json` reports `"verdict": "ready"`
|
||||
- `ktx connection test warehouse` exits 0 with `Status: ok`
|
||||
- `semantic-layer/warehouse/_schema/` contains generated YAML files
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
import { source } from "@/lib/source";
|
||||
import { readDocsPageMarkdown } from "@/lib/docs-markdown";
|
||||
|
||||
const siteOrigin = "https://docs.kaelio.com/ktx";
|
||||
const siteOrigin = process.env.KTX_DOCS_ORIGIN ?? "https://docs.kaelio.com/ktx";
|
||||
|
||||
export type LlmDocsPage = {
|
||||
title: string;
|
||||
|
|
@ -61,6 +61,7 @@ ${link("/docs/ai-resources/agent-instructions", "Agent Instructions", "Suggested
|
|||
|
||||
${link("/docs/getting-started/introduction", "Introduction", "What KTX is and who it is for")}
|
||||
${link("/docs/getting-started/quickstart", "Quickstart", "Set up KTX and build your first context")}
|
||||
${link("/docs/getting-started/troubleshooting-linux", "Troubleshooting clean Linux install", "READ FIRST if installing from scratch on Linux/container — covers Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe")}
|
||||
${link("/docs/guides/writing-context", "Writing Context", "Write semantic sources and wiki pages")}
|
||||
|
||||
## Machine-Readable Documentation
|
||||
|
|
|
|||
2
docs-site/next-env.d.ts
vendored
2
docs-site/next-env.d.ts
vendored
|
|
@ -1,6 +1,6 @@
|
|||
/// <reference types="next" />
|
||||
/// <reference types="next/image-types/global" />
|
||||
import "./.next/types/routes.d.ts";
|
||||
import "./.next/dev/types/routes.d.ts";
|
||||
|
||||
// NOTE: This file should not be edited
|
||||
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { join } from 'node:path';
|
|||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
readManagedPythonDaemonStatus,
|
||||
sanitizeProxyEnv,
|
||||
startManagedPythonDaemon,
|
||||
stopAllManagedPythonDaemons,
|
||||
stopManagedPythonDaemon,
|
||||
|
|
@ -404,3 +405,38 @@ describe('managed Python daemon lifecycle', () => {
|
|||
expect(await readFile(layout(tempDir).daemonStatePath, 'utf8')).toContain('"pid": 4242');
|
||||
});
|
||||
});
|
||||
|
||||
describe('sanitizeProxyEnv', () => {
|
||||
it('removes IPv6 CIDR entries from NO_PROXY that crash httpx', () => {
|
||||
const cleaned = sanitizeProxyEnv({
|
||||
NO_PROXY: 'localhost,127.0.0.1,fd07:b51a:cc66:f0::/64,*.orb.internal',
|
||||
no_proxy: 'localhost,127.0.0.1,fd07:b51a:cc66:f0::/64,*.orb.internal',
|
||||
});
|
||||
expect(cleaned.NO_PROXY).toBe('localhost,127.0.0.1,*.orb.internal');
|
||||
expect(cleaned.no_proxy).toBe('localhost,127.0.0.1,*.orb.internal');
|
||||
});
|
||||
|
||||
it('deletes NO_PROXY entirely when every entry is unparseable', () => {
|
||||
const cleaned = sanitizeProxyEnv({ NO_PROXY: 'fd07::/64,::1' });
|
||||
expect(cleaned.NO_PROXY).toBeUndefined();
|
||||
});
|
||||
|
||||
it('preserves IPv4 addresses, IPv4 CIDRs, hostnames, and wildcards', () => {
|
||||
const cleaned = sanitizeProxyEnv({
|
||||
NO_PROXY: '127.0.0.0/8,10.0.0.1,localhost,*.example.com',
|
||||
});
|
||||
expect(cleaned.NO_PROXY).toBe('127.0.0.0/8,10.0.0.1,localhost,*.example.com');
|
||||
});
|
||||
|
||||
it('leaves other env vars untouched', () => {
|
||||
const cleaned = sanitizeProxyEnv({ PATH: '/usr/bin', NO_PROXY: '::1', FOO: 'bar' });
|
||||
expect(cleaned.PATH).toBe('/usr/bin');
|
||||
expect(cleaned.FOO).toBe('bar');
|
||||
expect(cleaned.NO_PROXY).toBeUndefined();
|
||||
});
|
||||
|
||||
it('does nothing when NO_PROXY is not set', () => {
|
||||
const cleaned = sanitizeProxyEnv({ PATH: '/usr/bin' });
|
||||
expect(cleaned).toEqual({ PATH: '/usr/bin' });
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -697,7 +697,7 @@ export async function startManagedPythonDaemon(
|
|||
detached: true,
|
||||
stdio: ['ignore', stdout.fd, stderr.fd],
|
||||
env: {
|
||||
...process.env,
|
||||
...sanitizeProxyEnv(process.env),
|
||||
KTX_DAEMON_VERSION: options.cliVersion,
|
||||
},
|
||||
},
|
||||
|
|
@ -807,3 +807,32 @@ export async function stopAllManagedPythonDaemons(
|
|||
scanErrors: discovery.scanErrors,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter NO_PROXY/no_proxy values to remove entries httpx cannot parse.
|
||||
*
|
||||
* httpx (used by the Python daemon via huggingface_hub / sentence-transformers)
|
||||
* treats each comma-separated NO_PROXY entry as a URL pattern. Raw IPv6 CIDR
|
||||
* blocks like `fd07:b51a:cc66:f0::/64` raise `InvalidURL` and crash the daemon.
|
||||
* OrbStack and similar Docker setups inject such entries by default.
|
||||
*
|
||||
* We drop any entry containing `::` (the unambiguous IPv6 marker) but keep
|
||||
* IPv4 addresses, IPv4 CIDRs, hostnames, and wildcard hosts intact.
|
||||
*/
|
||||
export function sanitizeProxyEnv(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
|
||||
const result: NodeJS.ProcessEnv = { ...env };
|
||||
for (const key of ['NO_PROXY', 'no_proxy']) {
|
||||
const value = result[key];
|
||||
if (typeof value !== 'string' || value.length === 0) continue;
|
||||
const kept = value
|
||||
.split(',')
|
||||
.map((entry) => entry.trim())
|
||||
.filter((entry) => entry.length > 0 && !entry.includes('::'));
|
||||
if (kept.length === 0) {
|
||||
delete result[key];
|
||||
} else {
|
||||
result[key] = kept.join(',');
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ describe('installManagedPythonRuntime', () => {
|
|||
expect(result.status).toBe('installed');
|
||||
expect(commands).toEqual([
|
||||
{ command: 'uv', args: ['--version'] },
|
||||
{ command: 'uv', args: ['venv', result.layout.venvDir] },
|
||||
{ command: 'uv', args: ['venv', result.layout.venvDir, '--python', '3.13'] },
|
||||
{
|
||||
command: 'uv',
|
||||
args: ['pip', 'install', '--python', result.layout.pythonPath, result.asset.wheelPath],
|
||||
|
|
|
|||
|
|
@ -12,6 +12,16 @@ const execFileAsync = promisify(execFile);
|
|||
export const runtimeFeatureSchema = z.enum(['core', 'local-embeddings']);
|
||||
export type KtxRuntimeFeature = z.infer<typeof runtimeFeatureSchema>;
|
||||
|
||||
/**
|
||||
* Python version the managed runtime venv must be built with. KTX's bundled
|
||||
* wheels declare `requires-python = ">=3.13"`; without an explicit `--python`
|
||||
* flag, `uv venv` may pick a too-old system Python (Ubuntu 24.04 ships 3.12)
|
||||
* and the subsequent `uv pip install` fails late with a confusing "package
|
||||
* requires Python >=3.13" error. Pinning here pushes uv to auto-download the
|
||||
* right interpreter via its python-management feature.
|
||||
*/
|
||||
export const MANAGED_RUNTIME_PYTHON_VERSION = '3.13';
|
||||
|
||||
const runtimeAssetManifestSchema = z.object({
|
||||
schemaVersion: z.literal(1),
|
||||
distributionName: z.literal('kaelio-ktx'),
|
||||
|
|
@ -334,7 +344,7 @@ export async function installManagedPythonRuntime(
|
|||
exec,
|
||||
logPath: layout.installLogPath,
|
||||
command: 'uv',
|
||||
args: ['venv', layout.venvDir],
|
||||
args: ['venv', layout.venvDir, '--python', MANAGED_RUNTIME_PYTHON_VERSION],
|
||||
env: uvEnv,
|
||||
});
|
||||
const wheelSpec = features.includes('local-embeddings') ? `${asset.wheelPath}[local-embeddings]` : asset.wheelPath;
|
||||
|
|
|
|||
|
|
@ -216,4 +216,40 @@ describe('createDaemonLiveDatabaseIntrospection', () => {
|
|||
);
|
||||
expect(runJson).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('filters out tables not on the enabled_tables allowlist', async () => {
|
||||
const runJson = vi.fn(async () => daemonResponse);
|
||||
const introspection = createDaemonLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
url: 'postgres://localhost:5432/warehouse',
|
||||
enabled_tables: ['public.orders'],
|
||||
},
|
||||
},
|
||||
schemas: ['public'],
|
||||
runJson,
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual(['public.orders']);
|
||||
});
|
||||
|
||||
it('passes through every table when enabled_tables is omitted or empty', async () => {
|
||||
const runJson = vi.fn(async () => daemonResponse);
|
||||
const introspection = createDaemonLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
url: 'postgres://localhost:5432/warehouse',
|
||||
enabled_tables: [],
|
||||
},
|
||||
},
|
||||
schemas: ['public'],
|
||||
runJson,
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
expect(snapshot.tables.map((table) => table.name)).toEqual(['customers', 'orders']);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -243,11 +243,29 @@ export function createDaemonLiveDatabaseIntrospection(
|
|||
const raw = requestJson
|
||||
? await requestJson('/database/introspect', payload)
|
||||
: await runJson('database-introspect', payload);
|
||||
return mapDaemonSnapshot(raw, {
|
||||
const snapshot = mapDaemonSnapshot(raw, {
|
||||
connectionId,
|
||||
extractedAt: now().toISOString(),
|
||||
schemas,
|
||||
});
|
||||
return applyEnabledTablesFilter(snapshot, connection);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function applyEnabledTablesFilter(
|
||||
snapshot: KtxSchemaSnapshot,
|
||||
connection: KtxProjectConnectionConfig,
|
||||
): KtxSchemaSnapshot {
|
||||
const allowlist = (connection as { enabled_tables?: unknown }).enabled_tables;
|
||||
if (!Array.isArray(allowlist) || allowlist.length === 0) return snapshot;
|
||||
const allowed = new Set(allowlist.filter((value): value is string => typeof value === 'string'));
|
||||
if (allowed.size === 0) return snapshot;
|
||||
return {
|
||||
...snapshot,
|
||||
tables: snapshot.tables.filter((table) => {
|
||||
const qualified = table.db ? `${table.db}.${table.name}` : table.name;
|
||||
return allowed.has(qualified);
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,12 @@ function warehouseConnectionSchema<const Driver extends WarehouseDriver>(driver:
|
|||
.min(1)
|
||||
.optional()
|
||||
.describe('Warehouse connection URL or DSN; may contain environment-variable references like env:DATABASE_URL.'),
|
||||
enabled_tables: z
|
||||
.array(z.string().min(1))
|
||||
.optional()
|
||||
.describe(
|
||||
'Optional allowlist of fully-qualified table names ("schema.table") to ingest. When set, live-database ingest discards any table whose schema-qualified name is not in this list. Useful for smoke-testing deep ingest on a single table.',
|
||||
),
|
||||
})
|
||||
.describe(
|
||||
`${driver} warehouse connection. Additional driver-tunable fields (e.g. historicSql, context.queryHistory) are accepted and passed through.`,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue